Chlorophyll Bloom MATLAB Implementation

function [bloom_buffer, night_chl_buffer, status] = compute_bloom_trend (jd_chl,chl,window_width, log_file )
% COMPUTE_BLOOM_TREND:  Function to compute the bloom index for GoMOOS.  
%
%  Function to compute the bloom index for GoMOOS.  Computes
%   the rate of change of the night time averaged chlorophyll
%   concentration on a daily time step.  Here, we assume that
%   chlorophyll concentration is a proxy for growth rate, so
%   that by computing the doubling or chlorophyll growth rate,
%   we can estimate what growth would be necessary for the observed
%   chlorophyll concentrations.  
%
%   BLOOM_CONDITION = CHL growth rate = 1/[CHL] * d[CHL] / dt
%
%   computed using the night time average chlorophyll concentration
%   (in local standard time, 12 pm to 6 am).  We are using the 
%   night time average in order to minimize the effects of changes
%   in fluorescence per cell.
%
% USAGE:  [bloom_buffer, night_chl_buffer, status] = compute_bloom_trend (jd_chl,chl,window_width, log_file );
%
% PARAMETERS
% Input:
%     jd_chl:  julian time
%     chl:  the chlorophyll concentration in mg/m^3 to compute the growth rate.
%     window_width:  
%        time length over which the computation can be performed.
%        This should be 7 days???  If an event can not sustain itself for this
%        long, it will not be classified as a bloom.
%     log_file:
%        optional.  If given, diagnostics are written to this file.  Otherwise
%        they go to stdout.
%
% Output:
%     bloom_buffer:
%        matlab structure with following parameters
%           time:
%              Time series of the daily rate of change, measured in julian days.  
%              If the window width is two days, and we have two days of
%              good data, then one data record is produced, and the time value for that
%              record is for the last day.
%           interpolated_chl:
%              realtime chlorophyll time series interpolated onto rate of growth time series
%           dchl_dt:
%              Daily rate of change of chlorophyll.  Units would be micrograms/liter/day
%           CHLrate:
%              Rate of daily growth of chlorophyll ==> [ d (chl) / dt ] / chl
%              The units are therefore 1/day
%           event_delta_chl:
%              This is a cumulative daily sum of chlorophyll from the moment that either a
%              bloom or advection event starts.
%           event_demarcation_code:
%              Non-NaN values represent either the beginning or the end of either a possible bloom
%              event or an advection event.  Note the word "possible".  An event could initially
%              flag itself as a possible bloom event, for example, but later be classified as not 
%              having actually been the start of a bloom event.    There is only one code for the
%              beginning of an event, but there can be many reasons why an event would end.
%
%              EVENT_START = 0;
%              NEGATIVE_EVENT_DELTA_CHL = 1;
%              LARGE_NEGATIVE_GROWTH_RATE = 2;
%              NEGATIVE_WINDOW_MEAN_GROWTH_RATE = 3;
%              INVALID_DATA = 4;
%
%           bloom_trend
%              For each day, a numeric code describes the state of the chlorophyll growth.
%                  NO_TREND = 0;
%                  BIOMASS_DECREASING=1;
%                  BIOMASS_VARIABLE=2;
%                  BIOMASS_INCREASING=3;
%                  POSSIBLE_ADVECTION_EVENT=4;
%                  PROBABLE_ADVECTION_EVENT=5;
%                  POSSIBLE_BLOOM_EVENT=6;
%                  PROBABLE_BLOOM_EVENT=7;
%
%     night_chl_buffer:
%         This structure returns the averaged nightly chlorophyll values.  This is on a different
%         time series than the growth rates, so it is given its own structure.
%
%             niteCHL:
%                 average of realtime chlorophyll values restricted to night hours.
%             niteTIM:
%                 time of averaged nightly chlorophyll values
%
%
%


if ( nargin < 4 )
	log_fid = 1;
else
	log_fid = fopen ( log_file, 'w' );
end

fprintf ( log_fid, '%s:  starting bloom calculations.\n', mfilename );

cvs_revision = '$Revision: 1.8 $:';

%
% Tag the algorithm version for later consumption.
global Alg_Id_List;
Alg_Id_List.Chlorophyll_Bloom = cvs_revision(12:end-3);





advection_event_width = 2;

bloom_buffer = [];
night_chl_buffer = [];
status = -1;
if nargin < 3
	help CalcBloom
	return
end

if ( isempty(jd_chl) )
    fprintf(2, '%s:  empty time input\n', mfilename );
    return
end
if ( isempty(chl) )
    fprintf(2, '%s:  empty chlorophyll input\n', mfilename );
    return
end
tottime=jd_chl(end)-jd_chl(1);
if tottime < window_width 
    fprintf(2, '%s:  Must have more than %.1f days of data to compute bloom status\n', mfilename, window_width);
    return
end




jjtmp=floor(jd_chl);
jday=unique(jjtmp);



day0=jday(1);



% STEP 3 : average the chlorophyll data
k=0;
for i=day0:jday(end)

	%
	% Just look at chl between hours of 5am GMT and 11am GMT.
	% That translates into 12 midnight to 6am EST.

    itim=find(jd_chl >= i & jd_chl < (i+0.26));
    if length(itim) > 2  % found more than 2 hours to average
        k=k+1;
        niteCHL(k,1)=nanmean(chl(itim));
        niteTIM(k,1)=mean(jd_chl(itim));
    end
end

%
% Step 3.5
% find the maximum value.  Use this as a relative gauge as to how large individual
% events are.
time_series_chl_max = max(niteCHL);
chl_min_threshold = 0.20 * time_series_chl_max;


% STEP 4 : compute the rate of change

diffchl=diff(niteCHL);
difftim=diff(niteTIM);
dchl_dt = ( diffchl ./ difftim );

%
% Assume a centered difference for the dchl/dt time
dchl_dt_time = (niteTIM(1:end-1) + niteTIM(2:end)) / 2;


%
% Produce rough estimates of the actual chl at the time
% values for dchl/dt
ave_night_chl = (niteCHL(1:end-1) + niteCHL(2:end)) / 2;



%
% Now compute the rate of growth = [dchl/dt] / chl
%CHLrate = ( diffchl ./ difftim ) ./ ave_night_chl;
%
% The denominator is taken to be at the beginning of
% the timestep so as to capture truly large rates of 
% growth.  
CHLrate = ( diffchl ./ difftim ) ./ niteCHL(1:end-1);
CHLrate = CHLrate(:);
CHLrate_time = dchl_dt_time;
len_CHLrate = length(CHLrate);

dCHLRdt = NaN*zeros(size(CHLrate));
dCHLRdt(1:len_CHLrate-1) = diff(CHLrate) ./ diff ( dchl_dt_time );




%
% bloom trend codes
NO_TREND = 0;
BIOMASS_DECREASING=1;
BIOMASS_VARIABLE=2;
BIOMASS_INCREASING=3;
POSSIBLE_ADVECTION_EVENT=4;
PROBABLE_ADVECTION_EVENT=5;
POSSIBLE_BLOOM_EVENT=6;
PROBABLE_BLOOM_EVENT=7;


%
% end event codes
EVENT_START = 0;
NEGATIVE_EVENT_DELTA_CHL = 1;
LARGE_NEGATIVE_GROWTH_RATE = 2;
NEGATIVE_WINDOW_MEAN_GROWTH_RATE = 3;
INVALID_DATA = 4;
CHL_MIN_THRESHOLD_NOT_EXCEEDED = 5;
ADVECTION_EVENT = 6;



time_series_length = length(CHLrate);

i_chl = interp1 ( niteTIM, niteCHL, CHLrate_time );
time = dchl_dt_time;
event_del_chl = NaN * ones(size(time));

event_start = 0;
event_end = 0;
event_in_progress = 0;


tot_del_chl = NaN*ones(size(time));
bloom_trend = BIOMASS_VARIABLE * ones(size(time));
event_demarcation_code = NaN * ones(size(time));

for j = 1:time_series_length


	%
	% Do we have a long enough sequence of points to make any decision?
	if ( j <= window_width )
		continue
	end

	window_start = max(1,j-window_width);
	current_window_inds = window_start:j;

	%
	% Preliminarily compute the delta chl .  If we get a long running
	% event, then we may extent the window.
	tot_del_chl(j,1) = sum(dchl_dt(current_window_inds));

	bloom_trend(j,1) = BIOMASS_VARIABLE;

	ave_chl_rate = CHLrate(j);
	if ave_chl_rate > 0.075
	    bloom_trend(j,1)=BIOMASS_INCREASING;
	end
	if ave_chl_rate < 0
	    bloom_trend(j,1)=BIOMASS_DECREASING;
	end
	if ( abs(dchl_dt(j)) < 0.10 )
	    bloom_trend(j,1)=BIOMASS_VARIABLE;
	end

	%
	% 0.45 is the growth rate magic number for a bloom event.
	if ( ave_chl_rate < 0.45 )
		
		%
		% do nothing
		;
	elseif (ave_chl_rate >= 0.45 & ave_chl_rate <= 0.80 )
		
		% 
		%  but if the chlorophyll content is low compared to the overall time series maximum
		%  then do not count it as a bloom event.
		%if (i_chl(j) < 0.8) | (niteCHL(j) < 0.8)  
		if ( niteCHL(j) < chl_min_threshold )
			fprintf ( log_fid, '  %s:  Growth rate exceeded 0.45, but overall chl too small.  VARIABLE\n', datestr(jd2matday(time(j)),1) );
			bloom_trend(j,1) = BIOMASS_VARIABLE;
		else
			bloom_trend(j,1) = POSSIBLE_BLOOM_EVENT;
			if event_in_progress == 0
				event_in_progress = 1;
				event_start = j;
			end
		end

	%
	% A growth rate of 0.80 or more is probably an advection event.
	elseif ave_chl_rate > 0.80 

		%
		% But again, if the actual chlorophyll content is small ( < 0.625 micrograms/liter )
		% then don't count it as a bloom event yet.
		%if dchl_dt(j) < 0.5; 
		%if (i_chl(j) < 0.8) | (niteCHL(j) < 0.8) ; 
		%if ( niteCHL(j) < chl_max_percentage*time_series_chl_max )
		%	bloom_trend(j,1) = BIOMASS_VARIABLE;
		%else
			bloom_trend(j,1) = POSSIBLE_ADVECTION_EVENT;
			if event_in_progress == 0
				event_in_progress = 1;
				event_start = j;
			end
		%end
	end



	if j == event_start
		event_demarcation_code(j) = EVENT_START;
		fprintf ( log_fid, 'Start of event at %s.\n', datestr(jd2matday(time(j)),1) );
	end


	%
	% If in the middle of an event, try to further characterize it based upon 
	% past trend and data.
	if event_in_progress

		event_length = j-event_start;
		if event_length >= window_width
			window_width_exceeded = 1;
		else
			window_width_exceeded = 0;
		end

		if (j == (event_start+1))
			right_after_event_start = 1;
		else
			right_after_event_start = 0;
		end

		if right_after_event_start & ( bloom_trend(event_start) == POSSIBLE_ADVECTION_EVENT)
			immediately_following_possadv_event = 1;
		else
			immediately_following_possadv_event = 0;
		end





		%
		% How much has the chlorophyll changed in the last few days?
		%
		% Compute this as at least the length of the window, but possibly
		% up to the total length of the event.
		dchl_start = min(j-window_width, event_start);
		dchl_inds = [dchl_start:j];
		tot_del_chl(j,1) = sum(dchl_dt(dchl_inds));

		event_del_chl(j) = sum(dchl_dt(event_start:j));

		fprintf ( log_fid,      'date\t\tchl\tGR\tdchldt\tevent delta chl\n' );
		for k = event_start:j
			fprintf ( log_fid, '%d:%s\t%.2f\t%.2f\t%.2f\t%.2f\n', k, datestr(jd2matday(time(k)),1), i_chl(k), CHLrate(k), dchl_dt(k), event_del_chl(k) );
		end
		fprintf ( log_fid, '\n' );


		if immediately_following_possadv_event
			%
			% If the rate of decrease is very large after a possible advection event, then
			% yes, that is a probable advection event.
			if abs(dchl_dt(j)) > 0.8*dchl_dt(j-1)
				event_in_progress = 0;
				event_end = j;
				bloom_trend(event_start) = PROBABLE_ADVECTION_EVENT;
				bloom_trend(event_end) = BIOMASS_DECREASING;
				event_demarcation_code(j) = ADVECTION_EVENT;
				fprintf ( log_fid, '  Event terminated, ADVECTION\n' );
				continue
			end
		end

		%
		% This should not happen, but if it does, no choice but to terminate the event.
		if isnan(CHLrate(j))
			
			event_in_progress = 0;
			event_end = j;
			if bloom_trend(event_start) == POSSIBLE_ADVECTION_EVENT
				bloom_trend(event_start:j-1) = PROBABLE_ADVECTION_EVENT;
				bloom_trend(event_end) = NO_TREND;
			elseif window_width_exceeded
				bloom_trend(event_start:j-1) = PROBABLE_BLOOM_EVENT;
				bloom_trend(event_end) = NO_TREND;
			else
				event_inds = [event_start:j-1];
				r = [1:length(event_inds)];

				adv_inds = find(bloom_trend(event_inds) == POSSIBLE_ADVECTION_EVENT);
				bloom_trend(event_inds(adv_inds)) = PROBABLE_ADVECTION_EVENT;
				non_adv_inds = setdiff (r, adv_inds);

				bloom_trend(event_inds(non_adv_inds)) = BIOMASS_VARIABLE;
				bloom_trend(event_end) = NO_TREND;
			end
			event_demarcation_code(j) = INVALID_DATA;
			fprintf ( log_fid, '  Event terminated, invalid data\n' );
			continue

		end



		%
		% If we are past the time threshold, go ahead and label as a bloom.
		if ( window_width_exceeded )
			max_event_chl = max(niteCHL(event_start:j));
			if ( max_event_chl > chl_min_threshold )
				bloom_trend(event_start:j-1) = PROBABLE_BLOOM_EVENT;
			else
				%
				% terminate the event.
				event_in_progress = 0;
				event_end = j;
				bloom_trend(event_start:j) = BIOMASS_VARIABLE;
				bloom_trend(event_end) =  CHL_MIN_THRESHOLD_NOT_EXCEEDED;
			end
		end



		%
		% What is the trend of the growth rate?
		%
		% If the mean of the last few growth rates is less than zero, then terminate the event.
		window_start = max(event_start, j-window_width+1);
		window_inds = window_start:j;
		chl_mean_window_growth_rate = mean ( CHLrate(window_inds) );
		chl_ratio = i_chl(j) / i_chl(event_start);
		if (chl_mean_window_growth_rate < 0) & (chl_ratio < 1.2 )
			if window_width_exceeded
				fprintf ( log_fid, '  Event terminated, Growth rates have overall been too low for too long.  BLOOM\n' );
				event_demarcation_code(j) = NEGATIVE_WINDOW_MEAN_GROWTH_RATE;
				event_in_progress = 0;
				event_end = j;
				bloom_trend(event_start:event_end-1) = PROBABLE_BLOOM_EVENT;
				bloom_trend(event_end) = BIOMASS_VARIABLE;

				%
				% Was there a possible bloom event in there?
				% If so, don't leave them in "possible" status
				other_event_inds = find( ...
											( ...
											  (bloom_trend(event_start+1:j) == POSSIBLE_BLOOM_EVENT) ...
												| ...
											  (bloom_trend(event_start+1:j) == POSSIBLE_ADVECTION_EVENT) ...
											) ...
											& ...
											(i_chl(event_start+1:j) > 1.0) ...
										) ;
				if ~isempty(other_event_inds)

					bloom_trend(other_event_inds) = BIOMASS_VARIABLE;

				end


				continue
			else
				;
			end
		end



		%
		% break the chain if the total event chl delta drops below a certain level
		if ( (event_del_chl(j) < 0)  )

			fprintf ( log_fid, '  Event terminated, event delta chl now negative\n' );
			event_in_progress = 0;
			event_demarcation_code(j) = NEGATIVE_EVENT_DELTA_CHL;


			event_end = j;

			switch ( bloom_trend(event_start) )
				case POSSIBLE_ADVECTION_EVENT
					fprintf ( log_fid, '        Must have been an advection event\n' );
					bloom_trend(event_start:j-1) = PROBABLE_ADVECTION_EVENT;
					bloom_trend(j) = BIOMASS_DECREASING;

					%
					% Anything inbetween that was POSSIBLE_BLOOM?  If so, make it variable.
					event_inds = event_start+1:j;
					ind = find(bloom_trend(event_inds) == POSSIBLE_BLOOM_EVENT);
					bloom_trend(event_inds(ind)) = BIOMASS_VARIABLE;

					%
					% Anything inbetween that was POSSIBLE_ADVECTION?  If so, make it PROBABLE_ADVECTION
					event_inds = event_start+1:j-1;
					ind = find(bloom_trend(event_inds) == POSSIBLE_ADVECTION_EVENT);
					bloom_trend(event_inds(ind)) = PROBABLE_ADVECTION_EVENT;



				case POSSIBLE_BLOOM_EVENT
					if window_width_exceeded
						fprintf ( log_fid, '        Window width exceeded, genuine bloom event\n' );
						bloom_trend(event_start:j-1) = PROBABLE_BLOOM_EVENT;
						bloom_trend(j) = BIOMASS_DECREASING;
					else
						event_inds = event_start:j;
						r = [1:length(event_inds)];
						ind = find((CHLrate(event_inds)>0.80)&(i_chl(event_inds)>1));
						bloom_trend(event_inds(ind)) = PROBABLE_ADVECTION_EVENT;
						var_inds = setdiff ( r, ind );
						bloom_trend(event_inds(var_inds)) = BIOMASS_VARIABLE;

					end

			end


		end



	end

	if j == event_end
		fprintf ( log_fid, 'End of event at %s.\n\n\n', datestr(jd2matday(time(j))) );
	end


end



if ( nargin >= 4 )
	fclose ( log_fid );
end

bloom_buffer.time = time;
bloom_buffer.CHLrate = CHLrate;
bloom_buffer.bloom_trend = bloom_trend;
bloom_buffer.event_delta_chl = event_del_chl;
bloom_buffer.event_demarcation_code = event_demarcation_code;
bloom_buffer.dchl_dt = dchl_dt;
bloom_buffer.interpolated_chl = i_chl;

night_chl_buffer.niteTIM = niteTIM;
night_chl_buffer.niteCHL = niteCHL;

status = 1;


return