Shell Script for Directing NOAA Data Ingest

This script is very similar in form to the cell phone ingest script except that it is not called with any input argument. as all the NOAA buoys keep the same ID designation. A simple loop is sufficient to loop thru the NOAA buoys, downloading forty-five days of data from web pages particular to each. The only operation of note is that the download must be preprocessed to remove instances of "MM" and replace them with -9999. By making all the data numeric, MATLAB can more easily read it.

#!/bin/bash


function do_it {

	echo ----------------------------------------------------------
	echo - Starting NOAA Ingest at `date`
	echo - PID is $$ 
	echo ----------------------------------------------------------
	echo

	stationary_buoys="44005 44007 44008 44011 44013 44017 44018 44025 44027 44142 44258"
	cman_stations="BUZM3 IOSN3 MISM1 MDRM1"
	drifters="44585"

	#
	# Loop thru each buoy
	for current_buoy in $stationary_buoys $cman_stations ; do
	
		echo Current buoy is ${current_buoy}


		url="http://www.ndbc.noaa.gov/data/realtime/${current_buoy}.txt"
		echo the url is $url
		local_file=${current_buoy}.txt
		wget -O $local_file $url

	done

	#
	# Do the drifting buoys
	for current_buoy in $drifters ; do
	
		echo Current buoy is ${current_buoy}


		url="http://www.ndbc.noaa.gov/data/realtime/${current_buoy}.drift"
		echo the url is $url
		local_file=${current_buoy}.txt
		wget -O $local_file $url

	done


		
	#
	# process the files
	export GOMOOS_WORKING_DIRECTORY=`pwd`
	echo matlab -nojvm -nosplash -r 'ingest_noaa_buoy_wrapper'  
	matlab -nojvm -nosplash -r 'ingest_noaa_buoy_wrapper'  


	#
	# Ok we're done, now sync them with gyre
	echo starting rsync transfer at `date`
	export RSYNC_PASSWORD=gomoos_rsync_transfer
	for current_buoy in $stationary_buoys $cman_stations $drifters; do
		echo rsyncing $current_buoy
		rsync --verbose  --progress --stats --compress  \
				--recursive --owner --times --perms --links \
				--exclude "*~" \
				/data/gomoos/buoy/archive/${current_buoy}  gomoos@gyre::grbt
	done
	echo apparently finished with rsync transfer at `date`

		
	echo removing the input files
	rm -f *.txt
		
	echo ----------------------------------------------------------
	echo - Finishing NOAA Ingest at `date`
	echo - PID is $$ 
	echo ----------------------------------------------------------
	echo

}


#
# How many input arguments?  If two, then
# the 2nd argument is the "project".  Set this to "blue_hill_bay", for example.
# If not supplied, "gomoos" is the default.
echo Script invoked as "$0: $@"
if [ $# -eq 2 ]; then
	project=${2}
else
	project=gomoos
fi


export DATA_ROOT=/data/${project}/buoy
export RUNTIME_ROOT=/data/gomoos/buoy
export BUOY_PROCESSING_CALLING_ENVIRONMENT="shell script"
export BUOY_PROJECT=${project}
incoming_directory=$DATA_ROOT/incoming



#
# Set up the rest of the environment.
. ${RUNTIME_ROOT}/bin/setup_gomoos_environment.sh

#
# Master log file
master_log_file=${DATA_ROOT}/log/buoy.log


#
# check the pid file.  If it exists, then don't run
pidfile=${RUNTIME_ROOT}/run/noaa_buoys
if ls ${pidfile}
then
	#
	# pid file exists. must already be running?
	echo "Pid file ${pidfile} exists, too much going on.  Remove this file if in error." | mail jevans
	exit 1
fi

#
# All the incoming data is placed into a single directory.
cd ${RUNTIME_ROOT}/incoming/noaa


do_it >> ${pidfile}


#
# Update the webpages that actually display NOAA data
cache_table_js_pages.sh


#
# Save the pidfile to the master log
cat ${pidfile} >> ${master_log_file}



#
# removing pid file
echo removing pid file ${pidfile}, all done, bye  >> ${pidfile}
rm -f ${pidfile}