Create flat tree mass.sh
Jump to navigation
Jump to search
The following shell script allows the creation of Highland flat trees from an entire run. It should be easy to modify for running any software on entire ND280 runs (look for the line running RunCreateFlatTree.exe). You need to have the lists of files in the same directory.
Note: this will create a directory with one root file per oaAnalysis file. Run the analysis on the individual files to make sure also the nRooTracker tree is keept. Don't use hadd!
There are also a lot of log files and empty control files created in the output directory. Feel free to delete them, once everything is finished tested, but keep in mind they are needed if you need to rerun the script to repeat failed runs!
#!/bin/bash
###################################################################################################
#
# create_flat_tree_mass.sh
#
# Set LIST to a list of file names to process, starting with "/production005/...".
# OUTDIR will be the name of the directory to put the files in.
# NCPU is the number of parallel running processes. The limit might be exceeded occaisonally.
# Don't change the PREFIX, this points to our SE.
#
if [ -z "$1" -o ! -f "list_$1" ]; then
echo "Specify one of the following runs:"
ls list_run* | sed -e "s/list_//g"
exit 1
fi
LIST="list_$1"
OUTDIR="flattree_$1"
NCPU=4
PREFIX="srm://dpm.lhep.unibe.ch/dpm/lhep.unibe.ch/home/t2k.org"
#
# Hint for real experts:
#
# If there are broken files on our SE (usually indicated by reproducable prefetch failures), delete
# them using the following command:
# for i in <put_list_failed_runs_here> ; do lcg-del -l `cat <put_outdir_here>/prefetch.$i.log | grep SURL | sed -e "s/^.*SURL //" -e "s/ ...$//" | head -n 1` ; done
# and run grid-replicate.sh again. DO THIS ONLY IF YOU KNOW WHAT YOU ARE DOING!
#
###################################################################################################
#
# set priority
#
renice 10 -p $$ > /dev/null
#
# clean up potential left-overs from last run
#
if [ -e $OUTDIR ]; then
echo -n "Warning: output directory $OUTDIR is not empty. Delete all files in it, try to resume, or abort (d/r/a)? "
read -n 1 answer
echo ""
if [ "$answer" = "d" ]; then
rm -rf $OUTDIR
elif [ "$answer" = "r" ]; then
RESUME=1
else
echo "Aborted."
exit 1
fi
fi
mkdir -p $OUTDIR
#
# initialise grid
#
export DPM_HOST=dpm.lhep.unibe.ch
export DPNS_HOST=dpm.lhep.unibe.ch
echo -n "Password for GRID certificate: "
read -s PASSWORD
echo ""
voms-proxy-destroy > /dev/null 2>&1
echo $PASSWORD | voms-proxy-init --voms t2k.org -pwstdin
voms-proxy-info > /dev/null 2>&1 || exit 1
#
# create temporary file for collecting PIDs of running jobs
#
JOBLIST=`mktemp`
#
# redirect Ctrl+C to terminate sub-processes
#
function sigint() {
trap true SIGINT
rm $JOBLIST
echo ""
echo Terminating all sub-processes...
kill 0
exit 1
}
trap sigint SIGINT
#
# function to print information
#
function printinfo() {
FINISHED=`ls $OUTDIR/finished.* 2>/dev/null | wc -l`
SUCCESS=`ls $OUTDIR/success.* 2>/dev/null | wc -l`
FAILED=$(( FINISHED - SUCCESS ))
PREFETCH_STARTED=`ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l`
PREFETCH_DONE=`ls $OUTDIR/prefetch.done.* 2>/dev/null | wc -l`
PREFETCHING=$(( PREFETCH_STARTED - PREFETCH_DONE ))
PERCENT=$(( ( FINISHED ) * 100 / NFILES ))
RUNNING=`cat $JOBLIST | wc -l`
ELAPSED_TIME=$(( `date +%s` - STARTDATE ))
if [ $(( FINISHED - FINISHED0 )) -ge $NCPU ]; then
ETA=`echo "$ELAPSED_TIME / ( $FINISHED - $FINISHED0 ) * ( $NFILES - $FINISHED ) / 3600" | bc`
else
ETA="---"
fi
echo -n -e "${PERCENT}% | ETA: ${ETA}h | RUNNING: $RUNNING | SUCCESS: $SUCCESS | FAILED: $FAILED | PREFETCHING: $PREFETCHING\r"
}
#
# preparations
#
NFILES=`cat $LIST | wc -l`
STARTDATE=`date +%s`
IDX=0
FINISHED0=`ls $OUTDIR/success.* 2>/dev/null | wc -l`
#
# cleanup in case of resuming
#
if [ -n "$RESUME" ]; then
rm -f $OUTDIR/prefetch.started.*
rm -f $OUTDIR/prefetch.done.*
for (( i=0; i<NFILES; i++ )); do
test -f $OUTDIR/success.${i} || rm -f $OUTDIR/finished.${i}
done
fi
#
# loop over all files in the list
#
cat $LIST | while read LINE ; do
if [ -e "$OUTDIR/success.${IDX}" ]; then
echo "Skipping already successful job $IDX"
IDX=$(( IDX + 1 ))
continue
fi
echo ${IDX}: $LINE
#
# start background prefetching of this file
#
(
echo "prefetch $IDX "
touch "$OUTDIR/prefetch.started.${IDX}"
# initialise grid certificate
export X509_USER_PROXY=`mktemp`
echo $PASSWORD | voms-proxy-init --voms t2k.org -pwstdin >> $OUTDIR/prefetch.${IDX}.log 2>&1
# copy the file to local disk
echo "lcg-cp -v $PREFIX$LINE $OUTDIR/prefetch.${IDX}.root" >> $OUTDIR/prefetch.${IDX}.log
lcg-cp -v $PREFIX$LINE $OUTDIR/prefetch.${IDX}.root >> $OUTDIR/prefetch.${IDX}.log 2>&1 &&
echo "prefetch done $IDX " ||
echo "prefetch FAILED $IDX "
touch "$OUTDIR/prefetch.done.${IDX}"
# cleanup
voms-proxy-destroy
) &
#
# start the actual processing of the file in the background
#
(
MPID=$!
# wait until prefetching is finished
while [ ! -f "$OUTDIR/prefetch.done.${IDX}" ]; do
sleep 0.6
done
# wait until less than $NCPU processes are running
while [ `cat $JOBLIST | wc -l` -ge $NCPU ]; do
sleep 0.7
done
# start processing
echo "start $IDX "
flock $JOBLIST -c "echo $MPID >> $JOBLIST"
RunCreateFlatTree.exe $OUTDIR/prefetch.${IDX}.root -o $OUTDIR/flattree_${IDX}.root > $OUTDIR/${IDX}.log 2>&1 &&
touch "$OUTDIR/success.${IDX}" &&
echo "done $IDX " ||
echo "FAILED $IDX "
touch "$OUTDIR/finished.${IDX}"
rm -f $OUTDIR/prefetch.started.${IDX}
rm -f $OUTDIR/prefetch.${IDX}.root
rm -f $OUTDIR/prefetch.done.${IDX}
flock $JOBLIST -c "grep -v ^$MPID\$ $JOBLIST > $JOBLIST.temp ; mv $JOBLIST.temp $JOBLIST || echo > $JOBLIST"
) &
# throttle and print info
printinfo
sleep 0.4
#
# wait if at least 2*$NCPU files are prefetched (prefetched files are deleted after processing)
#
while [ `ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l` -ge $(( NCPU * 2 )) ]; do
# throttle and print info
sleep 0.5
printinfo
done
# increment file index
IDX=$(( IDX + 1 ))
done
#
# wait until all background jobs are completed
#
while [ `ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l` -gt 0 ]; do
# throttle and print info
sleep 0.5
printinfo
done
#
# print information
#
FINISHED=`ls $OUTDIR/finished.* 2>/dev/null | wc -l`
SUCCESS=`ls $OUTDIR/success.* 2>/dev/null | wc -l`
FAILED=$(( FINISHED - SUCCESS ))
ELAPSED_TIME=$(( `date +%s` - STARTDATE ))
echo "100% | SUCCESS: $FINISHED | FAILED: $FAILED | TIME: $ELAPSED_TIME "
if [ "$FAILED" -gt 0 ]; then
echo -n "Failed runs: "
for (( i=0; i<NFILES; i++ )); do
test -f $OUTDIR/success.${i} || echo -n " $i"
done
echo ""
fi
#
# cleanup
#
rm $JOBLIST