Create flat tree mass.sh: Difference between revisions

From LHEP Wiki
Jump to navigation Jump to search
No edit summary
 
m (1 revision imported)
 
(No difference)

Latest revision as of 09:49, 18 March 2015

The following shell script allows the creation of Highland flat trees from an entire run. It should be easy to modify for running any software on entire ND280 runs (look for the line running RunCreateFlatTree.exe). You need to have the lists of files in the same directory.

Note: this will create a directory with one root file per oaAnalysis file. Run the analysis on the individual files to make sure also the nRooTracker tree is keept. Don't use hadd!

There are also a lot of log files and empty control files created in the output directory. Feel free to delete them, once everything is finished tested, but keep in mind they are needed if you need to rerun the script to repeat failed runs!

#!/bin/bash
###################################################################################################
#
# create_flat_tree_mass.sh
#
# Set LIST to a list of file names to process, starting with "/production005/...".
# OUTDIR will be the name of the directory to put the files in.
# NCPU is the number of parallel running processes. The limit might be exceeded occaisonally.
# Don't change the PREFIX, this points to our SE.
#
if [ -z "$1" -o ! -f "list_$1" ]; then
  echo "Specify one of the following runs:"
  ls list_run* | sed -e "s/list_//g"
  exit 1
fi
LIST="list_$1"
OUTDIR="flattree_$1"
NCPU=4
PREFIX="srm://dpm.lhep.unibe.ch/dpm/lhep.unibe.ch/home/t2k.org"
#
# Hint for real experts:
#
# If there are broken files on our SE (usually indicated by reproducable prefetch failures), delete
# them using the following command:
#   for i in <put_list_failed_runs_here> ; do lcg-del -l `cat <put_outdir_here>/prefetch.$i.log | grep SURL | sed -e "s/^.*SURL //" -e "s/ ...$//" | head -n 1` ; done
# and run grid-replicate.sh again. DO THIS ONLY IF YOU KNOW WHAT YOU ARE DOING!
#
###################################################################################################

#
# set priority
#
renice 10 -p $$ > /dev/null

#
# clean up potential left-overs from last run
#
if [ -e $OUTDIR ]; then
  echo -n "Warning: output directory $OUTDIR is not empty. Delete all files in it, try to resume, or abort (d/r/a)? "
  read -n 1 answer
  echo ""
  if [ "$answer" = "d" ]; then
    rm -rf $OUTDIR
  elif [ "$answer" = "r" ]; then
    RESUME=1
  else
    echo "Aborted."
    exit 1
  fi
fi
mkdir -p $OUTDIR

#
# initialise grid
#
export DPM_HOST=dpm.lhep.unibe.ch
export DPNS_HOST=dpm.lhep.unibe.ch

echo -n "Password for GRID certificate: "
read -s PASSWORD
echo ""

voms-proxy-destroy > /dev/null 2>&1
echo $PASSWORD | voms-proxy-init --voms t2k.org -pwstdin
voms-proxy-info > /dev/null 2>&1 || exit 1

#
# create temporary file for collecting PIDs of running jobs
#
JOBLIST=`mktemp`

#
# redirect Ctrl+C to terminate sub-processes
#
function sigint() {
  trap true SIGINT
  rm $JOBLIST
  echo ""
  echo Terminating all sub-processes...
  kill 0
  exit 1
}
trap sigint SIGINT

#
# function to print information
#
function printinfo() {
  FINISHED=`ls $OUTDIR/finished.* 2>/dev/null | wc -l`
  SUCCESS=`ls $OUTDIR/success.* 2>/dev/null | wc -l`
  FAILED=$(( FINISHED - SUCCESS ))
  PREFETCH_STARTED=`ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l`
  PREFETCH_DONE=`ls $OUTDIR/prefetch.done.* 2>/dev/null | wc -l`
  PREFETCHING=$(( PREFETCH_STARTED - PREFETCH_DONE ))
  PERCENT=$(( ( FINISHED ) * 100 / NFILES ))
  RUNNING=`cat $JOBLIST | wc -l`
  ELAPSED_TIME=$(( `date +%s` - STARTDATE ))
  if [ $(( FINISHED - FINISHED0 )) -ge $NCPU ]; then
    ETA=`echo "$ELAPSED_TIME / ( $FINISHED - $FINISHED0 ) * ( $NFILES - $FINISHED ) / 3600" | bc`
  else
    ETA="---"
  fi
  echo -n -e "${PERCENT}% | ETA: ${ETA}h | RUNNING: $RUNNING | SUCCESS: $SUCCESS | FAILED: $FAILED | PREFETCHING: $PREFETCHING\r"
}

#
# preparations
#
NFILES=`cat $LIST | wc -l`
STARTDATE=`date +%s`
IDX=0
FINISHED0=`ls $OUTDIR/success.* 2>/dev/null | wc -l`

#
# cleanup in case of resuming
#
if [ -n "$RESUME" ]; then
  rm -f $OUTDIR/prefetch.started.*
  rm -f $OUTDIR/prefetch.done.*
  for (( i=0; i<NFILES; i++ )); do
    test -f $OUTDIR/success.${i} || rm -f $OUTDIR/finished.${i}
  done
fi

#
# loop over all files in the list
#
cat $LIST | while read LINE ; do
  if [ -e "$OUTDIR/success.${IDX}" ]; then
    echo "Skipping already successful job $IDX"
    IDX=$(( IDX + 1 ))
    continue
  fi
  echo ${IDX}: $LINE

  #
  # start background prefetching of this file
  #
  (
    echo "prefetch $IDX                                                                   "
    touch "$OUTDIR/prefetch.started.${IDX}"
    # initialise grid certificate
    export X509_USER_PROXY=`mktemp`
    echo $PASSWORD | voms-proxy-init --voms t2k.org -pwstdin >> $OUTDIR/prefetch.${IDX}.log 2>&1
    # copy the file to local disk
    echo "lcg-cp -v $PREFIX$LINE $OUTDIR/prefetch.${IDX}.root" >> $OUTDIR/prefetch.${IDX}.log
    lcg-cp -v $PREFIX$LINE $OUTDIR/prefetch.${IDX}.root >> $OUTDIR/prefetch.${IDX}.log 2>&1   &&
      echo "prefetch done $IDX                                                            "     ||
      echo "prefetch FAILED $IDX                                                          "
    touch "$OUTDIR/prefetch.done.${IDX}"
    # cleanup
    voms-proxy-destroy
  ) &

  #
  # start the actual processing of the file in the background
  #
  (
    MPID=$!
    # wait until prefetching is finished
    while [ ! -f "$OUTDIR/prefetch.done.${IDX}" ]; do
      sleep 0.6
    done
    # wait until less than $NCPU processes are running
    while [ `cat $JOBLIST | wc -l` -ge $NCPU ]; do
       sleep 0.7
    done
    # start processing
    echo "start $IDX                                                                      "
    flock $JOBLIST -c "echo $MPID >> $JOBLIST"
    RunCreateFlatTree.exe $OUTDIR/prefetch.${IDX}.root -o $OUTDIR/flattree_${IDX}.root > $OUTDIR/${IDX}.log 2>&1   &&
      touch "$OUTDIR/success.${IDX}"                                                                                   &&
      echo "done $IDX                                                                     "                             ||
      echo "FAILED $IDX                                                                   "
    touch "$OUTDIR/finished.${IDX}"
    rm -f $OUTDIR/prefetch.started.${IDX}
    rm -f $OUTDIR/prefetch.${IDX}.root
    rm -f $OUTDIR/prefetch.done.${IDX}
    flock $JOBLIST -c "grep -v ^$MPID\$ $JOBLIST > $JOBLIST.temp ; mv $JOBLIST.temp $JOBLIST || echo > $JOBLIST"
  ) &


  # throttle and print info
  printinfo
  sleep 0.4

  #
  # wait if at least 2*$NCPU files are prefetched (prefetched files are deleted after processing)
  #
  while [ `ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l` -ge $(( NCPU * 2 )) ]; do
    # throttle and print info
    sleep 0.5
    printinfo
  done

  # increment file index
  IDX=$(( IDX + 1 ))
done

#
# wait until all background jobs are completed
#
while [ `ls $OUTDIR/prefetch.started.* 2>/dev/null | wc -l` -gt 0 ]; do
  # throttle and print info
  sleep 0.5
  printinfo
done

#
# print information
#
FINISHED=`ls $OUTDIR/finished.* 2>/dev/null | wc -l`
SUCCESS=`ls $OUTDIR/success.* 2>/dev/null | wc -l`
FAILED=$(( FINISHED - SUCCESS ))
ELAPSED_TIME=$(( `date +%s` - STARTDATE ))
echo "100% | SUCCESS: $FINISHED | FAILED: $FAILED | TIME: $ELAPSED_TIME                            "
if [ "$FAILED" -gt 0 ]; then
  echo -n "Failed runs: "
  for (( i=0; i<NFILES; i++ )); do
    test -f $OUTDIR/success.${i} || echo -n " $i"
  done
  echo ""
fi

#
# cleanup
#
rm $JOBLIST