#!/bin/bash
# set -x
#
#  Based on globus submission script for pbs
#
#  Submits job to PBS.
#  Input: path to grami file (same as Globus).
#
# The temporary job script is created for the submission and then removed 
# at the end of this script. 

echo "----- starting submit_pbs_job -----" 1>&2

joboption_lrms=pbs

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkgdatadir="$basedir"

. ${pkgdatadir}/submit_common.sh || exit $?

##############################################################
# Parse grami file, read arc config
##############################################################

init $1

read_arc_conf

failures_file="$joboption_controldir/job.$joboption_gridid.failed"

if [ -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  if [ -z "${RUNTIME_LOCAL_SCRATCH_DIR}" ] ; then
    echo "Need to know at which directory to run job: RUNTIME_LOCAL_SCRATCH_DIR must be set if RUNTIME_NODE_SEES_FRONTEND is empty" 1>&2
    echo "Submission: Configuration error.">>"$failures_file"
    exit 1
  fi
fi

##############################################################
# Zero stage of runtime environments
##############################################################
RTE_stage0

##############################################################
# create job script
##############################################################
mktempscript


PBS_QSUB='qsub -r n -S /bin/bash -m n '
if [ ! -z "$PBS_BIN_PATH" ] ; then
  PBS_QSUB=${PBS_BIN_PATH}/${PBS_QSUB}
fi

is_cluster=true

##############################################################
# Start job script
##############################################################
echo "# PBS batch job script built by grid-manager" > $LRMS_JOB_SCRIPT
# write PBS output to 'comment' file
echo "#PBS -e '${joboption_directory}.comment'" >> $LRMS_JOB_SCRIPT
echo "#PBS -j eo">> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT
# choose queue
if [ ! -z "${joboption_queue}" ] ; then
  echo "#PBS -q $joboption_queue" >> $LRMS_JOB_SCRIPT
fi
# project name for accounting
if [ ! -z "${joboption_rsl_project}" ] ; then
  echo "#PBS -A $joboption_rsl_project" >> $LRMS_JOB_SCRIPT
fi
# job name for convenience
if [ ! -z "${joboption_jobname}" ] ; then
  jobname=`echo "$joboption_jobname" | \
           sed 's/^\([^[:alpha:]]\)/N\1/' | \
           sed 's/[^[:alnum:]]/_/g' | \
	   sed 's/\(...............\).*/\1/'`
  echo "#PBS -N '$jobname'" >> $LRMS_JOB_SCRIPT
fi
echo "PBS jobname: $jobname" 1>&2

##############################################################
# (non-)parallel jobs
##############################################################
if [ -z "$joboption_count" ] ; then 
  joboption_count=1
elif [ "$joboption_count" -le 0 ] ; then
  joboption_count=1
fi

nodes_string="#PBS -l nodes=${joboption_count}"

if [ ! -z "$CONFIG_queue_node_string" ] ; then
  nodes_string="${nodes_string}:${CONFIG_queue_node_string}"
fi

i=0
eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
while [ ! -z "${var_is_set}" ] ; do
  eval "var_value=\${joboption_nodeproperty_$i}"
  nodes_string="${nodes_string}:${var_value}"
  i=$(( $i + 1 ))
  eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
done
echo "$nodes_string" >> $LRMS_JOB_SCRIPT

##############################################################
# Execution times (minutes)
##############################################################
if [ ! -z "$joboption_cputime" ] ; then
# TODO: parallel jobs, add initialization time, make walltime bigger, ...
# is cputime for every process ?
  if [ $joboption_cputime -lt 0 ] ; then
    joboption_cputime=0
  fi
  maxcputime="$joboption_cputime"
  cputime_min=$(( $maxcputime / 60 ))
  cputime_sec=$(( $maxcputime - $cputime_min * 60 ))
  echo "#PBS -l cput=${cputime_min}:${cputime_sec}" >> $LRMS_JOB_SCRIPT
fi  
  
if [ -z "$joboption_walltime" ] ; then
  if [ ! -z "$joboption_cputime" ] ; then
    # Set walltime for backward compatibility or incomplete requests
    joboption_walltime=$(( $joboption_cputime * $walltime_ratio ))
  fi
fi

if [ ! -z "$joboption_walltime" ] ; then
  if [ $joboption_walltime -lt 0 ] ; then
    joboption_walltime=0
  fi
  maxwalltime="$joboption_walltime"
  walltime_min=$(( $maxwalltime / 60 ))
  walltime_sec=$(( $maxwalltime - $walltime_min * 60 ))
  echo "#PBS -l walltime=${walltime_min}:${walltime_sec}" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Requested memory (mb)
##############################################################

set_req_mem

#pmem and pvmem are per process, so no need to modify based on count.
#valid for both PBS and Torque

if [ ! -z "$joboption_memory" ] ; then
  echo "#PBS -l pvmem=${joboption_memory}mb" >> $LRMS_JOB_SCRIPT
  echo "#PBS -l pmem=${joboption_memory}mb" >> $LRMS_JOB_SCRIPT
fi

gate_host=`uname -n`
if [ -z "$gate_host" ] ; then 
  echo "Can't get own hostname" 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  echo "Submission: Configuration error.">>"$failures_file"
  exit 1
fi

##############################################################
#  PBS stage in/out
##############################################################
if [ -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  (
    cd "$joboption_directory"
    if [ $? -ne '0' ] ; then 
      echo "Can't change to session directory: $joboption_directory" 1>&2
      rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
      echo "Submission: Configuration error.">>"$failures_file"
      exit 1
    fi
    scratch_dir=`dirname "$joboption_directory"`
    echo "#PBS -W stagein=$RUNTIME_LOCAL_SCRATCH_DIR@$gate_host:$joboption_directory" >> $LRMS_JOB_SCRIPT
    echo "#PBS -W stageout=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid@$gate_host:$scratch_dir" >> $LRMS_JOB_SCRIPT
    echo "#PBS -W stageout=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid.diag@$gate_host:$joboption_directory.diag" >> $LRMS_JOB_SCRIPT
  )
fi

echo "" >> $LRMS_JOB_SCRIPT
echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT
echo "umask 077" >> $LRMS_JOB_SCRIPT

##############################################################
# Add environment variables
##############################################################
add_user_env 

##############################################################
# Check for existance of executable,
# there is no sense to check for executable if files are 
# downloaded directly to computing node
##############################################################
if [ -z "${joboption_arg_0}" ] ; then
  echo 'Executable is not specified' 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  echo "Submission: Job description error.">>"$failures_file"
  exit 1
fi

#######################################################################
# copy information useful for transfering files to/from node directly
#######################################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  setup_local_transfer
fi

######################################################################
# Adjust working directory for tweaky nodes
# RUNTIME_GRIDAREA_DIR should be defined by external means on nodes
######################################################################
if [ ! -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  setup_runtime_env
else
  echo "RUNTIME_JOB_DIR=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_JOB_DIAG=$RUNTIME_LOCAL_SCRATCH_DIR/${joboption_gridid}.diag" >> $LRMS_JOB_SCRIPT
  RUNTIME_STDIN_REL=`echo "${joboption_stdin}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDOUT_REL=`echo "${joboption_stdout}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDERR_REL=`echo "${joboption_stderr}" | sed "s#^${joboption_directory}/*##"`
  if [ "$RUNTIME_STDIN_REL" = "${joboption_stdin}" ] ; then
    echo "RUNTIME_JOB_STDIN=\"${joboption_stdin}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDIN=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDIN_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDOUT_REL" = "${joboption_stdout}" ] ; then
    echo "RUNTIME_JOB_STDOUT=\"${joboption_stdout}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDOUT=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDOUT_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDERR_REL" = "${joboption_stderr}" ] ; then
    echo "RUNTIME_JOB_STDERR=\"${joboption_stderr}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDERR=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDERR_REL\"" >> $LRMS_JOB_SCRIPT
  fi
fi

##############################################################
# Add std... to job arguments
##############################################################
include_std_streams

##############################################################
#  Move files to local working directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_node

echo "" >> $LRMS_JOB_SCRIPT
echo "RESULT=0" >> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT


#####################################################
#  Download input files
####################################################
download_input_files

#####################################################
#  Go to working dir and start job
####################################################
echo "" >> $LRMS_JOB_SCRIPT
echo "# Changing to session directory" >> $LRMS_JOB_SCRIPT
echo "cd \$RUNTIME_JOB_DIR" >> $LRMS_JOB_SCRIPT
echo "export HOME=\$RUNTIME_JOB_DIR" >> $LRMS_JOB_SCRIPT

##############################################################
#  Skip execution if something already failed
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime configuration at computing node
##############################################################
RTE_stage1


##############################################################
#  Diagnostics
##############################################################
echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT
cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
if [ ! "X$PBS_NODEFILE" = 'X' ] ; then
  if [ -r "$PBS_NODEFILE" ] ; then
    cat "$PBS_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG"
  else
    PBS_NODEFILE=
  fi
fi
EOSCR
if [ ! -z "$NODENAME" ] ; then
  echo 'if [ "X$PBS_NODEFILE" = X ] ; then' >> $LRMS_JOB_SCRIPT
  echo "  nodename=\`$NODENAME\`" >> $LRMS_JOB_SCRIPT
  echo "  echo \"nodename=\$nodename\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT
  echo 'fi' >> $LRMS_JOB_SCRIPT
fi

##############################################################
#  Check intermediate result again
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Execution
##############################################################
cd_and_run

##############################################################
#  End of RESULT checks
##############################################################
echo "fi" >> $LRMS_JOB_SCRIPT
echo "fi" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime (post)configuration at computing node
##############################################################
configure_runtime

#####################################################
#  Upload output files
####################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  upload_output_files
else
# There is no sense to keep trash till GM runs uploader
  echo 'if [ ! -z  "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then' >> $LRMS_JOB_SCRIPT
# Delete all files except listed in job.#.output
  echo '  find ./ -type l -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type d -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT

  if [ -f "$joboption_controldir/job.$joboption_gridid.output" ] ; then
    cat "$joboption_controldir/job.$joboption_gridid.output" | \
    # remove leading backslashes, if any
    sed 's/^\/*//' | \
    # backslashes and spaces are escaped with a backslash in job.*.output. The
    # shell built-in read undoes this escaping.
    while read name rest; do

      # make it safe for shell by replacing single quotes with '\''
      name=`printf "%s" "$name"|sed "s/'/'\\\\\\''/g"`;

      # protect from deleting output files including those in the dynamic list
      if [ "${name#@}" != "$name" ]; then     # Does $name start with a @ ?

        dynlist=${name#@}
        echo "  dynlist='$dynlist'" >> $LRMS_JOB_SCRIPT
        cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
  chmod -R u-w "./$dynlist" 2>/dev/null
  cat "./$dynlist" | while read name rest; do
    chmod -R u-w "./$name" 2>/dev/null
  done
EOSCR
      else

        echo "  chmod -R u-w \"\$RUNTIME_JOB_DIR\"/'$name' 2>/dev/null" >> $LRMS_JOB_SCRIPT
      fi
    done
  fi
  
  echo '  find ./ -type f -perm +200 -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type d -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo 'fi' >> $LRMS_JOB_SCRIPT
fi
echo "" >> $LRMS_JOB_SCRIPT

##############################################################
#  Move files back to session directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
# !!!!!!!!!!!!!!!!!!! would be better to know the names of files !!!!!!!!!!!
##############################################################
move_files_to_frontend

#######################################
#  Submit the job
#######################################
echo "PBS job script built" 1>&2
# Execute qsub command
cd "$joboption_directory"
echo "PBS script follows:" 1>&2
echo "-------------------------------------------------------------------" 1>&2
cat "$LRMS_JOB_SCRIPT" 1>&2
echo "-------------------------------------------------------------------" 1>&2
echo "" 1>&2
PBS_RESULT=1
PBS_TRIES=0
while [ "$PBS_TRIES" -lt '10' ] ; do
  ${PBS_QSUB} < $LRMS_JOB_SCRIPT 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR
  PBS_RESULT="$?"
  if [ "$PBS_RESULT" -eq '0' ] ; then break ; fi 
  if [ "$PBS_RESULT" -eq '198' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    PBS_TRIES=0
    continue
  fi
  grep 'maximum number of jobs' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    PBS_TRIES=0
    continue
  fi 
  PBS_TRIES=$(( $PBS_TRIES + 1 ))
  sleep 2
done
if [ $PBS_RESULT -eq '0' ] ; then
   job_id=`cat $LRMS_JOB_OUT`
   # This should be on the format 1414162.$hostname
   if [ "${job_id}" = "" ]; then
      echo "job *NOT* submitted successfully!" 1>&2
      echo "failed getting the pbs jobid for the job!" 1>&2
      echo "Submission: Local submission client behaved unexpectedly.">>"$failures_file"
   elif [ `echo "${job_id}" | grep -Ec "^[0-9]+"` != "1" ]; then
      echo "job *NOT* submitted successfully!" 1>&2
      echo "badly formatted pbs jobid for the job: $job_id !" 1>&2
      echo "Submission: Local submission client behaved unexpectedly.">>"$failures_file"
   else
      echo "joboption_jobid=$job_id" >> $arg_file
      echo "job submitted successfully!" 1>&2
      echo "local job id: $job_id" 1>&2
      # Remove temporary job script file
      rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR
      echo "----- exiting submit_pbs_job -----" 1>&2
      echo "" 1>&2
      exit 0
   fi
else
  echo "job *NOT* submitted successfully!" 1>&2
  echo "got error code from qsub: $PBS_RESULT !" 1>&2
  echo "Submission: Local submission client failed.">>"$failures_file"
fi
echo "Output is:" 1>&2
cat $LRMS_JOB_OUT 1>&2
echo "Error output is:"
cat $LRMS_JOB_ERR 1>&2
rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
echo "----- exiting submit_pbs_job -----" 1>&2
echo "" 1>&2
exit 1
