jobs/psjob

#!/usr/bin/env bash

# Do "ps aux" on a set of nodes belonging to a single job, but exclude system processes.
export usage="$0 [-c columns | -h] jobid"

# Author: Ole.H.Nielsen@fysik.dtu.dk
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/

# Requires ClusterShell with configuration for parallel commands on Slurm jobs,
# see https://wiki.fysik.dtu.dk/niflheim/SLURM#clustershell
# and https://clustershell.readthedocs.io/en/latest/intro.html

# Default value:
export columns=100
# Enable this if the gpustat command has been installed on nodes with GPUs
enable_gpustat=1

# Parse command options
while getopts "c:h" options; do
	case $options in
		c )	export columns=$OPTARG
			if ! [[ $columns =~ ^[0-9]+$ ]]
			then
				echo Number of columns $columns must be a positive number
				exit 1
			fi
			if [[ $columns -lt 5 ]] || [[ $columns -gt 1024 ]]
			then
				echo Unreasonable number of columns $columns 
				exit 1
			fi
			;;
		h | * ) echo "Usage: $usage"
			exit 1;;
	esac
done
shift $((OPTIND-1))

CLUSH="/usr/bin/clush"
PS="/bin/ps"
PSFLAGS="-o pid,nlwp,state,user,start,cputime,%cpu,rssize,command --columns $columns"

if test $# -ne 1
then
	echo ERROR: No jobid given
	echo Usage: $usage
	exit -1
fi
# Check that jobid is a number (possibly with _ separating jobid and arraytaskid)
if ! [[ ${1} =~ ^[0-9_]+$ ]]
then
	echo "ERROR: <jobid> must be a number or jobid_arraytaskid"
	echo "Usage: $usage"
	exit 1
fi

jobid=$1
# Check if this jobid can be inquired successfully.
JOBSTATE="`squeue -h -O State:. -j $jobid`"
if test "$?" != "0"
then
	echo Error inquiring about job $jobid 
	exit 1
fi

# Detect job arrays by counting number of words in JOBSTATE
words=( $JOBSTATE )
if [[ ${#words[@]} > 1 ]]
then
	echo "ERROR: The job $jobid is a job array with multiple jobs. Please select only one of the array jobs:"
	squeue -O JobArrayID,ArrayJobID,ArrayTaskID,JobID,StartTime,TimeUsed,TimeLimit -j $jobid
	exit 1
fi

if test "$JOBSTATE" != "RUNNING"
then
	echo The job $jobid is not running, it has state=$JOBSTATE
	exit 1
fi

# For an array job, get the individual jobid corresponding to the array job:
realjobid=`squeue -h -O JobID: -j $jobid`

# Print some job information
if test "$realjobid" = "$jobid"
then
	arrayprint=""
else
	# For array jobs
	arrayprint=",ArrayJobID:13,ArrayTaskID:14"
fi
squeue -O JobID:10,Partition:14,NumNodes:6,NumTasks:6,UserName:10$arrayprint,StartTime,TimeUsed:14,TimeLimit:14,tres-alloc: -j $jobid
# Print the NodeList
NODELIST="`squeue -h -O NodeList: -j $jobid`"
echo NODELIST: $NODELIST

bar="===================================================="
echo "$bar"
echo "Process list from 'ps' on each node in the job:"
# Execute parallel shell on the job nodes:
# The "scontrol listpids" lists all processes belonging to a Slurm job $realjobid
# Count also the number of processes (numprocs) and threads (numthreads in $2=nlwp) when $1 is a number
$CLUSH -bw $NODELIST "$PS $PSFLAGS \$(scontrol listpids $realjobid | awk 'int(\$1)>0 {print \$1}') | grep -v ' root    ' | awk '{print \$0; if(\$1~/^[0-9]+\$/) {numprocs++; numthreads+=\$2}} END {printf(\"Total: %d processes and %d threads\n\", numprocs, numthreads)}'; echo -n Uptime:; uptime"

# Check for job nodes with GPU gres
gpunodes=`mktemp`
sinfo -h -N -n $NODELIST -O NodeHost,Gres | uniq | grep -i GPU > $gpunodes
if [[ -s $gpunodes ]]
then
	echo "$bar"
	echo "Nodes in this job with GPU Generic Resources (Gres):"
	cat $gpunodes
fi
rm -f $gpunodes

# Check for GPU jobs
# Using gpustat tool from https://github.com/wookayin/gpustat
if [[ $enable_gpustat > 0 && "`squeue -h -O tres-alloc: -j $jobid`" =~ "gres/gpu" ]]
then
	echo
	echo "Running GPU tasks:"
	echo "Node: ID  GPU-type                  | Temp   GPU% |   Mem / Tot      | user:process/PID(Mem)"
	# color="--color"
	GPUNAME_WIDTH="--gpuname-width 24"
	color=""
	# The "/bin/true" catches the case of grep returning an error code
	# Sort output on nodenames using "sort -k 1"
	$CLUSH -w $NODELIST "gpustat -upc $GPUNAME_WIDTH $color | grep \$(scontrol listpids $realjobid | awk 'int(\$1)>0 {print \"-e \" \$1}') || /bin/true" | sort -k 1
fi

# If using JobContainerType=job_container/tmpfs then print also disk space usage
# See also https://support.schedmd.com/show_bug.cgi?id=11183
#   Viewing mounts on a node from outside the job: findmnt -l -o target,source,fstype,propagation

JobContainerType=`scontrol show config | grep JobContainerType | awk '{print $3}'`
if [[ "$JobContainerType" == "job_container/tmpfs" ]]
then
	echo "$bar"
	# echo "Slurm job_container temporary scratch spaces:"
	# grep -i '^BasePath' /etc/slurm/job_container.conf
	basepath=`grep -i '^BasePath' /etc/slurm/job_container.conf | awk '{split($1,a,"="); print a[2]}'`
	echo "Scratch disk usage for JobID $realjobid:"
	echo "Node: Usage     Scratch folder"
	# Sort output on nodenames using "sort -k 1"
	$CLUSH -w $NODELIST "findmnt -l -o target -t xfs | grep $realjobid | xargs du -sh" | sort -k 1
	echo
	echo "Scratch disks on JobID $realjobid compute nodes:"
	echo "Node:        Size   Used  Avail   Use%  Mounted on"
	# Sort output on nodenames (key 1)
	$CLUSH -w $NODELIST "df -Ph $basepath | tail -n+2" | awk '{printf("%-10s %6s %6s %6s %6s  %-20s\n", $1, $3, $4, $5, $6, $7)}' | sort -k 1
fi

echo "$bar"

# $CLUSH -bw $NODELIST "$PS $PSFLAGS \$(scontrol listpids $realjobid | tail -n+3 | awk '{print \$1}') | grep -v ' root    '" 
# You could also use: $CLUSH -bw@sj:$realjobid ...