Skip to content

Commit

Permalink
ensure job is killed when exception in task status check
Browse files Browse the repository at this point in the history
Signed-off-by: jorgee <[email protected]>
  • Loading branch information
jorgee committed Nov 29, 2024
1 parent ee25217 commit 712deb6
Showing 1 changed file with 16 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@

package nextflow.processor

import nextflow.cloud.CloudSpotTerminationException
import nextflow.exception.FailedGuardException
import nextflow.exception.ProcessEvalException
import nextflow.exception.ProcessException
import nextflow.exception.ProcessRetryableException

import static nextflow.processor.TaskProcessor.*

import java.util.concurrent.ExecutorService
Expand Down Expand Up @@ -573,6 +579,16 @@ class TaskPollingMonitor implements TaskMonitor {
checkTaskStatus(handler)
}
catch (Throwable error) {
// At this point NF assumes job is not running, but there could be errors at monitoring that could leave a job running (#5516).
// In this case, NF needs to ensure the job is killed.
if( error !instanceof ProcessException && error !instanceof ProcessRetryableException && error !instanceof CloudSpotTerminationException
&& error !instanceof ProcessEvalException && error !instanceof FailedGuardException) {
try {
handler.kill()
} catch( Throwable t ) {
log.debug("Unable to cancel task ${handler.task.lazyName()} after error", t)
}
}
handleException(handler, error)
}
}
Expand Down

0 comments on commit 712deb6

Please sign in to comment.