From f503d3d3f0d15743b73c3347a1f42f578ee6a481 Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Wed, 4 Dec 2024 10:08:42 -0500 Subject: [PATCH] Fallback resources: allocated to measured (#3998) * add merge default to rmsummary * fallback to committe resources when monitoring not enabled --- dttools/src/rmsummary.c | 15 +++++++++++++++ dttools/src/rmsummary.h | 1 + taskvine/src/manager/vine_manager.c | 17 ++++++++++++----- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/dttools/src/rmsummary.c b/dttools/src/rmsummary.c index 0ec0657cda..c7eca1dd01 100644 --- a/dttools/src/rmsummary.c +++ b/dttools/src/rmsummary.c @@ -742,6 +742,21 @@ void rmsummary_merge_override_basic(struct rmsummary *dest, const struct rmsumma RM_BIN_OP_BASIC(dest, src, override_field); } +/* Copy the value for all the fields in src to dest when dest < 0 */ +static inline double default_field(double d, double s) +{ + return (d > -1) ? d : s; +} + +void rmsummary_merge_default(struct rmsummary *dest, const struct rmsummary *src) +{ + if (!src) { + return; + } + + RM_BIN_OP(dest, src, default_field); +} + struct rmsummary *rmsummary_copy(const struct rmsummary *src, int deep_copy) { struct rmsummary *dest = rmsummary_create(-1); diff --git a/dttools/src/rmsummary.h b/dttools/src/rmsummary.h index ca8089a3f8..4f7ea8486b 100644 --- a/dttools/src/rmsummary.h +++ b/dttools/src/rmsummary.h @@ -111,6 +111,7 @@ void rmsummary_merge_max_w_time(struct rmsummary *dest, const struct rmsummary * struct rmsummary *rmsummary_copy(const struct rmsummary *src, int deep_copy); void rmsummary_merge_override(struct rmsummary *dest, const struct rmsummary *src); void rmsummary_merge_override_basic(struct rmsummary *dest, const struct rmsummary *src); +void rmsummary_merge_default(struct rmsummary *dest, const struct rmsummary *src); void rmsummary_merge_max(struct rmsummary *dest, const struct rmsummary *src); void rmsummary_merge_min(struct rmsummary *dest, const struct rmsummary *src); void rmsummary_add(struct rmsummary *dest, const struct rmsummary *src); diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 4ecdc63b7d..168d822186 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -566,12 +566,17 @@ static vine_result_code_t get_completion_result(struct vine_manager *q, struct v t->time_workers_execute_last = observed_execution_time > execution_time ? execution_time : observed_execution_time; t->time_workers_execute_last_start = start_time; t->time_workers_execute_last_end = end_time; - t->resources_measured->wall_time = t->time_workers_execute_last_end - t->time_workers_execute_last_start; t->time_workers_execute_all += t->time_workers_execute_last; t->output_length = output_length; t->result = task_status; t->exit_code = exit_status; + /* fill resources measured with whatever vine reported/committed, as a fallback when task ran without monitoring enabled */ + t->resources_measured->start = ((double)start_time) / ONE_SECOND; + t->resources_measured->end = ((double)end_time) / ONE_SECOND; + t->resources_measured->wall_time = ((double)t->time_workers_execute_last) / ONE_SECOND; + rmsummary_merge_override_basic(t->resources_measured, t->resources_allocated); + /* If output is less than 1KB stdout is sent along with completion msg. retrieve it from the link. */ if (bytes_sent) { get_stdout(q, w, t, bytes_sent); @@ -1168,12 +1173,14 @@ static void read_measured_resources(struct vine_manager *q, struct vine_task *t) { char *summary = monitor_file_name(q, t, ".summary", 0); - if (t->resources_measured) { - rmsummary_delete(t->resources_measured); - } - + struct rmsummary *tmp = t->resources_measured; t->resources_measured = rmsummary_parse_file_single(summary); + /* read the fallback values set by get_completion_result, if any */ + /* if tmp is null that's ok, both delete and merge_default check for it */ + rmsummary_merge_default(t->resources_measured, tmp); + rmsummary_delete(tmp); + if (t->resources_measured) { t->exit_code = t->resources_measured->exit_status;