Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fallback resources: allocated to measured #3998

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions dttools/src/rmsummary.c
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,21 @@ void rmsummary_merge_override_basic(struct rmsummary *dest, const struct rmsumma
RM_BIN_OP_BASIC(dest, src, override_field);
}

/* Copy the value for all the fields in src to dest when dest < 0 */
static inline double default_field(double d, double s)
{
return (d > -1) ? d : s;
}

void rmsummary_merge_default(struct rmsummary *dest, const struct rmsummary *src)
{
if (!src) {
return;
}

RM_BIN_OP(dest, src, default_field);
}

struct rmsummary *rmsummary_copy(const struct rmsummary *src, int deep_copy)
{
struct rmsummary *dest = rmsummary_create(-1);
Expand Down
1 change: 1 addition & 0 deletions dttools/src/rmsummary.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ void rmsummary_merge_max_w_time(struct rmsummary *dest, const struct rmsummary *
struct rmsummary *rmsummary_copy(const struct rmsummary *src, int deep_copy);
void rmsummary_merge_override(struct rmsummary *dest, const struct rmsummary *src);
void rmsummary_merge_override_basic(struct rmsummary *dest, const struct rmsummary *src);
void rmsummary_merge_default(struct rmsummary *dest, const struct rmsummary *src);
void rmsummary_merge_max(struct rmsummary *dest, const struct rmsummary *src);
void rmsummary_merge_min(struct rmsummary *dest, const struct rmsummary *src);
void rmsummary_add(struct rmsummary *dest, const struct rmsummary *src);
Expand Down
17 changes: 12 additions & 5 deletions taskvine/src/manager/vine_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,12 +566,17 @@ static vine_result_code_t get_completion_result(struct vine_manager *q, struct v
t->time_workers_execute_last = observed_execution_time > execution_time ? execution_time : observed_execution_time;
t->time_workers_execute_last_start = start_time;
t->time_workers_execute_last_end = end_time;
t->resources_measured->wall_time = t->time_workers_execute_last_end - t->time_workers_execute_last_start;
t->time_workers_execute_all += t->time_workers_execute_last;
t->output_length = output_length;
t->result = task_status;
t->exit_code = exit_status;

/* fill resources measured with whatever vine reported/committed, as a fallback when task ran without monitoring enabled */
t->resources_measured->start = ((double)start_time) / ONE_SECOND;
t->resources_measured->end = ((double)end_time) / ONE_SECOND;
t->resources_measured->wall_time = ((double)t->time_workers_execute_last) / ONE_SECOND;
rmsummary_merge_override_basic(t->resources_measured, t->resources_allocated);

/* If output is less than 1KB stdout is sent along with completion msg. retrieve it from the link. */
if (bytes_sent) {
get_stdout(q, w, t, bytes_sent);
Expand Down Expand Up @@ -1168,12 +1173,14 @@ static void read_measured_resources(struct vine_manager *q, struct vine_task *t)
{
char *summary = monitor_file_name(q, t, ".summary", 0);

if (t->resources_measured) {
rmsummary_delete(t->resources_measured);
}

struct rmsummary *tmp = t->resources_measured;
t->resources_measured = rmsummary_parse_file_single(summary);

/* read the fallback values set by get_completion_result, if any */
/* if tmp is null that's ok, both delete and merge_default check for it */
rmsummary_merge_default(t->resources_measured, tmp);
rmsummary_delete(tmp);

if (t->resources_measured) {
t->exit_code = t->resources_measured->exit_status;

Expand Down
Loading