From 4e61ed33d2f4874d753a68f6b75bb9629116bef1 Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Tue, 26 Nov 2024 12:37:22 -0500 Subject: [PATCH] correctly consider sampling of tasks --- taskvine/src/manager/vine_manager.c | 54 ++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 3b35778d33..e3d2637752 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -5227,35 +5227,15 @@ int vine_hungry(struct vine_manager *q) int64_t avg_commited_tasks_disk = DIV_INT_ROUND_UP(qstats.committed_disk, tasks_running); int64_t avg_commited_tasks_gpus = DIV_INT_ROUND_UP(qstats.committed_gpus, tasks_running); - // get required resources (cores, memory, disk, gpus) of one (all?) waiting tasks - // seems to iterate through all tasks counted in the queue. - int64_t ready_task_cores = 0; - int64_t ready_task_memory = 0; - int64_t ready_task_disk = 0; - int64_t ready_task_gpus = 0; - - int t_idx; - struct vine_task *t; - int iter_depth = MIN(q->attempt_schedule_depth, tasks_waiting); - int sampled_tasks_waiting = 0; - PRIORITY_QUEUE_BASE_ITERATE(q->ready_tasks, t_idx, t, sampled_tasks_waiting, iter_depth) - { - /* unset resources are marked with -1, so we added what we know about currently running tasks */ - ready_task_cores += t->resources_requested->cores > 0 ? t->resources_requested->cores : avg_commited_tasks_cores; - ready_task_memory += t->resources_requested->memory > 0 ? t->resources_requested->memory : avg_commited_tasks_memory; - ready_task_disk += t->resources_requested->disk > 0 ? t->resources_requested->disk : avg_commited_tasks_disk; - ready_task_gpus += t->resources_requested->gpus > 0 ? t->resources_requested->gpus : avg_commited_tasks_gpus; - } - // get total available resources consumption (cores, memory, disk, gpus) of all workers of this manager // available = factor*total (all) - committed (actual in use) - int64_t workers_total_avail_cores = q->hungry_minimum_factor * qstats.total_cores - qstats.committed_cores - ready_task_cores; - int64_t workers_total_avail_memory = q->hungry_minimum_factor * qstats.total_memory - qstats.committed_memory - ready_task_memory; - int64_t workers_total_avail_disk = q->hungry_minimum_factor * qstats.total_disk - qstats.committed_disk - ready_task_disk; - int64_t workers_total_avail_gpus = q->hungry_minimum_factor * qstats.total_gpus - qstats.committed_gpus - ready_task_gpus; + int64_t workers_total_avail_cores = q->hungry_minimum_factor * qstats.total_cores - qstats.committed_cores; + int64_t workers_total_avail_memory = q->hungry_minimum_factor * qstats.total_memory - qstats.committed_memory; + int64_t workers_total_avail_disk = q->hungry_minimum_factor * qstats.total_disk - qstats.committed_disk; + int64_t workers_total_avail_gpus = q->hungry_minimum_factor * qstats.total_gpus - qstats.committed_gpus; int64_t tasks_needed = 0; - if (sampled_tasks_waiting < 1) { + if (tasks_waiting < 1) { tasks_needed = DIV_INT_ROUND_UP(workers_total_avail_cores, avg_commited_tasks_cores); if (avg_commited_tasks_memory > 0) { tasks_needed = MIN(tasks_needed, DIV_INT_ROUND_UP(workers_total_avail_memory, avg_commited_tasks_memory)); @@ -5272,7 +5252,27 @@ int vine_hungry(struct vine_manager *q) return MAX(tasks_needed, hungry_minimum); } - // from here on we can assume that sampled_tasks_waiting > 0. + // from here on we can assume that tasks_waiting > 0. + + // get required resources (cores, memory, disk, gpus) of one (all?) waiting tasks + // seems to iterate through all tasks counted in the queue. + int64_t ready_task_cores = 0; + int64_t ready_task_memory = 0; + int64_t ready_task_disk = 0; + int64_t ready_task_gpus = 0; + + int t_idx; + struct vine_task *t; + int iter_depth = MIN(q->attempt_schedule_depth, tasks_waiting); + int sampled_tasks_waiting = 0; + PRIORITY_QUEUE_BASE_ITERATE(q->ready_tasks, t_idx, t, sampled_tasks_waiting, iter_depth) + { + /* unset resources are marked with -1, so we added what we know about currently running tasks */ + ready_task_cores += t->resources_requested->cores > 0 ? t->resources_requested->cores : avg_commited_tasks_cores; + ready_task_memory += t->resources_requested->memory > 0 ? t->resources_requested->memory : avg_commited_tasks_memory; + ready_task_disk += t->resources_requested->disk > 0 ? t->resources_requested->disk : avg_commited_tasks_disk; + ready_task_gpus += t->resources_requested->gpus > 0 ? t->resources_requested->gpus : avg_commited_tasks_gpus; + } int64_t avg_ready_tasks_cores = DIV_INT_ROUND_UP(ready_task_cores, sampled_tasks_waiting); int64_t avg_ready_tasks_memory = DIV_INT_ROUND_UP(ready_task_memory, sampled_tasks_waiting); @@ -5294,7 +5294,7 @@ int vine_hungry(struct vine_manager *q) tasks_needed = MIN(tasks_needed, DIV_INT_ROUND_UP(workers_total_avail_gpus, avg_ready_tasks_gpus)); } - tasks_needed = MAX(0, MAX(tasks_needed, hungry_minimum - tasks_waiting)); + tasks_needed = MAX(0, MAX(tasks_needed, hungry_minimum) - tasks_waiting); return tasks_needed; }