Skip to content

Commit

Permalink
correctly consider sampling of tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
btovar committed Nov 26, 2024
1 parent 3814fec commit 4e61ed3
Showing 1 changed file with 27 additions and 27 deletions.
54 changes: 27 additions & 27 deletions taskvine/src/manager/vine_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -5227,35 +5227,15 @@ int vine_hungry(struct vine_manager *q)
int64_t avg_commited_tasks_disk = DIV_INT_ROUND_UP(qstats.committed_disk, tasks_running);
int64_t avg_commited_tasks_gpus = DIV_INT_ROUND_UP(qstats.committed_gpus, tasks_running);

// get required resources (cores, memory, disk, gpus) of one (all?) waiting tasks
// seems to iterate through all tasks counted in the queue.
int64_t ready_task_cores = 0;
int64_t ready_task_memory = 0;
int64_t ready_task_disk = 0;
int64_t ready_task_gpus = 0;

int t_idx;
struct vine_task *t;
int iter_depth = MIN(q->attempt_schedule_depth, tasks_waiting);
int sampled_tasks_waiting = 0;
PRIORITY_QUEUE_BASE_ITERATE(q->ready_tasks, t_idx, t, sampled_tasks_waiting, iter_depth)
{
/* unset resources are marked with -1, so we added what we know about currently running tasks */
ready_task_cores += t->resources_requested->cores > 0 ? t->resources_requested->cores : avg_commited_tasks_cores;
ready_task_memory += t->resources_requested->memory > 0 ? t->resources_requested->memory : avg_commited_tasks_memory;
ready_task_disk += t->resources_requested->disk > 0 ? t->resources_requested->disk : avg_commited_tasks_disk;
ready_task_gpus += t->resources_requested->gpus > 0 ? t->resources_requested->gpus : avg_commited_tasks_gpus;
}

// get total available resources consumption (cores, memory, disk, gpus) of all workers of this manager
// available = factor*total (all) - committed (actual in use)
int64_t workers_total_avail_cores = q->hungry_minimum_factor * qstats.total_cores - qstats.committed_cores - ready_task_cores;
int64_t workers_total_avail_memory = q->hungry_minimum_factor * qstats.total_memory - qstats.committed_memory - ready_task_memory;
int64_t workers_total_avail_disk = q->hungry_minimum_factor * qstats.total_disk - qstats.committed_disk - ready_task_disk;
int64_t workers_total_avail_gpus = q->hungry_minimum_factor * qstats.total_gpus - qstats.committed_gpus - ready_task_gpus;
int64_t workers_total_avail_cores = q->hungry_minimum_factor * qstats.total_cores - qstats.committed_cores;
int64_t workers_total_avail_memory = q->hungry_minimum_factor * qstats.total_memory - qstats.committed_memory;
int64_t workers_total_avail_disk = q->hungry_minimum_factor * qstats.total_disk - qstats.committed_disk;
int64_t workers_total_avail_gpus = q->hungry_minimum_factor * qstats.total_gpus - qstats.committed_gpus;

int64_t tasks_needed = 0;
if (sampled_tasks_waiting < 1) {
if (tasks_waiting < 1) {
tasks_needed = DIV_INT_ROUND_UP(workers_total_avail_cores, avg_commited_tasks_cores);
if (avg_commited_tasks_memory > 0) {
tasks_needed = MIN(tasks_needed, DIV_INT_ROUND_UP(workers_total_avail_memory, avg_commited_tasks_memory));
Expand All @@ -5272,7 +5252,27 @@ int vine_hungry(struct vine_manager *q)
return MAX(tasks_needed, hungry_minimum);
}

// from here on we can assume that sampled_tasks_waiting > 0.
// from here on we can assume that tasks_waiting > 0.

// get required resources (cores, memory, disk, gpus) of one (all?) waiting tasks
// seems to iterate through all tasks counted in the queue.
int64_t ready_task_cores = 0;
int64_t ready_task_memory = 0;
int64_t ready_task_disk = 0;
int64_t ready_task_gpus = 0;

int t_idx;
struct vine_task *t;
int iter_depth = MIN(q->attempt_schedule_depth, tasks_waiting);
int sampled_tasks_waiting = 0;
PRIORITY_QUEUE_BASE_ITERATE(q->ready_tasks, t_idx, t, sampled_tasks_waiting, iter_depth)
{
/* unset resources are marked with -1, so we added what we know about currently running tasks */
ready_task_cores += t->resources_requested->cores > 0 ? t->resources_requested->cores : avg_commited_tasks_cores;
ready_task_memory += t->resources_requested->memory > 0 ? t->resources_requested->memory : avg_commited_tasks_memory;
ready_task_disk += t->resources_requested->disk > 0 ? t->resources_requested->disk : avg_commited_tasks_disk;
ready_task_gpus += t->resources_requested->gpus > 0 ? t->resources_requested->gpus : avg_commited_tasks_gpus;
}

int64_t avg_ready_tasks_cores = DIV_INT_ROUND_UP(ready_task_cores, sampled_tasks_waiting);
int64_t avg_ready_tasks_memory = DIV_INT_ROUND_UP(ready_task_memory, sampled_tasks_waiting);
Expand All @@ -5294,7 +5294,7 @@ int vine_hungry(struct vine_manager *q)
tasks_needed = MIN(tasks_needed, DIV_INT_ROUND_UP(workers_total_avail_gpus, avg_ready_tasks_gpus));
}

tasks_needed = MAX(0, MAX(tasks_needed, hungry_minimum - tasks_waiting));
tasks_needed = MAX(0, MAX(tasks_needed, hungry_minimum) - tasks_waiting);

return tasks_needed;
}
Expand Down

0 comments on commit 4e61ed3

Please sign in to comment.