Skip to content

Commit

Permalink
Merge pull request #6558 from grondo/issue#6474
Browse files Browse the repository at this point in the history
add `FLUX_ENCLOSING_ID` to initial program environment for instances with a `jobid` broker attribute
  • Loading branch information
mergify[bot] authored Jan 17, 2025
2 parents 8259a8f + a12f5e5 commit 122bd11
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 11 deletions.
19 changes: 18 additions & 1 deletion doc/man7/flux-environment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ The following are set in the environment of each task spawned by
NUMERIC_JOB_ID=$(flux job id $FLUX_JOB_ID)
.. envvar:: FLUX_ENCLOSING_ID

The jobid of the enclosing Flux instance, if it has one. The enclosing
Flux instance is the one that ran :envvar:`FLUX_JOB_ID`. Depending on
how the enclosing Flux instance was started, it may or may not have
a jobid. If it was not launched by Flux, :envvar:`FLUX_ENCLOSING_ID`
is not set.

Example 1: A batch job that runs one MPI job is submitted to a Flux system
instance. In the environment of the MPI job, :envvar:`FLUX_ENCLOSING_ID`
refers to the batch jobid in the system instance.

Example 2: An MPI job is submitted directly to a Flux system
instance. Since the Flux system instance was not launched by Flux,
:envvar:`FLUX_ENCLOSING_ID` is not set in the environment of the MPI job.

.. envvar:: FLUX_JOB_SIZE

The number of tasks in the current job.
Expand Down Expand Up @@ -170,8 +186,9 @@ environment in other workload managers, for example:
BATCH_NCORES=$(flux resource list -n -o {ncores})
BATCH_NGPUS=$(flux resource list -n -o {ngpus})
BATCH_HOSTLIST=$(flux getattr hostlist)
BATCH_JOBID=$(flux getattr jobid)
Additionally, :envvar:`FLUX_ENCLOSING_ID` is set to the jobid of the
enclosing instance, if it has one.

PMI CLIENT
==========
Expand Down
9 changes: 8 additions & 1 deletion src/broker/broker.c
Original file line number Diff line number Diff line change
Expand Up @@ -700,9 +700,13 @@ static int create_runat_rc2 (struct runat *r, const char *argz, size_t argz_len)

static int create_runat_phases (broker_ctx_t *ctx)
{
const char *jobid = NULL;
const char *rc1, *rc3, *local_uri;
bool rc2_none = false;

/* jobid may be NULL */
(void) attr_get (ctx->attrs, "jobid", &jobid, NULL);

if (attr_get (ctx->attrs, "local-uri", &local_uri, NULL) < 0) {
log_err ("local-uri is not set");
return -1;
Expand All @@ -718,7 +722,10 @@ static int create_runat_phases (broker_ctx_t *ctx)
if (attr_get (ctx->attrs, "broker.rc2_none", NULL, NULL) == 0)
rc2_none = true;

if (!(ctx->runat = runat_create (ctx->h, local_uri, ctx->sd_notify))) {
if (!(ctx->runat = runat_create (ctx->h,
local_uri,
jobid,
ctx->sd_notify))) {
log_err ("runat_create");
return -1;
}
Expand Down
26 changes: 21 additions & 5 deletions src/broker/runat.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct runat_entry {

struct runat {
flux_t *h;
const char *jobid;
const char *local_uri;
zhashx_t *entries;
flux_msg_handler_t **handlers;
Expand All @@ -76,6 +77,7 @@ static const int abort_signal = SIGHUP;

static const char *env_blocklist[] = {
"FLUX_JOB_ID",
"FLUX_ENCLOSING_ID",
"FLUX_JOB_SIZE",
"FLUX_JOB_NNODES",
"FLUX_JOB_TMPDIR",
Expand Down Expand Up @@ -363,11 +365,13 @@ static struct runat_command *runat_command_create (char **env, int flags)
}

/* Unset blocklisted variables in command environment.
* Set FLUX_ENCLOSING_ID if "jobid" is non-NULL.
* Set FLUX_URI if local_uri is non-NULL.
*/
static int runat_command_modenv (struct runat_command *cmd,
const char **blocklist,
const char *local_uri)
const char *local_uri,
const char *jobid)
{
if (blocklist) {
int i;
Expand All @@ -378,6 +382,14 @@ static int runat_command_modenv (struct runat_command *cmd,
if (flux_cmd_setenvf (cmd->cmd, 1, "FLUX_URI", "%s", local_uri) < 0)
return -1;
}
if (jobid) {
if (flux_cmd_setenvf (cmd->cmd,
1,
"FLUX_ENCLOSING_ID",
"%s",
jobid) < 0)
return -1;
}
return 0;
}

Expand Down Expand Up @@ -493,7 +505,7 @@ int runat_push_shell_command (struct runat *r,
return -1;
if (runat_command_set_cmdline (cmd, NULL, cmdline) < 0)
goto error;
if (runat_command_modenv (cmd, env_blocklist, r->local_uri) < 0)
if (runat_command_modenv (cmd, env_blocklist, r->local_uri, r->jobid) < 0)
goto error;
if (runat_push (r, name, cmd, false) < 0)
goto error;
Expand All @@ -518,7 +530,7 @@ int runat_push_shell (struct runat *r,
return -1;
if (runat_command_set_cmdline (cmd, shell, NULL) < 0)
goto error;
if (runat_command_modenv (cmd, env_blocklist, r->local_uri) < 0)
if (runat_command_modenv (cmd, env_blocklist, r->local_uri, r->jobid) < 0)
goto error;
if (runat_push (r, name, cmd, true) < 0)
goto error;
Expand All @@ -544,7 +556,7 @@ int runat_push_command (struct runat *r,
return -1;
if (runat_command_set_argz (cmd, argz, argz_len) < 0)
goto error;
if (runat_command_modenv (cmd, env_blocklist, r->local_uri) < 0)
if (runat_command_modenv (cmd, env_blocklist, r->local_uri, r->jobid) < 0)
goto error;
if (runat_push (r, name, cmd, false) < 0)
goto error;
Expand Down Expand Up @@ -693,7 +705,10 @@ static const struct flux_msg_handler_spec htab[] = {
FLUX_MSGHANDLER_TABLE_END,
};

struct runat *runat_create (flux_t *h, const char *local_uri, bool sdnotify)
struct runat *runat_create (flux_t *h,
const char *local_uri,
const char *jobid,
bool sdnotify)
{
struct runat *r;

Expand All @@ -705,6 +720,7 @@ struct runat *runat_create (flux_t *h, const char *local_uri, bool sdnotify)
goto error;
zhashx_set_destructor (r->entries, runat_entry_destroy_wrapper);
r->h = h;
r->jobid = jobid;
r->local_uri = local_uri;
r->sd_notify = sdnotify;
if (isatty (STDIN_FILENO)
Expand Down
6 changes: 5 additions & 1 deletion src/broker/runat.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ typedef void (*runat_completion_f)(struct runat *r,
const char *name,
void *arg);

struct runat *runat_create (flux_t *h, const char *local_uri, bool sdnotify);
struct runat *runat_create (flux_t *h,
const char *local_uri,
const char *jobid,
bool sdnotify);

void runat_destroy (struct runat *r);

/* Push command, to be run under shell -c, onto named list.
Expand Down
4 changes: 2 additions & 2 deletions src/broker/test/runat.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void basic (flux_t *h)

ctx.h = h;

r = runat_create (h, "local://notreally", false);
r = runat_create (h, "local://notreally", "f1234", false);
ok (r != NULL,
"runat_create works");

Expand Down Expand Up @@ -268,7 +268,7 @@ void badinput (flux_t *h)
struct runat *r;
int rc;

if (!(r = runat_create (h, NULL, false)))
if (!(r = runat_create (h, NULL, NULL, false)))
BAIL_OUT ("runat_create failed");

ok (runat_is_defined (NULL, "foo") == false,
Expand Down
10 changes: 9 additions & 1 deletion t/t0014-runlevel.t
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ test_expect_success 'flux admin cleanup-push (stdin) retains cmd block order' '
'

test_expect_success 'capture the environment for all three rc scripts' '
SLURM_FOO=42 flux start \
SLURM_FOO=42 FLUX_ENCLOSING_ID=66 flux start \
-Slog-stderr-level=6 \
-Sbroker.rc1_path="bash -c printenv >rc1.env" \
-Sbroker.rc3_path="bash -c printenv >rc3.env" \
Expand Down Expand Up @@ -161,6 +161,10 @@ test_expect_success 'job environment is not set in rc scripts' '
var_is_unset FLUX_KVS_NAMESPACE *.env
'

test_expect_success 'FLUX_ENCLOSING_ID not set if instance is not a job' '
var_is_unset FLUX_ENCLOSING_ID *.env
'

test_expect_success 'capture the environment for instance run as a job' '
flux start flux run flux start \
-Slog-stderr-level=6 \
Expand All @@ -179,4 +183,8 @@ test_expect_success 'job environment is not set in rcs of subinstance' '
var_is_unset FLUX_KVS_NAMESPACE *.env2
'

test_expect_success 'FLUX_ENCLOSING_ID is set if instance is a job' '
var_is_set FLUX_ENCLOSING_ID *.env2
'

test_done

0 comments on commit 122bd11

Please sign in to comment.