From 018830312d236eba90a32a2d11948d5b79a88240 Mon Sep 17 00:00:00 2001 From: Reid Mello <30907815+rjmello@users.noreply.github.com> Date: Mon, 14 Oct 2024 20:22:52 -0400 Subject: [PATCH] Ensure k8s pod names/labels are RFC 1123 compliant - Modified Kubernetes pod names and labels to conform to RFC 1123 for DNS subdomain names and labels, ensuring compliance with Kubernetes naming conventions. - Replaced the trailing timestamp in the job name with an eight-character hex string (job ID) to improve collision avoidance. - Replaced `app` pod label with `parsl-job-id`. - Updated container name to use job ID. --- parsl/providers/kubernetes/kube.py | 49 +++++++++++++++--------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/parsl/providers/kubernetes/kube.py b/parsl/providers/kubernetes/kube.py index 40b5b430a5..5a69d160fb 100644 --- a/parsl/providers/kubernetes/kube.py +++ b/parsl/providers/kubernetes/kube.py @@ -1,10 +1,5 @@ import logging -import time - -from parsl.providers.kubernetes.template import template_string - -logger = logging.getLogger(__name__) - +import uuid from typing import Any, Dict, List, Optional, Tuple import typeguard @@ -12,7 +7,8 @@ from parsl.errors import OptionalModuleMissing from parsl.jobs.states import JobState, JobStatus from parsl.providers.base import ExecutionProvider -from parsl.utils import RepresentationMixin +from parsl.providers.kubernetes.template import template_string +from parsl.utils import RepresentationMixin, sanitize_dns_subdomain_rfc1123 try: from kubernetes import client, config @@ -20,6 +16,8 @@ except (ImportError, NameError, FileNotFoundError): _kubernetes_enabled = False +logger = logging.getLogger(__name__) + translate_table = { 'Running': JobState.RUNNING, 'Pending': JobState.PENDING, @@ -161,7 +159,7 @@ def __init__(self, self.resources: Dict[object, Dict[str, Any]] self.resources = {} - def submit(self, cmd_string, tasks_per_node, job_name="parsl"): + def submit(self, cmd_string: str, tasks_per_node: int, job_name: str = "parsl.kube"): """ Submit a job Args: - cmd_string :(String) - Name of the container to initiate @@ -173,15 +171,18 @@ def submit(self, cmd_string, tasks_per_node, job_name="parsl"): Returns: - job_id: (string) Identifier for the job """ + job_id = uuid.uuid4().hex[:8] - cur_timestamp = str(time.time() * 1000).split(".")[0] - job_name = "{0}-{1}".format(job_name, cur_timestamp) - - if not self.pod_name: - pod_name = '{}'.format(job_name) - else: - pod_name = '{}-{}'.format(self.pod_name, - cur_timestamp) + pod_name = self.pod_name or job_name + try: + pod_name = sanitize_dns_subdomain_rfc1123(pod_name) + except ValueError: + logger.warning( + f"Invalid pod name '{pod_name}' for job '{job_id}', falling back to 'parsl.kube'" + ) + pod_name = "parsl.kube" + pod_name = pod_name[:253 - 1 - len(job_id)].rstrip(".") # Leave room for the job ID + pod_name = f"{pod_name}.{job_id}" formatted_cmd = template_string.format(command=cmd_string, worker_init=self.worker_init) @@ -189,7 +190,7 @@ def submit(self, cmd_string, tasks_per_node, job_name="parsl"): logger.debug("Pod name: %s", pod_name) self._create_pod(image=self.image, pod_name=pod_name, - job_name=job_name, + job_id=job_id, cmd_string=formatted_cmd, volumes=self.persistent_volumes, service_account_name=self.service_account_name, @@ -257,10 +258,10 @@ def _status(self): self.resources[jid]['status'] = JobStatus(status) def _create_pod(self, - image, - pod_name, - job_name, - port=80, + image: str, + pod_name: str, + job_id: str, + port: int = 80, cmd_string=None, volumes=[], service_account_name=None, @@ -269,7 +270,7 @@ def _create_pod(self, Args: - image (string) : Docker image to launch - pod_name (string) : Name of the pod - - job_name (string) : App label + - job_id (string) : Job ID KWargs: - port (integer) : Container port Returns: @@ -299,7 +300,7 @@ def _create_pod(self, ) # Configure Pod template container container = client.V1Container( - name=pod_name, + name=job_id, image=image, resources=resources, ports=[client.V1ContainerPort(container_port=port)], @@ -322,7 +323,7 @@ def _create_pod(self, claim_name=volume[0]))) metadata = client.V1ObjectMeta(name=pod_name, - labels={"app": job_name}, + labels={"parsl-job-id": job_id}, annotations=annotations) spec = client.V1PodSpec(containers=[container], image_pull_secrets=[secret],