diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index b8b96c6306a3bb..371bbd4591e362 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -1,3 +1,4 @@ +import logging from dataclasses import dataclass from typing import TYPE_CHECKING, Iterable, List @@ -28,6 +29,8 @@ FeatureGroupSummaryTypeDef, ) +logger = logging.getLogger(__name__) + @dataclass class FeatureGroupProcessor: @@ -41,10 +44,13 @@ def get_all_feature_groups(self) -> List["FeatureGroupSummaryTypeDef"]: """ feature_groups = [] - + logger.debug("Attempting to get all feature groups") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_feature_groups paginator = self.sagemaker_client.get_paginator("list_feature_groups") for page in paginator.paginate(): + logger.debug( + "Retrieved %s feature groups", len(page["FeatureGroupSummaries"]) + ) feature_groups += page["FeatureGroupSummaries"] return feature_groups @@ -55,7 +61,7 @@ def get_feature_group_details( """ Get details of a feature group (including list of component features). """ - + logger.debug("Attempting to describe feature group: %s", feature_group_name) # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.describe_feature_group feature_group = self.sagemaker_client.describe_feature_group( FeatureGroupName=feature_group_name @@ -66,12 +72,19 @@ def get_feature_group_details( # paginate over feature group features while next_token: + logger.debug( + "Iterating over another token to retrieve full feature group description for: %s", + feature_group_name, + ) next_features = self.sagemaker_client.describe_feature_group( FeatureGroupName=feature_group_name, NextToken=next_token ) feature_group["FeatureDefinitions"] += next_features["FeatureDefinitions"] next_token = feature_group.get("NextToken", "") + logger.debug( + "Retrieved full description for feature group: %s", feature_group_name + ) return feature_group def get_feature_group_wu( diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py index 73a83295ec8cba..2fb48cd211c07a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict from dataclasses import dataclass, field from enum import Enum @@ -49,6 +50,8 @@ if TYPE_CHECKING: from mypy_boto3_sagemaker import SageMakerClient +logger = logging.getLogger(__name__) + JobInfo = TypeVar( "JobInfo", AutoMlJobInfo, @@ -171,9 +174,11 @@ class JobProcessor: def get_jobs(self, job_type: JobType, job_spec: JobInfo) -> List[Any]: jobs = [] + logger.debug("Attempting to retrieve all jobs for type %s", job_type) paginator = self.sagemaker_client().get_paginator(job_spec.list_command) for page in paginator.paginate(): page_jobs: List[Any] = page[job_spec.list_key] + logger.debug("Retrieved %s jobs", len(page_jobs)) for job in page_jobs: job_name = ( @@ -269,6 +274,11 @@ def get_job_details(self, job_name: str, job_type: JobType) -> Dict[str, Any]: describe_command = job_type_to_info[job_type].describe_command describe_name_key = job_type_to_info[job_type].describe_name_key + logger.debug( + "Retrieving description for job: %s using command: %s", + job_name, + describe_command, + ) return getattr(self.sagemaker_client(), describe_command)( **{describe_name_key: job_name} ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py index b677dccad24ac4..650520bffb5666 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Set @@ -15,6 +16,8 @@ ContextSummaryTypeDef, ) +logger = logging.getLogger(__name__) + @dataclass class LineageInfo: @@ -55,10 +58,11 @@ def get_all_actions(self) -> List["ActionSummaryTypeDef"]: """ actions = [] - + logger.debug("Attempting to get all actions") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_actions paginator = self.sagemaker_client.get_paginator("list_actions") for page in paginator.paginate(): + logger.debug("Retrieved %s actions", len(page["ActionSummaries"])) actions += page["ActionSummaries"] return actions @@ -69,10 +73,11 @@ def get_all_artifacts(self) -> List["ArtifactSummaryTypeDef"]: """ artifacts = [] - + logger.debug("Attempting to get all artifacts") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_artifacts paginator = self.sagemaker_client.get_paginator("list_artifacts") for page in paginator.paginate(): + logger.debug("Retrieved %s artifacts", len(page["ArtifactSummaries"])) artifacts += page["ArtifactSummaries"] return artifacts @@ -83,10 +88,11 @@ def get_all_contexts(self) -> List["ContextSummaryTypeDef"]: """ contexts = [] - + logger.debug("Attempting to get all contexts") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_contexts paginator = self.sagemaker_client.get_paginator("list_contexts") for page in paginator.paginate(): + logger.debug("Retrieved %s contexts", len(page["ContextSummaries"])) contexts += page["ContextSummaries"] return contexts @@ -97,10 +103,11 @@ def get_incoming_edges(self, node_arn: str) -> List["AssociationSummaryTypeDef"] """ edges = [] - + logger.debug("Attempting to get all incoming edges") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_associations paginator = self.sagemaker_client.get_paginator("list_associations") for page in paginator.paginate(DestinationArn=node_arn): + logger.debug("Retrieved %s edges", len(page["AssociationSummaries"])) edges += page["AssociationSummaries"] return edges @@ -110,10 +117,11 @@ def get_outgoing_edges(self, node_arn: str) -> List["AssociationSummaryTypeDef"] Get all outgoing edges for a node in the lineage graph. """ edges = [] - + logger.debug("Attempting to get all outgoing edges") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_associations paginator = self.sagemaker_client.get_paginator("list_associations") for page in paginator.paginate(SourceArn=node_arn): + logger.debug("Retrieved %s edges", len(page["AssociationSummaries"])) edges += page["AssociationSummaries"] return edges diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index eef2b26ee08f2e..585f3d8a5218f5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime @@ -53,6 +54,8 @@ ModelSummaryTypeDef, ) +logger = logging.getLogger(__name__) + ENDPOINT_STATUS_MAP: Dict[str, str] = { "OutOfService": DeploymentStatusClass.OUT_OF_SERVICE, "Creating": DeploymentStatusClass.CREATING, @@ -97,10 +100,11 @@ def get_all_models(self) -> List["ModelSummaryTypeDef"]: """ models = [] - + logger.debug("Attempting to retrieve all models") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_models paginator = self.sagemaker_client.get_paginator("list_models") for page in paginator.paginate(): + logger.debug("Retrieved %s models", len(page["Models"])) models += page["Models"] return models @@ -118,10 +122,13 @@ def get_all_groups(self) -> List["ModelPackageGroupSummaryTypeDef"]: List all model groups in SageMaker. """ groups = [] - + logger.debug("Attempting to retrieve all model groups") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_model_package_groups paginator = self.sagemaker_client.get_paginator("list_model_package_groups") for page in paginator.paginate(): + logger.debug( + "Retrieved %s model groups", len(page["ModelPackageGroupSummaryList"]) + ) groups += page["ModelPackageGroupSummaryList"] return groups @@ -140,11 +147,11 @@ def get_group_details( def get_all_endpoints(self) -> List["EndpointSummaryTypeDef"]: endpoints = [] - + logger.debug("Attempting to retrieve all endpoints") # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_endpoints paginator = self.sagemaker_client.get_paginator("list_endpoints") - for page in paginator.paginate(): + logger.debug("Retrieved %s endpoints", len(page["Endpoints"])) endpoints += page["Endpoints"] return endpoints