diff --git a/datahub-web-react/src/images/verticalogo copy.png b/datahub-web-react/src/images/verticalogo copy.png deleted file mode 100644 index 5da38f4e67c7d4..00000000000000 Binary files a/datahub-web-react/src/images/verticalogo copy.png and /dev/null differ diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 30bab24267d706..0a8722ac64ef6d 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -98,6 +98,10 @@ module.exports = { to: "/champions", label: "Champions", }, + { + label: "Share Your Journey", + href: "/customer-stories-survey", + }, ], }, { diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 9d6d2a59978f5b..0010fc5ad13b51 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -31,6 +31,7 @@ module.exports = { label: "Demo", href: "https://demo.datahubproject.io/", }, + "docs/what-is-datahub/customer-stories", "docs/what-is-datahub/datahub-concepts", ], }, diff --git a/docs-website/src/pages/customer-stories-survey/customerstoriessurvey.scss b/docs-website/src/pages/customer-stories-survey/customerstoriessurvey.scss new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/docs-website/src/pages/customer-stories-survey/index.js b/docs-website/src/pages/customer-stories-survey/index.js new file mode 100644 index 00000000000000..63a3ecd77e9687 --- /dev/null +++ b/docs-website/src/pages/customer-stories-survey/index.js @@ -0,0 +1,48 @@ +import React, { useEffect } from 'react'; +import Layout from '@theme/Layout'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; + +function CustomerStoriesSurvey() { + const { siteConfig = {} } = useDocusaurusContext(); + + useEffect(() => { + const script = document.createElement('script'); + script.src = "//js.hsforms.net/forms/embed/v2.js"; + script.async = true; + script.type = 'text/javascript'; + document.body.appendChild(script); + + script.onload = () => { + if (window.hbspt) { + window.hbspt.forms.create({ + region: "na1", + portalId: "14552909", + formId: "087ef03d-e47e-4814-b458-b30e3e02b623", + target: '#hubspotForm' // Targeting the div with the specific ID + }); + } + }; + + return () => { + document.body.removeChild(script); + }; + }, []); + + return ( + +
+
+
+

We Want To Hear Your Journey!

+
Share Your DataHub Journey with Our Community.
+
+
+
+
+
+ ); +} + +export default CustomerStoriesSurvey; diff --git a/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss b/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss new file mode 100644 index 00000000000000..349f705d25b10d --- /dev/null +++ b/docs-website/src/pages/docs/_components/CustomerCard/customercard.module.scss @@ -0,0 +1,56 @@ +.card { + color: var(--ifm-hero-text-color); + padding: 0; + margin: 0rem 3rem 2rem 0rem; + text-decoration: none !important; + + .card_button { + padding: 0rem 0rem 0rem 1rem; + text-align: right; + } + + .card_img { + justify-content: center; + display: flex; + height: 250px; + margin: 0; + position: relative; + text-align: center; + } + + .card_body { + padding: 2rem 3rem 2rem 3rem; + + .card_description { + min-height: 20rem; + } + } + + .card_overlay_text { + position: absolute; + text-align: left; + width: 80%; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + color: white; + + .card_customer { + font-size: 3.2rem; + font-weight: 800; + line-height: 1.2; + + } + .card_title { + font-size: 1.2rem; + font-weight: 600; + } + + } + + img { + object-fit: cover; + filter: brightness(50%); + } + +} diff --git a/docs-website/src/pages/docs/_components/CustomerCard/index.jsx b/docs-website/src/pages/docs/_components/CustomerCard/index.jsx new file mode 100644 index 00000000000000..36c83226e1f732 --- /dev/null +++ b/docs-website/src/pages/docs/_components/CustomerCard/index.jsx @@ -0,0 +1,30 @@ +import React from "react"; +import clsx from "clsx"; +import styles from "./customercard.module.scss"; +import Link from "@docusaurus/Link"; + +const CustomerCard = ({ customer, title, imgUrl, description, to,}) => { + return ( +
+
+
+ {customer} +
+
{customer}
+
{title}
+
+
+
+

{description}

+
+ + Discover {customer}'s Story + +
+
+
+
+ ); +}; + +export default CustomerCard; diff --git a/docs-website/src/pages/docs/_components/CustomerCardSection/customercardsection.module.scss b/docs-website/src/pages/docs/_components/CustomerCardSection/customercardsection.module.scss new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx b/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx new file mode 100644 index 00000000000000..ca34d89df8701d --- /dev/null +++ b/docs-website/src/pages/docs/_components/CustomerCardSection/index.jsx @@ -0,0 +1,96 @@ +import React from "react"; +import CustomerCard from '../CustomerCard' + +const customerCardContent = [ + { + customer: "Netflix", + imgUrl: "/img/assets/netflix.jpg", + title: "How they are contributing to DataHub to make it more extensible", + description: ( + <> + + "DataHub gave us the extensibility features we needed to define new + entity types easily and augment existing ones. + DataHub performed exceptionally well in managing our traffic load and data + volume. It offers a great developer experience, a well-documented + taxonomy, and — very importantly — solid community support." +
+
+ — Ajoy Majumdar, Software Architect at Netflix +
+
+ + ), + to: "https://youtu.be/ejcO7hdX0lk?si=8iPjrPeBZq5KNdb-", + }, + { + customer: "Visa", + imgUrl: "/img/assets/travel.jpg", + title: "How to use DataHub to scale your Data Governance", + description: ( + <> + + "We found DataHub to provide excellent coverage for our needs. What we + appreciate most about DataHub is its powerful API platform." +
+
+ — Jean-Pierre Dijcks, Sr. Dir. Product Management at VISA +
+
+ + ), + to: "https://youtu.be/B6CplqnIkFw?si=jrrr04cV5rdxO6Ra", + }, + { + customer: "MediaMarkt Saturn", + imgUrl: "/img/assets/business.jpg", + title: "Building Data Access Management within DataHub", + description: ( + <> + Europe’s #1 consumer electronics retailer implemented DataHub for three reasons: +
+
+ 1. DataHub provides an extremely flexible and customizable metadata platform at scale +
+ 2. Open-source means lower cost to implement and removes the headache of license management +
+ 3. Community-driven project which continually evolves with industry trends and best practices + + ), + to: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management?utm_source=datahub&utm_medium=referral&utm_content=blog", + }, + { + customer: "Airtel", + imgUrl: "/img/assets/phonecall.jpg", + title: "DataHub is the Bedrock of Data Mesh at Airtel", + description: ( + <> + Airtel is a leading global telecommunication provider. DataHub is the + bedrock of Data Mesh at Airtel by providing the requisite governance and + metadata management functionality to ensure their Data Products should + are discoverable, addressable, trustworthy, self-describing, and secure. +
+
+ Get a closer look at how the Airtel team has successfully integrated + DataHub to take their data mesh implementation to the next level. + + ), + to: "https://youtu.be/wsCFnElN_Wo?si=i-bNAQAsbHJq5O9-", + }, +]; + +const CustomerCardSection = () => { + return ( +
+
+
+ {customerCardContent.map((props, idx) => ( + + ))} +
+
+
+ ); +}; + +export default CustomerCardSection; diff --git a/docs-website/static/img/assets/business.jpg b/docs-website/static/img/assets/business.jpg new file mode 100644 index 00000000000000..f5a91928ee2ad8 Binary files /dev/null and b/docs-website/static/img/assets/business.jpg differ diff --git a/docs-website/static/img/assets/netflix.jpg b/docs-website/static/img/assets/netflix.jpg new file mode 100644 index 00000000000000..8b555f5b63187f Binary files /dev/null and b/docs-website/static/img/assets/netflix.jpg differ diff --git a/docs-website/static/img/assets/phonecall.jpg b/docs-website/static/img/assets/phonecall.jpg new file mode 100644 index 00000000000000..87e48f28213827 Binary files /dev/null and b/docs-website/static/img/assets/phonecall.jpg differ diff --git a/docs-website/static/img/assets/travel.jpg b/docs-website/static/img/assets/travel.jpg new file mode 100644 index 00000000000000..de2697f5631217 Binary files /dev/null and b/docs-website/static/img/assets/travel.jpg differ diff --git a/docs/what-is-datahub/customer-stories.md b/docs/what-is-datahub/customer-stories.md new file mode 100644 index 00000000000000..2745a8aca0d2f2 --- /dev/null +++ b/docs/what-is-datahub/customer-stories.md @@ -0,0 +1,17 @@ +import CustomerCardSection from '@site/src/pages/docs/_components/CustomerCardSection'; + +# Customer Stories + +Meet the DataHub users who have shared their stories with us. + +:::note Share Your DataHub Journey +We're excited to hear about your experience with DataHub. Share your story with us! + +Share Your Story + +::: + + diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py index 1ecfc362ceb4e6..1c7d60666a085f 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "acryl-datahub-dagster-plugin" -__version__ = "0.0.0.dev0" +__version__ = "1!0.0.0.dev0" def is_dev_mode() -> bool: diff --git a/metadata-ingestion/docs/sources/oracle/oracle_pre.md b/metadata-ingestion/docs/sources/oracle/oracle_pre.md new file mode 100644 index 00000000000000..aa4cf64efac736 --- /dev/null +++ b/metadata-ingestion/docs/sources/oracle/oracle_pre.md @@ -0,0 +1,24 @@ +### Prerequisites + +#### Data Dictionary Mode/Views + +The Oracle ingestion source supports two modes for extracting metadata information (see `data_dictionary_mode` option): `ALL` and `DBA`. In the `ALL` mode, the SQLAlchemy backend queries `ALL_` data dictionary views to extract metadata information. In the `DBA` mode, the Oracle ingestion source directly queries `DBA_` data dictionary views to extract metadata information. `ALL_` views only provide information accessible to the user used for ingestion while `DBA_` views provide information for the entire database (that is, all schema objects in the database). + +The following table contains a brief description of what each data dictionary view is used for: + +| Data Dictionary View | What's it used for? | +| --- | --- | +| `ALL_TABLES` or `DBA_TABLES` | Get list of all relational tables in the database | +| `ALL_VIEWS` or `DBA_VIEWS` | Get list of all views in the database | +| `ALL_TAB_COMMENTS` or `DBA_TAB_COMMENTS` | Get comments on tables and views | +| `ALL_TAB_COLS` or `DBA_TAB_COLS` | Get description of the columns of tables and views | +| `ALL_COL_COMMENTS` or `DBA_COL_COMMENTS` | Get comments on the columns of tables and views | +| `ALL_TAB_IDENTITY_COLS` or `DBA_TAB_IDENTITY_COLS` | Get table identity columns | +| `ALL_CONSTRAINTS` or `DBA_CONSTRAINTS` | Get constraint definitions on tables | +| `ALL_CONS_COLUMNS` or `DBA_CONS_COLUMNS` | Get list of columns that are specified in constraints | +| `ALL_USERS` or `DBA_USERS` | Get all schema names | + +#### Data Dictionary Views accessible information and required privileges + +- `ALL_` views display all the information accessible to the user used for ingestion, including information from the user's schema as well as information from objects in other schemas, if the user has access to those objects by way of grants of privileges or roles. +- `DBA_` views display all relevant information in the entire database. They can be queried only by users with the `SYSDBA` system privilege or `SELECT ANY DICTIONARY` privilege, or `SELECT_CATALOG_ROLE` role, or by users with direct privileges granted to them. diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 00e4995218a710..772a638b6a9487 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -817,8 +817,6 @@ overwrite the previous value. properties: prop1: value1 prop2: value2 - - ``` - Add dataset-properties, however overwrite the dataset-properties available for the dataset on DataHub GMS ```yaml @@ -829,8 +827,6 @@ overwrite the previous value. properties: prop1: value1 prop2: value2 - - ``` - Add dataset-properties, however keep the dataset-properties available for the dataset on DataHub GMS ```yaml @@ -841,7 +837,6 @@ overwrite the previous value. properties: prop1: value1 prop2: value2 - ``` ## Add Dataset datasetProperties @@ -973,7 +968,7 @@ transformers: `simple_add_dataset_domain` can be configured in below different way - Add domains, however replace existing domains sent by ingestion source -```yaml + ```yaml transformers: - type: "simple_add_dataset_domain" config: @@ -981,9 +976,9 @@ transformers: domains: - "urn:li:domain:engineering" - "urn:li:domain:hr" - ``` + ``` - Add domains, however overwrite the domains available for the dataset on DataHub GMS -```yaml + ```yaml transformers: - type: "simple_add_dataset_domain" config: @@ -991,9 +986,9 @@ transformers: domains: - "urn:li:domain:engineering" - "urn:li:domain:hr" - ``` + ``` - Add domains, however keep the domains available for the dataset on DataHub GMS -```yaml + ```yaml transformers: - type: "simple_add_dataset_domain" config: @@ -1001,7 +996,7 @@ transformers: domains: - "urn:li:domain:engineering" - "urn:li:domain:hr" - ``` + ``` ## Pattern Add Dataset domains ### Config Details @@ -1019,20 +1014,20 @@ Here we can set domain list to either urn (i.e. urn:li:domain:hr) or simple doma in both of the cases domain should be provisioned on DataHub GMS ```yaml - transformers: - - type: "pattern_add_dataset_domain" - config: - semantics: OVERWRITE - domain_pattern: - rules: - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] + transformers: + - type: "pattern_add_dataset_domain" + config: + semantics: OVERWRITE + domain_pattern: + rules: + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] ``` `pattern_add_dataset_domain` can be configured in below different way - Add domains, however replace existing domains sent by ingestion source -```yaml + ```yaml transformers: - type: "pattern_add_dataset_domain" config: @@ -1041,29 +1036,29 @@ in both of the cases domain should be provisioned on DataHub GMS rules: 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] - ``` + ``` - Add domains, however overwrite the domains available for the dataset on DataHub GMS -```yaml - transformers: - - type: "pattern_add_dataset_domain" - config: - semantics: OVERWRITE # OVERWRITE is default behaviour - domain_pattern: - rules: - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] - ``` + ```yaml + transformers: + - type: "pattern_add_dataset_domain" + config: + semantics: OVERWRITE # OVERWRITE is default behaviour + domain_pattern: + rules: + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] + ``` - Add domains, however keep the domains available for the dataset on DataHub GMS -```yaml - transformers: - - type: "pattern_add_dataset_domain" - config: - semantics: PATCH - domain_pattern: - rules: - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] - 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] - ``` + ```yaml + transformers: + - type: "pattern_add_dataset_domain" + config: + semantics: PATCH + domain_pattern: + rules: + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] + 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] + ``` @@ -1099,7 +1094,7 @@ transformers: `domain_mapping_based_on_tags` can be configured in below different way - Add domains based on tags, however overwrite the domains available for the dataset on DataHub GMS -```yaml + ```yaml transformers: - type: "domain_mapping_based_on_tags" config: @@ -1107,9 +1102,9 @@ transformers: domain_mapping: 'example1': "urn:li:domain:engineering" 'example2': "urn:li:domain:hr" - ``` + ``` - Add domains based on tags, however keep the domains available for the dataset on DataHub GMS -```yaml + ```yaml transformers: - type: "domain_mapping_based_on_tags" config: @@ -1117,7 +1112,7 @@ transformers: domain_mapping: 'example1': "urn:li:domain:engineering" 'example2': "urn:li:domain:hr" - ``` + ``` ## Simple Add Dataset dataProduct ### Config Details @@ -1313,18 +1308,18 @@ Let's begin by adding a `create()` method for parsing our configuration dictiona @classmethod def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddCustomOwnership": - config = AddCustomOwnershipConfig.parse_obj(config_dict) - return cls(config, ctx) + config = AddCustomOwnershipConfig.parse_obj(config_dict) + return cls(config, ctx) ``` Next we need to tell the helper classes which entity types and aspect we are interested in transforming. In this case, we want to only process `dataset` entities and transform the `ownership` aspect. ```python def entity_types(self) -> List[str]: - return ["dataset"] + return ["dataset"] - def aspect_name(self) -> str: - return "ownership" +def aspect_name(self) -> str: + return "ownership" ``` Finally we need to implement the `transform_aspect()` method that does the work of adding our custom ownership classes. This method will be called be the framework with an optional aspect value filled out if the upstream source produced a value for this aspect. The framework takes care of pre-processing both MCE-s and MCP-s so that the `transform_aspect()` function is only called one per entity. Our job is merely to inspect the incoming aspect (or absence) and produce a transformed value for this aspect. Returning `None` from this method will effectively suppress this aspect from being emitted. @@ -1332,24 +1327,24 @@ Finally we need to implement the `transform_aspect()` method that does the work ```python # add this as a function of AddCustomOwnership - def transform_aspect( # type: ignore - self, entity_urn: str, aspect_name: str, aspect: Optional[OwnershipClass] - ) -> Optional[OwnershipClass]: +def transform_aspect( # type: ignore + self, entity_urn: str, aspect_name: str, aspect: Optional[OwnershipClass] +) -> Optional[OwnershipClass]: - owners_to_add = self.owners - assert aspect is None or isinstance(aspect, OwnershipClass) + owners_to_add = self.owners + assert aspect is None or isinstance(aspect, OwnershipClass) - if owners_to_add: - ownership = ( - aspect - if aspect - else OwnershipClass( - owners=[], - ) - ) - ownership.owners.extend(owners_to_add) + if owners_to_add: + ownership = ( + aspect + if aspect + else OwnershipClass( + owners=[], + ) + ) + ownership.owners.extend(owners_to_add) - return ownership + return ownership ``` ### More Sophistication: Making calls to DataHub during Transformation @@ -1383,27 +1378,27 @@ e.g. Here is how the AddDatasetOwnership transformer can now support PATCH seman ```python def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventClass: - if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass): - return mce - owners_to_add = self.config.get_owners_to_add(mce.proposedSnapshot) - if owners_to_add: - ownership = builder.get_or_add_aspect( - mce, - OwnershipClass( - owners=[], - ), - ) - ownership.owners.extend(owners_to_add) - - if self.config.semantics == Semantics.PATCH: - assert self.ctx.graph - patch_ownership = AddDatasetOwnership.get_ownership_to_set( - self.ctx.graph, mce.proposedSnapshot.urn, ownership - ) - builder.set_aspect( - mce, aspect=patch_ownership, aspect_type=OwnershipClass - ) + if not isinstance(mce.proposedSnapshot, DatasetSnapshotClass): return mce + owners_to_add = self.config.get_owners_to_add(mce.proposedSnapshot) + if owners_to_add: + ownership = builder.get_or_add_aspect( + mce, + OwnershipClass( + owners=[], + ), + ) + ownership.owners.extend(owners_to_add) + + if self.config.semantics == Semantics.PATCH: + assert self.ctx.graph + patch_ownership = AddDatasetOwnership.get_ownership_to_set( + self.ctx.graph, mce.proposedSnapshot.urn, ownership + ) + builder.set_aspect( + mce, aspect=patch_ownership, aspect_type=OwnershipClass + ) + return mce ``` ### Installing the package diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index cd8c9d4541c1d6..7f51b39c2731b1 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -198,6 +198,7 @@ "pandas", "cryptography", "msal", + "cachetools", } | classification_lib trino = { @@ -403,6 +404,7 @@ "sagemaker": aws_common, "salesforce": {"simple-salesforce"}, "snowflake": snowflake_common | usage_common | sqlglot_lib, + "snowflake-summary": snowflake_common | usage_common | sqlglot_lib, "sqlalchemy": sql_common, "sql-queries": usage_common | sqlglot_lib, "slack": slack, @@ -424,7 +426,7 @@ "nifi": {"requests", "packaging", "requests-gssapi"}, "powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"} | sqlglot_lib, "powerbi-report-server": powerbi_report_server, - "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"}, + "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.2"}, "unity-catalog": databricks | sql_common | sqllineage_lib, # databricks is alias for unity-catalog and needs to be kept in sync "databricks": databricks | sql_common | sqllineage_lib, diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 8baa8481ea4f73..e6257796aa4c4e 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -252,6 +252,16 @@ def emit_mcp( self._emit_generic(url, payload) + def emit_mcps( + self, mcps: List[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]] + ) -> None: + url = f"{self._gms_server}/aspects?action=ingestProposalBatch" + + mcp_objs = [pre_json_transform(mcp.to_obj()) for mcp in mcps] + payload = json.dumps({"proposals": mcp_objs}) + + self._emit_generic(url, payload) + @deprecated def emit_usage(self, usageStats: UsageAggregation) -> None: url = f"{self._gms_server}/usageStats?action=batchIngest" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 7621c6d363e3d2..310a18cc0c9c63 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1445,6 +1445,44 @@ def run_assertions_for_asset( return res["runAssertionsForAsset"] + def get_entities_v2( + self, + entity_name: str, + urns: List[str], + aspects: List[str] = [], + with_system_metadata: bool = False, + ) -> Dict[str, Any]: + payload = { + "urns": urns, + "aspectNames": aspects, + "withSystemMetadata": with_system_metadata, + } + headers: Dict[str, Any] = { + "Accept": "application/json", + "Content-Type": "application/json", + } + url = f"{self.config.server}/openapi/v2/entity/batch/{entity_name}" + response = self._session.post(url, data=json.dumps(payload), headers=headers) + response.raise_for_status() + + json_resp = response.json() + entities = json_resp.get("entities", []) + aspects_set = set(aspects) + retval: Dict[str, Any] = {} + + for entity in entities: + entity_aspects = entity.get("aspects", {}) + entity_urn = entity.get("urn", None) + + if entity_urn is None: + continue + for aspect_key, aspect_value in entity_aspects.items(): + # Include all aspects if aspect filter is empty + if len(aspects) == 0 or aspect_key in aspects_set: + retval.setdefault(entity_urn, {}) + retval[entity_urn][aspect_key] = aspect_value + return retval + def close(self) -> None: self._make_schema_resolver.cache_clear() super().close() diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index 8572b2378a3bb0..dab8e99b797fe9 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -3,6 +3,8 @@ import dataclasses import functools import logging +import os +import threading import uuid from enum import auto from typing import Optional, Union @@ -14,7 +16,7 @@ OperationalError, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.emitter.rest_emitter import DataHubRestEmitter from datahub.ingestion.api.common import RecordEnvelope, WorkUnit from datahub.ingestion.api.sink import ( NoopWriteCallback, @@ -34,6 +36,10 @@ logger = logging.getLogger(__name__) +DEFAULT_REST_SINK_MAX_THREADS = int( + os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", 15) +) + class SyncOrAsync(ConfigEnum): SYNC = auto() @@ -44,7 +50,7 @@ class DatahubRestSinkConfig(DatahubClientConfig): mode: SyncOrAsync = SyncOrAsync.ASYNC # These only apply in async mode. - max_threads: int = 15 + max_threads: int = DEFAULT_REST_SINK_MAX_THREADS max_pending_requests: int = 2000 @@ -82,22 +88,12 @@ def _get_partition_key(record_envelope: RecordEnvelope) -> str: class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]): - emitter: DatahubRestEmitter + _emitter_thread_local: threading.local treat_errors_as_warnings: bool = False def __post_init__(self) -> None: - self.emitter = DatahubRestEmitter( - self.config.server, - self.config.token, - connect_timeout_sec=self.config.timeout_sec, # reuse timeout_sec for connect timeout - read_timeout_sec=self.config.timeout_sec, - retry_status_codes=self.config.retry_status_codes, - retry_max_times=self.config.retry_max_times, - extra_headers=self.config.extra_headers, - ca_certificate_path=self.config.ca_certificate_path, - client_certificate_path=self.config.client_certificate_path, - disable_ssl_verification=self.config.disable_ssl_verification, - ) + self._emitter_thread_local = threading.local() + try: gms_config = self.emitter.get_server_config() except Exception as exc: @@ -120,6 +116,32 @@ def __post_init__(self) -> None: max_pending=self.config.max_pending_requests, ) + @classmethod + def _make_emitter(cls, config: DatahubRestSinkConfig) -> DataHubRestEmitter: + return DataHubRestEmitter( + config.server, + config.token, + connect_timeout_sec=config.timeout_sec, # reuse timeout_sec for connect timeout + read_timeout_sec=config.timeout_sec, + retry_status_codes=config.retry_status_codes, + retry_max_times=config.retry_max_times, + extra_headers=config.extra_headers, + ca_certificate_path=config.ca_certificate_path, + client_certificate_path=config.client_certificate_path, + disable_ssl_verification=config.disable_ssl_verification, + ) + + @property + def emitter(self) -> DataHubRestEmitter: + # While this is a property, it actually uses one emitter per thread. + # Since emitter is one-to-one with request sessions, using a separate + # emitter per thread should improve correctness and performance. + # https://github.com/psf/requests/issues/1871#issuecomment-32751346 + thread_local = self._emitter_thread_local + if not hasattr(thread_local, "emitter"): + thread_local.emitter = DatahubRestSink._make_emitter(self.config) + return thread_local.emitter + def handle_work_unit_start(self, workunit: WorkUnit) -> None: if isinstance(workunit, MetadataWorkUnit): self.treat_errors_as_warnings = workunit.treat_errors_as_warnings diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index b47f7450575e52..f9004b9ba9f86b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -1064,11 +1064,19 @@ def gen_view_dataset_workunits( project_id: str, dataset_name: str, ) -> Iterable[MetadataWorkUnit]: + tags_to_add = None + if table.labels and self.config.capture_view_label_as_tag: + tags_to_add = [ + make_tag_urn(f"{k}:{v}") + for k, v in table.labels.items() + if is_tag_allowed(self.config.capture_view_label_as_tag, k) + ] yield from self.gen_dataset_workunits( table=table, columns=columns, project_id=project_id, dataset_name=dataset_name, + tags_to_add=tags_to_add, sub_types=[DatasetSubTypes.VIEW], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index b4bfa3040d72ac..2c7a53ef2bdca6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -127,6 +127,11 @@ class BigQueryV2Config( description="Capture BigQuery table labels as DataHub tag", ) + capture_view_label_as_tag: Union[bool, AllowDenyPattern] = Field( + default=False, + description="Capture BigQuery view labels as DataHub tag", + ) + capture_dataset_label_as_tag: Union[bool, AllowDenyPattern] = Field( default=False, description="Capture BigQuery dataset labels as DataHub tag", diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_helper.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_helper.py index 6142c96a5faa1d..bbdf32da13621d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_helper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_helper.py @@ -1,4 +1,5 @@ -from typing import Optional +import re +from typing import Dict, Optional def unquote_and_decode_unicode_escape_seq( @@ -17,3 +18,10 @@ def unquote_and_decode_unicode_escape_seq( cleaned_string = string.encode().decode("unicode-escape") return cleaned_string + + +def parse_labels(labels_str: str) -> Dict[str, str]: + pattern = r'STRUCT\("([^"]+)", "([^"]+)"\)' + + # Map of BigQuery label keys to label values + return dict(re.findall(pattern, labels_str)) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 54eca61dfe1c9a..8a1bf9e5f3d1d6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -20,6 +20,8 @@ @dataclass class BigQuerySchemaApiPerfReport(Report): + num_list_projects: int = 0 + num_list_projects_retry_request: int = 0 list_projects: PerfTimer = field(default_factory=PerfTimer) list_datasets: PerfTimer = field(default_factory=PerfTimer) get_columns_for_dataset: PerfTimer = field(default_factory=PerfTimer) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index e610d8604a61a5..f691bd44c32195 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -4,6 +4,7 @@ from datetime import datetime, timezone from typing import Any, Dict, Iterable, Iterator, List, Optional +from google.api_core import retry from google.cloud import bigquery, datacatalog_v1 from google.cloud.bigquery.table import ( RowIterator, @@ -13,6 +14,7 @@ ) from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier +from datahub.ingestion.source.bigquery_v2.bigquery_helper import parse_labels from datahub.ingestion.source.bigquery_v2.bigquery_report import ( BigQuerySchemaApiPerfReport, BigQueryV2Report, @@ -53,9 +55,7 @@ def from_time_partitioning( cls, time_partitioning: TimePartitioning ) -> "PartitionInfo": return cls( - field=time_partitioning.field - if time_partitioning.field - else "_PARTITIONTIME", + field=time_partitioning.field or "_PARTITIONTIME", type=time_partitioning.type_, expiration_ms=time_partitioning.expiration_ms, require_partition_filter=time_partitioning.require_partition_filter, @@ -106,6 +106,7 @@ class BigqueryTable(BaseTable): class BigqueryView(BaseView): columns: List[BigqueryColumn] = field(default_factory=list) materialized: bool = False + labels: Optional[Dict[str, str]] = None @dataclass @@ -154,14 +155,33 @@ def get_query_result(self, query: str) -> RowIterator: return resp.result() def get_projects(self) -> List[BigqueryProject]: + def _should_retry(exc: BaseException) -> bool: + logger.debug( + f"Exception occured for project.list api. Reason: {exc}. Retrying api request..." + ) + self.report.num_list_projects_retry_request += 1 + return True + with self.report.list_projects: try: - projects = self.bq_client.list_projects() - - return [ + # Bigquery API has limit in calling project.list request i.e. 2 request per second. + # https://cloud.google.com/bigquery/quotas#api_request_quotas + # Whenever this limit reached an exception occur with msg + # 'Quota exceeded: Your user exceeded quota for concurrent project.lists requests.' + # Hence, added the api request retry of 15 min. + # We already tried adding rate_limit externally, proving max_result and page_size + # to restrict the request calls inside list_project but issue still occured. + projects_iterator = self.bq_client.list_projects( + retry=retry.Retry( + predicate=_should_retry, initial=10, maximum=180, timeout=900 + ) + ) + projects: List[BigqueryProject] = [ BigqueryProject(id=p.project_id, name=p.friendly_name) - for p in projects + for p in projects_iterator ] + self.report.num_list_projects = len(projects) + return projects except Exception as e: logger.error(f"Error getting projects. {e}", exc_info=True) return [] @@ -225,9 +245,11 @@ def get_tables_for_dataset( BigqueryQuery.tables_for_dataset.format( project_id=project_id, dataset_name=dataset_name, - table_filter=f" and t.table_name in ({filter_clause})" - if filter_clause - else "", + table_filter=( + f" and t.table_name in ({filter_clause})" + if filter_clause + else "" + ), ), ) else: @@ -237,9 +259,11 @@ def get_tables_for_dataset( BigqueryQuery.tables_for_dataset_without_partition_data.format( project_id=project_id, dataset_name=dataset_name, - table_filter=f" and t.table_name in ({filter_clause})" - if filter_clause - else "", + table_filter=( + f" and t.table_name in ({filter_clause})" + if filter_clause + else "" + ), ), ) @@ -277,20 +301,22 @@ def _make_bigquery_table( return BigqueryTable( name=table.table_name, created=table.created, - last_altered=datetime.fromtimestamp( - table.get("last_altered") / 1000, tz=timezone.utc - ) - if table.get("last_altered") is not None - else None, + last_altered=( + datetime.fromtimestamp( + table.get("last_altered") / 1000, tz=timezone.utc + ) + if table.get("last_altered") is not None + else None + ), size_in_bytes=table.get("bytes"), rows_count=table.get("row_count"), comment=table.comment, ddl=table.ddl, expires=expiration, labels=table_basic.labels if table_basic else None, - partition_info=PartitionInfo.from_table_info(table_basic) - if table_basic - else None, + partition_info=( + PartitionInfo.from_table_info(table_basic) if table_basic else None + ), clustering_fields=table_basic.clustering_fields if table_basic else None, max_partition_id=table.get("max_partition_id"), max_shard_id=shard, @@ -341,16 +367,17 @@ def _make_bigquery_view(view: bigquery.Row) -> BigqueryView: return BigqueryView( name=view.table_name, created=view.created, - last_altered=datetime.fromtimestamp( - view.get("last_altered") / 1000, tz=timezone.utc - ) - if view.get("last_altered") is not None - else None, + last_altered=( + datetime.fromtimestamp(view.get("last_altered") / 1000, tz=timezone.utc) + if view.get("last_altered") is not None + else None + ), comment=view.comment, view_definition=view.view_definition, materialized=view.table_type == BigqueryTableType.MATERIALIZED_VIEW, size_in_bytes=view.get("size_bytes"), rows_count=view.get("row_count"), + labels=parse_labels(view.labels) if hasattr(view, "labels") else None, ) def get_policy_tags_for_column( @@ -421,14 +448,16 @@ def get_columns_for_dataset( with self.report.get_columns_for_dataset: try: cur = self.get_query_result( - BigqueryQuery.columns_for_dataset.format( - project_id=project_id, dataset_name=dataset_name - ) - if not run_optimized_column_query - else BigqueryQuery.optimized_columns_for_dataset.format( - project_id=project_id, - dataset_name=dataset_name, - column_limit=column_limit, + ( + BigqueryQuery.columns_for_dataset.format( + project_id=project_id, dataset_name=dataset_name + ) + if not run_optimized_column_query + else BigqueryQuery.optimized_columns_for_dataset.format( + project_id=project_id, + dataset_name=dataset_name, + column_limit=column_limit, + ) ), ) except Exception as e: @@ -558,11 +587,13 @@ def _make_bigquery_table_snapshot(snapshot: bigquery.Row) -> BigqueryTableSnapsh return BigqueryTableSnapshot( name=snapshot.table_name, created=snapshot.created, - last_altered=datetime.fromtimestamp( - snapshot.get("last_altered") / 1000, tz=timezone.utc - ) - if snapshot.get("last_altered") is not None - else None, + last_altered=( + datetime.fromtimestamp( + snapshot.get("last_altered") / 1000, tz=timezone.utc + ) + if snapshot.get("last_altered") is not None + else None + ), comment=snapshot.comment, ddl=snapshot.ddl, snapshot_time=snapshot.snapshot_time, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py index 3545cc77438388..6155619d144c40 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py @@ -117,7 +117,8 @@ class BigqueryQuery: t.table_type as table_type, t.creation_time as created, ts.last_modified_time as last_altered, - tos.OPTION_VALUE as comment, + tos_description.OPTION_VALUE as comment, + tos_labels.OPTION_VALUE as labels, t.is_insertable_into, t.ddl as view_definition, ts.row_count, @@ -125,9 +126,12 @@ class BigqueryQuery: FROM `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME - left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema - and t.TABLE_NAME = tos.TABLE_NAME - and tos.OPTION_NAME = "description" + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos_description on t.table_schema = tos_description.table_schema + and t.TABLE_NAME = tos_description.TABLE_NAME + and tos_description.OPTION_NAME = "description" + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos_labels on t.table_schema = tos_labels.table_schema + and t.TABLE_NAME = tos_labels.TABLE_NAME + and tos_labels.OPTION_NAME = "labels" WHERE table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}') order by @@ -142,14 +146,18 @@ class BigqueryQuery: t.table_name as table_name, t.table_type as table_type, t.creation_time as created, - tos.OPTION_VALUE as comment, + tos_description.OPTION_VALUE as comment, + tos_labels.OPTION_VALUE as labels, t.is_insertable_into, t.ddl as view_definition FROM `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t - left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema - and t.TABLE_NAME = tos.TABLE_NAME - and tos.OPTION_NAME = "description" + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos_description on t.table_schema = tos_description.table_schema + and t.TABLE_NAME = tos_description.TABLE_NAME + and tos_description.OPTION_NAME = "description" + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos_labels on t.table_schema = tos_labels.table_schema + and t.TABLE_NAME = tos_labels.TABLE_NAME + and tos_labels.OPTION_NAME = "labels" WHERE table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}') order by diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index f7b8bb09724a16..c82b1c030e2430 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -10,7 +10,7 @@ from pydantic.fields import Field from redash_toolbelt import Redash from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry import datahub.emitter.mce_builder as builder from datahub.configuration.common import AllowDenyPattern, ConfigModel diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/config.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/config.py index 06c381add05b9c..6a47884e1b139a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/config.py @@ -1,5 +1,5 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Dict, Optional import pydantic @@ -26,7 +26,10 @@ class Constant: """ # Rest API response key constants + REFRESH_TOKEN = "refresh_token" + ACCESS_TOKEN = "access_token" ENTRIES = "entries" + MEMBERID = "memberId" FIRSTNAME = "firstName" LASTNAME = "lastName" EDGES = "edges" @@ -52,6 +55,11 @@ class Constant: @dataclass class SigmaSourceReport(StaleEntityRemovalSourceReport): number_of_workspaces: int = 0 + non_accessible_workspaces_count: int = 0 + shared_entities_count: int = 0 + number_of_datasets: int = 0 + number_of_workbooks: int = 0 + number_of_files_metadata: Dict[str, int] = field(default_factory=dict) def report_number_of_workspaces(self, number_of_workspaces: int) -> None: self.number_of_workspaces = number_of_workspaces @@ -75,15 +83,29 @@ class SigmaSourceConfig( workspace_pattern: AllowDenyPattern = pydantic.Field( default=AllowDenyPattern.allow_all(), description="Regex patterns to filter Sigma workspaces in ingestion." - "Mention 'User Folder' if entities of 'My documents' need to ingest.", + "Mention 'My documents' if personal entities also need to ingest.", ) ingest_owner: Optional[bool] = pydantic.Field( default=True, - description="Ingest Owner from source. This will override Owner info entered from UI", + description="Ingest Owner from source. This will override Owner info entered from UI.", + ) + ingest_shared_entities: Optional[bool] = pydantic.Field( + default=False, + description="Whether to ingest the shared entities or not.", + ) + extract_lineage: Optional[bool] = pydantic.Field( + default=True, + description="Whether to extract lineage of workbook's elements and datasets or not.", + ) + workbook_lineage_pattern: AllowDenyPattern = pydantic.Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns to filter workbook's elements and datasets lineage in ingestion." + "Requires extract_lineage to be enabled.", ) chart_sources_platform_mapping: Dict[str, PlatformDetail] = pydantic.Field( default={}, description="A mapping of the sigma workspace/workbook/chart folder path to all chart's data sources platform details present inside that folder path.", ) - - stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field( + default=None, description="Sigma Stateful Ingestion Config." + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py index 9863adc4a854a8..922b0be3b4a93c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/data_classes.py @@ -21,46 +21,56 @@ class Workspace(BaseModel): createdAt: datetime updatedAt: datetime + @root_validator(pre=True) + def update_values(cls, values: Dict) -> Dict: + # Update name if presonal workspace + if values["name"] == "User Folder": + values["name"] = "My documents" + return values + class SigmaDataset(BaseModel): datasetId: str - workspaceId: str name: str description: str createdBy: str createdAt: datetime updatedAt: datetime url: str - path: str + workspaceId: Optional[str] = None + path: Optional[str] = None badge: Optional[str] = None - @root_validator(pre=True) - def update_values(cls, values: Dict) -> Dict: + def get_urn_part(self): # As element lineage api provide this id as source dataset id - values["datasetId"] = values["url"].split("/")[-1] - return values + return self.url.split("/")[-1] class Element(BaseModel): elementId: str - type: str name: str url: str + type: Optional[str] = None vizualizationType: Optional[str] = None query: Optional[str] = None columns: List[str] = [] upstream_sources: Dict[str, str] = {} + def get_urn_part(self): + return self.elementId + class Page(BaseModel): pageId: str name: str elements: List[Element] = [] + def get_urn_part(self): + return self.pageId + class Workbook(BaseModel): workbookId: str - workspaceId: str name: str createdBy: str updatedBy: str @@ -69,5 +79,16 @@ class Workbook(BaseModel): url: str path: str latestVersion: int + workspaceId: Optional[str] = None pages: List[Page] = [] badge: Optional[str] = None + + +class File(BaseModel): + id: str + name: str + parentId: str + path: str + type: str + badge: Optional[str] = None + workspaceId: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index 97da198aaa435a..74d7abb121a3eb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -110,7 +110,7 @@ def __init__(self, config: SigmaSourceConfig, ctx: PipelineContext): self.reporter = SigmaSourceReport() self.dataset_upstream_urn_mapping: Dict[str, List[str]] = {} try: - self.sigma_api = SigmaAPI(self.config) + self.sigma_api = SigmaAPI(self.config, self.reporter) except Exception as e: raise ConfigurationError(f"Unable to connect sigma API. Exception: {e}") @@ -118,7 +118,10 @@ def __init__(self, config: SigmaSourceConfig, ctx: PipelineContext): def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() try: - SigmaAPI(SigmaSourceConfig.parse_obj_allow_extras(config_dict)) + SigmaAPI( + SigmaSourceConfig.parse_obj_allow_extras(config_dict), + SigmaSourceReport(), + ) test_report.basic_connectivity = CapabilityReport(capable=True) except Exception as e: test_report.basic_connectivity = CapabilityReport( @@ -175,9 +178,6 @@ def _gen_workspace_workunit( last_modified=int(workspace.updatedAt.timestamp() * 1000), ) - def _get_sigma_dataset_identifier(self, dataset: SigmaDataset) -> str: - return dataset.datasetId - def _gen_sigma_dataset_urn(self, dataset_identifier: str) -> str: return builder.make_dataset_urn_with_platform_instance( name=dataset_identifier, @@ -201,9 +201,11 @@ def _gen_dataset_properties( externalUrl=dataset.url, created=TimeStamp(time=int(dataset.createdAt.timestamp() * 1000)), lastModified=TimeStamp(time=int(dataset.updatedAt.timestamp() * 1000)), + customProperties={"datasetId": dataset.datasetId}, tags=[dataset.badge] if dataset.badge else None, ) - dataset_properties.customProperties.update({"path": dataset.path}) + if dataset.path: + dataset_properties.customProperties["path"] = dataset.path return MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=dataset_properties ).as_workunit() @@ -262,18 +264,18 @@ def _gen_entity_browsepath_aspect( def _gen_dataset_workunit( self, dataset: SigmaDataset ) -> Iterable[MetadataWorkUnit]: - dataset_identifier = self._get_sigma_dataset_identifier(dataset) - dataset_urn = self._gen_sigma_dataset_urn(dataset_identifier) + dataset_urn = self._gen_sigma_dataset_urn(dataset.get_urn_part()) yield self._gen_entity_status_aspect(dataset_urn) yield self._gen_dataset_properties(dataset_urn, dataset) - yield from add_entity_to_container( - container_key=self._gen_workspace_key(dataset.workspaceId), - entity_type="dataset", - entity_urn=dataset_urn, - ) + if dataset.workspaceId: + yield from add_entity_to_container( + container_key=self._gen_workspace_key(dataset.workspaceId), + entity_type="dataset", + entity_urn=dataset_urn, + ) dpi_aspect = self._gen_dataplatform_instance_aspect(dataset_urn) if dpi_aspect: @@ -288,15 +290,16 @@ def _gen_dataset_workunit( aspect=SubTypes(typeNames=[DatasetSubTypes.SIGMA_DATASET]), ).as_workunit() - paths = dataset.path.split("/")[1:] - if len(paths) > 0: - yield self._gen_entity_browsepath_aspect( - entity_urn=dataset_urn, - parent_entity_urn=builder.make_container_urn( - self._gen_workspace_key(dataset.workspaceId) - ), - paths=paths, - ) + if dataset.path and dataset.workspaceId: + paths = dataset.path.split("/")[1:] + if len(paths) > 0: + yield self._gen_entity_browsepath_aspect( + entity_urn=dataset_urn, + parent_entity_urn=builder.make_container_urn( + self._gen_workspace_key(dataset.workspaceId) + ), + paths=paths, + ) if dataset.badge: yield MetadataChangeProposalWrapper( @@ -322,7 +325,7 @@ def _gen_dashboard_urn(self, dashboard_identifier: str) -> str: ) def _gen_dashboard_info_workunit(self, page: Page) -> MetadataWorkUnit: - dashboard_urn = self._gen_dashboard_urn(page.pageId) + dashboard_urn = self._gen_dashboard_urn(page.get_urn_part()) dashboard_info_cls = DashboardInfoClass( title=page.name, description="", @@ -330,7 +333,7 @@ def _gen_dashboard_info_workunit(self, page: Page) -> MetadataWorkUnit: builder.make_chart_urn( platform=self.platform, platform_instance=self.config.platform_instance, - name=element.elementId, + name=element.get_urn_part(), ) for element in page.elements ], @@ -424,12 +427,12 @@ def _gen_elements_workunit( chart_urn = builder.make_chart_urn( platform=self.platform, platform_instance=self.config.platform_instance, - name=element.elementId, + name=element.get_urn_part(), ) custom_properties = { "VizualizationType": str(element.vizualizationType), - "type": str(element.type), + "type": str(element.type) if element.type else "Unknown", } yield self._gen_entity_status_aspect(chart_urn) @@ -490,7 +493,7 @@ def _gen_pages_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUnit]: Map Sigma workbook page to Datahub dashboard """ for page in workbook.pages: - dashboard_urn = self._gen_dashboard_urn(page.pageId) + dashboard_urn = self._gen_dashboard_urn(page.get_urn_part()) yield self._gen_entity_status_aspect(dashboard_urn) @@ -513,7 +516,7 @@ def _gen_pages_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUnit]: ) yield MetadataChangeProposalWrapper( - entityUrn=self._gen_dashboard_urn(page.pageId), + entityUrn=dashboard_urn, aspect=InputFieldsClass(fields=all_input_fields), ).as_workunit() @@ -522,11 +525,14 @@ def _gen_workbook_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUni Map Sigma Workbook to Datahub container """ owner_username = self.sigma_api.get_user_name(workbook.createdBy) + workbook_key = self._gen_workbook_key(workbook.workbookId) yield from gen_containers( - container_key=self._gen_workbook_key(workbook.workbookId), + container_key=workbook_key, name=workbook.name, sub_types=[BIContainerSubTypes.SIGMA_WORKBOOK], - parent_container_key=self._gen_workspace_key(workbook.workspaceId), + parent_container_key=self._gen_workspace_key(workbook.workspaceId) + if workbook.workspaceId + else None, extra_properties={ "path": workbook.path, "latestVersion": str(workbook.latestVersion), @@ -541,11 +547,9 @@ def _gen_workbook_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUni ) paths = workbook.path.split("/")[1:] - if len(paths) > 0: + if len(paths) > 0 and workbook.workspaceId: yield self._gen_entity_browsepath_aspect( - entity_urn=builder.make_container_urn( - self._gen_workbook_key(workbook.workbookId), - ), + entity_urn=builder.make_container_urn(workbook_key), parent_entity_urn=builder.make_container_urn( self._gen_workspace_key(workbook.workspaceId) ), @@ -578,12 +582,13 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: Datahub Ingestion framework invoke this method """ logger.info("Sigma plugin execution is started") - entities = self.sigma_api.get_sigma_entities() - for entity in entities: - if isinstance(entity, Workbook): - yield from self._gen_workbook_workunit(entity) - elif isinstance(entity, SigmaDataset): - yield from self._gen_dataset_workunit(entity) + self.sigma_api.fill_workspaces() + + for dataset in self.sigma_api.get_sigma_datasets(): + yield from self._gen_dataset_workunit(dataset) + for workbook in self.sigma_api.get_sigma_workbooks(): + yield from self._gen_workbook_workunit(workbook) + for workspace in self._get_allowed_workspaces(): yield from self._gen_workspace_workunit(workspace) yield from self._gen_sigma_dataset_upstream_lineage_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py index c2c28419ebcfd3..66d4678e521328 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py @@ -1,12 +1,18 @@ +import functools import logging import sys -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional import requests -from datahub.ingestion.source.sigma.config import Constant, SigmaSourceConfig +from datahub.ingestion.source.sigma.config import ( + Constant, + SigmaSourceConfig, + SigmaSourceReport, +) from datahub.ingestion.source.sigma.data_classes import ( Element, + File, Page, SigmaDataset, Workbook, @@ -18,11 +24,13 @@ class SigmaAPI: - def __init__(self, config: SigmaSourceConfig) -> None: + def __init__(self, config: SigmaSourceConfig, report: SigmaSourceReport) -> None: self.config = config + self.report = report self.workspaces: Dict[str, Workspace] = {} self.users: Dict[str, str] = {} self.session = requests.Session() + self.refresh_token: Optional[str] = None # Test connection by generating access token logger.info(f"Trying to connect to {self.config.api_url}") self._generate_token() @@ -35,84 +43,235 @@ def _generate_token(self): } response = self.session.post(f"{self.config.api_url}/auth/token", data=data) response.raise_for_status() + response_dict = response.json() + self.refresh_token = response_dict[Constant.REFRESH_TOKEN] self.session.headers.update( { - "Authorization": f"Bearer {response.json()['access_token']}", + "Authorization": f"Bearer {response_dict[Constant.ACCESS_TOKEN]}", "Content-Type": "application/json", } ) def _log_http_error(self, message: str) -> Any: - logger.warning(message) _, e, _ = sys.exc_info() if isinstance(e, requests.exceptions.HTTPError): logger.warning(f"HTTP status-code = {e.response.status_code}") logger.debug(msg=message, exc_info=e) return e - def get_workspace(self, workspace_id: str) -> Optional[Workspace]: - workspace: Optional[Workspace] = None + def _refresh_access_token(self): try: - response = self.session.get( - f"{self.config.api_url}/workspaces/{workspace_id}" + data = { + "grant_type": Constant.REFRESH_TOKEN, + "refresh_token": self.refresh_token, + "client_id": self.config.client_id, + "client_secret": self.config.client_secret, + } + post_response = self.session.post( + f"{self.config.api_url}/auth/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data=data, + ) + post_response.raise_for_status() + response_dict = post_response.json() + self.refresh_token = response_dict[Constant.REFRESH_TOKEN] + self.session.headers.update( + { + "Authorization": f"Bearer {response_dict[Constant.ACCESS_TOKEN]}", + "Content-Type": "application/json", + } ) - response.raise_for_status() - workspace_dict = response.json() - workspace = Workspace.parse_obj(workspace_dict) except Exception as e: self._log_http_error( - message=f"Unable to fetch workspace {workspace_id}. Exception: {e}" + message=f"Unable to refresh access token. Exception: {e}" ) - return workspace - def get_user_name(self, user_id: str) -> Optional[str]: + def _get_api_call(self, url: str) -> requests.Response: + get_response = self.session.get(url) + if get_response.status_code == 401 and self.refresh_token: + logger.debug("Access token might expired. Refreshing access token.") + self._refresh_access_token() + get_response = self.session.get(url) + return get_response + + def get_workspace(self, workspace_id: str) -> Optional[Workspace]: + logger.debug(f"Fetching workspace metadata with id '{workspace_id}'") try: - if user_id in self.users: - # To avoid fetching same user details again - return self.users[user_id] + if workspace_id in self.workspaces: + return self.workspaces[workspace_id] else: - response = self.session.get(f"{self.config.api_url}/members/{user_id}") - response.raise_for_status() - user_dict = response.json() - user_name = ( - f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}" + response = self._get_api_call( + f"{self.config.api_url}/workspaces/{workspace_id}" ) - self.users[user_id] = user_name - return user_name + if response.status_code == 403: + logger.debug(f"Workspace {workspace_id} not accessible.") + self.report.non_accessible_workspaces_count += 1 + return None + response.raise_for_status() + workspace = Workspace.parse_obj(response.json()) + self.workspaces[workspace.workspaceId] = workspace + return workspace except Exception as e: self._log_http_error( - message=f"Unable to fetch user with id {user_id}. Exception: {e}" + message=f"Unable to fetch workspace '{workspace_id}'. Exception: {e}" ) return None - def get_sigma_dataset( - self, dataset_id: str, workspace_id: str, path: str - ) -> Optional[SigmaDataset]: - dataset: Optional[SigmaDataset] = None + def fill_workspaces(self) -> None: + logger.debug("Fetching all accessible workspaces metadata.") + workspace_url = url = f"{self.config.api_url}/workspaces?limit=50" try: - response = self.session.get(f"{self.config.api_url}/datasets/{dataset_id}") + while True: + response = self._get_api_call(url) + response.raise_for_status() + response_dict = response.json() + for workspace_dict in response_dict[Constant.ENTRIES]: + self.workspaces[ + workspace_dict[Constant.WORKSPACEID] + ] = Workspace.parse_obj(workspace_dict) + if response_dict[Constant.NEXTPAGE]: + url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}" + else: + break + except Exception as e: + self._log_http_error(message=f"Unable to fetch workspaces. Exception: {e}") + + @functools.lru_cache() + def _get_users(self) -> Dict[str, str]: + logger.debug("Fetching all accessible users metadata.") + try: + users: Dict[str, str] = {} + response = self._get_api_call(f"{self.config.api_url}/members") response.raise_for_status() - dataset_dict = response.json() - dataset_dict[Constant.WORKSPACEID] = workspace_id - dataset_dict[Constant.PATH] = path - dataset = SigmaDataset.parse_obj(dataset_dict) + for user_dict in response.json(): + users[ + user_dict[Constant.MEMBERID] + ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}" + return users + except Exception as e: + self._log_http_error( + message=f"Unable to fetch users details. Exception: {e}" + ) + return {} + + def get_user_name(self, user_id: str) -> Optional[str]: + return self._get_users().get(user_id) + + @functools.lru_cache() + def get_workspace_id_from_file_path( + self, parent_id: str, path: str + ) -> Optional[str]: + try: + path_list = path.split("/") + while len(path_list) != 1: # means current parent id is folder's id + response = self._get_api_call( + f"{self.config.api_url}/files/{parent_id}" + ) + response.raise_for_status() + parent_id = response.json()[Constant.PARENTID] + path_list.pop() + return parent_id + except Exception as e: + logger.error( + f"Unable to find workspace id using file path '{path}'. Exception: {e}" + ) + return None + + @functools.lru_cache + def _get_files_metadata(self, file_type: str) -> Dict[str, File]: + logger.debug(f"Fetching file metadata with type {file_type}.") + file_url = url = f"{self.config.api_url}/files?typeFilters={file_type}" + try: + files_metadata: Dict[str, File] = {} + while True: + response = self._get_api_call(url) + response.raise_for_status() + response_dict = response.json() + for file_dict in response_dict[Constant.ENTRIES]: + file = File.parse_obj(file_dict) + file.workspaceId = self.get_workspace_id_from_file_path( + file.parentId, file.path + ) + files_metadata[file_dict[Constant.ID]] = file + if response_dict[Constant.NEXTPAGE]: + url = f"{file_url}&page={response_dict[Constant.NEXTPAGE]}" + else: + break + self.report.number_of_files_metadata[file_type] = len(files_metadata) + return files_metadata + except Exception as e: + self._log_http_error( + message=f"Unable to fetch files metadata. Exception: {e}" + ) + return {} + + def get_sigma_datasets(self) -> List[SigmaDataset]: + logger.debug("Fetching all accessible datasets metadata.") + dataset_url = url = f"{self.config.api_url}/datasets" + dataset_files_metadata = self._get_files_metadata(file_type=Constant.DATASET) + try: + datasets: List[SigmaDataset] = [] + while True: + response = self._get_api_call(url) + response.raise_for_status() + response_dict = response.json() + for dataset_dict in response_dict[Constant.ENTRIES]: + dataset = SigmaDataset.parse_obj(dataset_dict) + + if dataset.datasetId in dataset_files_metadata: + dataset.path = dataset_files_metadata[dataset.datasetId].path + dataset.badge = dataset_files_metadata[dataset.datasetId].badge + + workspace_id = dataset_files_metadata[ + dataset.datasetId + ].workspaceId + if workspace_id: + dataset.workspaceId = workspace_id + workspace = self.get_workspace(dataset.workspaceId) + if workspace: + if self.config.workspace_pattern.allowed( + workspace.name + ): + datasets.append(dataset) + elif self.config.ingest_shared_entities: + # If no workspace for dataset we can consider it as shared entity + self.report.shared_entities_count += 1 + datasets.append(dataset) + + if response_dict[Constant.NEXTPAGE]: + url = f"{dataset_url}?page={response_dict[Constant.NEXTPAGE]}" + else: + break + self.report.number_of_datasets = len(datasets) + return datasets except Exception as e: self._log_http_error( - message=f"Unable to fetch sigma dataset {dataset_id}. Exception: {e}" + message=f"Unable to fetch sigma datasets. Exception: {e}" ) - return dataset + return [] def _get_element_upstream_sources( - self, element_id: str, workbook_id: str + self, element: Element, workbook: Workbook ) -> Dict[str, str]: """ Returns upstream dataset sources with keys as id and values as name of that dataset """ - upstream_sources: Dict[str, str] = {} try: - response = self.session.get( - f"{self.config.api_url}/workbooks/{workbook_id}/lineage/elements/{element_id}" + upstream_sources: Dict[str, str] = {} + response = self._get_api_call( + f"{self.config.api_url}/workbooks/{workbook.workbookId}/lineage/elements/{element.elementId}" ) + if response.status_code == 500: + logger.debug( + f"Lineage metadata not present for element {element.name} of workbook '{workbook.name}'" + ) + return upstream_sources + if response.status_code == 403: + logger.debug( + f"Lineage metadata not accessible for element {element.name} of workbook '{workbook.name}'" + ) + return upstream_sources + response.raise_for_status() response_dict = response.json() for edge in response_dict[Constant.EDGES]: @@ -123,34 +282,39 @@ def _get_element_upstream_sources( upstream_sources[edge[Constant.SOURCE]] = response_dict[ Constant.DEPENDENCIES ][edge[Constant.SOURCE]][Constant.NAME] + return upstream_sources except Exception as e: self._log_http_error( - message=f"Unable to fetch lineage of element {element_id}. Exception: {e}" + message=f"Unable to fetch lineage for element {element.name} of workbook '{workbook.name}'. Exception: {e}" ) - return upstream_sources + return {} def _get_element_sql_query( - self, element_id: str, workbook_id: str + self, element: Element, workbook: Workbook ) -> Optional[str]: - query: Optional[str] = None try: - response = self.session.get( - f"{self.config.api_url}/workbooks/{workbook_id}/elements/{element_id}/query" + response = self._get_api_call( + f"{self.config.api_url}/workbooks/{workbook.workbookId}/elements/{element.elementId}/query" ) + if response.status_code == 404: + logger.debug( + f"Query not present for element {element.name} of workbook '{workbook.name}'" + ) + return None response.raise_for_status() response_dict = response.json() if "sql" in response_dict: - query = response_dict["sql"] + return response_dict["sql"] except Exception as e: self._log_http_error( - message=f"Unable to fetch sql query for a element {element_id}. Exception: {e}" + message=f"Unable to fetch sql query for element {element.name} of workbook '{workbook.name}'. Exception: {e}" ) - return query + return None def get_page_elements(self, workbook: Workbook, page: Page) -> List[Element]: - elements: List[Element] = [] try: - response = self.session.get( + elements: List[Element] = [] + response = self._get_api_call( f"{self.config.api_url}/workbooks/{workbook.workbookId}/pages/{page.pageId}/elements" ) response.raise_for_status() @@ -161,23 +325,26 @@ def get_page_elements(self, workbook: Workbook, page: Page) -> List[Element]: Constant.URL ] = f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true" element = Element.parse_obj(element_dict) - element.upstream_sources = self._get_element_upstream_sources( - element.elementId, workbook.workbookId - ) - element.query = self._get_element_sql_query( - element.elementId, workbook.workbookId - ) + if ( + self.config.extract_lineage + and self.config.workbook_lineage_pattern.allowed(workbook.name) + ): + element.upstream_sources = self._get_element_upstream_sources( + element, workbook + ) + element.query = self._get_element_sql_query(element, workbook) elements.append(element) + return elements except Exception as e: self._log_http_error( - message=f"Unable to fetch elements of page {page.pageId}, workbook {workbook.workbookId}. Exception: {e}" + message=f"Unable to fetch elements of page '{page.name}', workbook '{workbook.name}'. Exception: {e}" ) - return elements + return [] def get_workbook_pages(self, workbook: Workbook) -> List[Page]: - pages: List[Page] = [] try: - response = self.session.get( + pages: List[Page] = [] + response = self._get_api_call( f"{self.config.api_url}/workbooks/{workbook.workbookId}/pages" ) response.raise_for_status() @@ -185,75 +352,57 @@ def get_workbook_pages(self, workbook: Workbook) -> List[Page]: page = Page.parse_obj(page_dict) page.elements = self.get_page_elements(workbook, page) pages.append(page) + return pages except Exception as e: self._log_http_error( - message=f"Unable to fetch pages of workbook {workbook.workbookId}. Exception: {e}" + message=f"Unable to fetch pages of workbook '{workbook.name}'. Exception: {e}" ) - return pages + return [] - def get_workbook(self, workbook_id: str, workspace_id: str) -> Optional[Workbook]: - workbook: Optional[Workbook] = None + def get_sigma_workbooks(self) -> List[Workbook]: + logger.debug("Fetching all accessible workbooks metadata.") + workbook_url = url = f"{self.config.api_url}/workbooks" + workbook_files_metadata = self._get_files_metadata(file_type=Constant.WORKBOOK) try: - response = self.session.get( - f"{self.config.api_url}/workbooks/{workbook_id}" - ) - response.raise_for_status() - workbook_dict = response.json() - workbook_dict[Constant.WORKSPACEID] = workspace_id - workbook = Workbook.parse_obj(workbook_dict) - workbook.pages = self.get_workbook_pages(workbook) - except Exception as e: - self._log_http_error( - message=f"Unable to fetch workbook {workbook_id}. Exception: {e}" - ) - return workbook + workbooks: List[Workbook] = [] + while True: + response = self._get_api_call(url) + response.raise_for_status() + response_dict = response.json() + for workbook_dict in response_dict[Constant.ENTRIES]: + workbook = Workbook.parse_obj(workbook_dict) - def get_workspace_id(self, parent_id: str, path: str) -> str: - path_list = path.split("/") - while len(path_list) != 1: # means current parent id is folder's id - response = self.session.get(f"{self.config.api_url}/files/{parent_id}") - parent_id = response.json()[Constant.PARENTID] - path_list.pop() - return parent_id + if workbook.workbookId in workbook_files_metadata: + workbook.badge = workbook_files_metadata[ + workbook.workbookId + ].badge - def get_sigma_entities(self) -> List[Union[Workbook, SigmaDataset]]: - entities: List[Union[Workbook, SigmaDataset]] = [] - url = f"{self.config.api_url}/files" - while True: - response = self.session.get(url) - response.raise_for_status() - response_dict = response.json() - for entity in response_dict[Constant.ENTRIES]: - workspace_id = self.get_workspace_id( - entity[Constant.PARENTID], entity[Constant.PATH] - ) - if workspace_id not in self.workspaces: - workspace = self.get_workspace(workspace_id) - if workspace: - self.workspaces[workspace.workspaceId] = workspace + workspace_id = workbook_files_metadata[ + workbook.workbookId + ].workspaceId + if workspace_id: + workbook.workspaceId = workspace_id + workspace = self.get_workspace(workbook.workspaceId) + if workspace: + if self.config.workspace_pattern.allowed( + workspace.name + ): + workbook.pages = self.get_workbook_pages(workbook) + workbooks.append(workbook) + elif self.config.ingest_shared_entities: + # If no workspace for workbook we can consider it as shared entity + self.report.shared_entities_count += 1 + workbook.pages = self.get_workbook_pages(workbook) + workbooks.append(workbook) - if self.workspaces.get( - workspace_id - ) and self.config.workspace_pattern.allowed( - self.workspaces[workspace_id].name - ): - type = entity[Constant.TYPE] - if type == Constant.DATASET: - dataset = self.get_sigma_dataset( - entity[Constant.ID], - workspace_id, - entity[Constant.PATH], - ) - if dataset: - dataset.badge = entity[Constant.BADGE] - entities.append(dataset) - elif type == Constant.WORKBOOK: - workbook = self.get_workbook(entity[Constant.ID], workspace_id) - if workbook: - workbook.badge = entity[Constant.BADGE] - entities.append(workbook) - if response_dict[Constant.NEXTPAGE]: - url = f"{url}?page={response_dict[Constant.NEXTPAGE]}" - else: - break - return entities + if response_dict[Constant.NEXTPAGE]: + url = f"{workbook_url}?page={response_dict[Constant.NEXTPAGE]}" + else: + break + self.report.number_of_workbooks = len(workbooks) + return workbooks + except Exception as e: + self._log_http_error( + message=f"Unable to fetch sigma workbooks. Exception: {e}" + ) + return [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index d79ed384d755b0..db2095da01134d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from datetime import datetime -from typing import Dict, List, MutableSet, Optional +from typing import TYPE_CHECKING, Dict, List, MutableSet, Optional from datahub.ingestion.api.report import Report from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin @@ -14,6 +14,11 @@ from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.perf_timer import PerfTimer +if TYPE_CHECKING: + from datahub.ingestion.source.snowflake.snowflake_schema import ( + SnowflakeDataDictionary, + ) + @dataclass class SnowflakeUsageAggregationReport(Report): @@ -106,11 +111,7 @@ class SnowflakeV2Report( num_tables_with_known_upstreams: int = 0 num_upstream_lineage_edge_parsing_failed: int = 0 - # Reports how many times we reset in-memory `functools.lru_cache` caches of data, - # which occurs when we occur a different database / schema. - # Should not be more than the number of databases / schemas scanned. - # Maps (function name) -> (stat_name) -> (stat_value) - lru_cache_info: Dict[str, Dict[str, int]] = field(default_factory=dict) + data_dictionary_cache: Optional["SnowflakeDataDictionary"] = None # These will be non-zero if snowflake information_schema queries fail with error - # "Information schema query returned too much data. Please repeat query with more selective predicates."" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 292c57494632c5..3e26d2acd78e1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -1,19 +1,23 @@ import logging +import os from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime -from functools import lru_cache -from typing import Dict, List, Optional +from typing import Callable, Dict, List, Optional from snowflake.connector import SnowflakeConnection +from datahub.ingestion.api.report import SupportsAsObj from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeQueryMixin from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView +from datahub.utilities.serialized_lru_cache import serialized_lru_cache logger: logging.Logger = logging.getLogger(__name__) +SCHEMA_PARALLELISM = int(os.getenv("DATAHUB_SNOWFLAKE_SCHEMA_PARALLELISM", 20)) + @dataclass class SnowflakePK: @@ -176,7 +180,7 @@ def get_column_tags_for_table( ) -class SnowflakeDataDictionary(SnowflakeQueryMixin): +class SnowflakeDataDictionary(SnowflakeQueryMixin, SupportsAsObj): def __init__(self) -> None: self.logger = logger self.connection: Optional[SnowflakeConnection] = None @@ -189,6 +193,26 @@ def get_connection(self) -> SnowflakeConnection: assert self.connection is not None return self.connection + def as_obj(self) -> Dict[str, Dict[str, int]]: + # TODO: Move this into a proper report type that gets computed. + + # Reports how many times we reset in-memory `functools.lru_cache` caches of data, + # which occurs when we occur a different database / schema. + # Should not be more than the number of databases / schemas scanned. + # Maps (function name) -> (stat_name) -> (stat_value) + lru_cache_functions: List[Callable] = [ + self.get_tables_for_database, + self.get_views_for_database, + self.get_columns_for_schema, + self.get_pk_constraints_for_schema, + self.get_fk_constraints_for_schema, + ] + + report = {} + for func in lru_cache_functions: + report[func.__name__] = func.cache_info()._asdict() # type: ignore + return report + def show_databases(self) -> List[SnowflakeDatabase]: databases: List[SnowflakeDatabase] = [] @@ -241,7 +265,7 @@ def get_schemas_for_database(self, db_name: str) -> List[SnowflakeSchema]: snowflake_schemas.append(snowflake_schema) return snowflake_schemas - @lru_cache(maxsize=1) + @serialized_lru_cache(maxsize=1) def get_tables_for_database( self, db_name: str ) -> Optional[Dict[str, List[SnowflakeTable]]]: @@ -299,7 +323,7 @@ def get_tables_for_schema( ) return tables - @lru_cache(maxsize=1) + @serialized_lru_cache(maxsize=1) def get_views_for_database( self, db_name: str ) -> Optional[Dict[str, List[SnowflakeView]]]: @@ -349,7 +373,7 @@ def get_views_for_schema( ) return views - @lru_cache(maxsize=1) + @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM) def get_columns_for_schema( self, schema_name: str, db_name: str ) -> Optional[Dict[str, List[SnowflakeColumn]]]: @@ -405,7 +429,7 @@ def get_columns_for_table( ) return columns - @lru_cache(maxsize=1) + @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM) def get_pk_constraints_for_schema( self, schema_name: str, db_name: str ) -> Dict[str, SnowflakePK]: @@ -422,7 +446,7 @@ def get_pk_constraints_for_schema( constraints[row["table_name"]].column_names.append(row["column_name"]) return constraints - @lru_cache(maxsize=1) + @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM) def get_fk_constraints_for_schema( self, schema_name: str, db_name: str ) -> Dict[str, List[SnowflakeFK]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py new file mode 100644 index 00000000000000..5a4e37078dd75f --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -0,0 +1,1077 @@ +import concurrent.futures +import logging +import queue +from typing import Dict, Iterable, List, Optional, Union + +from snowflake.connector import SnowflakeConnection + +from datahub.configuration.pattern_utils import is_schema_allowed +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataset_urn_with_platform_instance, + make_schema_field_urn, + make_tag_urn, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.glossary.classification_mixin import ( + ClassificationHandler, + classification_workunit_processor, +) +from datahub.ingestion.source.common.subtypes import ( + DatasetContainerSubTypes, + DatasetSubTypes, +) +from datahub.ingestion.source.snowflake.constants import ( + GENERIC_PERMISSION_ERROR_KEY, + SNOWFLAKE_DATABASE, + SnowflakeObjectDomain, +) +from datahub.ingestion.source.snowflake.snowflake_config import ( + SnowflakeV2Config, + TagOption, +) +from datahub.ingestion.source.snowflake.snowflake_data_reader import SnowflakeDataReader +from datahub.ingestion.source.snowflake.snowflake_profiler import SnowflakeProfiler +from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report +from datahub.ingestion.source.snowflake.snowflake_schema import ( + SCHEMA_PARALLELISM, + SnowflakeColumn, + SnowflakeDatabase, + SnowflakeDataDictionary, + SnowflakeFK, + SnowflakePK, + SnowflakeSchema, + SnowflakeTable, + SnowflakeTag, + SnowflakeView, +) +from datahub.ingestion.source.snowflake.snowflake_tag import SnowflakeTagExtractor +from datahub.ingestion.source.snowflake.snowflake_utils import ( + SnowflakeCommonMixin, + SnowflakeCommonProtocol, + SnowflakeConnectionMixin, + SnowflakePermissionError, + SnowflakeQueryMixin, +) +from datahub.ingestion.source.sql.sql_utils import ( + add_table_to_schema_container, + gen_database_container, + gen_database_key, + gen_schema_container, + gen_schema_key, + get_dataplatform_instance_aspect, + get_domain_wu, +) +from datahub.ingestion.source_report.ingestion_stage import ( + METADATA_EXTRACTION, + PROFILING, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import ( + GlobalTags, + Status, + SubTypes, + TagAssociation, + TimeStamp, +) +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + DatasetProperties, + ViewProperties, +) +from datahub.metadata.com.linkedin.pegasus2avro.schema import ( + ArrayType, + BooleanType, + BytesType, + DateType, + ForeignKeyConstraint, + MySqlDDL, + NullType, + NumberType, + RecordType, + SchemaField, + SchemaFieldDataType, + SchemaMetadata, + StringType, + TimeType, +) +from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties +from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator +from datahub.utilities.registries.domain_registry import DomainRegistry + +logger = logging.getLogger(__name__) + +# https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html +SNOWFLAKE_FIELD_TYPE_MAPPINGS = { + "DATE": DateType, + "BIGINT": NumberType, + "BINARY": BytesType, + # 'BIT': BIT, + "BOOLEAN": BooleanType, + "CHAR": NullType, + "CHARACTER": NullType, + "DATETIME": TimeType, + "DEC": NumberType, + "DECIMAL": NumberType, + "DOUBLE": NumberType, + "FIXED": NumberType, + "FLOAT": NumberType, + "INT": NumberType, + "INTEGER": NumberType, + "NUMBER": NumberType, + # 'OBJECT': ? + "REAL": NumberType, + "BYTEINT": NumberType, + "SMALLINT": NumberType, + "STRING": StringType, + "TEXT": StringType, + "TIME": TimeType, + "TIMESTAMP": TimeType, + "TIMESTAMP_TZ": TimeType, + "TIMESTAMP_LTZ": TimeType, + "TIMESTAMP_NTZ": TimeType, + "TINYINT": NumberType, + "VARBINARY": BytesType, + "VARCHAR": StringType, + "VARIANT": RecordType, + "OBJECT": NullType, + "ARRAY": ArrayType, + "GEOGRAPHY": NullType, +} + + +class SnowflakeSchemaGenerator( + SnowflakeQueryMixin, + SnowflakeConnectionMixin, + SnowflakeCommonMixin, + SnowflakeCommonProtocol, +): + def __init__( + self, + config: SnowflakeV2Config, + report: SnowflakeV2Report, + connection: SnowflakeConnection, + domain_registry: Optional[DomainRegistry], + profiler: Optional[SnowflakeProfiler], + aggregator: Optional[SqlParsingAggregator], + snowsight_base_url: Optional[str], + ) -> None: + self.config: SnowflakeV2Config = config + self.report: SnowflakeV2Report = report + self.connection: SnowflakeConnection = connection + self.logger = logger + + self.data_dictionary: SnowflakeDataDictionary = SnowflakeDataDictionary() + self.data_dictionary.set_connection(self.connection) + self.report.data_dictionary_cache = self.data_dictionary + + self.domain_registry: Optional[DomainRegistry] = domain_registry + self.classification_handler = ClassificationHandler(self.config, self.report) + self.tag_extractor = SnowflakeTagExtractor( + config, self.data_dictionary, self.report + ) + self.profiler: Optional[SnowflakeProfiler] = profiler + self.snowsight_base_url: Optional[str] = snowsight_base_url + + # These are populated as side-effects of get_workunits_internal. + self.databases: List[SnowflakeDatabase] = [] + self.aggregator: Optional[SqlParsingAggregator] = aggregator + + def get_connection(self) -> SnowflakeConnection: + return self.connection + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + self.databases = [] + for database in self.get_databases() or []: + self.report.report_entity_scanned(database.name, "database") + if not self.config.database_pattern.allowed(database.name): + self.report.report_dropped(f"{database.name}.*") + else: + self.databases.append(database) + + if len(self.databases) == 0: + return + + try: + for snowflake_db in self.databases: + self.report.set_ingestion_stage(snowflake_db.name, METADATA_EXTRACTION) + yield from self._process_database(snowflake_db) + + except SnowflakePermissionError as e: + self.report_error(GENERIC_PERMISSION_ERROR_KEY, str(e)) + return + + def get_databases(self) -> Optional[List[SnowflakeDatabase]]: + try: + # `show databases` is required only to get one of the databases + # whose information_schema can be queried to start with. + databases = self.data_dictionary.show_databases() + except Exception as e: + logger.debug(f"Failed to list databases due to error {e}", exc_info=e) + self.report_error( + "list-databases", + f"Failed to list databases due to error {e}", + ) + return None + else: + ischema_databases: List[ + SnowflakeDatabase + ] = self.get_databases_from_ischema(databases) + + if len(ischema_databases) == 0: + self.report_error( + GENERIC_PERMISSION_ERROR_KEY, + "No databases found. Please check permissions.", + ) + return ischema_databases + + def get_databases_from_ischema( + self, databases: List[SnowflakeDatabase] + ) -> List[SnowflakeDatabase]: + ischema_databases: List[SnowflakeDatabase] = [] + for database in databases: + try: + ischema_databases = self.data_dictionary.get_databases(database.name) + break + except Exception: + # query fails if "USAGE" access is not granted for database + # This is okay, because `show databases` query lists all databases irrespective of permission, + # if role has `MANAGE GRANTS` privilege. (not advisable) + logger.debug( + f"Failed to list databases {database.name} information_schema" + ) + # SNOWFLAKE database always shows up even if permissions are missing + if database == SNOWFLAKE_DATABASE: + continue + logger.info( + f"The role {self.report.role} has `MANAGE GRANTS` privilege. This is not advisable and also not required." + ) + + return ischema_databases + + def _process_database( + self, snowflake_db: SnowflakeDatabase + ) -> Iterable[MetadataWorkUnit]: + db_name = snowflake_db.name + + try: + pass + # self.query(SnowflakeQuery.use_database(db_name)) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # This may happen if REFERENCE_USAGE permissions are set + # We can not run show queries on database in such case. + # This need not be a failure case. + self.report_warning( + "Insufficient privileges to operate on database, skipping. Please grant USAGE permissions on database to extract its metadata.", + db_name, + ) + else: + logger.debug( + f"Failed to use database {db_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get schemas for database", + db_name, + ) + return + + if self.config.extract_tags != TagOption.skip: + snowflake_db.tags = self.tag_extractor.get_tags_on_object( + domain="database", db_name=db_name + ) + + if self.config.include_technical_schema: + yield from self.gen_database_containers(snowflake_db) + + self.fetch_schemas_for_database(snowflake_db, db_name) + + if self.config.include_technical_schema and snowflake_db.tags: + for tag in snowflake_db.tags: + yield from self._process_tag(tag) + + # Caches tables for a single database. Consider moving to disk or S3 when possible. + db_tables: Dict[str, List[SnowflakeTable]] = {} + yield from self._process_db_schemas(snowflake_db, db_tables) + + if self.profiler and db_tables: + self.report.set_ingestion_stage(snowflake_db.name, PROFILING) + yield from self.profiler.get_workunits(snowflake_db, db_tables) + + def _process_db_schemas( + self, + snowflake_db: SnowflakeDatabase, + db_tables: Dict[str, List[SnowflakeTable]], + ) -> Iterable[MetadataWorkUnit]: + q: "queue.Queue[MetadataWorkUnit]" = queue.Queue(maxsize=100) + + def _process_schema_worker(snowflake_schema: SnowflakeSchema) -> None: + for wu in self._process_schema( + snowflake_schema, snowflake_db.name, db_tables + ): + q.put(wu) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=SCHEMA_PARALLELISM + ) as executor: + futures = [] + for snowflake_schema in snowflake_db.schemas: + f = executor.submit(_process_schema_worker, snowflake_schema) + futures.append(f) + + # Read from the queue and yield the work units until all futures are done. + while True: + if q.empty(): + while not q.empty(): + yield q.get_nowait() + else: + try: + yield q.get(timeout=0.2) + except queue.Empty: + pass + + # Filter out the done futures. + futures = [f for f in futures if not f.done()] + if not futures: + break + + # Yield the remaining work units. This theoretically should not happen, but adding it just in case. + while not q.empty(): + yield q.get_nowait() + + def fetch_schemas_for_database( + self, snowflake_db: SnowflakeDatabase, db_name: str + ) -> None: + schemas: List[SnowflakeSchema] = [] + try: + for schema in self.data_dictionary.get_schemas_for_database(db_name): + self.report.report_entity_scanned(schema.name, "schema") + if not is_schema_allowed( + self.config.schema_pattern, + schema.name, + db_name, + self.config.match_fully_qualified_names, + ): + self.report.report_dropped(f"{db_name}.{schema.name}.*") + else: + schemas.append(schema) + except Exception as e: + if isinstance(e, SnowflakePermissionError): + error_msg = f"Failed to get schemas for database {db_name}. Please check permissions." + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get schemas for database {db_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get schemas for database", + db_name, + ) + + if not schemas: + self.report_warning( + "No schemas found in database. If schemas exist, please grant USAGE permissions on them.", + db_name, + ) + else: + snowflake_db.schemas = schemas + + def _process_schema( + self, + snowflake_schema: SnowflakeSchema, + db_name: str, + db_tables: Dict[str, List[SnowflakeTable]], + ) -> Iterable[MetadataWorkUnit]: + schema_name = snowflake_schema.name + + if self.config.extract_tags != TagOption.skip: + snowflake_schema.tags = self.tag_extractor.get_tags_on_object( + schema_name=schema_name, db_name=db_name, domain="schema" + ) + + if self.config.include_technical_schema: + yield from self.gen_schema_containers(snowflake_schema, db_name) + + if self.config.include_tables: + tables = self.fetch_tables_for_schema( + snowflake_schema, db_name, schema_name + ) + db_tables[schema_name] = tables + + if self.config.include_technical_schema: + data_reader = self.make_data_reader() + for table in tables: + table_wu_generator = self._process_table( + table, schema_name, db_name + ) + yield from classification_workunit_processor( + table_wu_generator, + self.classification_handler, + data_reader, + [db_name, schema_name, table.name], + ) + + if self.config.include_views: + views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name) + if ( + self.aggregator + and self.config.include_view_lineage + and self.config.parse_view_ddl + ): + for view in views: + view_identifier = self.get_dataset_identifier( + view.name, schema_name, db_name + ) + if view.view_definition: + self.aggregator.add_view_definition( + view_urn=self.gen_dataset_urn(view_identifier), + view_definition=view.view_definition, + default_db=db_name, + default_schema=schema_name, + ) + + if self.config.include_technical_schema: + for view in views: + yield from self._process_view(view, schema_name, db_name) + + if self.config.include_technical_schema and snowflake_schema.tags: + for tag in snowflake_schema.tags: + yield from self._process_tag(tag) + + if not snowflake_schema.views and not snowflake_schema.tables: + self.report_warning( + "No tables/views found in schema. If tables exist, please grant REFERENCES or SELECT permissions on them.", + f"{db_name}.{schema_name}", + ) + + def fetch_views_for_schema( + self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str + ) -> List[SnowflakeView]: + try: + views: List[SnowflakeView] = [] + for view in self.get_views_for_schema(schema_name, db_name): + view_name = self.get_dataset_identifier(view.name, schema_name, db_name) + + self.report.report_entity_scanned(view_name, "view") + + if not self.config.view_pattern.allowed(view_name): + self.report.report_dropped(view_name) + else: + views.append(view) + snowflake_schema.views = [view.name for view in views] + return views + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + error_msg = f"Failed to get views for schema {db_name}.{schema_name}. Please check permissions." + + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get views for schema {db_name}.{schema_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get views for schema", + f"{db_name}.{schema_name}", + ) + return [] + + def fetch_tables_for_schema( + self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str + ) -> List[SnowflakeTable]: + try: + tables: List[SnowflakeTable] = [] + for table in self.get_tables_for_schema(schema_name, db_name): + table_identifier = self.get_dataset_identifier( + table.name, schema_name, db_name + ) + self.report.report_entity_scanned(table_identifier) + if not self.config.table_pattern.allowed(table_identifier): + self.report.report_dropped(table_identifier) + else: + tables.append(table) + snowflake_schema.tables = [table.name for table in tables] + return tables + except Exception as e: + if isinstance(e, SnowflakePermissionError): + # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes + error_msg = f"Failed to get tables for schema {db_name}.{schema_name}. Please check permissions." + raise SnowflakePermissionError(error_msg) from e.__cause__ + else: + logger.debug( + f"Failed to get tables for schema {db_name}.{schema_name} due to error {e}", + exc_info=e, + ) + self.report_warning( + "Failed to get tables for schema", + f"{db_name}.{schema_name}", + ) + return [] + + def make_data_reader(self) -> Optional[SnowflakeDataReader]: + if self.classification_handler.is_classification_enabled() and self.connection: + return SnowflakeDataReader.create( + self.connection, self.snowflake_identifier + ) + + return None + + def _process_table( + self, + table: SnowflakeTable, + schema_name: str, + db_name: str, + ) -> Iterable[MetadataWorkUnit]: + table_identifier = self.get_dataset_identifier(table.name, schema_name, db_name) + + self.fetch_columns_for_table(table, schema_name, db_name, table_identifier) + + self.fetch_pk_for_table(table, schema_name, db_name, table_identifier) + + self.fetch_foreign_keys_for_table(table, schema_name, db_name, table_identifier) + + if self.config.extract_tags != TagOption.skip: + table.tags = self.tag_extractor.get_tags_on_object( + table_name=table.name, + schema_name=schema_name, + db_name=db_name, + domain="table", + ) + + if self.config.include_technical_schema: + if table.tags: + for tag in table.tags: + yield from self._process_tag(tag) + for column_name in table.column_tags: + for tag in table.column_tags[column_name]: + yield from self._process_tag(tag) + + yield from self.gen_dataset_workunits(table, schema_name, db_name) + + def fetch_foreign_keys_for_table( + self, + table: SnowflakeTable, + schema_name: str, + db_name: str, + table_identifier: str, + ) -> None: + try: + table.foreign_keys = self.get_fk_constraints_for_table( + table.name, schema_name, db_name + ) + except Exception as e: + logger.debug( + f"Failed to get foreign key for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get foreign key for table", table_identifier) + + def fetch_pk_for_table( + self, + table: SnowflakeTable, + schema_name: str, + db_name: str, + table_identifier: str, + ) -> None: + try: + table.pk = self.get_pk_constraints_for_table( + table.name, schema_name, db_name + ) + except Exception as e: + logger.debug( + f"Failed to get primary key for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get primary key for table", table_identifier) + + def fetch_columns_for_table( + self, + table: SnowflakeTable, + schema_name: str, + db_name: str, + table_identifier: str, + ) -> None: + try: + table.columns = self.get_columns_for_table(table.name, schema_name, db_name) + table.column_count = len(table.columns) + if self.config.extract_tags != TagOption.skip: + table.column_tags = self.tag_extractor.get_column_tags_for_table( + table.name, schema_name, db_name + ) + except Exception as e: + logger.debug( + f"Failed to get columns for table {table_identifier} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get columns for table", table_identifier) + + def _process_view( + self, + view: SnowflakeView, + schema_name: str, + db_name: str, + ) -> Iterable[MetadataWorkUnit]: + view_name = self.get_dataset_identifier(view.name, schema_name, db_name) + + try: + view.columns = self.get_columns_for_table(view.name, schema_name, db_name) + if self.config.extract_tags != TagOption.skip: + view.column_tags = self.tag_extractor.get_column_tags_for_table( + view.name, schema_name, db_name + ) + except Exception as e: + logger.debug( + f"Failed to get columns for view {view_name} due to error {e}", + exc_info=e, + ) + self.report_warning("Failed to get columns for view", view_name) + + if self.config.extract_tags != TagOption.skip: + view.tags = self.tag_extractor.get_tags_on_object( + table_name=view.name, + schema_name=schema_name, + db_name=db_name, + domain="table", + ) + + if self.config.include_technical_schema: + if view.tags: + for tag in view.tags: + yield from self._process_tag(tag) + for column_name in view.column_tags: + for tag in view.column_tags[column_name]: + yield from self._process_tag(tag) + + yield from self.gen_dataset_workunits(view, schema_name, db_name) + + def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: + tag_identifier = tag.identifier() + + if self.report.is_tag_processed(tag_identifier): + return + + self.report.report_tag_processed(tag_identifier) + + yield from self.gen_tag_workunits(tag) + + def gen_dataset_workunits( + self, + table: Union[SnowflakeTable, SnowflakeView], + schema_name: str, + db_name: str, + ) -> Iterable[MetadataWorkUnit]: + dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) + dataset_urn = self.gen_dataset_urn(dataset_name) + + status = Status(removed=False) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=status + ).as_workunit() + + schema_metadata = self.gen_schema_metadata(table, schema_name, db_name) + + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=schema_metadata + ).as_workunit() + + dataset_properties = self.get_dataset_properties(table, schema_name, db_name) + + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=dataset_properties + ).as_workunit() + + schema_container_key = gen_schema_key( + db_name=self.snowflake_identifier(db_name), + schema=self.snowflake_identifier(schema_name), + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + + yield from add_table_to_schema_container( + dataset_urn=dataset_urn, + parent_container_key=schema_container_key, + ) + dpi_aspect = get_dataplatform_instance_aspect( + dataset_urn=dataset_urn, + platform=self.platform, + platform_instance=self.config.platform_instance, + ) + if dpi_aspect: + yield dpi_aspect + + subTypes = SubTypes( + typeNames=( + [DatasetSubTypes.VIEW] + if isinstance(table, SnowflakeView) + else [DatasetSubTypes.TABLE] + ) + ) + + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=subTypes + ).as_workunit() + + if self.domain_registry: + yield from get_domain_wu( + dataset_name=dataset_name, + entity_urn=dataset_urn, + domain_config=self.config.domain, + domain_registry=self.domain_registry, + ) + + if table.tags: + tag_associations = [ + TagAssociation( + tag=make_tag_urn(self.snowflake_identifier(tag.identifier())) + ) + for tag in table.tags + ] + global_tags = GlobalTags(tag_associations) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=global_tags + ).as_workunit() + + if isinstance(table, SnowflakeView) and table.view_definition is not None: + view_properties_aspect = ViewProperties( + materialized=table.materialized, + viewLanguage="SQL", + viewLogic=table.view_definition, + ) + + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=view_properties_aspect + ).as_workunit() + + def get_dataset_properties( + self, + table: Union[SnowflakeTable, SnowflakeView], + schema_name: str, + db_name: str, + ) -> DatasetProperties: + return DatasetProperties( + name=table.name, + created=( + TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None + ), + lastModified=( + TimeStamp(time=int(table.last_altered.timestamp() * 1000)) + if table.last_altered is not None + else ( + TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None + ) + ), + description=table.comment, + qualifiedName=f"{db_name}.{schema_name}.{table.name}", + customProperties={}, + externalUrl=( + self.get_external_url_for_table( + table.name, + schema_name, + db_name, + ( + SnowflakeObjectDomain.TABLE + if isinstance(table, SnowflakeTable) + else SnowflakeObjectDomain.VIEW + ), + ) + if self.config.include_external_url + else None + ), + ) + + def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: + tag_urn = make_tag_urn(self.snowflake_identifier(tag.identifier())) + + tag_properties_aspect = TagProperties( + name=tag.display_name(), + description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.", + ) + + yield MetadataChangeProposalWrapper( + entityUrn=tag_urn, aspect=tag_properties_aspect + ).as_workunit() + + def gen_schema_metadata( + self, + table: Union[SnowflakeTable, SnowflakeView], + schema_name: str, + db_name: str, + ) -> SchemaMetadata: + dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) + dataset_urn = self.gen_dataset_urn(dataset_name) + + foreign_keys: Optional[List[ForeignKeyConstraint]] = None + if isinstance(table, SnowflakeTable) and len(table.foreign_keys) > 0: + foreign_keys = self.build_foreign_keys(table, dataset_urn) + + schema_metadata = SchemaMetadata( + schemaName=dataset_name, + platform=make_data_platform_urn(self.platform), + version=0, + hash="", + platformSchema=MySqlDDL(tableSchema=""), + fields=[ + SchemaField( + fieldPath=self.snowflake_identifier(col.name), + type=SchemaFieldDataType( + SNOWFLAKE_FIELD_TYPE_MAPPINGS.get(col.data_type, NullType)() + ), + # NOTE: nativeDataType will not be in sync with older connector + nativeDataType=col.get_precise_native_type(), + description=col.comment, + nullable=col.is_nullable, + isPartOfKey=( + col.name in table.pk.column_names + if isinstance(table, SnowflakeTable) and table.pk is not None + else None + ), + globalTags=( + GlobalTags( + [ + TagAssociation( + make_tag_urn( + self.snowflake_identifier(tag.identifier()) + ) + ) + for tag in table.column_tags[col.name] + ] + ) + if col.name in table.column_tags + else None + ), + ) + for col in table.columns + ], + foreignKeys=foreign_keys, + ) + + if self.aggregator: + self.aggregator.register_schema(urn=dataset_urn, schema=schema_metadata) + + return schema_metadata + + def build_foreign_keys( + self, table: SnowflakeTable, dataset_urn: str + ) -> List[ForeignKeyConstraint]: + foreign_keys = [] + for fk in table.foreign_keys: + foreign_dataset = make_dataset_urn_with_platform_instance( + platform=self.platform, + name=self.get_dataset_identifier( + fk.referred_table, fk.referred_schema, fk.referred_database + ), + env=self.config.env, + platform_instance=self.config.platform_instance, + ) + foreign_keys.append( + ForeignKeyConstraint( + name=fk.name, + foreignDataset=foreign_dataset, + foreignFields=[ + make_schema_field_urn( + foreign_dataset, + self.snowflake_identifier(col), + ) + for col in fk.referred_column_names + ], + sourceFields=[ + make_schema_field_urn( + dataset_urn, + self.snowflake_identifier(col), + ) + for col in fk.column_names + ], + ) + ) + return foreign_keys + + def gen_database_containers( + self, database: SnowflakeDatabase + ) -> Iterable[MetadataWorkUnit]: + database_container_key = gen_database_key( + self.snowflake_identifier(database.name), + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + + yield from gen_database_container( + name=database.name, + database=self.snowflake_identifier(database.name), + database_container_key=database_container_key, + sub_types=[DatasetContainerSubTypes.DATABASE], + domain_registry=self.domain_registry, + domain_config=self.config.domain, + external_url=( + self.get_external_url_for_database(database.name) + if self.config.include_external_url + else None + ), + description=database.comment, + created=( + int(database.created.timestamp() * 1000) + if database.created is not None + else None + ), + last_modified=( + int(database.last_altered.timestamp() * 1000) + if database.last_altered is not None + else ( + int(database.created.timestamp() * 1000) + if database.created is not None + else None + ) + ), + tags=( + [self.snowflake_identifier(tag.identifier()) for tag in database.tags] + if database.tags + else None + ), + ) + + def gen_schema_containers( + self, schema: SnowflakeSchema, db_name: str + ) -> Iterable[MetadataWorkUnit]: + schema_name = self.snowflake_identifier(schema.name) + database_container_key = gen_database_key( + database=self.snowflake_identifier(db_name), + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + + schema_container_key = gen_schema_key( + db_name=self.snowflake_identifier(db_name), + schema=schema_name, + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + + yield from gen_schema_container( + name=schema.name, + schema=self.snowflake_identifier(schema.name), + database=self.snowflake_identifier(db_name), + database_container_key=database_container_key, + domain_config=self.config.domain, + schema_container_key=schema_container_key, + sub_types=[DatasetContainerSubTypes.SCHEMA], + domain_registry=self.domain_registry, + description=schema.comment, + external_url=( + self.get_external_url_for_schema(schema.name, db_name) + if self.config.include_external_url + else None + ), + created=( + int(schema.created.timestamp() * 1000) + if schema.created is not None + else None + ), + last_modified=( + int(schema.last_altered.timestamp() * 1000) + if schema.last_altered is not None + else ( + int(schema.created.timestamp() * 1000) + if schema.created is not None + else None + ) + ), + tags=( + [self.snowflake_identifier(tag.identifier()) for tag in schema.tags] + if schema.tags + else None + ), + ) + + def get_tables_for_schema( + self, schema_name: str, db_name: str + ) -> List[SnowflakeTable]: + tables = self.data_dictionary.get_tables_for_database(db_name) + + # get all tables for database failed, + # falling back to get tables for schema + if tables is None: + self.report.num_get_tables_for_schema_queries += 1 + return self.data_dictionary.get_tables_for_schema(schema_name, db_name) + + # Some schema may not have any table + return tables.get(schema_name, []) + + def get_views_for_schema( + self, schema_name: str, db_name: str + ) -> List[SnowflakeView]: + views = self.data_dictionary.get_views_for_database(db_name) + + # get all views for database failed, + # falling back to get views for schema + if views is None: + self.report.num_get_views_for_schema_queries += 1 + return self.data_dictionary.get_views_for_schema(schema_name, db_name) + + # Some schema may not have any table + return views.get(schema_name, []) + + def get_columns_for_table( + self, table_name: str, schema_name: str, db_name: str + ) -> List[SnowflakeColumn]: + columns = self.data_dictionary.get_columns_for_schema(schema_name, db_name) + + # get all columns for schema failed, + # falling back to get columns for table + if columns is None: + self.report.num_get_columns_for_table_queries += 1 + return self.data_dictionary.get_columns_for_table( + table_name, schema_name, db_name + ) + + # Access to table but none of its columns - is this possible ? + return columns.get(table_name, []) + + def get_pk_constraints_for_table( + self, table_name: str, schema_name: str, db_name: str + ) -> Optional[SnowflakePK]: + constraints = self.data_dictionary.get_pk_constraints_for_schema( + schema_name, db_name + ) + + # Access to table but none of its constraints - is this possible ? + return constraints.get(table_name) + + def get_fk_constraints_for_table( + self, table_name: str, schema_name: str, db_name: str + ) -> List[SnowflakeFK]: + constraints = self.data_dictionary.get_fk_constraints_for_schema( + schema_name, db_name + ) + + # Access to table but none of its constraints - is this possible ? + return constraints.get(table_name, []) + + # domain is either "view" or "table" + def get_external_url_for_table( + self, table_name: str, schema_name: str, db_name: str, domain: str + ) -> Optional[str]: + if self.snowsight_base_url is not None: + return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{domain}/{table_name}/" + return None + + def get_external_url_for_schema( + self, schema_name: str, db_name: str + ) -> Optional[str]: + if self.snowsight_base_url is not None: + return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/" + return None + + def get_external_url_for_database(self, db_name: str) -> Optional[str]: + if self.snowsight_base_url is not None: + return f"{self.snowsight_base_url}#/data/databases/{db_name}/" + return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_summary.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_summary.py index ef08866ccd3ede..cd6f17092e810a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_summary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_summary.py @@ -17,12 +17,14 @@ SnowflakeDatabase, SnowflakeDataDictionary, ) +from datahub.ingestion.source.snowflake.snowflake_schema_gen import ( + SnowflakeSchemaGenerator, +) from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, SnowflakeConnectionMixin, SnowflakeQueryMixin, ) -from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source from datahub.ingestion.source_config.sql.snowflake import BaseSnowflakeConfig from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.utilities.lossy_collections import LossyList @@ -167,13 +169,13 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # This is a bit of a hack, but lets us reuse the code from the main ingestion source. # Mypy doesn't really know how to deal with it though, which is why we have all these # type ignore comments. - get_databases = SnowflakeV2Source.get_databases - get_databases_from_ischema = SnowflakeV2Source.get_databases_from_ischema - fetch_schemas_for_database = SnowflakeV2Source.fetch_schemas_for_database - fetch_tables_for_schema = SnowflakeV2Source.fetch_tables_for_schema - fetch_views_for_schema = SnowflakeV2Source.fetch_views_for_schema - get_tables_for_schema = SnowflakeV2Source.get_tables_for_schema - get_views_for_schema = SnowflakeV2Source.get_views_for_schema + get_databases = SnowflakeSchemaGenerator.get_databases + get_databases_from_ischema = SnowflakeSchemaGenerator.get_databases_from_ischema + fetch_schemas_for_database = SnowflakeSchemaGenerator.fetch_schemas_for_database + fetch_tables_for_schema = SnowflakeSchemaGenerator.fetch_tables_for_schema + fetch_views_for_schema = SnowflakeSchemaGenerator.fetch_views_for_schema + get_tables_for_schema = SnowflakeSchemaGenerator.get_tables_for_schema + get_views_for_schema = SnowflakeSchemaGenerator.get_views_for_schema def get_report(self) -> SnowflakeSummaryReport: return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index adcc4ba09d8c9e..02942556093f9d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -7,6 +7,7 @@ from datahub.configuration.common import MetaError from datahub.configuration.pattern_utils import is_schema_allowed +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.source.snowflake.constants import ( GENERIC_PERMISSION_ERROR_KEY, SNOWFLAKE_REGION_CLOUD_REGION_MAPPING, @@ -48,6 +49,8 @@ def query(self: SnowflakeQueryProtocol, query: str) -> Any: class SnowflakeCommonProtocol(SnowflakeLoggingProtocol, Protocol): + platform: str = "snowflake" + config: SnowflakeV2Config report: SnowflakeV2Report @@ -178,6 +181,14 @@ def snowflake_identifier(self: SnowflakeCommonProtocol, identifier: str) -> str: return identifier.lower() return identifier + def gen_dataset_urn(self: SnowflakeCommonProtocol, dataset_identifier: str) -> str: + return make_dataset_urn_with_platform_instance( + platform=self.platform, + name=dataset_identifier, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + @staticmethod def get_quoted_identifier_for_database(db_name): return f'"{db_name}"' diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index f155ac24fea3fc..06d7042e02456c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -5,18 +5,10 @@ import os.path import platform from dataclasses import dataclass -from typing import Callable, Dict, Iterable, List, Optional, Union +from typing import Dict, Iterable, List, Optional, Union from snowflake.connector import SnowflakeConnection -from datahub.configuration.pattern_utils import is_schema_allowed -from datahub.emitter.mce_builder import ( - make_data_platform_urn, - make_dataset_urn_with_platform_instance, - make_schema_field_urn, - make_tag_urn, -) -from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -36,65 +28,35 @@ TestConnectionReport, ) from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.glossary.classification_mixin import ( - ClassificationHandler, - classification_workunit_processor, -) -from datahub.ingestion.source.common.subtypes import ( - DatasetContainerSubTypes, - DatasetSubTypes, -) from datahub.ingestion.source.snowflake.constants import ( GENERIC_PERMISSION_ERROR_KEY, - SNOWFLAKE_DATABASE, SnowflakeEdition, - SnowflakeObjectDomain, ) from datahub.ingestion.source.snowflake.snowflake_assertion import ( SnowflakeAssertionsHandler, ) -from datahub.ingestion.source.snowflake.snowflake_config import ( - SnowflakeV2Config, - TagOption, -) -from datahub.ingestion.source.snowflake.snowflake_data_reader import SnowflakeDataReader +from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_lineage_v2 import ( SnowflakeLineageExtractor, ) from datahub.ingestion.source.snowflake.snowflake_profiler import SnowflakeProfiler from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report from datahub.ingestion.source.snowflake.snowflake_schema import ( - SnowflakeColumn, - SnowflakeDatabase, SnowflakeDataDictionary, - SnowflakeFK, - SnowflakePK, SnowflakeQuery, - SnowflakeSchema, - SnowflakeTable, - SnowflakeTag, - SnowflakeView, +) +from datahub.ingestion.source.snowflake.snowflake_schema_gen import ( + SnowflakeSchemaGenerator, ) from datahub.ingestion.source.snowflake.snowflake_shares import SnowflakeSharesHandler -from datahub.ingestion.source.snowflake.snowflake_tag import SnowflakeTagExtractor from datahub.ingestion.source.snowflake.snowflake_usage_v2 import ( SnowflakeUsageExtractor, ) from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, SnowflakeConnectionMixin, - SnowflakePermissionError, SnowflakeQueryMixin, ) -from datahub.ingestion.source.sql.sql_utils import ( - add_table_to_schema_container, - gen_database_container, - gen_database_key, - gen_schema_container, - gen_schema_key, - get_dataplatform_instance_aspect, - get_domain_wu, -) from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler from datahub.ingestion.source.state.redundant_run_skip_handler import ( RedundantLineageRunSkipHandler, @@ -110,79 +72,12 @@ from datahub.ingestion.source_report.ingestion_stage import ( LINEAGE_EXTRACTION, METADATA_EXTRACTION, - PROFILING, -) -from datahub.metadata.com.linkedin.pegasus2avro.common import ( - GlobalTags, - Status, - SubTypes, - TagAssociation, - TimeStamp, ) -from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( - DatasetProperties, - ViewProperties, -) -from datahub.metadata.com.linkedin.pegasus2avro.schema import ( - ArrayType, - BooleanType, - BytesType, - DateType, - ForeignKeyConstraint, - MySqlDDL, - NullType, - NumberType, - RecordType, - SchemaField, - SchemaFieldDataType, - SchemaMetadata, - StringType, - TimeType, -) -from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator from datahub.utilities.registries.domain_registry import DomainRegistry logger: logging.Logger = logging.getLogger(__name__) -# https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html -SNOWFLAKE_FIELD_TYPE_MAPPINGS = { - "DATE": DateType, - "BIGINT": NumberType, - "BINARY": BytesType, - # 'BIT': BIT, - "BOOLEAN": BooleanType, - "CHAR": NullType, - "CHARACTER": NullType, - "DATETIME": TimeType, - "DEC": NumberType, - "DECIMAL": NumberType, - "DOUBLE": NumberType, - "FIXED": NumberType, - "FLOAT": NumberType, - "INT": NumberType, - "INTEGER": NumberType, - "NUMBER": NumberType, - # 'OBJECT': ? - "REAL": NumberType, - "BYTEINT": NumberType, - "SMALLINT": NumberType, - "STRING": StringType, - "TEXT": StringType, - "TIME": TimeType, - "TIMESTAMP": TimeType, - "TIMESTAMP_TZ": TimeType, - "TIMESTAMP_LTZ": TimeType, - "TIMESTAMP_NTZ": TimeType, - "TINYINT": NumberType, - "VARBINARY": BytesType, - "VARCHAR": StringType, - "VARIANT": RecordType, - "OBJECT": NullType, - "ARRAY": ArrayType, - "GEOGRAPHY": NullType, -} - @platform_name("Snowflake", doc_order=1) @config_class(SnowflakeV2Config) @@ -235,7 +130,6 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): self.config: SnowflakeV2Config = config self.report: SnowflakeV2Report = SnowflakeV2Report() self.logger = logger - self.snowsight_base_url: Optional[str] = None self.connection: Optional[SnowflakeConnection] = None self.domain_registry: Optional[DomainRegistry] = None @@ -309,10 +203,6 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): redundant_run_skip_handler=redundant_usage_run_skip_handler, ) - self.tag_extractor = SnowflakeTagExtractor( - config, self.data_dictionary, self.report - ) - self.profiling_state_handler: Optional[ProfilingHandler] = None if self.config.enable_stateful_profiling: self.profiling_state_handler = ProfilingHandler( @@ -322,16 +212,13 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): run_id=self.ctx.run_id, ) + # For profiling + self.profiler: Optional[SnowflakeProfiler] = None if config.is_profiling_enabled(): - # For profiling self.profiler = SnowflakeProfiler( config, self.report, self.profiling_state_handler ) - self.classification_handler = ClassificationHandler(self.config, self.report) - - # Caches tables for a single database. Consider moving to disk or S3 when possible. - self.db_tables: Dict[str, List[SnowflakeTable]] = {} self.add_config_to_report() @classmethod @@ -543,41 +430,31 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.inspect_session_metadata() + snowsight_base_url = None if self.config.include_external_url: - self.snowsight_base_url = self.get_snowsight_base_url() + snowsight_base_url = self.get_snowsight_base_url() if self.report.default_warehouse is None: self.report_warehouse_failure() return - self.data_dictionary.set_connection(self.connection) - databases: List[SnowflakeDatabase] = [] - - for database in self.get_databases() or []: - self.report.report_entity_scanned(database.name, "database") - if not self.config.database_pattern.allowed(database.name): - self.report.report_dropped(f"{database.name}.*") - else: - databases.append(database) - - if len(databases) == 0: - return + schema_extractor = SnowflakeSchemaGenerator( + config=self.config, + report=self.report, + connection=self.connection, + domain_registry=self.domain_registry, + profiler=self.profiler, + aggregator=self.aggregator, + snowsight_base_url=snowsight_base_url, + ) - for snowflake_db in databases: - try: - self.report.set_ingestion_stage(snowflake_db.name, METADATA_EXTRACTION) - yield from self._process_database(snowflake_db) + self.report.set_ingestion_stage("*", METADATA_EXTRACTION) + yield from schema_extractor.get_workunits_internal() - except SnowflakePermissionError as e: - # FIXME - This may break stateful ingestion if new tables than previous run are emitted above - # and stateful ingestion is enabled - self.report_error(GENERIC_PERMISSION_ERROR_KEY, str(e)) - return + databases = schema_extractor.databases self.connection.close() - self.report_cache_info() - # TODO: The checkpoint state for stale entity detection can be committed here. if self.config.shares: @@ -624,17 +501,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.config, self.report, self.gen_dataset_urn ).get_assertion_workunits(discovered_datasets) - def report_cache_info(self) -> None: - lru_cache_functions: List[Callable] = [ - self.data_dictionary.get_tables_for_database, - self.data_dictionary.get_views_for_database, - self.data_dictionary.get_columns_for_schema, - self.data_dictionary.get_pk_constraints_for_schema, - self.data_dictionary.get_fk_constraints_for_schema, - ] - for func in lru_cache_functions: - self.report.lru_cache_info[func.__name__] = func.cache_info()._asdict() # type: ignore - def report_warehouse_failure(self) -> None: if self.config.warehouse is not None: self.report_error( @@ -647,828 +513,9 @@ def report_warehouse_failure(self) -> None: "No default warehouse set for user. Either set default warehouse for user or configure warehouse in recipe.", ) - def get_databases(self) -> Optional[List[SnowflakeDatabase]]: - try: - # `show databases` is required only to get one of the databases - # whose information_schema can be queried to start with. - databases = self.data_dictionary.show_databases() - except Exception as e: - logger.debug(f"Failed to list databases due to error {e}", exc_info=e) - self.report_error( - "list-databases", - f"Failed to list databases due to error {e}", - ) - return None - else: - ischema_databases: List[ - SnowflakeDatabase - ] = self.get_databases_from_ischema(databases) - - if len(ischema_databases) == 0: - self.report_error( - GENERIC_PERMISSION_ERROR_KEY, - "No databases found. Please check permissions.", - ) - return ischema_databases - - def get_databases_from_ischema( - self, databases: List[SnowflakeDatabase] - ) -> List[SnowflakeDatabase]: - ischema_databases: List[SnowflakeDatabase] = [] - for database in databases: - try: - ischema_databases = self.data_dictionary.get_databases(database.name) - break - except Exception: - # query fails if "USAGE" access is not granted for database - # This is okay, because `show databases` query lists all databases irrespective of permission, - # if role has `MANAGE GRANTS` privilege. (not advisable) - logger.debug( - f"Failed to list databases {database.name} information_schema" - ) - # SNOWFLAKE database always shows up even if permissions are missing - if database == SNOWFLAKE_DATABASE: - continue - logger.info( - f"The role {self.report.role} has `MANAGE GRANTS` privilege. This is not advisable and also not required." - ) - - return ischema_databases - - def _process_database( - self, snowflake_db: SnowflakeDatabase - ) -> Iterable[MetadataWorkUnit]: - db_name = snowflake_db.name - - try: - self.query(SnowflakeQuery.use_database(db_name)) - except Exception as e: - if isinstance(e, SnowflakePermissionError): - # This may happen if REFERENCE_USAGE permissions are set - # We can not run show queries on database in such case. - # This need not be a failure case. - self.report_warning( - "Insufficient privileges to operate on database, skipping. Please grant USAGE permissions on database to extract its metadata.", - db_name, - ) - else: - logger.debug( - f"Failed to use database {db_name} due to error {e}", - exc_info=e, - ) - self.report_warning( - "Failed to get schemas for database", - db_name, - ) - return - - if self.config.extract_tags != TagOption.skip: - snowflake_db.tags = self.tag_extractor.get_tags_on_object( - domain="database", db_name=db_name - ) - - if self.config.include_technical_schema: - yield from self.gen_database_containers(snowflake_db) - - self.fetch_schemas_for_database(snowflake_db, db_name) - - if self.config.include_technical_schema and snowflake_db.tags: - for tag in snowflake_db.tags: - yield from self._process_tag(tag) - - self.db_tables = {} - for snowflake_schema in snowflake_db.schemas: - yield from self._process_schema(snowflake_schema, db_name) - - if self.config.is_profiling_enabled() and self.db_tables: - self.report.set_ingestion_stage(snowflake_db.name, PROFILING) - yield from self.profiler.get_workunits(snowflake_db, self.db_tables) - - def fetch_schemas_for_database( - self, snowflake_db: SnowflakeDatabase, db_name: str - ) -> None: - schemas: List[SnowflakeSchema] = [] - try: - for schema in self.data_dictionary.get_schemas_for_database(db_name): - self.report.report_entity_scanned(schema.name, "schema") - if not is_schema_allowed( - self.config.schema_pattern, - schema.name, - db_name, - self.config.match_fully_qualified_names, - ): - self.report.report_dropped(f"{db_name}.{schema.name}.*") - else: - schemas.append(schema) - except Exception as e: - if isinstance(e, SnowflakePermissionError): - error_msg = f"Failed to get schemas for database {db_name}. Please check permissions." - # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes - raise SnowflakePermissionError(error_msg) from e.__cause__ - else: - logger.debug( - f"Failed to get schemas for database {db_name} due to error {e}", - exc_info=e, - ) - self.report_warning( - "Failed to get schemas for database", - db_name, - ) - - if not schemas: - self.report_warning( - "No schemas found in database. If schemas exist, please grant USAGE permissions on them.", - db_name, - ) - else: - snowflake_db.schemas = schemas - - def _process_schema( - self, snowflake_schema: SnowflakeSchema, db_name: str - ) -> Iterable[MetadataWorkUnit]: - schema_name = snowflake_schema.name - - if self.config.extract_tags != TagOption.skip: - snowflake_schema.tags = self.tag_extractor.get_tags_on_object( - schema_name=schema_name, db_name=db_name, domain="schema" - ) - - if self.config.include_technical_schema: - yield from self.gen_schema_containers(snowflake_schema, db_name) - - if self.config.include_tables: - tables = self.fetch_tables_for_schema( - snowflake_schema, db_name, schema_name - ) - self.db_tables[schema_name] = tables - - if self.config.include_technical_schema: - data_reader = self.make_data_reader() - for table in tables: - table_wu_generator = self._process_table( - table, schema_name, db_name - ) - yield from classification_workunit_processor( - table_wu_generator, - self.classification_handler, - data_reader, - [db_name, schema_name, table.name], - ) - - if self.config.include_views: - views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name) - if ( - self.aggregator - and self.config.include_view_lineage - and self.config.parse_view_ddl - ): - for view in views: - view_identifier = self.get_dataset_identifier( - view.name, schema_name, db_name - ) - if view.view_definition: - self.aggregator.add_view_definition( - view_urn=self.gen_dataset_urn(view_identifier), - view_definition=view.view_definition, - default_db=db_name, - default_schema=schema_name, - ) - - if self.config.include_technical_schema: - for view in views: - yield from self._process_view(view, schema_name, db_name) - - if self.config.include_technical_schema and snowflake_schema.tags: - for tag in snowflake_schema.tags: - yield from self._process_tag(tag) - - if not snowflake_schema.views and not snowflake_schema.tables: - self.report_warning( - "No tables/views found in schema. If tables exist, please grant REFERENCES or SELECT permissions on them.", - f"{db_name}.{schema_name}", - ) - - def fetch_views_for_schema( - self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str - ) -> List[SnowflakeView]: - try: - views: List[SnowflakeView] = [] - for view in self.get_views_for_schema(schema_name, db_name): - view_name = self.get_dataset_identifier(view.name, schema_name, db_name) - - self.report.report_entity_scanned(view_name, "view") - - if not self.config.view_pattern.allowed(view_name): - self.report.report_dropped(view_name) - else: - views.append(view) - snowflake_schema.views = [view.name for view in views] - return views - except Exception as e: - if isinstance(e, SnowflakePermissionError): - # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes - error_msg = f"Failed to get views for schema {db_name}.{schema_name}. Please check permissions." - - raise SnowflakePermissionError(error_msg) from e.__cause__ - else: - logger.debug( - f"Failed to get views for schema {db_name}.{schema_name} due to error {e}", - exc_info=e, - ) - self.report_warning( - "Failed to get views for schema", - f"{db_name}.{schema_name}", - ) - return [] - - def fetch_tables_for_schema( - self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str - ) -> List[SnowflakeTable]: - try: - tables: List[SnowflakeTable] = [] - for table in self.get_tables_for_schema(schema_name, db_name): - table_identifier = self.get_dataset_identifier( - table.name, schema_name, db_name - ) - self.report.report_entity_scanned(table_identifier) - if not self.config.table_pattern.allowed(table_identifier): - self.report.report_dropped(table_identifier) - else: - tables.append(table) - snowflake_schema.tables = [table.name for table in tables] - return tables - except Exception as e: - if isinstance(e, SnowflakePermissionError): - # Ideal implementation would use PEP 678 – Enriching Exceptions with Notes - error_msg = f"Failed to get tables for schema {db_name}.{schema_name}. Please check permissions." - raise SnowflakePermissionError(error_msg) from e.__cause__ - else: - logger.debug( - f"Failed to get tables for schema {db_name}.{schema_name} due to error {e}", - exc_info=e, - ) - self.report_warning( - "Failed to get tables for schema", - f"{db_name}.{schema_name}", - ) - return [] - - def make_data_reader(self) -> Optional[SnowflakeDataReader]: - if self.classification_handler.is_classification_enabled() and self.connection: - return SnowflakeDataReader.create( - self.connection, self.snowflake_identifier - ) - - return None - - def _process_table( - self, - table: SnowflakeTable, - schema_name: str, - db_name: str, - ) -> Iterable[MetadataWorkUnit]: - table_identifier = self.get_dataset_identifier(table.name, schema_name, db_name) - - self.fetch_columns_for_table(table, schema_name, db_name, table_identifier) - - self.fetch_pk_for_table(table, schema_name, db_name, table_identifier) - - self.fetch_foreign_keys_for_table(table, schema_name, db_name, table_identifier) - - if self.config.extract_tags != TagOption.skip: - table.tags = self.tag_extractor.get_tags_on_object( - table_name=table.name, - schema_name=schema_name, - db_name=db_name, - domain="table", - ) - - if self.config.include_technical_schema: - if table.tags: - for tag in table.tags: - yield from self._process_tag(tag) - for column_name in table.column_tags: - for tag in table.column_tags[column_name]: - yield from self._process_tag(tag) - - yield from self.gen_dataset_workunits(table, schema_name, db_name) - - def fetch_foreign_keys_for_table( - self, - table: SnowflakeTable, - schema_name: str, - db_name: str, - table_identifier: str, - ) -> None: - try: - table.foreign_keys = self.get_fk_constraints_for_table( - table.name, schema_name, db_name - ) - except Exception as e: - logger.debug( - f"Failed to get foreign key for table {table_identifier} due to error {e}", - exc_info=e, - ) - self.report_warning("Failed to get foreign key for table", table_identifier) - - def fetch_pk_for_table( - self, - table: SnowflakeTable, - schema_name: str, - db_name: str, - table_identifier: str, - ) -> None: - try: - table.pk = self.get_pk_constraints_for_table( - table.name, schema_name, db_name - ) - except Exception as e: - logger.debug( - f"Failed to get primary key for table {table_identifier} due to error {e}", - exc_info=e, - ) - self.report_warning("Failed to get primary key for table", table_identifier) - - def fetch_columns_for_table( - self, - table: SnowflakeTable, - schema_name: str, - db_name: str, - table_identifier: str, - ) -> None: - try: - table.columns = self.get_columns_for_table(table.name, schema_name, db_name) - table.column_count = len(table.columns) - if self.config.extract_tags != TagOption.skip: - table.column_tags = self.tag_extractor.get_column_tags_for_table( - table.name, schema_name, db_name - ) - except Exception as e: - logger.debug( - f"Failed to get columns for table {table_identifier} due to error {e}", - exc_info=e, - ) - self.report_warning("Failed to get columns for table", table_identifier) - - def _process_view( - self, - view: SnowflakeView, - schema_name: str, - db_name: str, - ) -> Iterable[MetadataWorkUnit]: - view_name = self.get_dataset_identifier(view.name, schema_name, db_name) - - try: - view.columns = self.get_columns_for_table(view.name, schema_name, db_name) - if self.config.extract_tags != TagOption.skip: - view.column_tags = self.tag_extractor.get_column_tags_for_table( - view.name, schema_name, db_name - ) - except Exception as e: - logger.debug( - f"Failed to get columns for view {view_name} due to error {e}", - exc_info=e, - ) - self.report_warning("Failed to get columns for view", view_name) - - if self.config.extract_tags != TagOption.skip: - view.tags = self.tag_extractor.get_tags_on_object( - table_name=view.name, - schema_name=schema_name, - db_name=db_name, - domain="table", - ) - - if self.config.include_technical_schema: - if view.tags: - for tag in view.tags: - yield from self._process_tag(tag) - for column_name in view.column_tags: - for tag in view.column_tags[column_name]: - yield from self._process_tag(tag) - - yield from self.gen_dataset_workunits(view, schema_name, db_name) - - def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_identifier = tag.identifier() - - if self.report.is_tag_processed(tag_identifier): - return - - self.report.report_tag_processed(tag_identifier) - - yield from self.gen_tag_workunits(tag) - - def gen_dataset_urn(self, dataset_identifier: str) -> str: - return make_dataset_urn_with_platform_instance( - platform=self.platform, - name=dataset_identifier, - platform_instance=self.config.platform_instance, - env=self.config.env, - ) - - def gen_dataset_workunits( - self, - table: Union[SnowflakeTable, SnowflakeView], - schema_name: str, - db_name: str, - ) -> Iterable[MetadataWorkUnit]: - dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) - dataset_urn = self.gen_dataset_urn(dataset_name) - - status = Status(removed=False) - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=status - ).as_workunit() - - schema_metadata = self.gen_schema_metadata(table, schema_name, db_name) - - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=schema_metadata - ).as_workunit() - - dataset_properties = self.get_dataset_properties(table, schema_name, db_name) - - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=dataset_properties - ).as_workunit() - - schema_container_key = gen_schema_key( - db_name=self.snowflake_identifier(db_name), - schema=self.snowflake_identifier(schema_name), - platform=self.platform, - platform_instance=self.config.platform_instance, - env=self.config.env, - ) - - yield from add_table_to_schema_container( - dataset_urn=dataset_urn, - parent_container_key=schema_container_key, - ) - dpi_aspect = get_dataplatform_instance_aspect( - dataset_urn=dataset_urn, - platform=self.platform, - platform_instance=self.config.platform_instance, - ) - if dpi_aspect: - yield dpi_aspect - - subTypes = SubTypes( - typeNames=( - [DatasetSubTypes.VIEW] - if isinstance(table, SnowflakeView) - else [DatasetSubTypes.TABLE] - ) - ) - - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=subTypes - ).as_workunit() - - if self.domain_registry: - yield from get_domain_wu( - dataset_name=dataset_name, - entity_urn=dataset_urn, - domain_config=self.config.domain, - domain_registry=self.domain_registry, - ) - - if table.tags: - tag_associations = [ - TagAssociation( - tag=make_tag_urn(self.snowflake_identifier(tag.identifier())) - ) - for tag in table.tags - ] - global_tags = GlobalTags(tag_associations) - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=global_tags - ).as_workunit() - - if isinstance(table, SnowflakeView) and table.view_definition is not None: - view_properties_aspect = ViewProperties( - materialized=table.materialized, - viewLanguage="SQL", - viewLogic=table.view_definition, - ) - - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=view_properties_aspect - ).as_workunit() - - def get_dataset_properties( - self, - table: Union[SnowflakeTable, SnowflakeView], - schema_name: str, - db_name: str, - ) -> DatasetProperties: - return DatasetProperties( - name=table.name, - created=( - TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created is not None - else None - ), - lastModified=( - TimeStamp(time=int(table.last_altered.timestamp() * 1000)) - if table.last_altered is not None - else ( - TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created is not None - else None - ) - ), - description=table.comment, - qualifiedName=f"{db_name}.{schema_name}.{table.name}", - customProperties={}, - externalUrl=( - self.get_external_url_for_table( - table.name, - schema_name, - db_name, - ( - SnowflakeObjectDomain.TABLE - if isinstance(table, SnowflakeTable) - else SnowflakeObjectDomain.VIEW - ), - ) - if self.config.include_external_url - else None - ), - ) - - def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_urn = make_tag_urn(self.snowflake_identifier(tag.identifier())) - - tag_properties_aspect = TagProperties( - name=tag.display_name(), - description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.", - ) - - yield MetadataChangeProposalWrapper( - entityUrn=tag_urn, aspect=tag_properties_aspect - ).as_workunit() - - def gen_schema_metadata( - self, - table: Union[SnowflakeTable, SnowflakeView], - schema_name: str, - db_name: str, - ) -> SchemaMetadata: - dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name) - dataset_urn = self.gen_dataset_urn(dataset_name) - - foreign_keys: Optional[List[ForeignKeyConstraint]] = None - if isinstance(table, SnowflakeTable) and len(table.foreign_keys) > 0: - foreign_keys = self.build_foreign_keys(table, dataset_urn) - - schema_metadata = SchemaMetadata( - schemaName=dataset_name, - platform=make_data_platform_urn(self.platform), - version=0, - hash="", - platformSchema=MySqlDDL(tableSchema=""), - fields=[ - SchemaField( - fieldPath=self.snowflake_identifier(col.name), - type=SchemaFieldDataType( - SNOWFLAKE_FIELD_TYPE_MAPPINGS.get(col.data_type, NullType)() - ), - # NOTE: nativeDataType will not be in sync with older connector - nativeDataType=col.get_precise_native_type(), - description=col.comment, - nullable=col.is_nullable, - isPartOfKey=( - col.name in table.pk.column_names - if isinstance(table, SnowflakeTable) and table.pk is not None - else None - ), - globalTags=( - GlobalTags( - [ - TagAssociation( - make_tag_urn( - self.snowflake_identifier(tag.identifier()) - ) - ) - for tag in table.column_tags[col.name] - ] - ) - if col.name in table.column_tags - else None - ), - ) - for col in table.columns - ], - foreignKeys=foreign_keys, - ) - - if self.aggregator: - self.aggregator.register_schema(urn=dataset_urn, schema=schema_metadata) - - return schema_metadata - - def build_foreign_keys( - self, table: SnowflakeTable, dataset_urn: str - ) -> List[ForeignKeyConstraint]: - foreign_keys = [] - for fk in table.foreign_keys: - foreign_dataset = make_dataset_urn_with_platform_instance( - platform=self.platform, - name=self.get_dataset_identifier( - fk.referred_table, fk.referred_schema, fk.referred_database - ), - env=self.config.env, - platform_instance=self.config.platform_instance, - ) - foreign_keys.append( - ForeignKeyConstraint( - name=fk.name, - foreignDataset=foreign_dataset, - foreignFields=[ - make_schema_field_urn( - foreign_dataset, - self.snowflake_identifier(col), - ) - for col in fk.referred_column_names - ], - sourceFields=[ - make_schema_field_urn( - dataset_urn, - self.snowflake_identifier(col), - ) - for col in fk.column_names - ], - ) - ) - return foreign_keys - def get_report(self) -> SourceReport: return self.report - def gen_database_containers( - self, database: SnowflakeDatabase - ) -> Iterable[MetadataWorkUnit]: - database_container_key = gen_database_key( - self.snowflake_identifier(database.name), - platform=self.platform, - platform_instance=self.config.platform_instance, - env=self.config.env, - ) - - yield from gen_database_container( - name=database.name, - database=self.snowflake_identifier(database.name), - database_container_key=database_container_key, - sub_types=[DatasetContainerSubTypes.DATABASE], - domain_registry=self.domain_registry, - domain_config=self.config.domain, - external_url=( - self.get_external_url_for_database(database.name) - if self.config.include_external_url - else None - ), - description=database.comment, - created=( - int(database.created.timestamp() * 1000) - if database.created is not None - else None - ), - last_modified=( - int(database.last_altered.timestamp() * 1000) - if database.last_altered is not None - else ( - int(database.created.timestamp() * 1000) - if database.created is not None - else None - ) - ), - tags=( - [self.snowflake_identifier(tag.identifier()) for tag in database.tags] - if database.tags - else None - ), - ) - - def gen_schema_containers( - self, schema: SnowflakeSchema, db_name: str - ) -> Iterable[MetadataWorkUnit]: - schema_name = self.snowflake_identifier(schema.name) - database_container_key = gen_database_key( - database=self.snowflake_identifier(db_name), - platform=self.platform, - platform_instance=self.config.platform_instance, - env=self.config.env, - ) - - schema_container_key = gen_schema_key( - db_name=self.snowflake_identifier(db_name), - schema=schema_name, - platform=self.platform, - platform_instance=self.config.platform_instance, - env=self.config.env, - ) - - yield from gen_schema_container( - name=schema.name, - schema=self.snowflake_identifier(schema.name), - database=self.snowflake_identifier(db_name), - database_container_key=database_container_key, - domain_config=self.config.domain, - schema_container_key=schema_container_key, - sub_types=[DatasetContainerSubTypes.SCHEMA], - domain_registry=self.domain_registry, - description=schema.comment, - external_url=( - self.get_external_url_for_schema(schema.name, db_name) - if self.config.include_external_url - else None - ), - created=( - int(schema.created.timestamp() * 1000) - if schema.created is not None - else None - ), - last_modified=( - int(schema.last_altered.timestamp() * 1000) - if schema.last_altered is not None - else ( - int(schema.created.timestamp() * 1000) - if schema.created is not None - else None - ) - ), - tags=( - [self.snowflake_identifier(tag.identifier()) for tag in schema.tags] - if schema.tags - else None - ), - ) - - def get_tables_for_schema( - self, schema_name: str, db_name: str - ) -> List[SnowflakeTable]: - tables = self.data_dictionary.get_tables_for_database(db_name) - - # get all tables for database failed, - # falling back to get tables for schema - if tables is None: - self.report.num_get_tables_for_schema_queries += 1 - return self.data_dictionary.get_tables_for_schema(schema_name, db_name) - - # Some schema may not have any table - return tables.get(schema_name, []) - - def get_views_for_schema( - self, schema_name: str, db_name: str - ) -> List[SnowflakeView]: - views = self.data_dictionary.get_views_for_database(db_name) - - # get all views for database failed, - # falling back to get views for schema - if views is None: - self.report.num_get_views_for_schema_queries += 1 - return self.data_dictionary.get_views_for_schema(schema_name, db_name) - - # Some schema may not have any table - return views.get(schema_name, []) - - def get_columns_for_table( - self, table_name: str, schema_name: str, db_name: str - ) -> List[SnowflakeColumn]: - columns = self.data_dictionary.get_columns_for_schema(schema_name, db_name) - - # get all columns for schema failed, - # falling back to get columns for table - if columns is None: - self.report.num_get_columns_for_table_queries += 1 - return self.data_dictionary.get_columns_for_table( - table_name, schema_name, db_name - ) - - # Access to table but none of its columns - is this possible ? - return columns.get(table_name, []) - - def get_pk_constraints_for_table( - self, table_name: str, schema_name: str, db_name: str - ) -> Optional[SnowflakePK]: - constraints = self.data_dictionary.get_pk_constraints_for_schema( - schema_name, db_name - ) - - # Access to table but none of its constraints - is this possible ? - return constraints.get(table_name) - - def get_fk_constraints_for_table( - self, table_name: str, schema_name: str, db_name: str - ) -> List[SnowflakeFK]: - constraints = self.data_dictionary.get_fk_constraints_for_schema( - schema_name, db_name - ) - - # Access to table but none of its constraints - is this possible ? - return constraints.get(table_name, []) - def add_config_to_report(self) -> None: self.report.cleaned_account_id = self.config.get_account() self.report.ignore_start_time_lineage = self.config.ignore_start_time_lineage @@ -1517,26 +564,6 @@ def inspect_session_metadata(self) -> None: except Exception: self.report.edition = None - # domain is either "view" or "table" - def get_external_url_for_table( - self, table_name: str, schema_name: str, db_name: str, domain: str - ) -> Optional[str]: - if self.snowsight_base_url is not None: - return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{domain}/{table_name}/" - return None - - def get_external_url_for_schema( - self, schema_name: str, db_name: str - ) -> Optional[str]: - if self.snowsight_base_url is not None: - return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/" - return None - - def get_external_url_for_database(self, db_name: str) -> Optional[str]: - if self.snowsight_base_url is not None: - return f"{self.snowsight_base_url}#/data/databases/{db_name}/" - return None - def get_snowsight_base_url(self) -> Optional[str]: try: # See https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#finding-the-region-and-locator-for-an-account diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index 7534f1295c5283..5b1a815e178b18 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -133,17 +133,8 @@ def create(cls, config_dict: Dict, ctx: PipelineContext) -> "VerticaSource": return cls(config, ctx) def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: + yield from super().get_workunits_internal() sql_config = self.config - if logger.isEnabledFor(logging.DEBUG): - # If debug logging is enabled, we also want to echo each SQL query issued. - sql_config.options.setdefault("echo", True) - - # Extra default SQLAlchemy option for better connection pooling and threading. - # https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow - if sql_config.is_profiling_enabled(): - sql_config.options.setdefault( - "max_overflow", sql_config.profiling.max_workers - ) for inspector in self.get_inspectors(): profiler = None @@ -170,11 +161,6 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit ), ) - if sql_config.include_tables: - yield from self.loop_tables(inspector, schema, sql_config) - - if sql_config.include_views: - yield from self.loop_views(inspector, schema, sql_config) if sql_config.include_projections: yield from self.loop_projections(inspector, schema, sql_config) if sql_config.include_models: @@ -190,6 +176,15 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests, profiler, platform=self.platform ) + def get_identifier( + self, *, schema: str, entity: str, inspector: VerticaInspector, **kwargs: Any + ) -> str: + regular = f"{schema}.{entity}" + if self.config.database: + return f"{self.config.database}.{regular}" + current_database = self.get_db_name(inspector) + return f"{current_database}.{regular}" + def get_database_properties( self, inspector: VerticaInspector, database: str ) -> Optional[Dict[str, str]]: diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index 6bfbb9d3733960..e71e6953916fab 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -274,6 +274,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N # Reduce logging from some particularly chatty libraries. logging.getLogger("urllib3").setLevel(logging.ERROR) +logging.getLogger("urllib3.util.retry").setLevel(logging.WARNING) logging.getLogger("snowflake").setLevel(level=logging.WARNING) # logging.getLogger("botocore").setLevel(logging.INFO) # logging.getLogger("google").setLevel(logging.INFO) diff --git a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py new file mode 100644 index 00000000000000..23523501ee0b49 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py @@ -0,0 +1,98 @@ +import functools +import threading +from typing import Callable, Dict, Hashable, Tuple, TypeVar + +import cachetools +import cachetools.keys +from typing_extensions import ParamSpec + +_Key = Tuple[Hashable, ...] +_F = ParamSpec("_F") +_T = TypeVar("_T") + + +def serialized_lru_cache( + maxsize: int, +) -> Callable[[Callable[_F, _T]], Callable[_F, _T]]: + """Similar to `lru_cache`, but ensures multiple calls with the same parameters are serialized. + + Calls with different parameters are allowed to proceed in parallel. + + Args: + maxsize (int): Maximum number of entries to keep in the cache. + + Returns: + Callable[[Callable[F, T]], Callable[F, T]]: Decorator for the function to be wrapped. + """ + + UNSET = object() + + def decorator(func: Callable[_F, _T]) -> Callable[_F, _T]: + hits = 0 + misses = 0 + + cache_lock = threading.Lock() + cache: "cachetools.LRUCache[_Key, _T]" = cachetools.LRUCache(maxsize=maxsize) + + key_locks_lock = threading.Lock() + key_locks: Dict[_Key, threading.Lock] = {} + key_waiters: Dict[_Key, int] = {} + + def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T: + # We need a type ignore here because there's no way for us to require that + # the args and kwargs are hashable while using ParamSpec. + key: _Key = cachetools.keys.hashkey(*args, **kwargs) # type: ignore + + with cache_lock: + if key in cache: + nonlocal hits + hits += 1 + return cache[key] + + with key_locks_lock: + if key not in key_locks: + key_locks[key] = threading.Lock() + key_waiters[key] = 0 + lock = key_locks[key] + key_waiters[key] += 1 + + try: + with lock: + # Check the cache again, in case the cache was updated by another thread. + result = UNSET + with cache_lock: + if key in cache: + hits += 1 + return cache[key] + + nonlocal misses + misses += 1 + result = func(*args, **kwargs) + + with cache_lock: + cache[key] = result + return result + + finally: + with key_locks_lock: + key_waiters[key] -= 1 + if key_waiters[key] == 0: + del key_locks[key] + del key_waiters[key] + + def cache_info() -> functools._CacheInfo: + return functools._CacheInfo( + hits=hits, + misses=misses, + maxsize=maxsize, + currsize=len(cache), + ) + + # Add some extra attributes to the wrapper function. This makes it mostly compatible + # with functools.lru_cache. + wrapper.cache = cache # type: ignore + wrapper.cache_info = cache_info # type: ignore + + return functools.update_wrapper(wrapper, func) + + return decorator diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json index a81e6e927ef6ac..12bb7734f30a63 100644 --- a/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/sigma/golden_test_platform_instance_ingest.json @@ -39,6 +39,7 @@ "aspect": { "json": { "customProperties": { + "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", "path": "Acryl Data" }, "externalUrl": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", @@ -186,6 +187,7 @@ "aspect": { "json": { "customProperties": { + "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", "path": "Acryl Data/New Folder" }, "externalUrl": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", @@ -618,7 +620,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496054, + "lastObserved": 1718348049212, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1088,7 +1090,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496115, + "lastObserved": 1718348049268, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1239,7 +1241,7 @@ } }, "systemMetadata": { - "lastObserved": 1713794496188, + "lastObserved": 1718348049351, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1913,29 +1915,38 @@ } }, { - "entityType": "tag", - "entityUrn": "urn:li:tag:Warning", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", "changeType": "UPSERT", - "aspectName": "tagKey", + "aspectName": "upstreamLineage", "aspect": { "json": { - "name": "Warning" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.pets,DEV)", + "type": "COPY" + } + ] } }, "systemMetadata": { - "lastObserved": 1713794496203, + "lastObserved": 1718348049380, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } }, { "entityType": "tag", - "entityUrn": "urn:li:tag:Deprecated", + "entityUrn": "urn:li:tag:Warning", "changeType": "UPSERT", "aspectName": "tagKey", "aspect": { "json": { - "name": "Deprecated" + "name": "Warning" } }, "systemMetadata": { @@ -1945,26 +1956,17 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,cloud_instance.49HFLTr6xytgrPly3PFsNC,PROD)", + "entityType": "tag", + "entityUrn": "urn:li:tag:Deprecated", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "tagKey", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev_instance.long_tail_companions.adoption.pets,DEV)", - "type": "COPY" - } - ] + "name": "Deprecated" } }, "systemMetadata": { - "lastObserved": 1713794496202, + "lastObserved": 1713794496203, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_extract_lineage.json b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_extract_lineage.json new file mode 100644 index 00000000000000..4a1392accc9ad5 --- /dev/null +++ b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_extract_lineage.json @@ -0,0 +1,1861 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275457, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", + "path": "Acryl Data" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", + "name": "PETS", + "qualifiedName": "PETS", + "description": "", + "created": { + "time": 1713188592664 + }, + "lastModified": { + "time": 1713188592664 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1718003275458, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + }, + "systemMetadata": { + "lastObserved": 1718003275459, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275460, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275461, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275462, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275479, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", + "path": "Acryl Data/New Folder" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", + "name": "PET_PROFILES_JOINED_DYNAMIC", + "qualifiedName": "PET_PROFILES_JOINED_DYNAMIC", + "description": "", + "created": { + "time": 1713189068019 + }, + "lastModified": { + "time": 1713189068019 + }, + "tags": [ + "Deprecated" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275480, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + }, + "systemMetadata": { + "lastObserved": 1718003275481, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275481, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275482, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Deprecated" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275482, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "New Folder" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275483, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "sigma", + "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", + "path": "Acryl Data", + "latestVersion": "2" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "name": "Acryl Workbook", + "created": { + "time": 1713188691477 + }, + "lastModified": { + "time": 1713189117302 + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275484, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275485, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sigma" + } + }, + "systemMetadata": { + "lastObserved": 1718003275485, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275486, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275486, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Warning" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275487, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + }, + "systemMetadata": { + "lastObserved": 1718003275487, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275488, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275489, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "ElementsCount": "2" + }, + "title": "Page 1", + "description": "", + "charts": [ + "urn:li:chart:(sigma,kH0MeihtGs)", + "urn:li:chart:(sigma,Ml9C5ezT5W)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275489, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718003275490, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275491, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275520, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=kH0MeihtGs&:fullScreen=true", + "title": "ADOPTIONS", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.adoptions,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275521, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718003275522, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275522, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275525, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275582, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "bar", + "type": "visualization" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=Ml9C5ezT5W&:fullScreen=true", + "title": "Count of Profile Id by Status", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275583, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718003275584, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Count of Profile Id)", + "schemaField": { + "fieldPath": "Count of Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275584, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275586, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Count of Profile Id)", + "schemaField": { + "fieldPath": "Count of Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275587, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275592, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "ElementsCount": "1" + }, + "title": "Page 2", + "description": "", + "charts": [ + "urn:li:chart:(sigma,tQJu5N1l81)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275593, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718003275594, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275594, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275654, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=tQJu5N1l81&:fullScreen=true", + "title": "PETS ADOPTIONS JOIN", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.adoptions,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275655, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718003275656, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Pk (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Status (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Created At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Updated At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275656, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275660, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Pk (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Status (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Created At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Updated At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275661, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "sigma", + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b" + }, + "name": "Acryl Data", + "created": { + "time": 1710232264826 + }, + "lastModified": { + "time": 1710232264826 + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275665, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718003275666, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sigma" + } + }, + "systemMetadata": { + "lastObserved": 1718003275666, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275667, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718003275667, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1718003275668, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pets,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718003275668, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Deprecated", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Deprecated" + } + }, + "systemMetadata": { + "lastObserved": 1718003275669, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Warning", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Warning" + } + }, + "systemMetadata": { + "lastObserved": 1718003275670, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json index 50865ab0ff72e3..f800cb19f88115 100644 --- a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json +++ b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest.json @@ -23,6 +23,7 @@ "aspect": { "json": { "customProperties": { + "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", "path": "Acryl Data" }, "externalUrl": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", @@ -39,7 +40,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619219, + "lastObserved": 1718005767888, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -312,6 +313,7 @@ "aspect": { "json": { "customProperties": { + "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", "path": "Acryl Data/New Folder" }, "externalUrl": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", @@ -330,7 +332,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619223, + "lastObserved": 1718005767900, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -844,7 +846,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619372, + "lastObserved": 1718348048900, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1172,7 +1174,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619447, + "lastObserved": 1718348048975, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1612,32 +1614,7 @@ } }, "systemMetadata": { - "lastObserved": 1713795619266, - "runId": "sigma-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pets,PROD)", - "type": "COPY" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1713795619462, + "lastObserved": 1718348048836, "runId": "sigma-test", "lastRunId": "no-run-id-provided" } @@ -1824,6 +1801,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pets,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718348048990, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Deprecated", diff --git a/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json new file mode 100644 index 00000000000000..d6b702bdfd6695 --- /dev/null +++ b/metadata-ingestion/tests/integration/sigma/golden_test_sigma_ingest_shared_entities_mces.json @@ -0,0 +1,1821 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101661, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", + "path": "Acryl Data" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", + "name": "PETS", + "qualifiedName": "PETS", + "description": "", + "created": { + "time": 1713188592664 + }, + "lastModified": { + "time": 1713188592664 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1718004101662, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + }, + "systemMetadata": { + "lastObserved": 1718004101663, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101663, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101664, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,49HFLTr6xytgrPly3PFsNC,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101665, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101675, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", + "path": "Acryl Data/New Folder" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", + "name": "PET_PROFILES_JOINED_DYNAMIC", + "qualifiedName": "PET_PROFILES_JOINED_DYNAMIC", + "description": "", + "created": { + "time": 1713189068019 + }, + "lastModified": { + "time": 1713189068019 + }, + "tags": [ + "Deprecated" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101675, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + } + }, + "systemMetadata": { + "lastObserved": 1718004101676, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101677, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101677, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Deprecated" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101678, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sigma,5LqGLu14qUnqh3cN6wRJBd,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "urn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f" + }, + { + "id": "New Folder" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101678, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "sigma", + "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", + "path": "New Acryl Data", + "latestVersion": "2" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "name": "Acryl Workbook", + "created": { + "time": 1713188691477 + }, + "lastModified": { + "time": 1713189117302 + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101680, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101680, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sigma" + } + }, + "systemMetadata": { + "lastObserved": 1718004101681, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101681, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101682, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Warning" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101683, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b83da80a4d444484521d9f7aca958742" + } + }, + "systemMetadata": { + "lastObserved": 1718004101683, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101684, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101684, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "ElementsCount": "2" + }, + "title": "Page 1", + "description": "", + "charts": [ + "urn:li:chart:(sigma,kH0MeihtGs)", + "urn:li:chart:(sigma,Ml9C5ezT5W)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101685, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718004101686, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101686, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101687, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=kH0MeihtGs&:fullScreen=true", + "title": "ADOPTIONS", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [] + } + }, + "systemMetadata": { + "lastObserved": 1718004101688, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718004101689, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101689, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,kH0MeihtGs)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101692, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101693, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "bar", + "type": "visualization" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=Ml9C5ezT5W&:fullScreen=true", + "title": "Count of Profile Id by Status", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [] + } + }, + "systemMetadata": { + "lastObserved": 1718004101694, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718004101695, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Count of Profile Id)", + "schemaField": { + "fieldPath": "Count of Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101695, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,Ml9C5ezT5W)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101697, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,OSnGLBzL1i)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,kH0MeihtGs),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,Ml9C5ezT5W),Count of Profile Id)", + "schemaField": { + "fieldPath": "Count of Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101698, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101703, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "ElementsCount": "1" + }, + "title": "Page 2", + "description": "", + "charts": [ + "urn:li:chart:(sigma,tQJu5N1l81)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101703, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718004101704, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101704, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101706, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "VizualizationType": "levelTable", + "type": "table" + }, + "externalUrl": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7?:nodeId=tQJu5N1l81&:fullScreen=true", + "title": "PETS ADOPTIONS JOIN", + "description": "", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "inputs": [] + } + }, + "systemMetadata": { + "lastObserved": 1718004101707, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + }, + "systemMetadata": { + "lastObserved": 1718004101708, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Pk (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Status (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Created At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Updated At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101708, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(sigma,tQJu5N1l81)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b83da80a4d444484521d9f7aca958742", + "urn": "urn:li:container:b83da80a4d444484521d9f7aca958742" + }, + { + "id": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02", + "urn": "urn:li:container:140db5e9decc9b6ec67fa1e8207b6f02" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101712, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sigma,DFSieiAcgo)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk)", + "schemaField": { + "fieldPath": "Pk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Profile Id)", + "schemaField": { + "fieldPath": "Profile Id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status)", + "schemaField": { + "fieldPath": "Status", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At)", + "schemaField": { + "fieldPath": "Created At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At)", + "schemaField": { + "fieldPath": "Updated At", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pk %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Pk (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Pet Fk)", + "schemaField": { + "fieldPath": "Pet Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Human Fk)", + "schemaField": { + "fieldPath": "Human Fk", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Status %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Status (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Created At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Created At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(sigma,tQJu5N1l81),Updated At %28ADOPTIONS%29)", + "schemaField": { + "fieldPath": "Updated At (ADOPTIONS)", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "String", + "recursive": false, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101713, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "sigma", + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b" + }, + "name": "Acryl Data", + "created": { + "time": 1710232264826 + }, + "lastModified": { + "time": 1710232264826 + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101717, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1718004101718, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sigma" + } + }, + "systemMetadata": { + "lastObserved": 1718004101718, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Sigma Workspace" + ] + } + }, + "systemMetadata": { + "lastObserved": 1718004101719, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham_Jagtap", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1718004101719, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46c912b7a3f62c8e3269e559648c4b2f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1718004101720, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Deprecated", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Deprecated" + } + }, + "systemMetadata": { + "lastObserved": 1718004101720, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Warning", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Warning" + } + }, + "systemMetadata": { + "lastObserved": 1718004101721, + "runId": "sigma-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sigma/test_sigma.py b/metadata-ingestion/tests/integration/sigma/test_sigma.py index c9a845e59ddf25..f4948de7ea8250 100644 --- a/metadata-ingestion/tests/integration/sigma/test_sigma.py +++ b/metadata-ingestion/tests/integration/sigma/test_sigma.py @@ -13,11 +13,30 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "status_code": 200, "json": { "access_token": "717de8281754fe8e302b1ee69f1c9553faf0331cabd8712f459c", + "refresh_token": "124de8281754fe8e302b1ee69f1c9553faf0331cabd8712f442v", "token_type": "bearer", "expires_in": 3599, }, }, - "https://aws-api.sigmacomputing.com/v2/files": { + "https://aws-api.sigmacomputing.com/v2/workspaces?limit=50": { + "method": "GET", + "status_code": 200, + "json": { + "entries": [ + { + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", + "name": "Acryl Data", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-03-12T08:31:04.826Z", + "updatedAt": "2024-03-12T08:31:04.826Z", + }, + ], + "total": 1, + "nextPage": None, + }, + }, + "https://aws-api.sigmacomputing.com/v2/files?typeFilters=dataset": { "method": "GET", "status_code": 200, "json": { @@ -54,22 +73,16 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "updatedAt": "2024-04-15T13:51:08.019Z", "isArchived": False, }, - { - "id": "1b47afdb-db4e-4a2c-9fa4-fc1332f4a097", - "urlId": "Ptyl1jrKEO18RDX9y1d4P", - "name": "New Folder", - "type": "folder", - "parentId": "3ee61405-3be2-4000-ba72-60d36757b95b", - "parentUrlId": "1UGFyEQCHqwPfQoAec3xJ9", - "permission": "edit", - "path": "Acryl Data", - "badge": None, - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2024-04-15T13:35:39.512Z", - "updatedAt": "2024-04-15T13:35:39.512Z", - "isArchived": False, - }, + ], + "total": 2, + "nextPage": None, + }, + }, + "https://aws-api.sigmacomputing.com/v2/files?typeFilters=workbook": { + "method": "GET", + "status_code": 200, + "json": { + "entries": [ { "id": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", "urlId": "4JRFW1HThPI1K3YTjouXI7", @@ -87,35 +100,42 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "isArchived": False, }, ], - "total": 4, + "total": 1, "nextPage": None, }, }, - "https://aws-api.sigmacomputing.com/v2/workspaces/3ee61405-3be2-4000-ba72-60d36757b95b": { - "method": "GET", - "status_code": 200, - "json": { - "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", - "name": "Acryl Data", - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2024-03-12T08:31:04.826Z", - "updatedAt": "2024-03-12T08:31:04.826Z", - }, - }, - "https://aws-api.sigmacomputing.com/v2/datasets/8891fd40-5470-4ff2-a74f-6e61ee44d3fc": { + "https://aws-api.sigmacomputing.com/v2/datasets": { "method": "GET", "status_code": 200, "json": { - "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2024-04-15T13:43:12.664Z", - "updatedAt": "2024-04-15T13:43:12.664Z", - "name": "PETS", - "description": "", - "url": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", - "isArchived": False, + "entries": [ + { + "datasetId": "8891fd40-5470-4ff2-a74f-6e61ee44d3fc", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-04-15T13:43:12.664Z", + "updatedAt": "2024-04-15T13:43:12.664Z", + "name": "PETS", + "description": "", + "url": "https://app.sigmacomputing.com/acryldata/b/49HFLTr6xytgrPly3PFsNC", + "isArchived": False, + }, + { + "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-04-15T13:51:08.019Z", + "updatedAt": "2024-04-15T13:51:08.019Z", + "name": "PET_PROFILES_JOINED_DYNAMIC", + "description": "", + "url": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", + "isArchived": False, + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", + "path": "Acryl Data/New Folder", + }, + ], + "total": 2, + "nextPage": None, }, }, "https://aws-api.sigmacomputing.com/v2/files/1b47afdb-db4e-4a2c-9fa4-fc1332f4a097": { @@ -138,39 +158,28 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "isArchived": False, }, }, - "https://aws-api.sigmacomputing.com/v2/datasets/bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f": { + "https://aws-api.sigmacomputing.com/v2/workbooks": { "method": "GET", "status_code": 200, "json": { - "datasetId": "bd6b86e8-cd4a-4b25-ab65-f258c2a68a8f", - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2024-04-15T13:51:08.019Z", - "updatedAt": "2024-04-15T13:51:08.019Z", - "name": "PET_PROFILES_JOINED_DYNAMIC", - "description": "", - "url": "https://app.sigmacomputing.com/acryldata/b/5LqGLu14qUnqh3cN6wRJBd", - "isArchived": False, - "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", - "path": "Acryl Data/New Folder", - }, - }, - "https://aws-api.sigmacomputing.com/v2/workbooks/9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b": { - "method": "GET", - "status_code": 200, - "json": { - "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", - "workbookUrlId": "4JRFW1HThPI1K3YTjouXI7", - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2024-04-15T13:44:51.477Z", - "updatedAt": "2024-04-15T13:51:57.302Z", - "name": "Acryl Workbook", - "url": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", - "path": "Acryl Data", - "latestVersion": 2, - "isArchived": False, - "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", + "entries": [ + { + "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", + "workbookUrlId": "4JRFW1HThPI1K3YTjouXI7", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-04-15T13:44:51.477Z", + "updatedAt": "2024-04-15T13:51:57.302Z", + "name": "Acryl Workbook", + "url": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "path": "Acryl Data", + "latestVersion": 2, + "isArchived": False, + "workspaceId": "3ee61405-3be2-4000-ba72-60d36757b95b", + }, + ], + "total": 1, + "nextPage": None, }, }, "https://aws-api.sigmacomputing.com/v2/workbooks/9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b/pages": { @@ -369,26 +378,28 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: "sql": 'select PK_8 "Pk", PROFILE_ID_9 "Profile Id", STATUS_10 "Status", CAST_TIMESTAMP_TO_DATETIME_11 "Created At", CAST_TIMESTAMP_TO_DATETIME_12 "Updated At", PK_13 "Pk (ADOPTIONS)", PET_FK_14 "Pet Fk", HUMAN_FK_15 "Human Fk", STATUS_16 "Status (ADOPTIONS)", CAST_TIMESTAMP_TO_DATETIME_19 "Created At (ADOPTIONS)", CAST_TIMESTAMP_TO_DATETIME_20 "Updated At (ADOPTIONS)" from (select PK_8, PROFILE_ID_9, STATUS_10, CAST_TIMESTAMP_TO_DATETIME_11, CAST_TIMESTAMP_TO_DATETIME_12, PK_13, PET_FK_14, HUMAN_FK_15, STATUS_16, CREATED_AT_17::timestamp_ltz CAST_TIMESTAMP_TO_DATETIME_19, UPDATED_AT_18::timestamp_ltz CAST_TIMESTAMP_TO_DATETIME_20 from (select Q1.PK_8 PK_8, Q1.PROFILE_ID_9 PROFILE_ID_9, Q1.STATUS_10 STATUS_10, Q1.CAST_TIMESTAMP_TO_DATETIME_11 CAST_TIMESTAMP_TO_DATETIME_11, Q1.CAST_TIMESTAMP_TO_DATETIME_12 CAST_TIMESTAMP_TO_DATETIME_12, Q2.PK PK_13, Q2.PET_FK PET_FK_14, Q2.HUMAN_FK HUMAN_FK_15, Q2.STATUS STATUS_16, Q2.CREATED_AT CREATED_AT_17, Q2.UPDATED_AT UPDATED_AT_18 from (select PK PK_8, PROFILE_ID PROFILE_ID_9, STATUS STATUS_10, CREATED_AT::timestamp_ltz CAST_TIMESTAMP_TO_DATETIME_11, UPDATED_AT::timestamp_ltz CAST_TIMESTAMP_TO_DATETIME_12 from LONG_TAIL_COMPANIONS.ADOPTION.PETS PETS) Q1 inner join LONG_TAIL_COMPANIONS.ADOPTION.ADOPTIONS Q2 on (Q1.PK_8 = Q2.PET_FK) limit 1000) Q4) Q5 limit 1000\n\n-- Sigma Σ {"request-id":"f5a997ef-b80c-47f1-b32e-9cd0f50cd491","email":"Shubham.Jagtap@gslab.com"}', }, }, - "https://aws-api.sigmacomputing.com/v2/members/CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1": { + "https://aws-api.sigmacomputing.com/v2/members": { "method": "GET", "status_code": 200, - "json": { - "organizationId": "b94da709-176c-4242-bea6-6760f34c9228", - "memberId": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "memberType": "admin", - "firstName": "Shubham", - "lastName": "Jagtap", - "email": "Shubham.Jagtap@gslab.com", - "profileImgUrl": None, - "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", - "createdAt": "2023-11-28T10:59:20.957Z", - "updatedAt": "2024-03-12T21:21:17.996Z", - "homeFolderId": "9bb94df1-e8af-49eb-9c37-2bd40b0efb2e", - "userKind": "internal", - "isArchived": False, - "isInactive": False, - }, + "json": [ + { + "organizationId": "b94da709-176c-4242-bea6-6760f34c9228", + "memberId": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "memberType": "admin", + "firstName": "Shubham", + "lastName": "Jagtap", + "email": "Shubham.Jagtap@gslab.com", + "profileImgUrl": None, + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2023-11-28T10:59:20.957Z", + "updatedAt": "2024-03-12T21:21:17.996Z", + "homeFolderId": "9bb94df1-e8af-49eb-9c37-2bd40b0efb2e", + "userKind": "internal", + "isArchived": False, + "isInactive": False, + }, + ], }, } @@ -491,3 +502,106 @@ def test_platform_instance_ingest(pytestconfig, tmp_path, requests_mock): output_path=output_path, golden_path=f"{test_resources_dir}/{golden_file}", ) + + +@pytest.mark.integration +def test_sigma_ingest_shared_entities(pytestconfig, tmp_path, requests_mock): + + test_resources_dir = pytestconfig.rootpath / "tests/integration/sigma" + + override_data = { + "https://aws-api.sigmacomputing.com/v2/workbooks": { + "method": "GET", + "status_code": 200, + "json": { + "entries": [ + { + "workbookId": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", + "workbookUrlId": "4JRFW1HThPI1K3YTjouXI7", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-04-15T13:44:51.477Z", + "updatedAt": "2024-04-15T13:51:57.302Z", + "name": "Acryl Workbook", + "url": "https://app.sigmacomputing.com/acryldata/workbook/4JRFW1HThPI1K3YTjouXI7", + "path": "New Acryl Data", + "latestVersion": 2, + "isArchived": False, + "workspaceId": "4pe61405-3be2-4000-ba72-60d36757b95b", + }, + ], + "total": 1, + "nextPage": None, + }, + }, + "https://aws-api.sigmacomputing.com/v2/files?typeFilters=workbook": { + "method": "GET", + "status_code": 200, + "json": { + "entries": [ + { + "id": "9bbbe3b0-c0c8-4fac-b6f1-8dfebfe74f8b", + "urlId": "4JRFW1HThPI1K3YTjouXI7", + "name": "Acryl Workbook", + "type": "workbook", + "parentId": "4pe61405-3be2-4000-ba72-60d36757b95b", + "parentUrlId": "1UGFyEQCHqwPfQoAec3xJ9", + "permission": "edit", + "path": "New Acryl Data", + "badge": "Warning", + "createdBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "updatedBy": "CPbEdA26GNQ2cM2Ra2BeO0fa5Awz1", + "createdAt": "2024-04-15T13:44:51.477Z", + "updatedAt": "2024-04-15T13:51:57.302Z", + "isArchived": False, + }, + ], + "total": 1, + "nextPage": None, + }, + }, + "https://aws-api.sigmacomputing.com/v2/workspaces/4pe61405-3be2-4000-ba72-60d36757b95b": { + "method": "GET", + "status_code": 403, + "json": {}, + }, + } + + register_mock_api(request_mock=requests_mock, override_data=override_data) + + output_path: str = f"{tmp_path}/sigma_ingest_shared_entities_mces.json" + + pipeline = Pipeline.create( + { + "run_id": "sigma-test", + "source": { + "type": "sigma", + "config": { + "client_id": "CLIENTID", + "client_secret": "CLIENTSECRET", + "ingest_shared_entities": True, + "chart_sources_platform_mapping": { + "Acryl Data/Acryl Workbook": { + "data_source_platform": "snowflake" + }, + }, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": output_path, + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + golden_file = "golden_test_sigma_ingest_shared_entities_mces.json" + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_path, + golden_path=f"{test_resources_dir}/{golden_file}", + ) diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index e9f6190c464f94..ca694b02cff010 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -177,11 +177,13 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): ], ) report = cast(SnowflakeV2Report, pipeline.source.get_report()) - assert report.lru_cache_info["get_tables_for_database"]["misses"] == 1 - assert report.lru_cache_info["get_views_for_database"]["misses"] == 1 - assert report.lru_cache_info["get_columns_for_schema"]["misses"] == 1 - assert report.lru_cache_info["get_pk_constraints_for_schema"]["misses"] == 1 - assert report.lru_cache_info["get_fk_constraints_for_schema"]["misses"] == 1 + assert report.data_dictionary_cache is not None + cache_info = report.data_dictionary_cache.as_obj() + assert cache_info["get_tables_for_database"]["misses"] == 1 + assert cache_info["get_views_for_database"]["misses"] == 1 + assert cache_info["get_columns_for_schema"]["misses"] == 1 + assert cache_info["get_pk_constraints_for_schema"]["misses"] == 1 + assert cache_info["get_fk_constraints_for_schema"]["misses"] == 1 @freeze_time(FROZEN_TIME) diff --git a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json index ef535158165da5..cd1cd0d7e28a48 100644 --- a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json @@ -9,11 +9,7 @@ "customProperties": { "platform": "vertica", "env": "PROD", - "database": "vmart", - "cluster_type": "Enterprise", - "cluster_size": "101 GB", - "subcluster": " ", - "communal_storage_path": "" + "database": "vmart" }, "name": "vmart" } @@ -101,10 +97,7 @@ "platform": "vertica", "env": "PROD", "database": "vmart", - "schema": "public", - "projection_count": "12", - "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ", - "udx_language": "ComplexTypesLib -- Functions for Complex Types | DelimitedExportLib -- Delimited data export package | JsonExportLib -- Json data export package | MachineLearningLib -- Machine learning package | OrcExportLib -- Orc export package | ParquetExportLib -- Parquet export package | ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | KafkaLib -- Kafka streaming load and export | PlaceLib -- Geospatial package | VoltageSecureLib -- Voltage SecureData Connector | TransformFunctions -- User-defined Python library | " + "schema": "public" }, "name": "public" } @@ -204,7 +197,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -215,6 +208,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -229,7 +223,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -246,7 +240,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -256,7 +250,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:23:05.308022+00:00", + "create_time": "2024-06-03 12:24:31.057395+00:00", "table_size": "0 KB" }, "name": "clicks", @@ -266,7 +260,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.clicks", + "schemaName": "Vmart.public.clicks", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -337,7 +331,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -355,7 +349,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -380,7 +374,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -391,6 +385,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -405,7 +400,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -422,7 +417,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -432,7 +427,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.846965+00:00", + "create_time": "2024-06-03 12:23:45.887434+00:00", "table_size": "2119 KB" }, "name": "customer_dimension", @@ -442,7 +437,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.customer_dimension", + "schemaName": "Vmart.public.customer_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -747,7 +742,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -765,7 +760,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -790,7 +785,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -801,6 +796,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -815,7 +811,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -832,7 +828,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -842,8 +838,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.857152+00:00", - "table_size": "138 KB" + "create_time": "2024-06-03 12:23:45.897002+00:00", + "table_size": "145 KB" }, "name": "date_dimension", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -852,7 +848,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.date_dimension", + "schemaName": "Vmart.public.date_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -1157,7 +1153,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1175,7 +1171,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1200,7 +1196,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1211,6 +1207,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -1225,7 +1222,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1242,7 +1239,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1252,7 +1249,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.863745+00:00", + "create_time": "2024-06-03 12:23:45.903227+00:00", "table_size": "327 KB" }, "name": "employee_dimension", @@ -1262,7 +1259,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.employee_dimension", + "schemaName": "Vmart.public.employee_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -1528,7 +1525,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1546,7 +1543,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1571,7 +1568,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1582,6 +1579,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -1596,7 +1594,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1613,7 +1611,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1623,8 +1621,8 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.873181+00:00", - "table_size": "2564 KB" + "create_time": "2024-06-03 12:23:45.912348+00:00", + "table_size": "2567 KB" }, "name": "inventory_fact", "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", @@ -1633,7 +1631,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.inventory_fact", + "schemaName": "Vmart.public.inventory_fact", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -1743,7 +1741,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1761,7 +1759,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1786,7 +1784,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1797,6 +1795,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -1811,7 +1810,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1828,7 +1827,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1838,7 +1837,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:23:05.408507+00:00", + "create_time": "2024-06-03 12:24:31.194163+00:00", "table_size": "0 KB" }, "name": "phrases", @@ -1848,7 +1847,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.phrases", + "schemaName": "Vmart.public.phrases", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -1893,7 +1892,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1911,7 +1910,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1936,7 +1935,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1947,6 +1946,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -1961,7 +1961,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1978,7 +1978,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1988,7 +1988,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.850505+00:00", + "create_time": "2024-06-03 12:23:45.890782+00:00", "table_size": "19 KB" }, "name": "product_dimension", @@ -1998,7 +1998,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.product_dimension", + "schemaName": "Vmart.public.product_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -2303,7 +2303,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2321,7 +2321,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2346,7 +2346,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -2357,6 +2357,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -2371,7 +2372,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -2388,7 +2389,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2398,7 +2399,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.853878+00:00", + "create_time": "2024-06-03 12:23:45.893891+00:00", "table_size": "3 KB" }, "name": "promotion_dimension", @@ -2408,7 +2409,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.promotion_dimension", + "schemaName": "Vmart.public.promotion_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -2596,7 +2597,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2614,7 +2615,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2639,7 +2640,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -2650,6 +2651,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -2664,7 +2666,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -2681,7 +2683,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2691,7 +2693,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:23:05.296044+00:00", + "create_time": "2024-06-03 12:24:31.046829+00:00", "table_size": "0 KB" }, "name": "readings", @@ -2701,7 +2703,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.readings", + "schemaName": "Vmart.public.readings", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -2772,7 +2774,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2790,7 +2792,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2815,7 +2817,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -2826,6 +2828,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -2840,7 +2843,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -2857,7 +2860,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -2867,7 +2870,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.867119+00:00", + "create_time": "2024-06-03 12:23:45.906471+00:00", "table_size": "1 KB" }, "name": "shipping_dimension", @@ -2877,7 +2880,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.shipping_dimension", + "schemaName": "Vmart.public.shipping_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -2961,7 +2964,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2979,7 +2982,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3004,7 +3007,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3015,6 +3018,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3029,7 +3033,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3046,7 +3050,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3056,7 +3060,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.860541+00:00", + "create_time": "2024-06-03 12:23:45.900185+00:00", "table_size": "1 KB" }, "name": "vendor_dimension", @@ -3066,7 +3070,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.vendor_dimension", + "schemaName": "Vmart.public.vendor_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3202,7 +3206,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3220,7 +3224,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3245,7 +3249,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vmart_load_success,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vmart_load_success,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3256,6 +3260,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3270,7 +3275,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vmart_load_success,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vmart_load_success,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3287,7 +3292,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vmart_load_success,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vmart_load_success,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3297,7 +3302,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:23:04.970568+00:00", + "create_time": "2024-06-03 12:24:30.233405+00:00", "table_size": "0 KB" }, "name": "vmart_load_success", @@ -3307,7 +3312,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.vmart_load_success", + "schemaName": "Vmart.public.vmart_load_success", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3352,7 +3357,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vmart_load_success,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vmart_load_success,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3370,7 +3375,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vmart_load_success,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vmart_load_success,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3395,7 +3400,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3406,6 +3411,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3420,7 +3426,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3437,7 +3443,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3447,7 +3453,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.870169+00:00", + "create_time": "2024-06-03 12:23:45.909432+00:00", "table_size": "2 KB" }, "name": "warehouse_dimension", @@ -3457,7 +3463,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.warehouse_dimension", + "schemaName": "Vmart.public.warehouse_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3567,7 +3573,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3585,7 +3591,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3610,7 +3616,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3621,6 +3627,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3635,7 +3642,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -3652,7 +3659,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3662,7 +3669,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:23:05.319029+00:00", + "create_time": "2024-06-03 12:24:31.075640+00:00", "table_size": "0 KB", "view_definition": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state", "is_view": "True" @@ -3674,7 +3681,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.sampleview", + "schemaName": "Vmart.public.sampleview", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3693,7 +3700,7 @@ }, "fields": [ { - "fieldPath": "SUM", + "fieldPath": "sum", "nullable": true, "description": "", "type": { @@ -3732,7 +3739,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3750,7 +3757,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -3768,27 +3775,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.sampleview,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -3800,8 +3799,96 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "vertica", + "env": "PROD", + "database": "vmart", + "schema": "store" + }, + "name": "store" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:vertica" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:343f520ad0fb3259b298736800bb1385" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -3810,10 +3897,6 @@ { "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" - }, - { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -3826,7 +3909,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3837,6 +3920,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3851,12 +3935,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -3868,7 +3952,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -3878,23 +3962,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(date_dimension.date_key)", - "Projection_size": "138 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.918904+00:00", + "table_size": "2 KB" }, - "name": "date_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "store_dimension", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.date_dimension_super", + "schemaName": "Vmart.store.store_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -3913,7 +3991,7 @@ }, "fields": [ { - "fieldPath": "date_key", + "fieldPath": "store_key", "nullable": true, "description": "", "type": { @@ -3923,36 +4001,10 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "full_date_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=18)", - "recursive": false, - "isPartOfKey": false + "isPartOfKey": true }, { - "fieldPath": "day_of_week", + "fieldPath": "store_name", "nullable": true, "description": "", "type": { @@ -3960,12 +4012,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "day_number_in_calendar_month", + "fieldPath": "store_number", "nullable": true, "description": "", "type": { @@ -3978,85 +4030,85 @@ "isPartOfKey": false }, { - "fieldPath": "day_number_in_calendar_year", + "fieldPath": "store_address", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=256)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "day_number_in_fiscal_month", + "fieldPath": "store_city", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "day_number_in_fiscal_year", + "fieldPath": "store_state", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=2)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_day_in_week_indicator", + "fieldPath": "store_region", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_day_in_month_indicator", + "fieldPath": "floor_plan_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "calendar_week_number_in_year", + "fieldPath": "photo_processing_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "calendar_month_name", + "fieldPath": "financial_service_type", "nullable": true, "description": "", "type": { @@ -4064,12 +4116,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=9)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "calendar_month_number_in_year", + "fieldPath": "selling_square_footage", "nullable": true, "description": "", "type": { @@ -4082,20 +4134,7 @@ "isPartOfKey": false }, { - "fieldPath": "calendar_year_month", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=7)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "calendar_quarter", + "fieldPath": "total_square_footage", "nullable": true, "description": "", "type": { @@ -4108,33 +4147,33 @@ "isPartOfKey": false }, { - "fieldPath": "calendar_year_quarter", + "fieldPath": "first_open_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "calendar_half_year", + "fieldPath": "last_remodel_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "calendar_year", + "fieldPath": "number_of_employees", "nullable": true, "description": "", "type": { @@ -4147,41 +4186,41 @@ "isPartOfKey": false }, { - "fieldPath": "holiday_indicator", + "fieldPath": "annual_shrinkage", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=10)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weekday_indicator", + "fieldPath": "foot_traffic", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=7)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "selling_season", + "fieldPath": "monthly_rent_cost", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -4199,13 +4238,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Projections" + "Table" ] } }, @@ -4217,44 +4256,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ + "path": [ { "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" } ] } @@ -4267,7 +4281,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -4278,6 +4292,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -4292,12 +4307,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -4309,7 +4324,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -4319,23 +4334,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)", - "Projection_size": "19 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.929154+00:00", + "table_size": "8646 KB" }, - "name": "product_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "store_orders_fact", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.product_dimension_super", + "schemaName": "Vmart.store.store_orders_fact", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -4380,137 +4389,111 @@ "isPartOfKey": false }, { - "fieldPath": "product_description", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=128)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sku_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "category_description", + "fieldPath": "store_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "department_description", + "fieldPath": "vendor_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "package_type_description", + "fieldPath": "employee_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "package_size", + "fieldPath": "order_number", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "fat_content", + "fieldPath": "date_ordered", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "diet_type", + "fieldPath": "date_shipped", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "expected_delivery_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight_units_of_measure", + "fieldPath": "date_delivered", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "shelf_width", + "fieldPath": "quantity_ordered", "nullable": true, "description": "", "type": { @@ -4523,7 +4506,7 @@ "isPartOfKey": false }, { - "fieldPath": "shelf_height", + "fieldPath": "quantity_delivered", "nullable": true, "description": "", "type": { @@ -4536,20 +4519,20 @@ "isPartOfKey": false }, { - "fieldPath": "shelf_depth", + "fieldPath": "shipper_name", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "product_price", + "fieldPath": "unit_price", "nullable": true, "description": "", "type": { @@ -4562,7 +4545,7 @@ "isPartOfKey": false }, { - "fieldPath": "product_cost", + "fieldPath": "shipping_cost", "nullable": true, "description": "", "type": { @@ -4575,7 +4558,7 @@ "isPartOfKey": false }, { - "fieldPath": "lowest_competitor_price", + "fieldPath": "total_order_cost", "nullable": true, "description": "", "type": { @@ -4588,7 +4571,7 @@ "isPartOfKey": false }, { - "fieldPath": "highest_competitor_price", + "fieldPath": "quantity_in_stock", "nullable": true, "description": "", "type": { @@ -4601,7 +4584,7 @@ "isPartOfKey": false }, { - "fieldPath": "average_competitor_price", + "fieldPath": "reorder_level", "nullable": true, "description": "", "type": { @@ -4614,7 +4597,7 @@ "isPartOfKey": false }, { - "fieldPath": "discontinued_flag", + "fieldPath": "overstock_ceiling", "nullable": true, "description": "", "type": { @@ -4640,38 +4623,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Projections" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)", - "type": "TRANSFORMED" - } + "Table" ] } }, @@ -4683,7 +4641,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -4694,8 +4652,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" } ] } @@ -4708,7 +4666,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -4719,6 +4677,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -4733,12 +4692,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -4750,7 +4709,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -4760,23 +4719,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(promotion_dimension.promotion_key)", - "Projection_size": "3 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.922050+00:00", + "table_size": "225096 KB" }, - "name": "promotion_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "store_sales_fact", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.promotion_dimension_super", + "schemaName": "Vmart.store.store_sales_fact", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -4795,7 +4748,7 @@ }, "fields": [ { - "fieldPath": "promotion_key", + "fieldPath": "date_key", "nullable": true, "description": "", "type": { @@ -4808,111 +4761,111 @@ "isPartOfKey": false }, { - "fieldPath": "promotion_name", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "price_reduction_type", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_media_type", + "fieldPath": "store_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ad_type", + "fieldPath": "promotion_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "display_type", + "fieldPath": "customer_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "coupon_type", + "fieldPath": "employee_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ad_media_name", + "fieldPath": "pos_transaction_number", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "display_provider", + "fieldPath": "sales_quantity", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_cost", + "fieldPath": "sales_dollar_amount", "nullable": true, "description": "", "type": { @@ -4925,77 +4878,117 @@ "isPartOfKey": false }, { - "fieldPath": "promotion_begin_date", + "fieldPath": "cost_dollar_amount", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_end_date", + "fieldPath": "gross_profit_dollar_amount", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Projections" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" + }, + { + "fieldPath": "transaction_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=16)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "transaction_time", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "TIME()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "tender_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_sales_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_sales_datetime", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -5007,7 +5000,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5018,8 +5011,117 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "vertica", + "env": "PROD", + "database": "vmart", + "schema": "online_sales" + }, + "name": "online_sales" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:vertica" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:343f520ad0fb3259b298736800bb1385" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" } ] } @@ -5032,7 +5134,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5043,6 +5145,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -5057,12 +5160,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } }, "systemMetadata": { @@ -5074,7 +5177,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5084,23 +5187,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(vendor_dimension.vendor_key)", - "Projection_size": "1 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.938730+00:00", + "table_size": "6 KB" }, - "name": "vendor_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "call_center_dimension", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.vendor_dimension_super", + "schemaName": "Vmart.online_sales.call_center_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5119,7 +5216,7 @@ }, "fields": [ { - "fieldPath": "vendor_key", + "fieldPath": "call_center_key", "nullable": true, "description": "", "type": { @@ -5129,36 +5226,36 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": false + "isPartOfKey": true }, { - "fieldPath": "vendor_name", + "fieldPath": "cc_closed_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_address", + "fieldPath": "cc_open_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_city", + "fieldPath": "cc_name", "nullable": true, "description": "", "type": { @@ -5166,12 +5263,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=50)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_state", + "fieldPath": "cc_class", "nullable": true, "description": "", "type": { @@ -5179,70 +5276,104 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "VARCHAR(length=50)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "vendor_region", + "fieldPath": "cc_employees", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "deal_size", + "fieldPath": "cc_hours", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=20)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_deal_update", + "fieldPath": "cc_manager", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=40)", "recursive": false, "isPartOfKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Projections" + }, + { + "fieldPath": "cc_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cc_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cc_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cc_region", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } ] } }, @@ -5254,20 +5385,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -5279,7 +5403,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5290,8 +5414,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } ] } @@ -5304,7 +5428,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5315,6 +5439,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -5329,12 +5454,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } }, "systemMetadata": { @@ -5346,7 +5471,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5356,23 +5481,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(customer_dimension.customer_key)", - "Projection_size": "2119 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.935745+00:00", + "table_size": "9 KB" }, - "name": "customer_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "online_page_dimension", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.customer_dimension_super", + "schemaName": "Vmart.online_sales.online_page_dimension", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5391,7 +5510,7 @@ }, "fields": [ { - "fieldPath": "customer_key", + "fieldPath": "online_page_key", "nullable": true, "description": "", "type": { @@ -5401,62 +5520,36 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false + "isPartOfKey": true }, { - "fieldPath": "customer_gender", + "fieldPath": "start_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "title", + "fieldPath": "end_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "household_id", + "fieldPath": "page_number", "nullable": true, "description": "", "type": { @@ -5469,59 +5562,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_address", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_city", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_state", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=2)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_region", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "marital_status", + "fieldPath": "page_description", "nullable": true, "description": "", "type": { @@ -5529,51 +5570,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_age", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "number_of_children", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "annual_income", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=100)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "occupation", + "fieldPath": "page_type", "nullable": true, "description": "", "type": { @@ -5581,85 +5583,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "largest_bill_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_membership_card", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_since", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "deal_stage", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "deal_size", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "last_deal_update", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=100)", "recursive": false, "isPartOfKey": false } @@ -5677,38 +5601,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Projections" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)", - "type": "TRANSFORMED" - } + "Table" ] } }, @@ -5720,7 +5619,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -5731,8 +5630,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } ] } @@ -5745,7 +5644,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -5756,6 +5655,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -5770,12 +5670,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } }, "systemMetadata": { @@ -5787,7 +5687,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -5797,23 +5697,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(employee_dimension.employee_key)", - "Projection_size": "327 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" + "create_time": "2024-06-03 12:23:45.941712+00:00", + "table_size": "182385 KB" }, - "name": "employee_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "name": "online_sales_fact", + "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.employee_dimension_super", + "schemaName": "Vmart.online_sales.online_sales_fact", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -5832,7 +5726,7 @@ }, "fields": [ { - "fieldPath": "employee_key", + "fieldPath": "sale_date_key", "nullable": true, "description": "", "type": { @@ -5845,72 +5739,72 @@ "isPartOfKey": false }, { - "fieldPath": "employee_gender", + "fieldPath": "ship_date_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "courtesy_title", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_first_name", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_middle_initial", + "fieldPath": "customer_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_last_name", + "fieldPath": "call_center_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_age", + "fieldPath": "online_page_key", "nullable": true, "description": "", "type": { @@ -5923,85 +5817,85 @@ "isPartOfKey": false }, { - "fieldPath": "hire_date", + "fieldPath": "shipping_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_street_address", + "fieldPath": "warehouse_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_city", + "fieldPath": "promotion_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_state", + "fieldPath": "pos_transaction_number", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "employee_region", + "fieldPath": "sales_quantity", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=32)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "job_title", + "fieldPath": "sales_dollar_amount", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "reports_to", + "fieldPath": "ship_dollar_amount", "nullable": true, "description": "", "type": { @@ -6009,12 +5903,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "salaried_flag", + "fieldPath": "net_dollar_amount", "nullable": true, "description": "", "type": { @@ -6022,12 +5916,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "annual_salary", + "fieldPath": "cost_dollar_amount", "nullable": true, "description": "", "type": { @@ -6035,12 +5929,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "hourly_rate", + "fieldPath": "gross_profit_dollar_amount", "nullable": true, "description": "", "type": { @@ -6053,15 +5947,41 @@ "isPartOfKey": false }, { - "fieldPath": "vacation_days", + "fieldPath": "transaction_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=16)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "online_sales_saledate", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "online_sales_shipdate", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false } @@ -6079,13 +5999,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Projections" + "Table" ] } }, @@ -6097,19 +6017,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" } ] } @@ -6121,22 +6041,22 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:343f520ad0fb3259b298736800bb1385", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", - "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" - }, - { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" - } - ] + "customProperties": { + "platform": "vertica", + "env": "PROD", + "database": "vmart", + "cluster_type": "Enterprise", + "cluster_size": "52 GB", + "subcluster": " ", + "communal_storage_path": "" + }, + "name": "vmart" } }, "systemMetadata": { @@ -6146,22 +6066,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:343f520ad0fb3259b298736800bb1385", "changeType": "UPSERT", - "aspectName": "ownership", + "aspectName": "status", "aspect": { "json": { - "owners": [ - { - "owner": "urn:li:corpuser:dbadmin", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } + "removed": false } }, "systemMetadata": { @@ -6171,13 +6082,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:343f520ad0fb3259b298736800bb1385", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + "platform": "urn:li:dataPlatform:vertica" } }, "systemMetadata": { @@ -6187,133 +6098,14 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "ROS_Count": "1", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(warehouse_dimension.warehouse_key)", - "Projection_size": "2 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" - }, - "name": "warehouse_dimension_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.warehouse_dimension_super", - "platform": "urn:li:dataPlatform:vertica", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "warehouse_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=20)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_address", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_city", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=60)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_state", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=2)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "warehouse_region", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - } - ] - } - } + "entityType": "container", + "entityUrn": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" ] } }, @@ -6324,15 +6116,22 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Projections" - ] + "customProperties": { + "platform": "vertica", + "env": "PROD", + "database": "vmart", + "schema": "public", + "projection_count": "12", + "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ", + "udx_language": "ComplexTypesLib -- Functions for Complex Types | DelimitedExportLib -- Delimited data export package | JsonExportLib -- Json data export package | MachineLearningLib -- Machine learning package | OrcExportLib -- Orc export package | ParquetExportLib -- Parquet export package | ApproximateLib -- Approximate package | FlexTableLib -- Flexible Tables Data Load and Query | KafkaLib -- Kafka streaming load and export | PlaceLib -- Geospatial package | VoltageSecureLib -- Voltage SecureData Connector | TransformFunctions -- User-defined Python library | " + }, + "name": "public" } }, "systemMetadata": { @@ -6342,22 +6141,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { @@ -6367,21 +6157,30 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", - "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" - }, - { - "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", - "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" - } + "platform": "urn:li:dataPlatform:vertica" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" ] } }, @@ -6391,9 +6190,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:343f520ad0fb3259b298736800bb1385" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -6404,6 +6219,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -6418,7 +6234,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -6435,7 +6251,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -6448,20 +6264,20 @@ "ROS_Count": "1", "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(shipping_dimension.shipping_key)", - "Projection_size": "1 KB", + "Segmentation_key": "hash(date_dimension.date_key)", + "Projection_size": "145 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "shipping_dimension_super", + "name": "date_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.shipping_dimension_super", + "schemaName": "Vmart.public.date_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -6480,7 +6296,7 @@ }, "fields": [ { - "fieldPath": "shipping_key", + "fieldPath": "date_key", "nullable": true, "description": "", "type": { @@ -6493,20 +6309,20 @@ "isPartOfKey": false }, { - "fieldPath": "ship_type", + "fieldPath": "date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "CHAR(length=30)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ship_mode", + "fieldPath": "full_date_description", "nullable": true, "description": "", "type": { @@ -6514,12 +6330,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=10)", + "nativeDataType": "VARCHAR(length=18)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ship_carrier", + "fieldPath": "day_of_week", "nullable": true, "description": "", "type": { @@ -6527,31 +6343,234 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "VARCHAR(length=9)", "recursive": false, "isPartOfKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Projections" + }, + { + "fieldPath": "day_number_in_calendar_month", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "day_number_in_calendar_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "day_number_in_fiscal_month", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "day_number_in_fiscal_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_day_in_week_indicator", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_day_in_month_indicator", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_week_number_in_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_month_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=9)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_month_number_in_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_year_month", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=7)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_quarter", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_year_quarter", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=7)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_half_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "calendar_year", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "holiday_indicator", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=10)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weekday_indicator", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=7)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "selling_season", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } ] } }, @@ -6563,20 +6582,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension_super,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Projections" ] } }, @@ -6588,7 +6600,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.date_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -6613,7 +6625,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -6624,6 +6636,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -6638,7 +6651,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -6655,7 +6668,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -6668,20 +6681,20 @@ "ROS_Count": "1", "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)", - "Projection_size": "2564 KB", + "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)", + "Projection_size": "19 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "inventory_fact_super", + "name": "product_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.inventory_fact_super", + "schemaName": "Vmart.public.product_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -6700,7 +6713,7 @@ }, "fields": [ { - "fieldPath": "date_key", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { @@ -6713,7 +6726,7 @@ "isPartOfKey": false }, { - "fieldPath": "product_key", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { @@ -6726,33 +6739,85 @@ "isPartOfKey": false }, { - "fieldPath": "product_version", + "fieldPath": "product_description", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "warehouse_key", + "fieldPath": "sku_number", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "qty_in_stock", + "fieldPath": "category_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "department_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "package_type_description", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "package_size", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "fat_content", "nullable": true, "description": "", "type": { @@ -6765,15 +6830,158 @@ "isPartOfKey": false }, { - "fieldPath": "inventory_date", + "fieldPath": "diet_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "weight_units_of_measure", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "shelf_width", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "shelf_height", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "shelf_depth", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "product_cost", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lowest_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "highest_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "average_competitor_price", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "discontinued_flag", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -6791,7 +6999,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -6809,32 +7017,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.product_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -6859,7 +7042,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -6870,6 +7053,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -6884,7 +7068,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -6901,7 +7085,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -6912,22 +7096,22 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "ROS_Count": "1", - "Projection_Type": "is_aggregate_projection, has_expressions", + "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(readings.meter_id)", - "Projection_size": "0 KB", + "Segmentation_key": "hash(promotion_dimension.promotion_key)", + "Projection_size": "3 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "readings_topk", + "name": "promotion_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.readings_topk", + "schemaName": "Vmart.public.promotion_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -6946,7 +7130,7 @@ }, "fields": [ { - "fieldPath": "meter_id", + "fieldPath": "promotion_key", "nullable": true, "description": "", "type": { @@ -6959,52 +7143,151 @@ "isPartOfKey": false }, { - "fieldPath": "recent_date", + "fieldPath": "promotion_name", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "recent_value", + "fieldPath": "price_reduction_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false - } - ] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Projections" + }, + { + "fieldPath": "promotion_media_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ad_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "display_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "coupon_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ad_media_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "display_provider", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=128)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "promotion_cost", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "promotion_begin_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "promotion_end_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } ] } }, @@ -7016,20 +7299,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension_super,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Projections" ] } }, @@ -7041,7 +7317,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.promotion_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -7066,7 +7342,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -7077,6 +7353,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -7091,7 +7368,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -7108,7 +7385,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -7119,22 +7396,22 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "ROS_Count": "1", - "Projection_Type": "is_aggregate_projection, has_expressions", + "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(clicks.page_id, (clicks.click_time)::date)", - "Projection_size": "0 KB", + "Segmentation_key": "hash(vendor_dimension.vendor_key)", + "Projection_size": "1 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "clicks_agg", + "name": "vendor_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.clicks_agg", + "schemaName": "Vmart.public.vendor_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -7153,7 +7430,85 @@ }, "fields": [ { - "fieldPath": "page_id", + "fieldPath": "vendor_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vendor_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vendor_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vendor_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vendor_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "vendor_region", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "deal_size", "nullable": true, "description": "", "type": { @@ -7164,6 +7519,19 @@ "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "last_deal_update", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false } ] } @@ -7179,7 +7547,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -7197,32 +7565,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.vendor_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -7247,7 +7590,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -7258,6 +7601,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -7272,7 +7616,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -7289,7 +7633,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -7302,20 +7646,20 @@ "ROS_Count": "1", "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(phrases.phrase)", - "Projection_size": "0 KB", + "Segmentation_key": "hash(customer_dimension.customer_key)", + "Projection_size": "2119 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "phrases_super", + "name": "customer_dimension_super", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "public.phrases_super", + "schemaName": "Vmart.public.customer_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -7334,7 +7678,20 @@ }, "fields": [ { - "fieldPath": "phrase", + "fieldPath": "customer_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_type", "nullable": true, "description": "", "type": { @@ -7342,7 +7699,254 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=128)", + "nativeDataType": "VARCHAR(length=16)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_gender", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "title", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "household_id", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_region", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "marital_status", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_age", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "number_of_children", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "annual_income", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "occupation", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "largest_bill_amount", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_membership_card", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_since", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "deal_stage", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=32)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "deal_size", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_deal_update", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false } @@ -7360,7 +7964,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -7378,32 +7982,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.customer_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -7426,121 +8005,9 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "vertica", - "env": "PROD", - "database": "vmart", - "schema": "store", - "projection_count": "3", - "udx_list": "", - "udx_language": "" - }, - "name": "store" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:vertica" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:343f520ad0fb3259b298736800bb1385" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", - "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -7551,6 +8018,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -7565,12 +8033,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -7582,7 +8050,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -7592,17 +8060,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.879951+00:00", - "table_size": "2 KB" + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(employee_dimension.employee_key)", + "Projection_size": "327 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "store_dimension", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "employee_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_dimension", + "schemaName": "Vmart.public.employee_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -7621,7 +8095,7 @@ }, "fields": [ { - "fieldPath": "store_key", + "fieldPath": "employee_key", "nullable": true, "description": "", "type": { @@ -7631,10 +8105,10 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": true + "isPartOfKey": false }, { - "fieldPath": "store_name", + "fieldPath": "employee_gender", "nullable": true, "description": "", "type": { @@ -7642,25 +8116,25 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=8)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_number", + "fieldPath": "courtesy_title", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=8)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_address", + "fieldPath": "employee_first_name", "nullable": true, "description": "", "type": { @@ -7668,12 +8142,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_city", + "fieldPath": "employee_middle_initial", "nullable": true, "description": "", "type": { @@ -7681,12 +8155,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "VARCHAR(length=8)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_state", + "fieldPath": "employee_last_name", "nullable": true, "description": "", "type": { @@ -7694,38 +8168,38 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_region", + "fieldPath": "employee_age", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "floor_plan_type", + "fieldPath": "hire_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "photo_processing_type", + "fieldPath": "employee_street_address", "nullable": true, "description": "", "type": { @@ -7733,12 +8207,12 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(length=256)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "financial_service_type", + "fieldPath": "employee_city", "nullable": true, "description": "", "type": { @@ -7746,64 +8220,64 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "selling_square_footage", + "fieldPath": "employee_state", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=2)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "total_square_footage", + "fieldPath": "employee_region", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "first_open_date", + "fieldPath": "job_title", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_remodel_date", + "fieldPath": "reports_to", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "number_of_employees", + "fieldPath": "salaried_flag", "nullable": true, "description": "", "type": { @@ -7816,7 +8290,7 @@ "isPartOfKey": false }, { - "fieldPath": "annual_shrinkage", + "fieldPath": "annual_salary", "nullable": true, "description": "", "type": { @@ -7829,7 +8303,7 @@ "isPartOfKey": false }, { - "fieldPath": "foot_traffic", + "fieldPath": "hourly_rate", "nullable": true, "description": "", "type": { @@ -7837,12 +8311,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "monthly_rent_cost", + "fieldPath": "vacation_days", "nullable": true, "description": "", "type": { @@ -7868,13 +8342,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -7886,7 +8360,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.employee_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -7897,8 +8371,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -7911,7 +8385,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -7922,6 +8396,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -7936,12 +8411,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -7953,7 +8428,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -7963,17 +8438,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.890717+00:00", - "table_size": "8646 KB" + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(warehouse_dimension.warehouse_key)", + "Projection_size": "2 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "store_orders_fact", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "warehouse_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_orders_fact", + "schemaName": "Vmart.public.warehouse_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -7992,72 +8473,7 @@ }, "fields": [ { - "fieldPath": "product_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_version", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "vendor_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "employee_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "order_number", + "fieldPath": "warehouse_key", "nullable": true, "description": "", "type": { @@ -8070,72 +8486,216 @@ "isPartOfKey": false }, { - "fieldPath": "date_ordered", + "fieldPath": "warehouse_name", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=20)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "date_shipped", + "fieldPath": "warehouse_address", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=256)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "expected_delivery_date", + "fieldPath": "warehouse_city", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=60)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "date_delivered", + "fieldPath": "warehouse_state", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "CHAR(length=2)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "quantity_ordered", + "fieldPath": "warehouse_region", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false - }, + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Projections" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.warehouse_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", + "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" + }, + { + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:dbadmin", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension_super,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension_super,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(shipping_dimension.shipping_key)", + "Projection_size": "1 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "shipping_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "Vmart.public.shipping_dimension_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "fieldPath": "quantity_delivered", + "fieldPath": "shipping_key", "nullable": true, "description": "", "type": { @@ -8148,7 +8708,7 @@ "isPartOfKey": false }, { - "fieldPath": "shipper_name", + "fieldPath": "ship_type", "nullable": true, "description": "", "type": { @@ -8156,85 +8716,33 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "unit_price", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "shipping_cost", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "total_order_cost", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "quantity_in_stock", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=30)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "reorder_level", + "fieldPath": "ship_mode", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=10)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "overstock_ceiling", + "fieldPath": "ship_carrier", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "CHAR(length=20)", "recursive": false, "isPartOfKey": false } @@ -8252,13 +8760,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -8270,7 +8778,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.shipping_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -8281,8 +8789,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -8295,7 +8803,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -8306,6 +8814,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -8320,12 +8829,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -8337,7 +8846,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -8347,17 +8856,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.883186+00:00", - "table_size": "225060 KB" + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)", + "Projection_size": "2567 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "store_sales_fact", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "inventory_fact_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_sales_fact", + "schemaName": "Vmart.public.inventory_fact_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -8415,184 +8930,41 @@ "isPartOfKey": false }, { - "fieldPath": "store_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "promotion_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "employee_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pos_transaction_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sales_quantity", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sales_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "cost_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "gross_profit_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "transaction_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "transaction_time", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "TIME()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "tender_type", + "fieldPath": "warehouse_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=8)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_sales_date", + "fieldPath": "qty_in_stock", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "store_sales_datetime", + "fieldPath": "inventory_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false } @@ -8610,13 +8982,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -8628,7 +9000,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.inventory_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -8639,8 +9011,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -8653,7 +9025,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -8664,6 +9036,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -8678,12 +9051,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -8695,7 +9068,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings_topk,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -8706,22 +9079,22 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "ROS_Count": "1", - "Projection_Type": "is_super_projection", + "Projection_Type": "is_aggregate_projection, has_expressions", "Is_Segmented": "True", - "Segmentation_key": "hash(store_dimension.store_key)", - "Projection_size": "2 KB", + "Segmentation_key": "hash(readings.meter_id)", + "Projection_size": "0 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" }, - "name": "store_dimension_super", + "name": "readings_topk", "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_dimension_super", + "schemaName": "Vmart.public.readings_topk", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -8740,202 +9113,7 @@ }, "fields": [ { - "fieldPath": "store_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_name", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_address", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=256)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_city", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_state", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "CHAR(length=2)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_region", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=64)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "floor_plan_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "photo_processing_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "financial_service_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "selling_square_footage", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "total_square_footage", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "first_open_date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "last_remodel_date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "number_of_employees", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "annual_shrinkage", + "fieldPath": "meter_id", "nullable": true, "description": "", "type": { @@ -8948,20 +9126,20 @@ "isPartOfKey": false }, { - "fieldPath": "foot_traffic", + "fieldPath": "recent_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "monthly_rent_cost", + "fieldPath": "recent_value", "nullable": true, "description": "", "type": { @@ -8969,7 +9147,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "FLOAT()", "recursive": false, "isPartOfKey": false } @@ -8987,38 +9165,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { - "typeNames": [ - "Projections" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Projections" ] } }, @@ -9030,7 +9183,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.readings_topk,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -9041,8 +9194,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -9055,7 +9208,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -9066,6 +9219,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -9080,12 +9234,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -9097,268 +9251,60 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks_agg,PROD)", "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "ROS_Count": "2", - "Projection_Type": "is_super_projection", - "Is_Segmented": "True", - "Segmentation_key": "hash(store_sales_fact.date_key, store_sales_fact.product_key, store_sales_fact.product_version, store_sales_fact.store_key, store_sales_fact.promotion_key, store_sales_fact.customer_key, store_sales_fact.employee_key, store_sales_fact.pos_transaction_number)", - "Projection_size": "225060 KB", - "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" - }, - "name": "store_sales_fact_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_sales_fact_super", - "platform": "urn:li:dataPlatform:vertica", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "date_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_version", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "promotion_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "customer_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "employee_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pos_transaction_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sales_quantity", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sales_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "cost_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "gross_profit_dollar_amount", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "transaction_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "transaction_time", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "TIME()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "tender_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=8)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_sales_date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "ROS_Count": "1", + "Projection_Type": "is_aggregate_projection, has_expressions", + "Is_Segmented": "True", + "Segmentation_key": "hash(clicks.page_id, (clicks.click_time)::date)", + "Projection_size": "0 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "clicks_agg", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "Vmart.public.clicks_agg", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "fieldPath": "store_sales_datetime", + "fieldPath": "page_id", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "TIMESTAMP_WITH_PRECISION()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -9376,7 +9322,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -9394,32 +9340,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.clicks_agg,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -9430,8 +9351,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -9444,7 +9365,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -9455,6 +9376,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -9469,12 +9391,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } }, "systemMetadata": { @@ -9486,7 +9408,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -9499,195 +9421,39 @@ "ROS_Count": "1", "Projection_Type": "is_super_projection", "Is_Segmented": "True", - "Segmentation_key": "hash(store_orders_fact.product_key, store_orders_fact.product_version, store_orders_fact.store_key, store_orders_fact.vendor_key, store_orders_fact.employee_key, store_orders_fact.order_number, store_orders_fact.date_ordered, store_orders_fact.date_shipped)", - "Projection_size": "8646 KB", + "Segmentation_key": "hash(phrases.phrase)", + "Projection_size": "0 KB", "Partition_Key": "Not Available", - "Number_Of_Partitions": "0", - "Projection_Cached": "False" - }, - "name": "store_orders_fact_super", - "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "store.store_orders_fact_super", - "platform": "urn:li:dataPlatform:vertica", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "product_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "product_version", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "store_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "vendor_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "employee_key", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "order_number", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "date_ordered", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "date_shipped", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "expected_delivery_date", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "date_delivered", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "quantity_ordered", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "quantity_delivered", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, + "Number_Of_Partitions": "0", + "Projection_Cached": "False" + }, + "name": "phrases_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "Vmart.public.phrases_super", + "platform": "urn:li:dataPlatform:vertica", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "fieldPath": "shipper_name", + "fieldPath": "phrase", "nullable": true, "description": "", "type": { @@ -9695,85 +9461,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=32)", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "unit_price", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "shipping_cost", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "total_order_cost", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "quantity_in_stock", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "reorder_level", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "overstock_ceiling", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=128)", "recursive": false, "isPartOfKey": false } @@ -9791,7 +9479,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -9809,32 +9497,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_orders_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.public.phrases_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -9845,8 +9508,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", - "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" + "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4", + "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4" } ] } @@ -9859,7 +9522,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9868,12 +9531,12 @@ "platform": "vertica", "env": "PROD", "database": "vmart", - "schema": "online_sales", + "schema": "store", "projection_count": "3", "udx_list": "", "udx_language": "" }, - "name": "online_sales" + "name": "store" } }, "systemMetadata": { @@ -9884,7 +9547,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -9900,7 +9563,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -9916,7 +9579,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -9934,7 +9597,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "entityUrn": "urn:li:container:342b43fc61f85b16580be55c11e89787", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -9948,30 +9611,9 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:343f520ad0fb3259b298736800bb1385", - "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -9982,6 +9624,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -9996,12 +9639,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -10013,7 +9656,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -10023,17 +9666,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.900841+00:00", - "table_size": "6 KB" + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(store_dimension.store_key)", + "Projection_size": "2 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "call_center_dimension", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "store_dimension_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.call_center_dimension", + "schemaName": "Vmart.store.store_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -10052,46 +9701,124 @@ }, "fields": [ { - "fieldPath": "call_center_key", + "fieldPath": "store_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_name", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_number", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_address", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=256)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_city", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=64)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_state", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "CHAR(length=2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_region", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "VARCHAR(length=64)", "recursive": false, - "isPartOfKey": true + "isPartOfKey": false }, { - "fieldPath": "cc_closed_date", + "fieldPath": "floor_plan_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_open_date", + "fieldPath": "photo_processing_type", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_name", + "fieldPath": "financial_service_type", "nullable": true, "description": "", "type": { @@ -10099,25 +9826,25 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_class", + "fieldPath": "selling_square_footage", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=50)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_employees", + "fieldPath": "total_square_footage", "nullable": true, "description": "", "type": { @@ -10130,80 +9857,80 @@ "isPartOfKey": false }, { - "fieldPath": "cc_hours", + "fieldPath": "first_open_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "CHAR(length=20)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_manager", + "fieldPath": "last_remodel_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "VARCHAR(length=40)", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_address", + "fieldPath": "number_of_employees", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=256)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_city", + "fieldPath": "annual_shrinkage", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_state", + "fieldPath": "foot_traffic", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "CHAR(length=2)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cc_region", + "fieldPath": "monthly_rent_cost", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "VARCHAR(length=64)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -10221,13 +9948,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -10239,7 +9966,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -10250,8 +9977,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", - "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" } ] } @@ -10264,7 +9991,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -10275,6 +10002,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -10289,12 +10017,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -10306,7 +10034,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -10316,17 +10044,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.897788+00:00", - "table_size": "9 KB" + "ROS_Count": "2", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(store_sales_fact.date_key, store_sales_fact.product_key, store_sales_fact.product_version, store_sales_fact.store_key, store_sales_fact.promotion_key, store_sales_fact.customer_key, store_sales_fact.employee_key, store_sales_fact.pos_transaction_number)", + "Projection_size": "225096 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "online_page_dimension", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "store_sales_fact_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.online_page_dimension", + "schemaName": "Vmart.store.store_sales_fact_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -10345,7 +10079,7 @@ }, "fields": [ { - "fieldPath": "online_page_key", + "fieldPath": "date_key", "nullable": true, "description": "", "type": { @@ -10355,36 +10089,36 @@ }, "nativeDataType": "INTEGER()", "recursive": false, - "isPartOfKey": true + "isPartOfKey": false }, { - "fieldPath": "start_date", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "end_date", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "page_number", + "fieldPath": "store_key", "nullable": true, "description": "", "type": { @@ -10397,7 +10131,111 @@ "isPartOfKey": false }, { - "fieldPath": "page_description", + "fieldPath": "promotion_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "employee_key", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "pos_transaction_number", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "sales_quantity", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "sales_dollar_amount", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "cost_dollar_amount", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "gross_profit_dollar_amount", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "transaction_type", "nullable": true, "description": "", "type": { @@ -10405,20 +10243,59 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "VARCHAR(length=16)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "page_type", + "fieldPath": "transaction_time", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "TIME()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "tender_type", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=8)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_sales_date", + "nullable": true, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_sales_datetime", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.TimeType": {} } }, - "nativeDataType": "VARCHAR(length=100)", + "nativeDataType": "TIMESTAMP_WITH_PRECISION()", "recursive": false, "isPartOfKey": false } @@ -10436,13 +10313,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -10454,7 +10331,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -10465,8 +10342,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", - "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" } ] } @@ -10479,7 +10356,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -10490,6 +10367,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -10504,12 +10382,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "container": "urn:li:container:342b43fc61f85b16580be55c11e89787" } }, "systemMetadata": { @@ -10521,7 +10399,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -10531,17 +10409,23 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_time": "2023-10-13 11:22:37.903963+00:00", - "table_size": "182356 KB" + "ROS_Count": "1", + "Projection_Type": "is_super_projection", + "Is_Segmented": "True", + "Segmentation_key": "hash(store_orders_fact.product_key, store_orders_fact.product_version, store_orders_fact.store_key, store_orders_fact.vendor_key, store_orders_fact.employee_key, store_orders_fact.order_number, store_orders_fact.date_ordered, store_orders_fact.date_shipped)", + "Projection_size": "8646 KB", + "Partition_Key": "Not Available", + "Number_Of_Partitions": "0", + "Projection_Cached": "False" }, - "name": "online_sales_fact", - "description": "References the properties of a native table in Vertica. Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution. In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ", + "name": "store_orders_fact_super", + "description": "Vertica physically stores table data in projections, which are collections of table columns. Projections store data in a format that optimizes query execution For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.online_sales_fact", + "schemaName": "Vmart.store.store_orders_fact_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -10560,7 +10444,7 @@ }, "fields": [ { - "fieldPath": "sale_date_key", + "fieldPath": "product_key", "nullable": true, "description": "", "type": { @@ -10573,7 +10457,7 @@ "isPartOfKey": false }, { - "fieldPath": "ship_date_key", + "fieldPath": "product_version", "nullable": true, "description": "", "type": { @@ -10586,7 +10470,7 @@ "isPartOfKey": false }, { - "fieldPath": "product_key", + "fieldPath": "store_key", "nullable": true, "description": "", "type": { @@ -10599,7 +10483,7 @@ "isPartOfKey": false }, { - "fieldPath": "product_version", + "fieldPath": "vendor_key", "nullable": true, "description": "", "type": { @@ -10612,7 +10496,7 @@ "isPartOfKey": false }, { - "fieldPath": "customer_key", + "fieldPath": "employee_key", "nullable": true, "description": "", "type": { @@ -10625,7 +10509,7 @@ "isPartOfKey": false }, { - "fieldPath": "call_center_key", + "fieldPath": "order_number", "nullable": true, "description": "", "type": { @@ -10638,59 +10522,59 @@ "isPartOfKey": false }, { - "fieldPath": "online_page_key", + "fieldPath": "date_ordered", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "shipping_key", + "fieldPath": "date_shipped", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "warehouse_key", + "fieldPath": "expected_delivery_date", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "promotion_key", + "fieldPath": "date_delivered", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.DateType": {} } }, - "nativeDataType": "INTEGER()", + "nativeDataType": "DATE()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pos_transaction_number", + "fieldPath": "quantity_ordered", "nullable": true, "description": "", "type": { @@ -10703,7 +10587,7 @@ "isPartOfKey": false }, { - "fieldPath": "sales_quantity", + "fieldPath": "quantity_delivered", "nullable": true, "description": "", "type": { @@ -10716,20 +10600,20 @@ "isPartOfKey": false }, { - "fieldPath": "sales_dollar_amount", + "fieldPath": "shipper_name", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "VARCHAR(length=32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "ship_dollar_amount", + "fieldPath": "unit_price", "nullable": true, "description": "", "type": { @@ -10737,12 +10621,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "net_dollar_amount", + "fieldPath": "shipping_cost", "nullable": true, "description": "", "type": { @@ -10750,12 +10634,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cost_dollar_amount", + "fieldPath": "total_order_cost", "nullable": true, "description": "", "type": { @@ -10763,12 +10647,12 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "gross_profit_dollar_amount", + "fieldPath": "quantity_in_stock", "nullable": true, "description": "", "type": { @@ -10776,46 +10660,33 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "FLOAT()", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "transaction_type", - "nullable": true, - "description": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "online_sales_saledate", + "fieldPath": "reorder_level", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "online_sales_shipdate", + "fieldPath": "overstock_ceiling", "nullable": true, "description": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "DATE()", + "nativeDataType": "INTEGER()", "recursive": false, "isPartOfKey": false } @@ -10833,13 +10704,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Projections" ] } }, @@ -10851,7 +10722,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.store.store_orders_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -10862,8 +10733,8 @@ "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385" }, { - "id": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", - "urn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7" + "id": "urn:li:container:342b43fc61f85b16580be55c11e89787", + "urn": "urn:li:container:342b43fc61f85b16580be55c11e89787" } ] } @@ -10874,9 +10745,100 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "vertica", + "env": "PROD", + "database": "vmart", + "schema": "online_sales", + "projection_count": "3", + "udx_list": "", + "udx_language": "" + }, + "name": "online_sales" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:vertica" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ae8df3182db1bb8b3a612998126beae7", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:343f520ad0fb3259b298736800bb1385" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "vertica-2020_04_14-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -10887,6 +10849,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -10901,7 +10864,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -10918,7 +10881,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -10944,7 +10907,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.online_page_dimension_super", + "schemaName": "Vmart.online_sales.online_page_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -11054,7 +11017,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -11072,32 +11035,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_page_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_page_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -11122,7 +11060,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -11133,6 +11071,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -11147,7 +11086,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -11164,7 +11103,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -11190,7 +11129,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.call_center_dimension_super", + "schemaName": "Vmart.online_sales.call_center_dimension_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -11378,7 +11317,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -11396,32 +11335,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.call_center_dimension_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.call_center_dimension_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -11446,7 +11360,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -11457,6 +11371,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -11471,7 +11386,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -11488,7 +11403,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact_super,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -11502,7 +11417,7 @@ "Projection_Type": "is_super_projection", "Is_Segmented": "True", "Segmentation_key": "hash(online_sales_fact.sale_date_key, online_sales_fact.ship_date_key, online_sales_fact.product_key, online_sales_fact.product_version, online_sales_fact.customer_key, online_sales_fact.call_center_key, online_sales_fact.online_page_key, online_sales_fact.shipping_key)", - "Projection_size": "182356 KB", + "Projection_size": "182385 KB", "Partition_Key": "Not Available", "Number_Of_Partitions": "0", "Projection_Cached": "False" @@ -11514,7 +11429,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "online_sales.online_sales_fact_super", + "schemaName": "Vmart.online_sales.online_sales_fact_super", "platform": "urn:li:dataPlatform:vertica", "version": 0, "created": { @@ -11806,7 +11721,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -11824,32 +11739,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact,PROD)", - "type": "TRANSFORMED" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "vertica-2020_04_14-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,online_sales.online_sales_fact_super,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,Vmart.online_sales.online_sales_fact_super,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { diff --git a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml index a182e54bd53c71..6f628011279edd 100644 --- a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml +++ b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml @@ -11,6 +11,7 @@ source: include_models: true include_view_lineage: true include_projection_lineage: true + include_view_column_lineage: true sink: diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index c501593fbed012..ebdd59b9f0f080 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -820,6 +820,7 @@ def bigquery_view_1() -> BigqueryView: comment="comment1", view_definition="CREATE VIEW 1", materialized=False, + labels=None, ) @@ -833,6 +834,7 @@ def bigquery_view_2() -> BigqueryView: comment="comment2", view_definition="CREATE VIEW 2", materialized=True, + labels=None, ) diff --git a/metadata-ingestion/tests/unit/test_serialized_lru_cache.py b/metadata-ingestion/tests/unit/test_serialized_lru_cache.py new file mode 100644 index 00000000000000..2b937e700b4371 --- /dev/null +++ b/metadata-ingestion/tests/unit/test_serialized_lru_cache.py @@ -0,0 +1,92 @@ +import threading +import time + +from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.serialized_lru_cache import serialized_lru_cache + + +def test_cache_hit() -> None: + @serialized_lru_cache(maxsize=2) + def fetch_data(x): + return x * 2 + + assert fetch_data(1) == 2 # Cache miss + assert fetch_data(1) == 2 # Cache hit + assert fetch_data.cache_info().hits == 1 # type: ignore + assert fetch_data.cache_info().misses == 1 # type: ignore + + +def test_cache_eviction() -> None: + @serialized_lru_cache(maxsize=2) + def compute(x): + return x * 2 + + compute(1) + compute(2) + compute(3) # Should evict the first entry (1) + assert compute.cache_info().currsize == 2 # type: ignore + assert compute.cache_info().misses == 3 # type: ignore + assert compute(1) == 2 # Cache miss, since it was evicted + assert compute.cache_info().misses == 4 # type: ignore + + +def test_thread_safety() -> None: + @serialized_lru_cache(maxsize=5) + def compute(x): + time.sleep(0.2) # Simulate some delay + return x * 2 + + threads = [] + results = [None] * 10 + + def thread_func(index, arg): + results[index] = compute(arg) + + with PerfTimer() as timer: + for i in range(10): + thread = threading.Thread(target=thread_func, args=(i, i % 5)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + assert len(set(results)) == 5 # Only 5 unique results should be there + assert compute.cache_info().currsize <= 5 # type: ignore + # Only 5 unique calls should miss the cache + assert compute.cache_info().misses == 5 # type: ignore + + # Should take less than 1 second. If not, it means all calls were run serially. + assert timer.elapsed_seconds() < 1 + + +def test_concurrent_access_to_same_key() -> None: + @serialized_lru_cache(maxsize=3) + def compute(x: int) -> int: + time.sleep(0.2) # Simulate some delay + return x * 2 + + threads = [] + results = [] + + def thread_func(): + results.append(compute(1)) + + with PerfTimer() as timer: + for _ in range(10): + thread = threading.Thread(target=thread_func) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + assert all(result == 2 for result in results) # All should compute the same result + + # 9 hits, as the first one is a miss + assert compute.cache_info().hits == 9 # type: ignore + # Only the first call is a miss + assert compute.cache_info().misses == 1 # type: ignore + + # Should take less than 1 second. If not, it means all calls were run serially. + assert timer.elapsed_seconds() < 1 diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index ec25a2fee76d59..337288ab59c603 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -112,7 +112,7 @@ public EntityResponse getV2( @Nonnull @Deprecated public Entity get(@Nonnull OperationContext opContext, @Nonnull final Urn urn) { - return entityService.getEntity(opContext, urn, ImmutableSet.of()); + return entityService.getEntity(opContext, urn, ImmutableSet.of(), true); } @Nonnull @@ -175,7 +175,7 @@ public Map batchGetVersionedV2( @Deprecated public Map batchGet( @Nonnull OperationContext opContext, @Nonnull final Set urns) { - return entityService.getEntities(opContext, urns, ImmutableSet.of()); + return entityService.getEntities(opContext, urns, ImmutableSet.of(), true); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 353b83726611ef..80f976f9ae81e8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -210,7 +210,8 @@ public RecordTemplate getLatestAspect( public Map> getLatestAspects( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull final Set aspectNames) { + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) { Map batchGetResults = getLatestAspect(opContext, urns, aspectNames); @@ -223,15 +224,17 @@ public Map> getLatestAspects( urnToAspects.putIfAbsent(urn, new ArrayList<>()); } - // Add "key" aspects for each urn. TODO: Replace this with a materialized key aspect. - urnToAspects - .keySet() - .forEach( - key -> { - final RecordTemplate keyAspect = - EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), key); - urnToAspects.get(key).add(keyAspect); - }); + if (alwaysIncludeKeyAspect) { + // Add "key" aspects for each urn. TODO: Replace this with a materialized key aspect. + urnToAspects + .keySet() + .forEach( + key -> { + final RecordTemplate keyAspect = + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), key); + urnToAspects.get(key).add(keyAspect); + }); + } List systemAspects = EntityUtils.toSystemAspects( @@ -328,9 +331,12 @@ public EntityResponse getEntityV2( @Nonnull OperationContext opContext, @Nonnull final String entityName, @Nonnull final Urn urn, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { - return getEntitiesV2(opContext, entityName, Collections.singleton(urn), aspectNames).get(urn); + return getEntitiesV2( + opContext, entityName, Collections.singleton(urn), aspectNames, alwaysIncludeKeyAspect) + .get(urn); } /** @@ -348,9 +354,12 @@ public Map getEntitiesV2( @Nonnull OperationContext opContext, @Nonnull final String entityName, @Nonnull final Set urns, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { - return getLatestEnvelopedAspects(opContext, urns, aspectNames).entrySet().stream() + return getLatestEnvelopedAspects(opContext, urns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap( Map.Entry::getKey, @@ -370,9 +379,13 @@ public Map getEntitiesV2( public Map getEntitiesVersionedV2( @Nonnull OperationContext opContext, @Nonnull final Set versionedUrns, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { - return getVersionedEnvelopedAspects(opContext, versionedUrns, aspectNames).entrySet().stream() + return getVersionedEnvelopedAspects( + opContext, versionedUrns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap( Map.Entry::getKey, @@ -388,7 +401,10 @@ public Map getEntitiesVersionedV2( */ @Override public Map> getLatestEnvelopedAspects( - @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames) + @Nonnull OperationContext opContext, + @Nonnull Set urns, + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { final Set dbKeys = @@ -404,7 +420,7 @@ public Map> getLatestEnvelopedAspects( .flatMap(List::stream) .collect(Collectors.toSet()); - return getCorrespondingAspects(opContext, dbKeys, urns); + return getCorrespondingAspects(opContext, dbKeys, urns, alwaysIncludeKeyAspect); } /** @@ -419,7 +435,8 @@ public Map> getLatestEnvelopedAspects( public Map> getVersionedEnvelopedAspects( @Nonnull OperationContext opContext, @Nonnull Set versionedUrns, - @Nonnull Set aspectNames) + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { Map> urnAspectVersionMap = @@ -466,11 +483,15 @@ public Map> getVersionedEnvelopedAspects( versionedUrns.stream() .map(versionedUrn -> versionedUrn.getUrn().toString()) .map(UrnUtils::getUrn) - .collect(Collectors.toSet())); + .collect(Collectors.toSet()), + alwaysIncludeKeyAspect); } private Map> getCorrespondingAspects( - @Nonnull OperationContext opContext, Set dbKeys, Set urns) { + @Nonnull OperationContext opContext, + Set dbKeys, + Set urns, + boolean alwaysIncludeKeyAspect) { final Map envelopedAspectMap = getEnvelopedAspects(opContext, dbKeys); @@ -487,11 +508,14 @@ private Map> getCorrespondingAspects( for (Urn urn : urns) { List aspects = urnToAspects.getOrDefault(urn.toString(), Collections.emptyList()); + EnvelopedAspect keyAspect = EntityUtils.getKeyEnvelopedAspect(urn, opContext.getEntityRegistry()); // Add key aspect if it does not exist in the returned aspects - if (aspects.isEmpty() - || aspects.stream().noneMatch(aspect -> keyAspect.getName().equals(aspect.getName()))) { + if (alwaysIncludeKeyAspect + && (aspects.isEmpty() + || aspects.stream() + .noneMatch(aspect -> keyAspect.getName().equals(aspect.getName())))) { result.put( urn, ImmutableList.builder().addAll(aspects).add(keyAspect).build()); } else { @@ -1532,8 +1556,11 @@ public ListUrnsResult listUrns( public Entity getEntity( @Nonnull OperationContext opContext, @Nonnull final Urn urn, - @Nonnull final Set aspectNames) { - return getEntities(opContext, Collections.singleton(urn), aspectNames).values().stream() + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) { + return getEntities(opContext, Collections.singleton(urn), aspectNames, alwaysIncludeKeyAspect) + .values() + .stream() .findFirst() .orElse(null); } @@ -1552,12 +1579,15 @@ public Entity getEntity( public Map getEntities( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull Set aspectNames) { + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) { log.debug("Invoked getEntities with urns {}, aspects {}", urns, aspectNames); if (urns.isEmpty()) { return Collections.emptyMap(); } - return getSnapshotUnions(opContext, urns, aspectNames).entrySet().stream() + return getSnapshotUnions(opContext, urns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap(Map.Entry::getKey, entry -> EntityUtils.toEntity(entry.getValue()))); } @@ -1708,8 +1738,11 @@ public void ingestEntity( protected Map getSnapshotUnions( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull final Set aspectNames) { - return getSnapshotRecords(opContext, urns, aspectNames).entrySet().stream() + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) { + return getSnapshotRecords(opContext, urns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap( Map.Entry::getKey, entry -> EntityUtils.toSnapshotUnion(entry.getValue()))); @@ -1719,8 +1752,11 @@ protected Map getSnapshotUnions( protected Map getSnapshotRecords( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull final Set aspectNames) { - return getLatestAspectUnions(opContext, urns, aspectNames).entrySet().stream() + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) { + return getLatestAspectUnions(opContext, urns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap( Map.Entry::getKey, @@ -1731,8 +1767,11 @@ protected Map getSnapshotRecords( protected Map> getLatestAspectUnions( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull final Set aspectNames) { - return this.getLatestAspects(opContext, urns, aspectNames).entrySet().stream() + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) { + return this.getLatestAspects(opContext, urns, aspectNames, alwaysIncludeKeyAspect) + .entrySet() + .stream() .collect( Collectors.toMap( Map.Entry::getKey, @@ -2001,7 +2040,7 @@ public Set exists( } else { // Additionally exclude status.removed == true Map> statusResult = - getLatestAspects(opContext, existing, Set.of(STATUS_ASPECT_NAME)); + getLatestAspects(opContext, existing, Set.of(STATUS_ASPECT_NAME), false); return existing.stream() .filter( urn -> diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 9262d17ccde92f..4835ebe164e1c7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -455,7 +455,7 @@ private Optional getStructuredQuery( if (customQueryConfig != null) { executeStructuredQuery = customQueryConfig.isStructuredQuery(); } else { - executeStructuredQuery = !(isQuoted(sanitizedQuery) && exactMatchConfiguration.isExclusive()); + executeStructuredQuery = true; } if (executeStructuredQuery) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 45d4fe4f46c99a..9a8186cc838ab2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -160,7 +160,7 @@ public void testIngestGetEntity() throws Exception { // 2. Retrieve Entity com.linkedin.entity.Entity readEntity = - _entityServiceImpl.getEntity(opContext, entityUrn, Collections.emptySet()); + _entityServiceImpl.getEntity(opContext, entityUrn, Collections.emptySet(), true); // 3. Compare Entity Objects assertEquals( @@ -206,7 +206,7 @@ public void testAddKey() throws Exception { // 2. Retrieve Entity com.linkedin.entity.Entity readEntity = - _entityServiceImpl.getEntity(opContext, entityUrn, Collections.emptySet()); + _entityServiceImpl.getEntity(opContext, entityUrn, Collections.emptySet(), true); // 3. Compare Entity Objects assertEquals( @@ -261,7 +261,7 @@ public void testIngestGetEntities() throws Exception { // 2. Retrieve Entities Map readEntities = _entityServiceImpl.getEntities( - opContext, ImmutableSet.of(entityUrn1, entityUrn2), Collections.emptySet()); + opContext, ImmutableSet.of(entityUrn1, entityUrn2), Collections.emptySet(), true); // 3. Compare Entity Objects diff --git a/metadata-service/openapi-servlet/models/build.gradle b/metadata-service/openapi-servlet/models/build.gradle index a0e1a553fe8146..e4100b2d094e04 100644 --- a/metadata-service/openapi-servlet/models/build.gradle +++ b/metadata-service/openapi-servlet/models/build.gradle @@ -6,14 +6,6 @@ dependencies { implementation project(':entity-registry') implementation project(':metadata-operation-context') implementation project(':metadata-auth:auth-api') - implementation project(':metadata-service:auth-impl') - implementation project(':metadata-io') - - implementation externalDependency.springWeb - implementation(externalDependency.springDocUI) { - exclude group: 'org.springframework.boot' - } - implementation externalDependency.swaggerAnnotations implementation externalDependency.jacksonDataBind implementation externalDependency.httpClient diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/exception/InvalidUrnException.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/exception/InvalidUrnException.java new file mode 100644 index 00000000000000..52a7f95f78da04 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/exception/InvalidUrnException.java @@ -0,0 +1,9 @@ +package io.datahubproject.openapi.exception; + +import java.net.URISyntaxException; + +public class InvalidUrnException extends URISyntaxException { + public InvalidUrnException(String input, String reason) { + super(input, reason); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java index f4689a98628253..0e9fcbe15b525b 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java @@ -1,5 +1,7 @@ package io.datahubproject.openapi; +import io.datahubproject.openapi.exception.InvalidUrnException; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.ConversionNotSupportedException; import org.springframework.core.Ordered; @@ -23,4 +25,9 @@ public GlobalControllerExceptionHandler() { public ResponseEntity handleConflict(RuntimeException ex) { return new ResponseEntity<>(ex.getMessage(), HttpStatus.BAD_REQUEST); } + + @ExceptionHandler(InvalidUrnException.class) + public static ResponseEntity> handleUrnException(InvalidUrnException e) { + return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.BAD_REQUEST); + } } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java similarity index 87% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index a68d87434f7aa5..648fd0f5853191 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -15,7 +15,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -46,6 +45,7 @@ import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; @@ -77,6 +77,7 @@ public abstract class GenericEntitiesController< E extends GenericEntity, S extends GenericEntityScrollResult> { + public static final String NOT_FOUND_HEADER = "Not-Found-Reason"; protected static final SearchFlags DEFAULT_SEARCH_FLAGS = new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true); @@ -125,7 +126,7 @@ protected abstract E buildGenericEntity( protected abstract AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) - throws JsonProcessingException, URISyntaxException; + throws JsonProcessingException, InvalidUrnException; @Tag(name = "Generic Entities", description = "API for interacting with generic entities.") @GetMapping(value = "/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) @@ -203,7 +204,7 @@ public ResponseEntity getEntity( Boolean withSystemMetadata) throws URISyntaxException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, READ, List.of(urn))) { @@ -218,20 +219,24 @@ public ResponseEntity getEntity( authentication, true); - return ResponseEntity.of( - buildEntityList(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() - .findFirst()); + return buildEntityList(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() + .findFirst() + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().header(NOT_FOUND_HEADER, "ENTITY").build()); } @Tag(name = "Generic Entities") @RequestMapping( - value = "/{entityName}/{entityUrn}", + value = "/{entityName}/{entityUrn:urn:li:.+}", method = {RequestMethod.HEAD}) @Operation(summary = "Entity exists") public ResponseEntity headEntity( - @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable(value = "includeSoftDelete", required = false) Boolean includeSoftDelete) + throws InvalidUrnException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, EXISTS, List.of(urn))) { @@ -246,14 +251,14 @@ public ResponseEntity headEntity( authentication, true); - return exists(opContext, urn, null) + return exists(opContext, urn, null, includeSoftDelete) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); } @Tag(name = "Generic Aspects", description = "API for generic aspects.") @GetMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", + value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get an entity's generic aspect.") public ResponseEntity getAspect( @@ -264,7 +269,7 @@ public ResponseEntity getAspect( Boolean withSystemMetadata) throws URISyntaxException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, READ, List.of(urn))) { @@ -279,30 +284,32 @@ public ResponseEntity getAspect( authentication, true); - return ResponseEntity.of( - buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata).stream() - .findFirst() - .flatMap( - e -> - e.getAspects().entrySet().stream() - .filter( - entry -> - entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) - .map(Map.Entry::getValue) - .findFirst())); + return buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata).stream() + .findFirst() + .flatMap( + e -> + e.getAspects().entrySet().stream() + .filter( + entry -> entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) + .map(Map.Entry::getValue) + .findFirst()) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().header(NOT_FOUND_HEADER, "ENTITY").build()); } @Tag(name = "Generic Aspects") @RequestMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", + value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}", method = {RequestMethod.HEAD}) @Operation(summary = "Whether an entity aspect exists.") public ResponseEntity headAspect( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName) { + @PathVariable("aspectName") String aspectName, + @PathVariable(value = "includeSoftDelete", required = false) Boolean includeSoftDelete) + throws InvalidUrnException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, EXISTS, List.of(urn))) { @@ -317,19 +324,20 @@ public ResponseEntity headAspect( authentication, true); - return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName()) + return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName(), includeSoftDelete) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); } @Tag(name = "Generic Entities") - @DeleteMapping(value = "/{entityName}/{entityUrn}") + @DeleteMapping(value = "/{entityName}/{entityUrn:urn:li:.+}") @Operation(summary = "Delete an entity") public void deleteEntity( - @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { + @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) + throws InvalidUrnException { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, DELETE, List.of(urn))) { @@ -356,7 +364,7 @@ public ResponseEntity> createEntity( @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata, @RequestBody @Nonnull String jsonEntityList) - throws URISyntaxException, JsonProcessingException { + throws InvalidUrnException, JsonProcessingException { Authentication authentication = AuthenticationContext.getAuthentication(); @@ -385,14 +393,15 @@ public ResponseEntity> createEntity( } @Tag(name = "Generic Aspects") - @DeleteMapping(value = "/{entityName}/{entityUrn}/{aspectName}") + @DeleteMapping(value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}") @Operation(summary = "Delete an entity aspect.") public void deleteAspect( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName) { + @PathVariable("aspectName") String aspectName) + throws InvalidUrnException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( authentication, authorizationChain, DELETE, List.of(urn))) { @@ -413,7 +422,7 @@ public void deleteAspect( @Tag(name = "Generic Aspects") @PostMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", + value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Create an entity aspect.") public ResponseEntity createAspect( @@ -427,7 +436,7 @@ public ResponseEntity createAspect( @RequestBody @Nonnull String jsonAspect) throws URISyntaxException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Authentication authentication = AuthenticationContext.getAuthentication(); @@ -472,7 +481,7 @@ public ResponseEntity createAspect( @Tag(name = "Generic Aspects") @PatchMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", + value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}", consumes = "application/json-patch+json", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Patch an entity aspect. (Experimental)") @@ -483,13 +492,13 @@ public ResponseEntity patchAspect( @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata, @RequestBody @Nonnull GenericJsonPatch patch) - throws URISyntaxException, + throws InvalidUrnException, NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException { - Urn urn = UrnUtils.getUrn(entityUrn); + Urn urn = validatedUrn(entityUrn); EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Authentication authentication = AuthenticationContext.getAuthentication(); if (!AuthUtil.isAPIAuthorizedEntityUrns( @@ -518,7 +527,7 @@ public ResponseEntity patchAspect( ChangeMCP upsert = toUpsertItem( opContext.getRetrieverContext().get().getAspectRetriever(), - UrnUtils.getUrn(entityUrn), + validatedUrn(entityUrn), aspectSpec, currentValue, genericPatchTemplate, @@ -534,16 +543,23 @@ public ResponseEntity patchAspect( true, true); - return ResponseEntity.of( - results.stream() - .findFirst() - .map(result -> buildGenericEntity(aspectSpec.getName(), result, withSystemMetadata))); + return results.stream() + .findFirst() + .map(result -> buildGenericEntity(aspectSpec.getName(), result, withSystemMetadata)) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().header(NOT_FOUND_HEADER, "ENTITY").build()); } - protected Boolean exists(@Nonnull OperationContext opContext, Urn urn, @Nullable String aspect) { + protected Boolean exists( + @Nonnull OperationContext opContext, + Urn urn, + @Nullable String aspect, + @Nullable Boolean includeSoftDelete) { return aspect == null - ? entityService.exists(opContext, urn, true) - : entityService.exists(opContext, urn, aspect, true); + ? entityService.exists( + opContext, urn, includeSoftDelete != null ? includeSoftDelete : false) + : entityService.exists( + opContext, urn, aspect, includeSoftDelete != null ? includeSoftDelete : false); } protected Set resolveAspectNames(Set urns, Set requestedAspectNames) { @@ -638,4 +654,12 @@ protected static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspec .findFirst() .get(); } + + protected static Urn validatedUrn(String urn) throws InvalidUrnException { + try { + return Urn.createFromString(urn); + } catch (URISyntaxException e) { + throw new InvalidUrnException(urn, "Invalid urn!"); + } + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 23cd89147173ad..eeba41f9f819f5 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -30,6 +30,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.controller.GenericEntitiesController; +import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; import io.datahubproject.openapi.v2.models.BatchGetUrnRequest; import io.datahubproject.openapi.v2.models.BatchGetUrnResponse; @@ -120,7 +121,7 @@ public ResponseEntity getEntityBatch( @Override protected AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) - throws JsonProcessingException { + throws JsonProcessingException, InvalidUrnException { JsonNode entities = objectMapper.readTree(entityArrayList); List items = new LinkedList<>(); @@ -131,7 +132,7 @@ protected AspectsBatch toMCPBatch( if (!entity.has("urn")) { throw new IllegalArgumentException("Missing `urn` field"); } - Urn entityUrn = UrnUtils.getUrn(entity.get("urn").asText()); + Urn entityUrn = validatedUrn(entity.get("urn").asText()); if (!entity.has("aspects")) { throw new IllegalArgumentException("Missing `aspects` field"); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 86b03ccc467d93..93ad502c9f7d68 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -42,6 +42,7 @@ public class OpenAPIV3Generator { private static final String NAME_SYSTEM_METADATA = "systemMetadata"; private static final String NAME_ASYNC = "async"; private static final String NAME_SCROLL_ID = "scrollId"; + private static final String NAME_INCLUDE_SOFT_DELETE = "includeSoftDelete"; private static final String PROPERTY_VALUE = "value"; private static final String PROPERTY_URN = "urn"; private static final String PROPERTY_PATCH = "patch"; @@ -208,7 +209,12 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { .in(NAME_PATH) .name("urn") .description("The entity's unique URN id.") - .schema(new Schema().type(TYPE_STRING)))) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_QUERY) + .name(NAME_INCLUDE_SOFT_DELETE) + .description("If enabled, soft deleted items will exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .tags(List.of(entity.getName() + " Entity")) .responses( new ApiResponses() @@ -274,6 +280,7 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { .summary(String.format("Scroll/List %s.", upperFirst)) .parameters(parameters) .tags(List.of(entity.getName() + " Entity")) + .description("Scroll indexed entities. Will not include soft deleted entities.") .responses(new ApiResponses().addApiResponse("200", successApiResponse))); // Post Operation @@ -631,6 +638,13 @@ private static PathItem buildSingleEntityAspectPath( new Operation() .summary(String.format("%s on %s existence.", aspect, upperFirstEntity)) .tags(tags) + .parameters( + List.of( + new Parameter() + .in(NAME_QUERY) + .name(NAME_INCLUDE_SOFT_DELETE) + .description("If enabled, soft deleted items will exist.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .responses( new ApiResponses() .addApiResponse("200", successHeadResponse) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 20e917f1f452ea..be5558f821d4ff 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -5,7 +5,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -25,6 +24,7 @@ import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.openapi.controller.GenericEntitiesController; +import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.v3.models.GenericEntityScrollResultV3; import io.datahubproject.openapi.v3.models.GenericEntityV3; import java.net.URISyntaxException; @@ -83,16 +83,15 @@ protected List buildEntityList( urnsSet, resolveAspectNames(urnsSet, aspectNames).stream() .map(AspectSpec::getName) - .collect(Collectors.toSet())); + .collect(Collectors.toSet()), + false); return urns.stream() + .filter(urn -> aspects.containsKey(urn) && !aspects.get(urn).isEmpty()) .map( u -> GenericEntityV3.builder() - .build( - objectMapper, - u, - toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) + .build(objectMapper, u, toAspectMap(u, aspects.get(u), withSystemMetadata))) .collect(Collectors.toList()); } } @@ -153,7 +152,7 @@ private List toRecordTemplates( @Override protected AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) - throws JsonProcessingException { + throws JsonProcessingException, InvalidUrnException { JsonNode entities = objectMapper.readTree(entityArrayList); List items = new LinkedList<>(); @@ -164,7 +163,7 @@ protected AspectsBatch toMCPBatch( if (!entity.has("urn")) { throw new IllegalArgumentException("Missing `urn` field"); } - Urn entityUrn = UrnUtils.getUrn(entity.get("urn").asText()); + Urn entityUrn = validatedUrn(entity.get("urn").asText()); Iterator> aspectItr = entity.fields(); while (aspectItr.hasNext()) { diff --git a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java index 998ebe6a80096a..b70b643b10f323 100644 --- a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java +++ b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java @@ -151,7 +151,8 @@ public Map> getLatestEnvelopedAspects( public Map> getVersionedEnvelopedAspects( @Nonnull OperationContext opContext, @Nonnull Set versionedUrns, - @Nonnull Set aspectNames) + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException { return null; } diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json index 32e7a0e58e5355..888d55639d02be 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json @@ -87,6 +87,18 @@ "default" : "unset" } ], "returns" : "string" + }, { + "name" : "ingestProposalBatch", + "javaMethodName" : "ingestProposalBatch", + "parameters" : [ { + "name" : "proposals", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.mxe.MetadataChangeProposal\" }" + }, { + "name" : "async", + "type" : "string", + "default" : "unset" + } ], + "returns" : "string" }, { "name" : "restoreIndices", "javaMethodName" : "restoreIndices", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 32912e0c7364ad..bfa887ffda1175 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -4122,6 +4122,18 @@ "default" : "unset" } ], "returns" : "string" + }, { + "name" : "ingestProposalBatch", + "javaMethodName" : "ingestProposalBatch", + "parameters" : [ { + "name" : "proposals", + "type" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.mxe.MetadataChangeProposal\" }" + }, { + "name" : "async", + "type" : "string", + "default" : "unset" + } ], + "returns" : "string" }, { "name" : "restoreIndices", "javaMethodName" : "restoreIndices", diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 606c479cd14b5f..8dc73e45846edc 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -53,6 +53,7 @@ import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.time.Clock; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -69,10 +70,12 @@ public class AspectResource extends CollectionResourceTaskTemplate ingestProposal( @ActionParam(PARAM_PROPOSAL) @Nonnull MetadataChangeProposal metadataChangeProposal, @ActionParam(PARAM_ASYNC) @Optional(UNSET) String async) throws URISyntaxException { - log.info("INGEST PROPOSAL proposal: {}", metadataChangeProposal); + log.info("INGEST PROPOSAL proposal: {}", metadataChangeProposal); + + final boolean asyncBool; + if (UNSET.equals(async)) { + asyncBool = Boolean.parseBoolean(System.getenv(ASYNC_INGEST_DEFAULT_NAME)); + } else { + asyncBool = Boolean.parseBoolean(async); + } + + return ingestProposals(List.of(metadataChangeProposal), asyncBool); + } + + @Action(name = ACTION_INGEST_PROPOSAL_BATCH) + @Nonnull + @WithSpan + public Task ingestProposalBatch( + @ActionParam(PARAM_PROPOSALS) @Nonnull MetadataChangeProposal[] metadataChangeProposals, + @ActionParam(PARAM_ASYNC) @Optional(UNSET) String async) + throws URISyntaxException { + log.info("INGEST PROPOSAL BATCH proposals: {}", Arrays.asList(metadataChangeProposals)); + + final boolean asyncBool; + if (UNSET.equals(async)) { + asyncBool = Boolean.parseBoolean(System.getenv(ASYNC_INGEST_DEFAULT_NAME)); + } else { + asyncBool = Boolean.parseBoolean(async); + } + + return ingestProposals(Arrays.asList(metadataChangeProposals), asyncBool); + } - final boolean asyncBool; - if (UNSET.equals(async)) { - asyncBool = Boolean.parseBoolean(System.getenv(ASYNC_INGEST_DEFAULT_NAME)); - } else { - asyncBool = Boolean.parseBoolean(async); - } + private Task ingestProposals( + @Nonnull List metadataChangeProposals, + boolean asyncBool) + throws URISyntaxException { Authentication authentication = AuthenticationContext.getAuthentication(); - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(ACTION_INGEST_PROPOSAL, metadataChangeProposal.getEntityType()), _authorizer, authentication, true); - - /* - Ingest Authorization Checks - */ - List> exceptions = isAPIAuthorized(authentication, _authorizer, ENTITY, - opContext.getEntityRegistry(), List.of(metadataChangeProposal)) + + Set entityTypes = metadataChangeProposals.stream() + .map(MetadataChangeProposal::getEntityType) + .collect(Collectors.toSet()); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(ACTION_INGEST_PROPOSAL, entityTypes), _authorizer, authentication, true); + + // Ingest Authorization Checks + List> exceptions = isAPIAuthorized(authentication, _authorizer, ENTITY, + opContext.getEntityRegistry(), metadataChangeProposals) .stream().filter(p -> p.getSecond() != HttpStatus.S_200_OK.getCode()) .collect(Collectors.toList()); - if (!exceptions.isEmpty()) { - throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to modify entity: " + exceptions.stream() + if (!exceptions.isEmpty()) { + String errorMessages = exceptions.stream() .map(ex -> String.format("HttpStatus: %s Urn: %s", ex.getSecond(), ex.getFirst().getEntityUrn())) - .collect(Collectors.toList())); - } - + .collect(Collectors.joining(", ")); + throw new RestLiServiceException( + HttpStatus.S_403_FORBIDDEN, "User is unauthorized to modify entity: " + errorMessages); + } String actorUrnStr = authentication.getActor().toUrnStr(); final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); return RestliUtil.toTask(() -> { - log.debug("Proposal: {}", metadataChangeProposal); + log.debug("Proposals: {}", metadataChangeProposals); try { final AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(List.of(metadataChangeProposal), auditStamp, opContext.getRetrieverContext().get()) + .mcps(metadataChangeProposals, auditStamp, opContext.getRetrieverContext().get()) .build(); Set results = _entityService.ingestProposal(opContext, batch, asyncBool); - java.util.Optional one = results.stream().findFirst(); + for (IngestResult result : results) { + // Update runIds, only works for existing documents, so ES document must exist + Urn resultUrn = result.getUrn(); - // Update runIds, only works for existing documents, so ES document must exist - Urn resultUrn = one.map(IngestResult::getUrn).orElse(metadataChangeProposal.getEntityUrn()); - if (one.map(result -> result.isProcessedMCL() || result.isUpdate()).orElse(false)) { - tryIndexRunId(opContext, - resultUrn, metadataChangeProposal.getSystemMetadata(), entitySearchService); + if (resultUrn != null && (result.isProcessedMCL() || result.isUpdate())) { + tryIndexRunId(opContext, resultUrn, result.getRequest().getSystemMetadata(), entitySearchService); + } } - return resultUrn.toString(); + + // TODO: We don't actually use this return value anywhere. Maybe we should just stop returning it altogether? + return "success"; } catch (ValidationException e) { throw new RestLiServiceException(HttpStatus.S_422_UNPROCESSABLE_ENTITY, e.getMessage()); } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 4116b8ad30b943..4ad668d0b1054d 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -208,7 +208,7 @@ public Task get( aspectNames == null ? Collections.emptySet() : new HashSet<>(Arrays.asList(aspectNames)); - final Entity entity = entityService.getEntity(opContext, urn, projectedAspects); + final Entity entity = entityService.getEntity(opContext, urn, projectedAspects, true); if (entity == null) { throw RestliUtil.resourceNotFoundException(String.format("Did not find %s", urnStr)); } @@ -248,7 +248,7 @@ public Task> batchGet( aspectNames == null ? Collections.emptySet() : new HashSet<>(Arrays.asList(aspectNames)); - return entityService.getEntities(opContext, urns, projectedAspects).entrySet().stream() + return entityService.getEntities(opContext, urns, projectedAspects, true).entrySet().stream() .collect( Collectors.toMap( entry -> entry.getKey().toString(), diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 0794ba72ff6923..27358c4c0e2790 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -115,12 +115,23 @@ default boolean exists(@Nonnull OperationContext opContext, @Nonnull Urn urn) { * * @param urns set of urns to fetch aspects for * @param aspectNames aspects to fetch for each urn in urns set + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of provided {@link Urn} to a List containing the requested aspects. */ Map> getLatestAspects( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull final Set aspectNames); + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect); + + @Deprecated + default Map> getLatestAspects( + @Nonnull OperationContext opContext, + @Nonnull final Set urns, + @Nonnull final Set aspectNames) { + return getLatestAspects(opContext, urns, aspectNames, true); + } Map getLatestAspectsForUrn( @Nonnull OperationContext opContext, @@ -152,15 +163,28 @@ RecordTemplate getAspect( * @param entityName name of the entity to fetch * @param urn urn of entity to fetch * @param aspectNames set of aspects to fetch + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of {@link Urn} to {@link Entity} object */ EntityResponse getEntityV2( @Nonnull OperationContext opContext, @Nonnull final String entityName, @Nonnull final Urn urn, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException; + @Deprecated + default EntityResponse getEntityV2( + @Nonnull OperationContext opContext, + @Nonnull final String entityName, + @Nonnull final Urn urn, + @Nonnull final Set aspectNames) + throws URISyntaxException { + return getEntityV2(opContext, entityName, urn, aspectNames, true); + } + /** * Retrieves the latest aspects for the given set of urns as dynamic aspect objects (Without * having to define union objects) @@ -168,15 +192,27 @@ EntityResponse getEntityV2( * @param entityName name of the entity to fetch * @param urns set of urns to fetch * @param aspectNames set of aspects to fetch + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of {@link Urn} to {@link Entity} object */ Map getEntitiesV2( @Nonnull OperationContext opContext, @Nonnull final String entityName, @Nonnull final Set urns, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException; + default Map getEntitiesV2( + @Nonnull OperationContext opContext, + @Nonnull final String entityName, + @Nonnull final Set urns, + @Nonnull final Set aspectNames) + throws URISyntaxException { + return getEntitiesV2(opContext, entityName, urns, aspectNames, true); + } + /** * Retrieves the aspects for the given set of urns and versions as dynamic aspect objects (Without * having to define union objects) @@ -184,39 +220,75 @@ Map getEntitiesV2( * @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized * string * @param aspectNames set of aspects to fetch + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of {@link Urn} to {@link Entity} object */ Map getEntitiesVersionedV2( @Nonnull OperationContext opContext, @Nonnull final Set versionedUrns, - @Nonnull final Set aspectNames) + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException; + @Deprecated + default Map getEntitiesVersionedV2( + @Nonnull OperationContext opContext, + @Nonnull final Set versionedUrns, + @Nonnull final Set aspectNames) + throws URISyntaxException { + return getEntitiesVersionedV2(opContext, versionedUrns, aspectNames, true); + } + /** * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects * * @param urns set of urns to fetch * @param aspectNames set of aspects to fetch + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of {@link Urn} to {@link EnvelopedAspect} object */ Map> getLatestEnvelopedAspects( - @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames) + @Nonnull OperationContext opContext, + @Nonnull Set urns, + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException; + @Deprecated + default Map> getLatestEnvelopedAspects( + @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames) + throws URISyntaxException { + return getLatestEnvelopedAspects(opContext, urns, aspectNames, true); + } + /** * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects * * @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized * string * @param aspectNames set of aspects to fetch + * @param alwaysIncludeKeyAspect historically the key aspect was always added, allow disabling + * this behavior * @return a map of {@link Urn} to {@link EnvelopedAspect} object */ Map> getVersionedEnvelopedAspects( @Nonnull OperationContext opContext, @Nonnull Set versionedUrns, - @Nonnull Set aspectNames) + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect) throws URISyntaxException; + @Deprecated + default Map> getVersionedEnvelopedAspects( + @Nonnull OperationContext opContext, + @Nonnull Set versionedUrns, + @Nonnull Set aspectNames) + throws URISyntaxException { + return getVersionedEnvelopedAspects(opContext, versionedUrns, aspectNames, true); + } + /** * Retrieves the latest aspect for the given urn as a list of enveloped aspects * @@ -320,13 +392,15 @@ ListUrnsResult listUrns( Entity getEntity( @Nonnull OperationContext opContext, @Nonnull final Urn urn, - @Nonnull final Set aspectNames); + @Nonnull final Set aspectNames, + boolean alwaysIncludeKeyAspect); @Deprecated Map getEntities( @Nonnull OperationContext opContext, @Nonnull final Set urns, - @Nonnull Set aspectNames); + @Nonnull Set aspectNames, + boolean alwaysIncludeKeyAspect); Pair, Boolean> alwaysProduceMCLAsync( @Nonnull OperationContext opContext,