forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ingest): add urn validation test files (datahub-project#12036)
- Loading branch information
1 parent
26e0596
commit da85dc0
Showing
12 changed files
with
115 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Basic URN format tests | ||
urn:li:abc | ||
urn:li:abc: | ||
urn:li:abc:() | ||
urn:li:abc:(abc,) | ||
urn:li:corpuser:abc) | ||
|
||
# Reserved characters | ||
urn:li:corpuser:foo␟bar | ||
urn:li:tag:a,b,c | ||
|
||
# CorpUser URN tests | ||
urn:li:corpuser:(part1,part2) | ||
|
||
# Dataset URN tests | ||
urn:li:dataset:(urn:li:user:abc,dataset,prod) | ||
urn:li:dataset:(urn:li:user:abc,dataset) | ||
urn:li:dataset:(urn:li:user:abc,dataset,invalidEnv) | ||
|
||
# DataFlow URN tests | ||
urn:li:dataFlow:(airflow,flow_id) | ||
|
||
# DataJob URN tests | ||
urn:li:dataJob:(urn:li:user:abc,job_id) | ||
urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod)) | ||
|
||
# Domain URN tests | ||
urn:li:domain:(part1,part2) | ||
|
||
# Tag URN tests | ||
urn:li:tag:(part1,part2) | ||
|
||
# Notebook URN tests | ||
urn:li:notebook:(part1,part2,part3) | ||
|
||
# CorpGroup URN tests | ||
urn:li:corpGroup:(part1,part2) | ||
|
||
# DataProcessInstance URN tests | ||
urn:li:dataProcessInstance:(part1,part2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,17 @@ | ||
import logging | ||
import pathlib | ||
from typing import List | ||
|
||
import pytest | ||
|
||
from datahub.metadata.urns import ( | ||
CorpUserUrn, | ||
DashboardUrn, | ||
DataPlatformUrn, | ||
DatasetUrn, | ||
Urn, | ||
) | ||
from datahub.metadata.urns import CorpUserUrn, DatasetUrn, Urn | ||
from datahub.utilities.urns.error import InvalidUrnError | ||
|
||
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning") | ||
|
||
_CURRENT_DIR = pathlib.Path(__file__).parent | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def test_parse_urn() -> None: | ||
simple_urn_str = "urn:li:dataPlatform:abc" | ||
|
@@ -40,38 +41,12 @@ def test_url_encode_urn() -> None: | |
) | ||
|
||
|
||
def test_invalid_urn() -> None: | ||
with pytest.raises(InvalidUrnError): | ||
Urn.from_string("urn:li:abc") | ||
|
||
with pytest.raises(InvalidUrnError): | ||
Urn.from_string("urn:li:abc:") | ||
|
||
with pytest.raises(InvalidUrnError): | ||
Urn.from_string("urn:li:abc:()") | ||
|
||
with pytest.raises(InvalidUrnError): | ||
Urn.from_string("urn:li:abc:(abc,)") | ||
|
||
with pytest.raises(InvalidUrnError): | ||
Urn.from_string("urn:li:corpuser:abc)") | ||
|
||
|
||
def test_urn_colon() -> None: | ||
# Colon characters are valid in urns, and should not mess up parsing. | ||
|
||
urn = Urn.from_string( | ||
"urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)" | ||
) | ||
assert isinstance(urn, DashboardUrn) | ||
|
||
assert DataPlatformUrn.from_string("urn:li:dataPlatform:abc:def") | ||
assert DatasetUrn.from_string( | ||
"urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)" | ||
) | ||
assert Urn.from_string("urn:li:corpuser:foo:[email protected]") | ||
# There's a bunch of other, simpler tests for special characters in the valid_urns test. | ||
|
||
# This test ensures that the type dispatch and fields work fine here. | ||
# I'm not sure why you'd ever want this, but technically it's a valid urn. | ||
|
||
urn = Urn.from_string("urn:li:corpuser::") | ||
assert isinstance(urn, CorpUserUrn) | ||
assert urn.username == ":" | ||
|
@@ -85,9 +60,48 @@ def test_urn_coercion() -> None: | |
assert urn == Urn.from_string(urn.urn()) | ||
|
||
|
||
def test_urn_type_dispatch() -> None: | ||
def test_urn_type_dispatch_1() -> None: | ||
urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)") | ||
assert isinstance(urn, DatasetUrn) | ||
|
||
with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"): | ||
DatasetUrn.from_string("urn:li:corpuser:foo") | ||
|
||
|
||
def test_urn_type_dispatch_2() -> None: | ||
urn = "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)" | ||
assert Urn.from_string(urn).urn() == urn | ||
|
||
with pytest.raises(InvalidUrnError, match="Passed an urn of type dataJob"): | ||
CorpUserUrn.from_string(urn) | ||
|
||
|
||
def _load_urns(file_name: pathlib.Path) -> List[str]: | ||
urns = [ | ||
line.strip() | ||
for line in file_name.read_text().splitlines() | ||
if line.strip() and not line.startswith("#") | ||
] | ||
assert len(urns) > 0, f"No urns found in {file_name}" | ||
return urns | ||
|
||
|
||
def test_valid_urns() -> None: | ||
valid_urns_file = _CURRENT_DIR / "valid_urns.txt" | ||
valid_urns = _load_urns(valid_urns_file) | ||
|
||
for valid_urn in valid_urns: | ||
logger.info(f"Testing valid URN: {valid_urn}") | ||
parsed_urn = Urn.from_string(valid_urn) | ||
assert parsed_urn.urn() == valid_urn | ||
|
||
|
||
def test_invalid_urns() -> None: | ||
invalid_urns_file = _CURRENT_DIR / "invalid_urns.txt" | ||
invalid_urns = _load_urns(invalid_urns_file) | ||
|
||
# Test each invalid URN | ||
for invalid_urn in invalid_urns: | ||
with pytest.raises(InvalidUrnError): | ||
logger.info(f"Testing invalid URN: {invalid_urn}") | ||
Urn.from_string(invalid_urn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Unknown entity types become generic urns | ||
urn:li:abc:foo | ||
urn:li:abc:(foo,bar) | ||
urn:li:abc:(urn:li:dataPlatform:abc,def,prod) | ||
|
||
# A bunch of pretty normal urns | ||
urn:li:corpuser:foo | ||
urn:li:corpGroup:bar | ||
urn:li:dataset:(urn:li:dataPlatform:abc,def/ghi,prod) | ||
urn:li:dataFlow:(airflow,def,prod) | ||
urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id) | ||
urn:li:tag:abc | ||
urn:li:chart:(looker,chart_name) | ||
urn:li:dashboard:(looker,dashboard_name) | ||
urn:li:dataProcessInstance:abc | ||
urn:li:domain:abc | ||
urn:li:notebook:(querybook,123) | ||
|
||
# Urns with colons and other special characters | ||
urn:li:tag:dbt:bar | ||
urn:li:tag:: | ||
urn:li:dashboard:(looker,dashboards.thelook::customer_lookup) | ||
urn:li:dataPlatform:abc:def | ||
urn:li:corpuser:foo:[email protected] |