Skip to content

Commit

Permalink
feat(ingest): add tests for colon characters in urns (datahub-project…
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored and sleeperdeep committed Dec 17, 2024
1 parent 4f2ca2d commit d1caf76
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
3 changes: 2 additions & 1 deletion metadata-ingestion/src/datahub/utilities/urn_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
# If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
# We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
RESERVED_CHARS = {",", "(", ")"}
# Also see https://datahubproject.io/docs/what/urn/#restrictions
RESERVED_CHARS = {",", "(", ")", "␟"}
RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})


Expand Down
49 changes: 43 additions & 6 deletions metadata-ingestion/tests/unit/urns/test_urn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pytest

from datahub.metadata.urns import DatasetUrn, Urn
from datahub.metadata.urns import (
CorpUserUrn,
DashboardUrn,
DataPlatformUrn,
DatasetUrn,
Urn,
)
from datahub.utilities.urns.error import InvalidUrnError

pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand Down Expand Up @@ -36,20 +42,51 @@ def test_url_encode_urn() -> None:

def test_invalid_urn() -> None:
with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc")
Urn.from_string("urn:li:abc")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:")
Urn.from_string("urn:li:abc:")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:()")
Urn.from_string("urn:li:abc:()")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:(abc,)")
Urn.from_string("urn:li:abc:(abc,)")

with pytest.raises(InvalidUrnError):
Urn.from_string("urn:li:corpuser:abc)")


def test_urn_colon() -> None:
# Colon characters are valid in urns, and should not mess up parsing.

urn = Urn.from_string(
"urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)"
)
assert isinstance(urn, DashboardUrn)

assert DataPlatformUrn.from_string("urn:li:dataPlatform:abc:def")
assert DatasetUrn.from_string(
"urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)"
)
assert Urn.from_string("urn:li:corpuser:foo:[email protected]")

# I'm not sure why you'd ever want this, but technically it's a valid urn.
urn = Urn.from_string("urn:li:corpuser::")
assert isinstance(urn, CorpUserUrn)
assert urn.username == ":"
assert urn == CorpUserUrn(":")


def test_urn_coercion() -> None:
urn = CorpUserUrn("foo␟bar")
assert urn.urn() == "urn:li:corpuser:foo%E2%90%9Fbar"

assert urn == Urn.from_string(urn.urn())


def test_urn_type_dispatch() -> None:
urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,prod)")
urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)")
assert isinstance(urn, DatasetUrn)

with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"):
Expand Down

0 comments on commit d1caf76

Please sign in to comment.