Skip to content

Commit

Permalink
feat(ingest): update sqlglot fork (datahub-project#10022)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Mar 11, 2024
1 parent 4dddd7f commit b6956f9
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
sqlglot_lib = {
# Using an Acryl fork of sqlglot.
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
"acryl-sqlglot==21.1.2.dev10",
"acryl-sqlglot==22.3.1.dev3",
}

sql_common = (
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def get_query_type_of_sql(
sqlglot.exp.Update: QueryType.UPDATE,
sqlglot.exp.Delete: QueryType.DELETE,
sqlglot.exp.Merge: QueryType.MERGE,
sqlglot.exp.Subqueryable: QueryType.SELECT, # unions, etc. are also selects
sqlglot.exp.Query: QueryType.SELECT, # unions, etc. are also selects
}

for cls, query_type in mapping.items():
Expand Down Expand Up @@ -296,12 +296,12 @@ def _table_level_lineage(
# TODO: Once PEP 604 is supported (Python 3.10), we can unify these into a
# single type. See https://peps.python.org/pep-0604/#isinstance-and-issubclass.
_SupportedColumnLineageTypes = Union[
# Note that Select and Union inherit from Subqueryable.
sqlglot.exp.Subqueryable,
# Note that Select and Union inherit from Query.
sqlglot.exp.Query,
# For actual subqueries, the statement type might also be DerivedTable.
sqlglot.exp.DerivedTable,
]
_SupportedColumnLineageTypesTuple = (sqlglot.exp.Subqueryable, sqlglot.exp.DerivedTable)
_SupportedColumnLineageTypesTuple = (sqlglot.exp.Query, sqlglot.exp.DerivedTable)


class UnsupportedStatementTypeError(TypeError):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"query_type": "SELECT",
"query_type_props": {},
"query_fingerprint": "3316d40c409d45e97615e8dece5ea9ba11020aca4bb8d903100ee8c81372e73d",
"query_fingerprint": "96b854716f22f34eeeba89d8ec99f4fa7c0432f3712b0bd23838d03c7197b7d0",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table_yyyymmdd,PROD)"
],
Expand Down Expand Up @@ -46,6 +46,6 @@
],
"debug_info": {
"confidence": 0.9,
"generalized_statement": "SELECT * FROM `bq-proj`.dataset.`table_2023*`"
"generalized_statement": "SELECT * FROM `bq-proj.dataset.table_2023*`"
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"query_type": "SELECT",
"query_type_props": {},
"query_fingerprint": "68b038ff09626bbe2c4bc79be39ce51b50937457008e08461cdd6ed3b6ae3f2e",
"query_fingerprint": "9fd825981276bd1604efd2f277e6990b5415079d24adb8ac8f566a3fb350a091",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table_yyyymmdd,PROD)"
],
Expand Down Expand Up @@ -46,6 +46,6 @@
],
"debug_info": {
"confidence": 0.9,
"generalized_statement": "SELECT * FROM `bq-proj`.dataset.table_20230101"
"generalized_statement": "SELECT * FROM `bq-proj.dataset.table_20230101`"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"query_type_props": {
"kind": "VIEW"
},
"query_fingerprint": "4b2d3a58d47ddc4c1beeaddf5d296ff460a85ad5142009950aa072bb97fe771d",
"query_fingerprint": "53c10f64d18f777d45e6d13b9eab03957db1ac3a353db30c672965180035de8d",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project2.my-dataset2.test_physical_table,PROD)"
],
Expand Down Expand Up @@ -68,6 +68,6 @@
],
"debug_info": {
"confidence": 0.35,
"generalized_statement": "CREATE VIEW `my-project`.`my-dataset`.test_table AS SELECT * REPLACE (LOWER(something) AS something) FROM `my-project2`.`my-dataset2`.test_physical_table"
"generalized_statement": "CREATE VIEW `my-project.my-dataset.test_table` AS SELECT * REPLACE (LOWER(something) AS something) FROM `my-project2.my-dataset2.test_physical_table`"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"native_column_type": "NUMBER"
"native_column_type": "DOUBLE PRECISION"
},
"upstreams": []
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"kind": "TABLE",
"temporary": true
},
"query_fingerprint": "252f5a0232a14a4533919960412ad2681c14b14b8045c046b23ac3d2411c4c5e",
"query_fingerprint": "55195d697586ac4fdf8a6df745cb158a38878c2d2bb3ab3950b13fa618f02491",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.other_schema.table1,PROD)"
],
Expand Down Expand Up @@ -51,6 +51,6 @@
],
"debug_info": {
"confidence": 0.35,
"generalized_statement": "CREATE TABLE #my_custom_name DISTKEY(1) SORTKEY(\"1\", \"2\") AS WITH cte AS (SELECT * FROM other_schema.table1) SELECT * FROM cte"
"generalized_statement": "CREATE TABLE #my_custom_name DISTKEY(\"1\") SORTKEY(\"1\", \"2\") AS WITH cte AS (SELECT * FROM other_schema.table1) SELECT * FROM cte"
}
}

0 comments on commit b6956f9

Please sign in to comment.