diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 886f455390e5db..22626dcf2bddfc 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -99,7 +99,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==21.1.2.dev10", + "acryl-sqlglot==22.3.1.dev3", } sql_common = ( diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index 31b3a756f8d703..9150f2d93b7390 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -131,7 +131,7 @@ def get_query_type_of_sql( sqlglot.exp.Update: QueryType.UPDATE, sqlglot.exp.Delete: QueryType.DELETE, sqlglot.exp.Merge: QueryType.MERGE, - sqlglot.exp.Subqueryable: QueryType.SELECT, # unions, etc. are also selects + sqlglot.exp.Query: QueryType.SELECT, # unions, etc. are also selects } for cls, query_type in mapping.items(): @@ -296,12 +296,12 @@ def _table_level_lineage( # TODO: Once PEP 604 is supported (Python 3.10), we can unify these into a # single type. See https://peps.python.org/pep-0604/#isinstance-and-issubclass. _SupportedColumnLineageTypes = Union[ - # Note that Select and Union inherit from Subqueryable. - sqlglot.exp.Subqueryable, + # Note that Select and Union inherit from Query. + sqlglot.exp.Query, # For actual subqueries, the statement type might also be DerivedTable. sqlglot.exp.DerivedTable, ] -_SupportedColumnLineageTypesTuple = (sqlglot.exp.Subqueryable, sqlglot.exp.DerivedTable) +_SupportedColumnLineageTypesTuple = (sqlglot.exp.Query, sqlglot.exp.DerivedTable) class UnsupportedStatementTypeError(TypeError): diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json index 5e926fca87a7e4..5b7bd588870fab 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json @@ -1,7 +1,7 @@ { "query_type": "SELECT", "query_type_props": {}, - "query_fingerprint": "3316d40c409d45e97615e8dece5ea9ba11020aca4bb8d903100ee8c81372e73d", + "query_fingerprint": "96b854716f22f34eeeba89d8ec99f4fa7c0432f3712b0bd23838d03c7197b7d0", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table_yyyymmdd,PROD)" ], @@ -46,6 +46,6 @@ ], "debug_info": { "confidence": 0.9, - "generalized_statement": "SELECT * FROM `bq-proj`.dataset.`table_2023*`" + "generalized_statement": "SELECT * FROM `bq-proj.dataset.table_2023*`" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json index ad2cda34a73be5..656deeb2bfd2ec 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json @@ -1,7 +1,7 @@ { "query_type": "SELECT", "query_type_props": {}, - "query_fingerprint": "68b038ff09626bbe2c4bc79be39ce51b50937457008e08461cdd6ed3b6ae3f2e", + "query_fingerprint": "9fd825981276bd1604efd2f277e6990b5415079d24adb8ac8f566a3fb350a091", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table_yyyymmdd,PROD)" ], @@ -46,6 +46,6 @@ ], "debug_info": { "confidence": 0.9, - "generalized_statement": "SELECT * FROM `bq-proj`.dataset.table_20230101" + "generalized_statement": "SELECT * FROM `bq-proj.dataset.table_20230101`" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json index ba3075f43851c7..25ece72a189d56 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json @@ -3,7 +3,7 @@ "query_type_props": { "kind": "VIEW" }, - "query_fingerprint": "4b2d3a58d47ddc4c1beeaddf5d296ff460a85ad5142009950aa072bb97fe771d", + "query_fingerprint": "53c10f64d18f777d45e6d13b9eab03957db1ac3a353db30c672965180035de8d", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project2.my-dataset2.test_physical_table,PROD)" ], @@ -68,6 +68,6 @@ ], "debug_info": { "confidence": 0.35, - "generalized_statement": "CREATE VIEW `my-project`.`my-dataset`.test_table AS SELECT * REPLACE (LOWER(something) AS something) FROM `my-project2`.`my-dataset2`.test_physical_table" + "generalized_statement": "CREATE VIEW `my-project.my-dataset.test_table` AS SELECT * REPLACE (LOWER(something) AS something) FROM `my-project2.my-dataset2.test_physical_table`" } } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json index ecc104e36c89ba..2f154e9b49d316 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json @@ -34,7 +34,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "native_column_type": "NUMBER" + "native_column_type": "DOUBLE PRECISION" }, "upstreams": [] }, diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json index a56480f41c6f3f..e4ce4598fd623a 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_redshift_temp_table_shortcut.json @@ -4,7 +4,7 @@ "kind": "TABLE", "temporary": true }, - "query_fingerprint": "252f5a0232a14a4533919960412ad2681c14b14b8045c046b23ac3d2411c4c5e", + "query_fingerprint": "55195d697586ac4fdf8a6df745cb158a38878c2d2bb3ab3950b13fa618f02491", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:redshift,my_db.other_schema.table1,PROD)" ], @@ -51,6 +51,6 @@ ], "debug_info": { "confidence": 0.35, - "generalized_statement": "CREATE TABLE #my_custom_name DISTKEY(1) SORTKEY(\"1\", \"2\") AS WITH cte AS (SELECT * FROM other_schema.table1) SELECT * FROM cte" + "generalized_statement": "CREATE TABLE #my_custom_name DISTKEY(\"1\") SORTKEY(\"1\", \"2\") AS WITH cte AS (SELECT * FROM other_schema.table1) SELECT * FROM cte" } } \ No newline at end of file