diff --git a/docs/managed-datahub/release-notes/v_0_3_7.md b/docs/managed-datahub/release-notes/v_0_3_7.md index dc1c702c89fb2..59b7a23b5e583 100644 --- a/docs/managed-datahub/release-notes/v_0_3_7.md +++ b/docs/managed-datahub/release-notes/v_0_3_7.md @@ -7,7 +7,7 @@ Release Availability Date Recommended CLI/SDK --- -- `v0.14.1.7` with release notes at https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7 +- `v0.14.1.11` with release notes at https://github.com/datahub/datahub/releases/tag/v0.14.1.11 If you are using an older CLI/SDK version, then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, GitHub Actions, Airflow, in Python SDK somewhere, Java SDK, etc. This is a strong recommendation to upgrade, as we keep on pushing fixes in the CLI, and it helps us support you better. @@ -19,7 +19,7 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies - Breaking Changes - Authentication & RestAPI Authorization enabled by default (since v0.3.6) - - Helm Chart Requirement: 1.4.136+ + - Helm Chart Requirement: 1.4.137+ - Recommend setting timezone for `datahub-gc` and `datahub-usage-reporting` - ```yaml acryl-datahub: @@ -91,7 +91,7 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies - Improved UX for setting up and managing SSO - Ingestion changes - - In addition to the improvements listed here: https://github.com/datahub-project/datahub/releases/tag/v0.14.1.7 + - In addition to the improvements listed here: https://github.com/acryldata/datahub/releases/tag/v0.14.1.11 - PowerBI: Support for PowerBI Apps and cross-workspace lineage - Fivetran: Major improvements to configurability and improved reliability with large Fivetran setups - Snowflake & BigQuery: Improved handling of temporary tables and swap statements when generating lineage diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index e28eb08e492ee..db83dde7cf613 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -249,7 +249,7 @@ def post(self, url: str, data: str) -> Dict: ) return response.json() - def execute_query(self, query: str, timeout: int = 300) -> List[Dict[str, Any]]: + def execute_query(self, query: str, timeout: int = 3600) -> List[Dict[str, Any]]: """Execute SQL query with timeout and error handling""" try: response = self.post(url="/sql", data=json.dumps({"sql": query})) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py index 1c247c7d1f7bc..161e8141c8852 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_sql_queries.py @@ -235,21 +235,34 @@ class DremioSQLQueries: TABLE_NAME ASC """ + # Dremio Documentation: https://docs.dremio.com/current/reference/sql/system-tables/jobs_recent/ + # queried_datasets incorrectly documented as [varchar]. Observed as varchar. + # LENGTH used as opposed to ARRAY_SIZE QUERY_ALL_JOBS = """ SELECT - * + job_id, + user_name, + submitted_ts, + query, + queried_datasets FROM SYS.JOBS_RECENT WHERE STATUS = 'COMPLETED' - AND ARRAY_SIZE(queried_datasets)>0 + AND LENGTH(queried_datasets)>0 AND user_name != '$dremio$' AND query_type not like '%INTERNAL%' """ + # Dremio Documentation: https://docs.dremio.com/cloud/reference/sql/system-tables/jobs-historical + # queried_datasets correctly documented as [varchar] QUERY_ALL_JOBS_CLOUD = """ SELECT - * + job_id, + user_name, + submitted_ts, + query, + CONCAT('[', ARRAY_TO_STRING(queried_datasets, ','), ']') as queried_datasets FROM sys.project.history.jobs WHERE