diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js index ffa298b27a822..97a04eb21fa73 100644 --- a/docs-website/src/pages/_components/Hero/index.js +++ b/docs-website/src/pages/_components/Hero/index.js @@ -8,6 +8,7 @@ import { QuestionCircleOutlined } from "@ant-design/icons"; import styles from "./hero.module.scss"; import CodeBlock from "@theme/CodeBlock"; import CardCTAs from "../CardCTAs"; +import TownhallButton from "../TownhallButton"; const HeroAnnouncement = ({ message, linkUrl, linkText }) => (
@@ -46,6 +47,7 @@ const Hero = ({}) => { Join our Slack +
diff --git a/docs-website/src/pages/_components/TownhallButton/index.jsx b/docs-website/src/pages/_components/TownhallButton/index.jsx new file mode 100644 index 0000000000000..11dc2dc5c8476 --- /dev/null +++ b/docs-website/src/pages/_components/TownhallButton/index.jsx @@ -0,0 +1,31 @@ +import React from 'react'; +import styles from "./townhallbutton.module.scss"; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; + +const TownhallButton = () => { + const today = new Date(); + const currentDay = today.getDate(); + const lastDayOfMonth = new Date(today.getFullYear(), today.getMonth() + 1, 0); + const lastThursday = lastDayOfMonth.getDate() - ((lastDayOfMonth.getDay() + 7 - 4) % 7); + + const daysUntilLastThursday = lastThursday - currentDay; + + let showButton = false; + let currentMonth = ''; + + if (daysUntilLastThursday > 0 && daysUntilLastThursday <= 14) { + showButton = true; + currentMonth = new Intl.DateTimeFormat('en-US', { month: 'long' }).format(today); + } + + return ( + showButton && ( + + Join {currentMonth} Townhall! ✨ + + ) + ); +}; + +export default TownhallButton; diff --git a/docs-website/src/pages/_components/TownhallButton/townhallbutton.module.scss b/docs-website/src/pages/_components/TownhallButton/townhallbutton.module.scss new file mode 100644 index 0000000000000..951bc99015302 --- /dev/null +++ b/docs-website/src/pages/_components/TownhallButton/townhallbutton.module.scss @@ -0,0 +1,14 @@ +.feature { + color: white; + border: 1px solid transparent; + background-image: linear-gradient(to right, #1890ff 0%, #9c27b0 100%); + background-origin: border-box; + opacity: 90%; + + &:hover { + opacity: 100%; + background: linear-gradient(to right, #1890ff 0%, #9c27b0 100%); + background-image: linear-gradient(to right, #1890ff 0%, #9c27b0 100%); + background-origin: border-box; + } +} diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 0f89324f5efc6..267f7cf074909 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -1,5 +1,6 @@ from typing import List, Optional +from datahub.configuration.common import AllowDenyPattern from datahub.configuration.time_window_config import BucketDuration from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST @@ -551,6 +552,8 @@ def usage_per_object_per_time_bucket_for_time_window( use_base_objects: bool, top_n_queries: int, include_top_n_queries: bool, + email_domain: Optional[str], + email_filter: AllowDenyPattern, ) -> str: if not include_top_n_queries: top_n_queries = 0 @@ -561,6 +564,9 @@ def usage_per_object_per_time_bucket_for_time_window( objects_column = ( "BASE_OBJECTS_ACCESSED" if use_base_objects else "DIRECT_OBJECTS_ACCESSED" ) + email_filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + + email_domain = f"@{email_domain}" if email_domain else "" return f""" WITH object_access_history AS @@ -578,12 +584,16 @@ def usage_per_object_per_time_bucket_for_time_window( query_id, query_start_time, user_name, + NVL(USERS.email, CONCAT(user_name, '{email_domain}')) AS user_email, {objects_column} from snowflake.account_usage.access_history + LEFT JOIN + snowflake.account_usage.users USERS WHERE query_start_time >= to_timestamp_ltz({start_time_millis}, 3) AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) + {email_filter_query} ) t, lateral flatten(input => t.{objects_column}) object @@ -705,6 +715,34 @@ def usage_per_object_per_time_bucket_for_time_window( basic_usage_counts.bucket_start_time """ + @staticmethod + def gen_email_filter_query(email_filter: AllowDenyPattern) -> str: + allow_filters = [] + allow_filter = "" + if len(email_filter.allow) == 1 and email_filter.allow[0] == ".*": + allow_filter = "" + else: + for allow_pattern in email_filter.allow: + allow_filters.append( + f"rlike(user_name, '{allow_pattern}','{'i' if email_filter.ignoreCase else 'c'}')" + ) + if allow_filters: + allow_filter = " OR ".join(allow_filters) + allow_filter = f"AND ({allow_filter})" + deny_filters = [] + deny_filter = "" + for deny_pattern in email_filter.deny: + deny_filters.append( + f"rlike(user_name, '{deny_pattern}','{'i' if email_filter.ignoreCase else 'c'}')" + ) + if deny_filters: + deny_filter = " OR ".join(deny_filters) + deny_filter = f"({deny_filter})" + email_filter_query = allow_filter + ( + " AND" + f" NOT {deny_filter}" if deny_filter else "" + ) + return email_filter_query + @staticmethod def table_upstreams_with_column_lineage( start_time_millis: int, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 8f571313f1888..f75e994303954 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -214,6 +214,8 @@ def _get_workunits_internal( use_base_objects=self.config.apply_view_usage_to_tables, top_n_queries=self.config.top_n_queries, include_top_n_queries=self.config.include_top_n_queries, + email_domain=self.config.email_domain, + email_filter=self.config.user_email_pattern, ), ) except Exception as e: diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 78e5499697311..b21cea5f0988d 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -1,6 +1,7 @@ import json from datetime import datetime, timezone +from datahub.configuration.common import AllowDenyPattern from datahub.configuration.time_window_config import BucketDuration from datahub.ingestion.source.snowflake import snowflake_query from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery @@ -263,6 +264,8 @@ def default_query_results( # noqa: C901 top_n_queries=10, include_top_n_queries=True, time_bucket_size=BucketDuration.DAY, + email_domain=None, + email_filter=AllowDenyPattern.allow_all(), ) ): return [] diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py index aaff878b81eee..343f4466fd6fd 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_source.py @@ -3,6 +3,7 @@ import pytest from pydantic import ValidationError +from datahub.configuration.common import AllowDenyPattern from datahub.configuration.oauth import OAuthConfiguration from datahub.configuration.pattern_utils import UUID_REGEX from datahub.ingestion.api.source import SourceCapability @@ -16,6 +17,7 @@ SnowflakeV2Config, ) from datahub.ingestion.source.snowflake.snowflake_query import ( + SnowflakeQuery, create_deny_regex_sql_filter, ) from datahub.ingestion.source.snowflake.snowflake_usage_v2 import ( @@ -661,3 +663,44 @@ def test_snowflake_temporary_patterns_config_rename(): } ) assert conf.temporary_tables_pattern == [".*tmp.*"] + + +def test_email_filter_query_generation_with_one_deny(): + email_filter = AllowDenyPattern(deny=[".*@example.com"]) + filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + assert filter_query == " AND NOT (rlike(user_name, '.*@example.com','i'))" + + +def test_email_filter_query_generation_without_any_filter(): + email_filter = AllowDenyPattern() + filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + assert filter_query == "" + + +def test_email_filter_query_generation_one_allow(): + email_filter = AllowDenyPattern(allow=[".*@example.com"]) + filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + assert filter_query == "AND (rlike(user_name, '.*@example.com','i'))" + + +def test_email_filter_query_generation_one_allow_and_deny(): + email_filter = AllowDenyPattern( + allow=[".*@example.com", ".*@example2.com"], + deny=[".*@example2.com", ".*@example4.com"], + ) + filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + assert ( + filter_query + == "AND (rlike(user_name, '.*@example.com','i') OR rlike(user_name, '.*@example2.com','i')) AND NOT (rlike(user_name, '.*@example2.com','i') OR rlike(user_name, '.*@example4.com','i'))" + ) + + +def test_email_filter_query_generation_with_case_insensitive_filter(): + email_filter = AllowDenyPattern( + allow=[".*@example.com"], deny=[".*@example2.com"], ignoreCase=False + ) + filter_query = SnowflakeQuery.gen_email_filter_query(email_filter) + assert ( + filter_query + == "AND (rlike(user_name, '.*@example.com','c')) AND NOT (rlike(user_name, '.*@example2.com','c'))" + )