From a6be4869500355af0d178669fede6b6c7316e98a Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Fri, 10 Jan 2025 05:01:14 -0800 Subject: [PATCH] chore(experiments): Tests covering materialized columns support (#27067) Co-authored-by: Marius Andra --- .../test_experiment_trends_query_runner.py | 257 +++++++++++++++++- 1 file changed, 256 insertions(+), 1 deletion(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 58aef9b8c05c7..15ec7d4fbfd38 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -1,4 +1,5 @@ from django.test import override_settings +from ee.clickhouse.materialized_columns.columns import get_enabled_materialized_columns, materialize from posthog.hogql_queries.experiments.experiment_trends_query_runner import ExperimentTrendsQueryRunner from posthog.models.experiment import Experiment, ExperimentHoldout from posthog.models.feature_flag.feature_flag import FeatureFlag @@ -8,6 +9,7 @@ ExperimentSignificanceCode, ExperimentTrendsQuery, ExperimentTrendsQueryResponse, + PersonsOnEventsMode, TrendsQuery, ) from posthog.settings import ( @@ -17,7 +19,13 @@ OBJECT_STORAGE_SECRET_ACCESS_KEY, XDIST_SUFFIX, ) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, + flush_persons_and_events, +) from freezegun import freeze_time from typing import cast from django.utils import timezone @@ -1038,6 +1046,253 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) [0.0, 500.0, 1250.0, 101250.0, 101250.0, 102050.0, 102050.0, 102050.0, 102050.0, 102050.0], ) + def test_query_runner_with_data_warehouse_series_internal_user_filter(self): + table_name = self.create_data_warehouse_table_with_usage() + materialize("person", "email") + materialize("events", "email", table_column="person_properties") + + self.team.modifiers = {"personsOnEventsMode": PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS} + self.team.save() + + feature_flag = self.create_feature_flag() + experiment = self.create_experiment( + feature_flag=feature_flag, + start_date=datetime(2023, 1, 1), + ) + + feature_flag_property = f"$feature/{feature_flag.key}" + + self.team.test_account_filters = [ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + }, + { + "key": "$host", + "type": "event", + "value": "^(localhost|127\\.0\\.0\\.1)($|:)", + "operator": "not_regex", + }, + ] + self.team.save() + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="userid", + id_field="id", + table_name=table_name, + timestamp_field="ds", + math="avg", + math_property="usage", + math_property_type="data_warehouse_properties", + ) + ], + filterTestAccounts=True, + ) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=exposure_query, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + # Populate exposure events + for variant, count in [("control", 7), ("test", 9)]: + for i in range(count): + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id=f"distinct_{variant}_{i}", + properties={ + "$feature_flag_response": variant, + feature_flag_property: variant, + "$feature_flag": feature_flag.key, + "$user_id": f"user_{variant}_{i}", + }, + timestamp=datetime(2023, 1, i + 1), + ) + + _create_person( + team=self.team, + uuid="018f14b8-6cf3-7ffd-80bb-5ef1a9e4d328", + distinct_ids=["018f14b8-6cf3-7ffd-80bb-5ef1a9e4d328", "internal_test_1"], + properties={"email": "internal_test_1@posthog.com"}, + ) + + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="internal_test_1", + properties={ + feature_flag_property: "test", + "$feature_flag_response": "test", + "$feature_flag": feature_flag.key, + "$user_id": "internal_test_1", + }, + timestamp=datetime(2023, 1, 3), + ) + + # "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03 + # "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04 + # "user_test_3" other event (feature_flag_property="control" is on 2023-01-05 + # "user_test_3" purchase is on 2023-01-06 + # "user_test_3" second exposure (feature_flag_property="control") is on 2023-01-09 + # "user_test_3" should fall into the "test" variant, not the "control" variant + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 3), + ) + _create_event( + team=self.team, + event="Some other event", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 5), + ) + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 9), + ) + + flush_persons_and_events() + + query_runner = ExperimentTrendsQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + with freeze_time("2023-01-07"): + # Build and execute the query to get the ClickHouse SQL + queries = query_runner.count_query_runner.to_queries() + response = execute_hogql_query( + query_type="TrendsQuery", + query=queries[0], + team=query_runner.count_query_runner.team, + modifiers=query_runner.count_query_runner.modifiers, + limit_context=query_runner.count_query_runner.limit_context, + ) + + materialized_columns = get_enabled_materialized_columns("events") + self.assertIn("mat_pp_email", [col.name for col in materialized_columns.values()]) + # Assert the expected email where statement in the clickhouse SQL + expected_email_where_statement = "ifNull(notILike(e__events.poe___properties___email, %(hogql_val_25)s), 1)" + self.assertIn( + expected_email_where_statement, + str(response.clickhouse), + ) + + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + control_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "control") + test_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "test") + + self.assertEqual(control_result.count, 1000) + self.assertEqual(test_result.count, 2050) + self.assertEqual(control_result.absolute_exposure, 1) + self.assertEqual(test_result.absolute_exposure, 3) + + self.assertEqual( + control_insight["data"][:10], + [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + ) + self.assertEqual( + test_insight["data"][:10], + [0.0, 500.0, 1250.0, 1250.0, 1250.0, 2050.0, 2050.0, 2050.0, 2050.0, 2050.0], + ) + + # Run the query again with filter_test_accounts=False + # as a point of comparison to above + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="userid", + id_field="id", + table_name=table_name, + timestamp_field="ds", + math="avg", + math_property="usage", + math_property_type="data_warehouse_properties", + ) + ], + filterTestAccounts=False, + ) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=exposure_query, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + query_runner = ExperimentTrendsQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + with freeze_time("2023-01-07"): + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + control_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "control") + test_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "test") + + self.assertEqual(control_result.count, 1000) + self.assertEqual(test_result.count, 102050) + self.assertEqual(control_result.absolute_exposure, 1) + self.assertEqual(test_result.absolute_exposure, 4) + + self.assertEqual( + control_insight["data"][:10], + [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + ) + self.assertEqual( + test_insight["data"][:10], + [0.0, 500.0, 1250.0, 101250.0, 101250.0, 102050.0, 102050.0, 102050.0, 102050.0, 102050.0], + ) + def test_query_runner_with_data_warehouse_series_expected_query(self): table_name = self.create_data_warehouse_table_with_payments()