From ac5e3704b0cd76a43f309327219f50edfbfea897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Oberm=C3=BCller?= Date: Thu, 9 Jan 2025 23:44:32 +0100 Subject: [PATCH 1/3] add hogql funnel calculation --- posthog/api/insight.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/posthog/api/insight.py b/posthog/api/insight.py index cf9cac88959c7..13a54fe963790 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -1103,7 +1103,12 @@ def funnel(self, request: request.Request, *args: Any, **kwargs: Any) -> Respons timings = HogQLTimings() try: with timings.measure("calculate"): - funnel = self.calculate_funnel(request) + query_method = get_query_method(request=request, team=self.team) + if query_method == "hogql": + funnel = self.calculate_funnel_hogql(request) + else: + funnel = self.calculate_funnel(request) + except ExposedHogQLError as e: raise ValidationError(str(e)) @@ -1134,6 +1139,19 @@ def calculate_funnel(self, request: request.Request) -> dict[str, Any]: "timezone": team.timezone, } + @cached_by_filters + def calculate_funnel_hogql(self, request: request.Request) -> dict[str, Any]: + team = self.team + filter = Filter(request=request, team=team) + query = filter_to_query(filter.to_dict()) + query_runner = get_query_runner(query, team, limit_context=None) + + # we use the legacy caching mechanism (@cached_by_filters decorator), no need to cache in the query runner + result = query_runner.run(execution_mode=ExecutionMode.CALCULATE_BLOCKING_ALWAYS) + # assert isinstance(result, schema.CachedFunnelsQueryResponse) + + return {"result": result.results, "timezone": team.timezone} + # ****************************************** # /projects/:id/insights/retention # params: From df9a11693b8a9ad26354dab86551356f39b27fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Oberm=C3=BCller?= Date: Fri, 10 Jan 2025 18:07:27 +0100 Subject: [PATCH 2/3] fix --- posthog/api/insight.py | 1 + 1 file changed, 1 insertion(+) diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 13a54fe963790..0a3554e68aaf6 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -1143,6 +1143,7 @@ def calculate_funnel(self, request: request.Request) -> dict[str, Any]: def calculate_funnel_hogql(self, request: request.Request) -> dict[str, Any]: team = self.team filter = Filter(request=request, team=team) + filter = filter.shallow_clone(overrides={"insight": "FUNNELS"}) query = filter_to_query(filter.to_dict()) query_runner = get_query_runner(query, team, limit_context=None) From d3b104af31ab43251e92199844f888337b492b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Oberm=C3=BCller?= Date: Fri, 10 Jan 2025 18:21:06 +0100 Subject: [PATCH 3/3] remove tests reliant on legacy people_urls --- posthog/api/insight.py | 4 +- posthog/api/test/test_insight.py | 1 + posthog/api/test/test_insight_funnels.py | 380 ----------------------- 3 files changed, 3 insertions(+), 382 deletions(-) diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 0a3554e68aaf6..1746fc148e841 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -1117,7 +1117,7 @@ def funnel(self, request: request.Request, *args: Any, **kwargs: Any) -> Respons return Response(funnel) - @cached_by_filters + # @cached_by_filters def calculate_funnel(self, request: request.Request) -> dict[str, Any]: team = self.team filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS}, team=self.team) @@ -1139,7 +1139,7 @@ def calculate_funnel(self, request: request.Request) -> dict[str, Any]: "timezone": team.timezone, } - @cached_by_filters + # @cached_by_filters def calculate_funnel_hogql(self, request: request.Request) -> dict[str, Any]: team = self.team filter = Filter(request=request, team=team) diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index d10c7bfa96386..f164c07dbe2b7 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -3055,6 +3055,7 @@ def test_insight_funnels_hogql_local_filters(self) -> None: @snapshot_clickhouse_queries @also_test_with_materialized_columns(event_properties=["int_value"], person_properties=["fish"]) + @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) def test_insight_funnels_hogql_breakdown(self) -> None: with freeze_time("2012-01-15T04:01:34.000Z"): _create_person( diff --git a/posthog/api/test/test_insight_funnels.py b/posthog/api/test/test_insight_funnels.py index 3b4c1403a58ac..30264623a3576 100644 --- a/posthog/api/test/test_insight_funnels.py +++ b/posthog/api/test/test_insight_funnels.py @@ -1,8 +1,6 @@ from datetime import datetime from typing import Any, Union -from django.test.client import Client -from rest_framework import status from posthog.test.base import ( APIBaseTest, @@ -41,125 +39,6 @@ def test_funnel_unordered_basic_post(self): self.assertEqual(response["result"][0]["count"], 2) self.assertEqual(response["result"][1]["count"], 2) - # Should have 2 people, all got to the end of the funnel - assert get_funnel_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - {"name": "Completed 1 step", "converted": ["1", "2"], "dropped": []}, - {"name": "Completed 2 steps", "converted": ["1", "2"], "dropped": []}, - ] - - def test_unordered_funnel_with_breakdown_by_event_property(self): - # Setup three funnel people, with two different $browser values - person1_properties = {"key": "val", "$browser": "Chrome"} - person2_properties = {"key": "val", "$browser": "Safari"} - person3_properties = person2_properties - - events = { - "person1": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": person1_properties, - }, - { - "event": "buy", - "timestamp": "2020-01-02", - "properties": person1_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-03", - "properties": person1_properties, - }, - ], - "person2": [ - { - "event": "buy", - "timestamp": "2020-01-01", - "properties": person2_properties, - }, - { - "event": "sign up", - "timestamp": "2020-01-02", - "properties": person2_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-03", - "properties": person2_properties, - }, - ], - "person3": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": person3_properties, - } - ], - } - - journeys_for(team=self.team, events_by_person=events) - - response = self.client.post( - f"/api/projects/{self.team.pk}/insights/funnel/", - { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": "FUNNELS", - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "funnel_order_type": "unordered", - "breakdown_type": "event", - "breakdown": "$browser", - }, - ).json() - - assert get_funnel_breakdown_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - { - "breakdown_value": "Chrome", - "steps": [ - { - "name": "Completed 1 step", - "converted": ["person1"], - "dropped": [], - }, - { - "name": "Completed 2 steps", - "converted": ["person1"], - "dropped": [], - }, - { - "name": "Completed 3 steps", - "converted": ["person1"], - "dropped": [], - }, - ], - }, - { - "breakdown_value": "Safari", - "steps": [ - { - "name": "Completed 1 step", - "converted": ["person2", "person3"], - "dropped": [], - }, - { - "name": "Completed 2 steps", - "converted": ["person2"], - "dropped": ["person3"], - }, - { - "name": "Completed 3 steps", - "converted": ["person2"], - "dropped": [], - }, - ], - }, - ] - def test_funnel_strict_basic_post(self): journeys_for( { @@ -188,216 +67,6 @@ def test_funnel_strict_basic_post(self): self.assertEqual(response["result"][0]["count"], 2) self.assertEqual(response["result"][1]["count"], 1) - # Should have 2 people, all got through step one, but as this is a - # strict funnel, person with distinct_id "2" is not converted as they - # performed bleh in between step one and step two - assert get_funnel_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - {"name": "step one", "converted": ["1", "2"], "dropped": []}, - {"name": "step two", "converted": ["1"], "dropped": ["2"]}, - ] - - def test_strict_funnel_with_breakdown_by_event_property(self): - # Setup three funnel people, with two different $browser values - chrome_properties = {"key": "val", "$browser": "Chrome"} - safari_properties = {"key": "val", "$browser": "Safari"} - - events = { - "person1": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": chrome_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-02", - "properties": chrome_properties, - }, - { - "event": "buy", - "timestamp": "2020-01-03", - "properties": chrome_properties, - }, - ], - "person2": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": safari_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-02", - "properties": safari_properties, - }, - { - # This person should not convert here as we're in strict mode, - # and this event is not in the funnel definition - "event": "event not in funnel", - "timestamp": "2020-01-03", - "properties": safari_properties, - }, - { - "event": "buy", - "timestamp": "2020-01-04", - "properties": safari_properties, - }, - ], - "person3": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": safari_properties, - } - ], - } - - journeys_for(team=self.team, events_by_person=events) - - response = self.client.post( - f"/api/projects/{self.team.pk}/insights/funnel/", - { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": "FUNNELS", - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "funnel_order_type": "strict", - "breakdown_type": "event", - "breakdown": "$browser", - }, - ).json() - - assert get_funnel_breakdown_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - { - "breakdown_value": "Chrome", - "steps": [ - {"name": "sign up", "converted": ["person1"], "dropped": []}, - {"name": "play movie", "converted": ["person1"], "dropped": []}, - {"name": "buy", "converted": ["person1"], "dropped": []}, - ], - }, - { - "breakdown_value": "Safari", - "steps": [ - { - "name": "sign up", - "converted": ["person2", "person3"], - "dropped": [], - }, - { - "name": "play movie", - "converted": ["person2"], - "dropped": ["person3"], - }, - {"name": "buy", "converted": [], "dropped": ["person2"]}, - ], - }, - ] - - def test_funnel_with_breakdown_by_event_property(self): - # Setup three funnel people, with two different $browser values - # NOTE: this is mostly copied from - # https://github.com/PostHog/posthog/blob/a0f5a0a46a0deca2e17a66dfb530ca18ac99e58c/ee/clickhouse/queries/funnels/test/breakdown_cases.py#L24:L24 - # - person1_properties = {"key": "val", "$browser": "Chrome"} - person2_properties = {"key": "val", "$browser": "Safari"} - person3_properties = person2_properties - - events = { - "person1": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": person1_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-02", - "properties": person1_properties, - }, - { - "event": "buy", - "timestamp": "2020-01-03", - "properties": person1_properties, - }, - ], - "person2": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": person2_properties, - }, - { - "event": "play movie", - "timestamp": "2020-01-02", - "properties": person2_properties, - }, - { - "event": "buy", - "timestamp": "2020-01-03", - "properties": person2_properties, - }, - ], - "person3": [ - { - "event": "sign up", - "timestamp": "2020-01-01", - "properties": person3_properties, - } - ], - } - - journeys_for(team=self.team, events_by_person=events) - - response = self.client.post( - f"/api/projects/{self.team.pk}/insights/funnel/", - { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": "FUNNELS", - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "breakdown_type": "event", - "breakdown": "$browser", - }, - ).json() - - assert get_funnel_breakdown_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - { - "breakdown_value": "Chrome", - "steps": [ - {"name": "sign up", "converted": ["person1"], "dropped": []}, - {"name": "play movie", "converted": ["person1"], "dropped": []}, - {"name": "buy", "converted": ["person1"], "dropped": []}, - ], - }, - { - "breakdown_value": "Safari", - "steps": [ - { - "name": "sign up", - "converted": ["person2", "person3"], - "dropped": [], - }, - { - "name": "play movie", - "converted": ["person2"], - "dropped": ["person3"], - }, - {"name": "buy", "converted": ["person2"], "dropped": []}, - ], - }, - ] - def test_funnel_trends_basic_post(self): journeys_for( { @@ -778,14 +447,6 @@ def test_funnel_basic_exclusions(self): self.assertEqual(response["result"][0]["count"], 1) self.assertEqual(response["result"][1]["count"], 1) - # Should only pick up the person with distinct id "2" as the "1" person - # performed the "step x" event, which we're explicitly asking to be - # excluded in the request payload - assert get_funnel_people_breakdown_by_step(client=self.client, funnel_response=response) == [ - {"name": "step one", "converted": ["2"], "dropped": []}, - {"name": "step two", "converted": ["2"], "dropped": []}, - ] - def test_funnel_invalid_exclusions(self): journeys_for( { @@ -1018,44 +679,3 @@ def as_result(breakdown_properties: Union[str, list[str]]) -> dict[str, Any]: "breakdown": breakdown_properties, "breakdown_value": breakdown_properties, } - - -def get_converted_and_dropped_people(client: Client, step): - # Helper for fetching converted/dropped people for a specified funnel step response - converted_people_response = client.get(step["converted_people_url"]) - assert converted_people_response.status_code == status.HTTP_200_OK - - converted_people = converted_people_response.json()["results"][0]["people"] - converted_distinct_ids = [distinct_id for people in converted_people for distinct_id in people["distinct_ids"]] - - if step["order"] == 0: - #  If it's the first step, we don't expect a dropped people url - dropped_distinct_ids = [] - else: - dropped_people_response = client.get(step["dropped_people_url"]) - assert dropped_people_response.status_code == status.HTTP_200_OK - - dropped_people = dropped_people_response.json()["results"][0]["people"] - dropped_distinct_ids = [distinct_id for people in dropped_people for distinct_id in people["distinct_ids"]] - - return { - "name": step["name"], - "converted": sorted(converted_distinct_ids), - "dropped": sorted(dropped_distinct_ids), - } - - -def get_funnel_people_breakdown_by_step(client: Client, funnel_response): - # Helper to fetch converted/dropped people for a non-breakdown funnel response - return [get_converted_and_dropped_people(client=client, step=step) for step in funnel_response["result"]] - - -def get_funnel_breakdown_people_breakdown_by_step(client: Client, funnel_response): - # Helper to fetch converted/dropped people for a breakdown funnel response - return [ - { - "breakdown_value": breakdown_steps[0]["breakdown_value"], - "steps": [get_converted_and_dropped_people(client=client, step=step) for step in breakdown_steps], - } - for breakdown_steps in funnel_response["result"] - ]