From f75ab7368245ea0bfe0164bc9b342c6f03c828d0 Mon Sep 17 00:00:00 2001
From: feba-rajan <Feba.Rajan@levellingup.gov.uk>
Date: Wed, 28 Aug 2024 16:13:27 +0100
Subject: [PATCH] Difference in counts (#292)

* add tests and revert count to use the historic way that worked. This may be innefficient

* added exclude_field parameter in json extension and test functions

* Refactored exclude_field parameter logic

* added acceptance testcases and removed unused funtion _apply_exclusion_filters()

* make some changes

* new unit tests for _get_geojson()

* Updated acceptance tests for exclude-field and used FastAPI TestClient

* Updated acceptance tests with TestClient and db_session.

* Added .scalar() method to count

* revert to the .over method to try to check the canary's failure

* Rrmoved .over method and added .scalar()

---------

Co-authored-by: eveleighoj <35256612+eveleighoj@users.noreply.github.com>
---
 application/data_access/entity_queries.py     | 24 ++---
 tests/conftest.py                             |  4 -
 .../data_access/test_entity_queries.py        | 93 +++++++++++++++++++
 3 files changed, 103 insertions(+), 18 deletions(-)
 create mode 100644 tests/integration/data_access/test_entity_queries.py

diff --git a/application/data_access/entity_queries.py b/application/data_access/entity_queries.py
index 05883057..10d6d897 100644
--- a/application/data_access/entity_queries.py
+++ b/application/data_access/entity_queries.py
@@ -64,21 +64,16 @@ def get_entities(session, dataset: str, limit: int) -> List[EntityModel]:
 def get_entity_search(session: Session, parameters: dict):
     params = normalised_params(parameters)
     count: int
-    entities: [EntityModel]
+    entities: list[EntityModel]
 
     # get count
-    query_args = [func.count(EntityOrm.entity).label("count")]
-    query = session.query(*query_args)
-    query = _apply_base_filters(query, params)
-    query = _apply_date_filters(query, params)
-    query = _apply_location_filters(session, query, params)
-    query = _apply_period_option_filter(query, params)
-
-    entities = query.all()
-    if entities:
-        count = entities[0].count
-    else:
-        count = 0
+    subquery = session.query(EntityOrm.entity)
+    subquery = _apply_base_filters(subquery, params)
+    subquery = _apply_date_filters(subquery, params)
+    subquery = _apply_location_filters(session, subquery, params)
+    subquery = _apply_period_option_filter(subquery, params).subquery()
+    count_query = session.query(func.count()).select_from(subquery)
+    count = count_query.scalar()
 
     # get entities
     query_args = [EntityOrm]
@@ -285,7 +280,8 @@ def _apply_location_filters(session, query, params):
 
     # final step to add a group by if more than one condition is being met.
     if len(intersecting_entities) > 1 or len(references) > 0 or len(curies) > 1:
-        query = query.group_by(EntityOrm)
+        # if len(intersecting_entities) > 1 or len(curies) > 1:
+        query = query.group_by(EntityOrm.entity)
     elif len(intersecting_entities) + len(curies) > 1:
         query = query.group_by(EntityOrm)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 61dce632..3fe5d108 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,7 +14,6 @@
 import time
 import csv
 import json
-import logging
 
 from application.app import create_app  # noqa: E402
 
@@ -51,8 +50,6 @@ def test_settings() -> Settings:
         READ_DATABASE_URL=DEFAULT_TEST_DATABASE_URL,
     )
 
-    logging.warning(settings)
-
     return settings
 
 
@@ -63,7 +60,6 @@ def create_db(test_settings) -> PostgresDsn:
     if database_exists(database_url):
         drop_database(database_url)
     create_database(database_url)
-    logging.error(database_url)
 
     # apply migrations in new db, this assumes we will always want a properly set-up db
     config = Config("alembic.ini")
diff --git a/tests/integration/data_access/test_entity_queries.py b/tests/integration/data_access/test_entity_queries.py
new file mode 100644
index 00000000..eab8f19b
--- /dev/null
+++ b/tests/integration/data_access/test_entity_queries.py
@@ -0,0 +1,93 @@
+import logging
+import pytest
+
+from application.db.models import EntityOrm
+from application.data_access.entity_queries import get_entity_search
+
+# set up logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+
+@pytest.mark.parametrize(
+    "entities, parameters, expected_count, expected_entities",
+    [
+        # a test case with a reference but onlly one
+        (
+            [
+                {
+                    "entity": 1,
+                    "dataset": "conservation-area",
+                    "reference": "CA1",
+                    "geometry": "POLYGON((-0.39877874932762813 52.37557396590839,-0.49691286828496717 52.31309909388361,-0.3325311902998525 52.28924588007146,-0.280273249515528 52.35293530122911,-0.39877874932762813 52.37557396590839))",  # noqa E501
+                },
+                {
+                    "entity": 2,
+                    "reference": "CA2",
+                    "dataset": "conservation-area",
+                    "geometry": "POLYGON((-0.39877874932762813 52.37557396590839,-0.49691286828496717 52.31309909388361,-0.3325311902998525 52.28924588007146,-0.280273249515528 52.35293530122911,-0.39877874932762813 52.37557396590839))",  # noqa E501
+                },
+                {
+                    "entity": 3,
+                    "reference": "LPLAD1",
+                    "dataset": "local-authority-district",
+                    "geometry": "POLYGON((-0.7489300741242954 52.4863973669523,-0.9505678287521635 52.27316949066329,-0.3760369581498711 52.142463344337585,-0.21381926107403462 52.37856061047788,-0.7489300741242954 52.4863973669523))",  # noqa E501
+                },
+            ],
+            {"dataset": ["conservation-area"], "geometry_reference": ["LPLAD1"]},
+            2,
+            [1, 2],
+        ),
+        # a test case with multiple entities uner the same reference
+        (
+            [
+                {
+                    "entity": 1,
+                    "dataset": "conservation-area",
+                    "reference": "CA1",
+                    "geometry": "POLYGON((-0.39877874932762813 52.37557396590839,-0.49691286828496717 52.31309909388361,-0.3325311902998525 52.28924588007146,-0.280273249515528 52.35293530122911,-0.39877874932762813 52.37557396590839))",  # noqa E501
+                },
+                {
+                    "entity": 2,
+                    "reference": "CA2",
+                    "dataset": "conservation-area",
+                    "geometry": "POLYGON((-0.39877874932762813 52.37557396590839,-0.49691286828496717 52.31309909388361,-0.3325311902998525 52.28924588007146,-0.280273249515528 52.35293530122911,-0.39877874932762813 52.37557396590839))",  # noqa E501
+                },
+                {
+                    "entity": 3,
+                    "reference": "LPLAD1",
+                    "dataset": "local-authority-district",
+                    "geometry": "POLYGON((-0.7489300741242954 52.4863973669523,-0.9505678287521635 52.27316949066329,-0.3760369581498711 52.142463344337585,-0.21381926107403462 52.37856061047788,-0.7489300741242954 52.4863973669523))",  # noqa E501
+                },
+                {
+                    "entity": 4,
+                    "reference": "LPLAD1",
+                    "dataset": "local-authority-district",
+                    "geometry": "POLYGON((-0.07955040597019639 52.51338781409572,-0.5510502032408139 52.32210614595394,0.03009961574087244 52.18134255828252,-0.07955040597019639 52.51338781409572))",  # noqa E501
+                },
+            ],
+            {"dataset": ["conservation-area"], "geometry_reference": ["LPLAD1"]},
+            2,
+            [1, 2],
+        ),
+    ],
+)
+def test_get_entity_search_geometry_reference_queries_returns_correct_results(
+    entities, parameters, expected_count, expected_entities, db_session
+):
+    """
+    A test to check if the correct results are returned when using the geometry_reference parameter
+    """
+    # load data points into entity table
+    # add datasets
+    for entity in entities:
+        db_session.add(EntityOrm(**entity))
+
+    # run query and get results
+    results = get_entity_search(db_session, parameters)
+
+    # assert count
+    assert results["count"] == expected_count, results
+
+    for entity in expected_entities:
+        assert entity in [entity.entity for entity in results["entities"]], results