DO NOT MERGE: disable caching of collection summaries

With the cache enabled, every query on the server is pulling and processing 17k rows of dataset summaries because filtering is disabled when the cache is on. (Which is all queries). This is more like 1 or 60 rows if we use the filter. The processing of these rows seems to be taking excessive synchronous CPU time (300ms+ on the server).
lsst · Dec 5, 2024 · 761aad6 · 761aad6
1 parent 5eb02ad
commit 761aad6
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py b/python/lsst/daf/butler/registry/datasets/byDimensions/summaries.py
@@ -364,9 +364,9 @@ def fetch_summaries(
         sql = sqlalchemy.sql.select(*columns).select_from(fromClause)
         sql = sql.where(coll_col.in_([coll.key for coll in non_chains]))
         # For caching we need to fetch complete summaries.
-        if self._caching_context.collection_summaries is None:
-            if dataset_type_names is not None:
-                sql = sql.where(self._dataset_type_table.columns["name"].in_(dataset_type_names))
+        # if self._caching_context.collection_summaries is None:
+        if dataset_type_names is not None:
+            sql = sql.where(self._dataset_type_table.columns["name"].in_(dataset_type_names))
 
         # Run the query and construct CollectionSummary objects from the result
         # rows.  This will never include CHAINED collections or collections
@@ -407,8 +407,8 @@ def fetch_summaries(
         for chain, children in chains.items():
             summaries[chain.key] = CollectionSummary.union(*(summaries[child.key] for child in children))
 
-        if self._caching_context.collection_summaries is not None:
-            self._caching_context.collection_summaries.update(summaries)
+        # if self._caching_context.collection_summaries is not None:
+        #    self._caching_context.collection_summaries.update(summaries)
 
         return summaries