Skip to content

Commit

Permalink
DO NOT MERGE: disable caching of collection summaries
Browse files Browse the repository at this point in the history
With the cache enabled, every query on the server is pulling and processing 17k rows of dataset summaries because filtering is disabled when the cache is on.  (Which is all queries).

This is more like 1 or 60 rows if we use the filter.

The processing of these rows seems to be taking excessive synchronous CPU time (300ms+ on the server).
  • Loading branch information
dhirving committed Dec 5, 2024
1 parent 5eb02ad commit 761aad6
Showing 1 changed file with 5 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,9 @@ def fetch_summaries(
sql = sqlalchemy.sql.select(*columns).select_from(fromClause)
sql = sql.where(coll_col.in_([coll.key for coll in non_chains]))
# For caching we need to fetch complete summaries.
if self._caching_context.collection_summaries is None:
if dataset_type_names is not None:
sql = sql.where(self._dataset_type_table.columns["name"].in_(dataset_type_names))
# if self._caching_context.collection_summaries is None:
if dataset_type_names is not None:
sql = sql.where(self._dataset_type_table.columns["name"].in_(dataset_type_names))

# Run the query and construct CollectionSummary objects from the result
# rows. This will never include CHAINED collections or collections
Expand Down Expand Up @@ -407,8 +407,8 @@ def fetch_summaries(
for chain, children in chains.items():
summaries[chain.key] = CollectionSummary.union(*(summaries[child.key] for child in children))

if self._caching_context.collection_summaries is not None:
self._caching_context.collection_summaries.update(summaries)
# if self._caching_context.collection_summaries is not None:
# self._caching_context.collection_summaries.update(summaries)

return summaries

Expand Down

0 comments on commit 761aad6

Please sign in to comment.