From 86c3e37fc23312cbabc1ad1a578a101d9fff531d Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Wed, 18 Oct 2023 13:08:39 +0200 Subject: [PATCH] refactor: Use CONSTRUCT query instead of SELECT for dimension values metadata --- app/rdf/queries.ts | 196 ++++++++++++++++++++--------------------- app/rdf/query-cache.ts | 23 ++--- 2 files changed, 111 insertions(+), 108 deletions(-) diff --git a/app/rdf/queries.ts b/app/rdf/queries.ts index eeafe9469..45ef98503 100644 --- a/app/rdf/queries.ts +++ b/app/rdf/queries.ts @@ -1,4 +1,5 @@ -import { descending, group, index } from "d3"; +import { CONSTRUCT } from "@tpluscode/sparql-builder"; +import { descending, group, index, rollups } from "d3"; import { Maybe } from "graphql-tools"; import keyBy from "lodash/keyBy"; import { @@ -9,7 +10,7 @@ import { View, } from "rdf-cube-view-query"; import rdf from "rdf-ext"; -import { Literal, NamedNode } from "rdf-js"; +import { Literal, NamedNode, Quad } from "rdf-js"; import { ParsingClient } from "sparql-http-client/ParsingClient"; import { LRUCache } from "typescript-lru-cache"; @@ -270,6 +271,31 @@ export const getCubeDimensionValues = async ({ export const dimensionIsVersioned = (dimension: CubeDimension) => dimension.out(ns.schema.version)?.value ? true : false; +type DimensionPredicate = keyof DimensionValue | "sameAs"; + +const getDimensionPredicate = (predicate: string): DimensionPredicate => { + switch (predicate) { + case "value": + return "value"; + case ns.schema.identifier.value: + return "identifier"; + case ns.schema.name.value: + return "label"; + case ns.schema.alternateName.value: + return "alternateName"; + case ns.schema.description.value: + return "description"; + case ns.schema.position.value: + return "position"; + case ns.schema.color.value: + return "color"; + case ns.schema.sameAs.value: + return "sameAs"; + default: + throw new Error(`Unknown predicate: ${predicate}`); + } +}; + export const getCubeDimensionValuesWithMetadata = async ({ dimension, cube, @@ -335,112 +361,86 @@ export const getCubeDimensionValuesWithMetadata = async ({ return []; } + const result: DimensionValue[] = []; + /** * If the dimension is versioned, we're loading the "unversioned" values to store in the config, * so cubes can be upgraded to newer versions without the filters breaking. */ - const result: DimensionValue[] = []; - if (namedNodes.length > 0) { - const scaleType = getScaleType(dimension); - const [labels, descriptions, literals, unversioned] = await Promise.all([ - loadResourceLabels({ - ids: namedNodes, - locale, - sparqlClient, - labelTerm: schema.name, - cache, - }), - scaleType === "Ordinal" || scaleType === "Nominal" - ? loadResourceLabels({ - ids: namedNodes, - locale, - sparqlClient, - labelTerm: schema.description, - cache, - }) - : [], - loadResourceLiterals({ - ids: namedNodes, - sparqlClient, - predicates: { - identifier: - scaleType === "Ordinal" || scaleType === "Nominal" - ? { - predicate: schema.identifier, - } - : null, - position: - scaleType === "Ordinal" - ? { - predicate: schema.position, - } - : null, - color: - scaleType === "Nominal" || scaleType === "Ordinal" - ? { predicate: schema.color } - : null, - alternateName: { - predicate: schema.alternateName, - locale: locale, - }, - }, - cache, - }), - dimensionIsVersioned(dimension) - ? loadUnversionedResources({ ids: namedNodes, sparqlClient, cache }) - : [], - ]); - - const lookup = new Map( - literals.map(({ iri, alternateName, identifier, position, color }) => [ - iri.value, - { - alternateName: alternateName - ? parseRDFLiteral(alternateName) - : undefined, - identifier: identifier - ? parseRDFLiteral(identifier) - : undefined, - position: position ? parseRDFLiteral(position) : undefined, - color: color ? parseRDFLiteral(color) : undefined, - }, - ]) - ); - - const labelsLookup = new Map( - labels.map(({ iri, label }) => [iri.value, label?.value]) - ); - - const descriptionsLookup = new Map( - descriptions.map(({ iri, label }) => [iri.value, label?.value]) - ); - - const unversionedLookup = new Map( - unversioned.map(({ iri, sameAs }) => [iri.value, sameAs?.value]) - ); - - namedNodes.forEach((iri) => { - const lookupValue = lookup.get(iri.value); + const query: any = CONSTRUCT` + ?s ${ns.schema.identifier} ?identifier . + ?s ${ns.schema.name} ?name . + ?s ${ns.schema.alternateName} ?alternateName . + ?s ${ns.schema.description} ?description . + ?s ${ns.schema.position} ?position . + ?s ${ns.schema.color} ?color . + ?s ${ns.schema.sameAs} ?sameAs . + `.WHERE` + VALUES ?s { + ${namedNodes.map((d) => `<${d.value}>`).join(`\n`)} + } + { + ?s schema:identifier ?identifier . + } + UNION { + ?s schema:name ?name . + FILTER(LANG(?name) = "${locale}" || LANG(?name) = "") + } + UNION { + ?s schema:alternateName ?alternateName . + FILTER(LANG(?alternateName) = "${locale}" || LANG(?alternateName) = "") + } + UNION { + ?s schema:description ?description . + FILTER(LANG(?description) = "${locale}" || LANG(?description) = "") + } + UNION { + ?s schema:position ?position . + } + UNION { + ?s schema:color ?color . + } + UNION { + ?s schema:sameAs ?sameAs . + }`; + + await executeWithCache(sparqlClient, query, cache, (queryResult) => { + const parsed: Record[] = + queryResult.map((d: Quad) => { + const key = getDimensionPredicate(d.predicate.value); + + return { + value: d.subject.value, + [key]: + d.object.termType === "Literal" + ? parseRDFLiteral(d.object) + : d.object.value, + }; + }); - result.push({ - value: unversionedLookup.get(iri.value) ?? iri.value, - label: labelsLookup.get(iri.value) ?? "", - description: descriptionsLookup.get(iri.value), - position: lookupValue?.position, - identifier: lookupValue?.identifier, - color: lookupValue?.color, - alternateName: lookupValue?.alternateName, - }); + const grouped = rollups( + parsed, + (v) => Object.assign({}, ...v), + (d) => d.value + ) as [ObservationValue, Record][]; + + for (const [_, { sameAs, value, ...rest }] of grouped) { + result.push({ + ...rest, + value: sameAs ?? value, + } as DimensionValue); + } }); + } else if (literals.length > 0) { - literals.forEach((v) => { + literals.forEach(({ value }) => result.push({ - value: v.value, - label: v.value, - }); - }); + value, + label: value, + }) + ); } if (undValues.length > 0) { diff --git a/app/rdf/query-cache.ts b/app/rdf/query-cache.ts index e298e4241..216689405 100644 --- a/app/rdf/query-cache.ts +++ b/app/rdf/query-cache.ts @@ -2,8 +2,8 @@ import { SparqlQuery, SparqlQueryExecutable, } from "@tpluscode/sparql-builder/lib"; -import StreamClient from "sparql-http-client"; import { ParsingClient } from "sparql-http-client/ParsingClient"; +import { StreamClient } from "sparql-http-client/StreamClient"; import { LRUCache } from "typescript-lru-cache"; type SparqlClient = StreamClient | ParsingClient; @@ -16,16 +16,19 @@ export const executeWithCache = async ( ) => { const key = `${sparqlClient.query.endpoint.endpointUrl} - ${query.build()}`; const cached = cache?.get(key); + if (cached) { return cached as T; - } else { - const result = await query.execute(sparqlClient.query, { - operation: "postUrlencoded", - }); - const parsed = parse(result) as T; - if (cache) { - cache.set(key, parsed); - } - return parsed; } + + const result = await query.execute(sparqlClient.query, { + operation: "postUrlencoded", + }); + const parsed = parse(result) as T; + + if (cache) { + cache.set(key, parsed); + } + + return parsed; };