From 279fdd50d7870cc404a58a5c9afbf6b3c7c432ec Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 6 Nov 2023 19:51:20 -0600 Subject: [PATCH 1/6] fix(security): fix for zookeeper CVE-2023-44981 (#9190) --- build.gradle | 4 ++-- metadata-service/restli-api/build.gradle | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index bd282535fa13cd..31e005e001cf05 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ buildscript { ext.junitJupiterVersion = '5.6.1' // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md - ext.pegasusVersion = '29.45.0' + ext.pegasusVersion = '29.46.8' ext.mavenVersion = '3.6.3' ext.springVersion = '5.3.29' ext.springBootVersion = '2.7.14' @@ -212,7 +212,7 @@ project.ext.externalDependency = [ 'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0', 'typesafeConfig':'com.typesafe:config:1.4.1', 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', - 'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14', + 'zookeeper': 'org.apache.zookeeper:zookeeper:3.7.2', 'wire': 'com.squareup.wire:wire-compiler:3.7.1', 'charle': 'com.charleskorn.kaml:kaml:0.53.0', 'common': 'commons-io:commons-io:2.7', diff --git a/metadata-service/restli-api/build.gradle b/metadata-service/restli-api/build.gradle index ed4f4118dba307..f182d11b6baebf 100644 --- a/metadata-service/restli-api/build.gradle +++ b/metadata-service/restli-api/build.gradle @@ -8,4 +8,10 @@ dependencies { restClientCompile spec.product.pegasus.d2 restClientCompile spec.product.pegasus.restliClient + + constraints { + restClientCompile(externalDependency.zookeeper) { + because("CVE-2023-44981") + } + } } \ No newline at end of file From ac9a0140570b3ada060ce716304f33ff62a1348a Mon Sep 17 00:00:00 2001 From: John Joyce Date: Mon, 6 Nov 2023 18:33:02 -0800 Subject: [PATCH 2/6] refactor(ui): Rename "dataset details" button text to "view details" on lineage sidebar profile (#9196) --- datahub-web-react/src/app/lineage/LineageExplorer.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx index f59d1843b8a99b..28cd7025f51f45 100644 --- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx +++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx @@ -217,7 +217,7 @@ export default function LineageExplorer({ urn, type }: Props) { Close ) From 45770013c9bdaadfb49950c67a838aef879a8e8a Mon Sep 17 00:00:00 2001 From: John Joyce Date: Mon, 6 Nov 2023 18:33:13 -0800 Subject: [PATCH 3/6] feat(ui): Add command-k icons to search bar (#9194) --- .../src/app/home/HomePageHeader.tsx | 1 + datahub-web-react/src/app/search/CommandK.tsx | 29 +++++++++++++++ .../src/app/search/SearchBar.tsx | 37 ++++++++++++------- .../src/app/search/SearchHeader.tsx | 1 + 4 files changed, 55 insertions(+), 13 deletions(-) create mode 100644 datahub-web-react/src/app/search/CommandK.tsx diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx index e5c01252a865b6..0052d54f562ebd 100644 --- a/datahub-web-react/src/app/home/HomePageHeader.tsx +++ b/datahub-web-react/src/app/home/HomePageHeader.tsx @@ -276,6 +276,7 @@ export const HomePageHeader = () => { combineSiblings showQuickFilters showViewAllResults + showCommandK /> {searchResultsToShow && searchResultsToShow.length > 0 && ( diff --git a/datahub-web-react/src/app/search/CommandK.tsx b/datahub-web-react/src/app/search/CommandK.tsx new file mode 100644 index 00000000000000..13e55a0e3f2661 --- /dev/null +++ b/datahub-web-react/src/app/search/CommandK.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY } from '../entity/shared/constants'; + +const Container = styled.div` + color: ${ANTD_GRAY[6]}; + background-color: #ffffff; + opacity: 0.9; + border-color: black; + border-radius: 6px; + border: 1px solid ${ANTD_GRAY[6]}; + padding-right: 6px; + padding-left: 6px; + margin-right: 4px; + margin-left: 4px; +`; + +const Letter = styled.span` + padding: 2px; +`; + +export const CommandK = () => { + return ( + + + K + + ); +}; diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx index 5f797e68fe0e8b..a23ead83caf541 100644 --- a/datahub-web-react/src/app/search/SearchBar.tsx +++ b/datahub-web-react/src/app/search/SearchBar.tsx @@ -23,6 +23,7 @@ import { navigateToSearchUrl } from './utils/navigateToSearchUrl'; import ViewAllSearchItem from './ViewAllSearchItem'; import { ViewSelect } from '../entity/view/select/ViewSelect'; import { combineSiblingsInAutoComplete } from './utils/combineSiblingsInAutoComplete'; +import { CommandK } from './CommandK'; const StyledAutoComplete = styled(AutoComplete)` width: 100%; @@ -114,6 +115,7 @@ interface Props { fixAutoComplete?: boolean; hideRecommendations?: boolean; showQuickFilters?: boolean; + showCommandK?: boolean; viewsEnabled?: boolean; combineSiblings?: boolean; setIsSearchBarFocused?: (isSearchBarFocused: boolean) => void; @@ -142,6 +144,7 @@ export const SearchBar = ({ fixAutoComplete, hideRecommendations, showQuickFilters, + showCommandK = false, viewsEnabled = false, combineSiblings = false, setIsSearchBarFocused, @@ -153,6 +156,8 @@ export const SearchBar = ({ const [searchQuery, setSearchQuery] = useState(initialQuery); const [selected, setSelected] = useState(); const [isDropdownVisible, setIsDropdownVisible] = useState(false); + const [isFocused, setIsFocused] = useState(false); + useEffect(() => setSelected(initialQuery), [initialQuery]); const searchEntityTypes = entityRegistry.getSearchEntityTypes(); @@ -277,11 +282,13 @@ export const SearchBar = ({ function handleFocus() { if (onFocus) onFocus(); handleSearchBarClick(true); + setIsFocused(true); } function handleBlur() { if (onBlur) onBlur(); handleSearchBarClick(false); + setIsFocused(false); } function handleSearch(query: string, type?: EntityType, appliedQuickFilters?: FacetFilterInput[]) { @@ -294,18 +301,21 @@ export const SearchBar = ({ const searchInputRef = useRef(null); useEffect(() => { - const handleKeyDown = (event) => { - // Support command-k to select the search bar. - // 75 is the keyCode for 'k' - if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) { - (searchInputRef?.current as any)?.focus(); - } - }; - document.addEventListener('keydown', handleKeyDown); - return () => { - document.removeEventListener('keydown', handleKeyDown); - }; - }, []); + if (showCommandK) { + const handleKeyDown = (event) => { + // Support command-k to select the search bar. + // 75 is the keyCode for 'k' + if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) { + (searchInputRef?.current as any)?.focus(); + } + }; + document.addEventListener('keydown', handleKeyDown); + return () => { + document.removeEventListener('keydown', handleKeyDown); + }; + } + return () => null; + }, [showCommandK]); return ( @@ -377,7 +387,7 @@ export const SearchBar = ({ data-testid="search-input" onFocus={handleFocus} onBlur={handleBlur} - allowClear={{ clearIcon: }} + allowClear={(isFocused && { clearIcon: }) || false} prefix={ <> {viewsEnabled && ( @@ -411,6 +421,7 @@ export const SearchBar = ({ } ref={searchInputRef} + suffix={(showCommandK && !isFocused && ) || null} /> diff --git a/datahub-web-react/src/app/search/SearchHeader.tsx b/datahub-web-react/src/app/search/SearchHeader.tsx index 91f9753a3d6012..76e78a11d3e9d9 100644 --- a/datahub-web-react/src/app/search/SearchHeader.tsx +++ b/datahub-web-react/src/app/search/SearchHeader.tsx @@ -108,6 +108,7 @@ export const SearchHeader = ({ fixAutoComplete showQuickFilters showViewAllResults + showCommandK /> From 88cde08d060041bfb6f585ed7a486f6ba5886733 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 6 Nov 2023 21:34:17 -0500 Subject: [PATCH 4/6] feat(ui): Update Apollo cache to work with union types (#9193) --- datahub-web-react/codegen.yml | 3 ++ datahub-web-react/package.json | 1 + datahub-web-react/src/App.tsx | 3 ++ datahub-web-react/yarn.lock | 73 ++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/datahub-web-react/codegen.yml b/datahub-web-react/codegen.yml index 96a2bd61379205..35728e8aeb7d49 100644 --- a/datahub-web-react/codegen.yml +++ b/datahub-web-react/codegen.yml @@ -20,6 +20,9 @@ generates: src/types.generated.ts: plugins: - 'typescript' + src/possibleTypes.generated.ts: + plugins: + - 'fragment-matcher' src/: preset: near-operation-file presetConfig: diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 2d9329919fdc1b..019295f3e6ffeb 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -11,6 +11,7 @@ "@apollo/client": "^3.3.19", "@craco/craco": "^6.1.1", "@data-ui/xy-chart": "^0.0.84", + "@graphql-codegen/fragment-matcher": "^5.0.0", "@miragejs/graphql": "^0.1.11", "@monaco-editor/react": "^4.3.1", "@react-hook/window-size": "^3.0.7", diff --git a/datahub-web-react/src/App.tsx b/datahub-web-react/src/App.tsx index b6bc608dccbbb0..342a89f350429f 100644 --- a/datahub-web-react/src/App.tsx +++ b/datahub-web-react/src/App.tsx @@ -36,6 +36,7 @@ import { DataPlatformEntity } from './app/entity/dataPlatform/DataPlatformEntity import { DataProductEntity } from './app/entity/dataProduct/DataProductEntity'; import { DataPlatformInstanceEntity } from './app/entity/dataPlatformInstance/DataPlatformInstanceEntity'; import { RoleEntity } from './app/entity/Access/RoleEntity'; +import possibleTypesResult from './possibleTypes.generated'; /* Construct Apollo Client @@ -77,6 +78,8 @@ const client = new ApolloClient({ }, }, }, + // need to define possibleTypes to allow us to use Apollo cache with union types + possibleTypes: possibleTypesResult.possibleTypes, }), credentials: 'include', defaultOptions: { diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 590f3ebcef8c33..ce0f2f514dad1e 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -2298,6 +2298,14 @@ "@graphql-tools/utils" "^6" tslib "~2.0.1" +"@graphql-codegen/fragment-matcher@^5.0.0": + version "5.0.0" + resolved "https://registry.yarnpkg.com/@graphql-codegen/fragment-matcher/-/fragment-matcher-5.0.0.tgz#2a016715e42e8f21aa08830f34a4d0a930e660fe" + integrity sha512-mbash9E8eY6RSMSNrrO+C9JJEn8rdr8ORaxMpgdWL2qe2q/TlLUCE3ZvQvHkSc7GjBnMEk36LncA8ApwHR2BHg== + dependencies: + "@graphql-codegen/plugin-helpers" "^5.0.0" + tslib "~2.5.0" + "@graphql-codegen/near-operation-file-preset@^1.17.13": version "1.18.6" resolved "https://registry.yarnpkg.com/@graphql-codegen/near-operation-file-preset/-/near-operation-file-preset-1.18.6.tgz#2378ac75feaeaa1cfd2146bd84bf839b1fe20d9d" @@ -2331,6 +2339,18 @@ lodash "~4.17.0" tslib "~2.3.0" +"@graphql-codegen/plugin-helpers@^5.0.0": + version "5.0.1" + resolved "https://registry.yarnpkg.com/@graphql-codegen/plugin-helpers/-/plugin-helpers-5.0.1.tgz#e2429fcfba3f078d5aa18aa062d46c922bbb0d55" + integrity sha512-6L5sb9D8wptZhnhLLBcheSPU7Tg//DGWgc5tQBWX46KYTOTQHGqDpv50FxAJJOyFVJrveN9otWk9UT9/yfY4ww== + dependencies: + "@graphql-tools/utils" "^10.0.0" + change-case-all "1.0.15" + common-tags "1.8.2" + import-from "4.0.0" + lodash "~4.17.0" + tslib "~2.5.0" + "@graphql-codegen/typescript-operations@1.17.13": version "1.17.13" resolved "https://registry.yarnpkg.com/@graphql-codegen/typescript-operations/-/typescript-operations-1.17.13.tgz#a5b08c1573b9507ca5a9e66e795aecc40ddc5305" @@ -2584,6 +2604,16 @@ dependencies: tslib "^2.4.0" +"@graphql-tools/utils@^10.0.0": + version "10.0.8" + resolved "https://registry.yarnpkg.com/@graphql-tools/utils/-/utils-10.0.8.tgz#c7b84275ec83dc42ad9f3d4ffc424ff682075759" + integrity sha512-yjyA8ycSa1WRlJqyX/aLqXeE5DvF/H02+zXMUFnCzIDrj0UvLMUrxhmVFnMK0Q2n3bh4uuTeY3621m5za9ovXw== + dependencies: + "@graphql-typed-document-node/core" "^3.1.1" + cross-inspect "1.0.0" + dset "^3.1.2" + tslib "^2.4.0" + "@graphql-tools/utils@^6": version "6.2.4" resolved "https://registry.yarnpkg.com/@graphql-tools/utils/-/utils-6.2.4.tgz#38a2314d2e5e229ad4f78cca44e1199e18d55856" @@ -2618,6 +2648,11 @@ resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.1.0.tgz#0eee6373e11418bfe0b5638f654df7a4ca6a3950" integrity sha512-wYn6r8zVZyQJ6rQaALBEln5B1pzxb9shV5Ef97kTvn6yVGrqyXVnDqnU24MXnFubR+rZjBY9NWuxX3FB2sTsjg== +"@graphql-typed-document-node/core@^3.1.1": + version "3.2.0" + resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861" + integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ== + "@hapi/address@2.x.x": version "2.1.4" resolved "https://registry.yarnpkg.com/@hapi/address/-/address-2.1.4.tgz#5d67ed43f3fd41a69d4b9ff7b56e7c0d1d0a81e5" @@ -7001,6 +7036,22 @@ change-case-all@1.0.14: upper-case "^2.0.2" upper-case-first "^2.0.2" +change-case-all@1.0.15: + version "1.0.15" + resolved "https://registry.yarnpkg.com/change-case-all/-/change-case-all-1.0.15.tgz#de29393167fc101d646cd76b0ef23e27d09756ad" + integrity sha512-3+GIFhk3sNuvFAJKU46o26OdzudQlPNBCu1ZQi3cMeMHhty1bhDxu2WrEilVNYaGvqUtR1VSigFcJOiS13dRhQ== + dependencies: + change-case "^4.1.2" + is-lower-case "^2.0.2" + is-upper-case "^2.0.2" + lower-case "^2.0.2" + lower-case-first "^2.0.2" + sponge-case "^1.0.1" + swap-case "^2.0.2" + title-case "^3.0.3" + upper-case "^2.0.2" + upper-case-first "^2.0.2" + change-case@^4.1.2: version "4.1.2" resolved "https://registry.yarnpkg.com/change-case/-/change-case-4.1.2.tgz#fedfc5f136045e2398c0410ee441f95704641e12" @@ -7357,6 +7408,11 @@ common-tags@1.8.0, common-tags@^1.8.0: resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.0.tgz#8e3153e542d4a39e9b10554434afaaf98956a937" integrity sha512-6P6g0uetGpW/sdyUy/iQQCbFF0kWVMSIVSyYz7Zgjcgh8mgw8PQzDNZeyZ5DQ2gM7LBoZPHmnjz8rUthkBG5tw== +common-tags@1.8.2: + version "1.8.2" + resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.2.tgz#94ebb3c076d26032745fd54face7f688ef5ac9c6" + integrity sha512-gk/Z852D2Wtb//0I+kRFNKKE9dIIVirjoqPoA1wJU+XePVXZfGeBpk45+A1rKO4Q43prqWBNY/MiIeRLbPWUaA== + commondir@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b" @@ -7698,6 +7754,13 @@ cross-fetch@^3.1.5: dependencies: node-fetch "2.6.7" +cross-inspect@1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/cross-inspect/-/cross-inspect-1.0.0.tgz#5fda1af759a148594d2d58394a9e21364f6849af" + integrity sha512-4PFfn4b5ZN6FMNGSZlyb7wUhuN8wvj8t/VQHZdM4JsDcruGJ8L2kf9zao98QIrBPFCpdk27qst/AGTl7pL3ypQ== + dependencies: + tslib "^2.4.0" + cross-spawn@7.0.3, cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" @@ -8595,6 +8658,11 @@ dotenv@^8.2.0: resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.6.0.tgz#061af664d19f7f4d8fc6e4ff9b584ce237adcb8b" integrity sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g== +dset@^3.1.2: + version "3.1.3" + resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.3.tgz#c194147f159841148e8e34ca41f638556d9542d2" + integrity sha512-20TuZZHCEZ2O71q9/+8BwKwZ0QtD9D8ObhrihJPr+vLLYlSuAU3/zL4cSlgbfeoGHTjCSJBa7NGcrF9/Bx/WJQ== + duplexer3@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/duplexer3/-/duplexer3-0.1.4.tgz#ee01dd1cac0ed3cbc7fdbea37dc0a8f1ce002ce2" @@ -18712,6 +18780,11 @@ tslib@~2.3.0: resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.1.tgz#e8a335add5ceae51aa261d32a490158ef042ef01" integrity sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw== +tslib@~2.5.0: + version "2.5.3" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.5.3.tgz#24944ba2d990940e6e982c4bea147aba80209913" + integrity sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w== + tsutils@^3.17.1: version "3.21.0" resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623" From 23c98ecf7a88d11e3b195d457ab42c763818df47 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 7 Nov 2023 14:40:48 -0600 Subject: [PATCH 5/6] feat(policy): enable support for 10k+ policies (#9177) Co-authored-by: Pedro Silva --- .../policy/ListPoliciesResolver.java | 26 ++---- .../metadata/client/JavaEntityClient.java | 2 +- .../metadata/search/SearchService.java | 18 ++-- .../authorization/DataHubAuthorizer.java | 21 ++--- .../datahub/authorization/PolicyFetcher.java | 62 +++++++++++--- .../authorization/DataHubAuthorizerTest.java | 82 +++++++++++++------ .../src/main/resources/application.yml | 1 + .../auth/DataHubAuthorizerFactory.java | 5 +- .../linkedin/entity/client/EntityClient.java | 2 +- .../entity/client/RestliEntityClient.java | 7 +- .../cypress/e2e/settings/managing_groups.js | 2 +- 11 files changed, 153 insertions(+), 75 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java index 516d6fa2d31372..b44da1c2f832c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java @@ -40,23 +40,15 @@ public CompletableFuture get(final DataFetchingEnvironment e final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync(() -> { - try { - // First, get all policy Urns. - final PolicyFetcher.PolicyFetchResult policyFetchResult = - _policyFetcher.fetchPolicies(start, count, query, context.getAuthentication()); - - // Now that we have entities we can bind this to a result. - final ListPoliciesResult result = new ListPoliciesResult(); - result.setStart(start); - result.setCount(count); - result.setTotal(policyFetchResult.getTotal()); - result.setPolicies(mapEntities(policyFetchResult.getPolicies())); - return result; - } catch (Exception e) { - throw new RuntimeException("Failed to list policies", e); - } - }); + return _policyFetcher.fetchPolicies(start, query, count, context.getAuthentication()) + .thenApply(policyFetchResult -> { + final ListPoliciesResult result = new ListPoliciesResult(); + result.setStart(start); + result.setCount(count); + result.setTotal(policyFetchResult.getTotal()); + result.setPolicies(mapEntities(policyFetchResult.getPolicies())); + return result; + }); } throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index a69c6008fea474..dff9a22de8efd8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -381,7 +381,7 @@ public SearchResult searchAcrossEntities( @Nonnull @Override public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input, - @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count, + @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count, @Nullable SearchFlags searchFlags, @Nonnull Authentication authentication) throws RemoteInvocationException { final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index 94b8d57efcc160..c99e4a94feb291 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -147,15 +147,23 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul return result; } + /** + * If no entities are provided, fallback to the list of non-empty entities + * @param inputEntities the requested entities + * @return some entities to search + */ private List getEntitiesToSearch(@Nonnull List inputEntities) { List nonEmptyEntities; List lowercaseEntities = inputEntities.stream().map(String::toLowerCase).collect(Collectors.toList()); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getNonEmptyEntities").time()) { - nonEmptyEntities = _entityDocCountCache.getNonEmptyEntities(); - } - if (!inputEntities.isEmpty()) { - nonEmptyEntities = nonEmptyEntities.stream().filter(lowercaseEntities::contains).collect(Collectors.toList()); + + if (lowercaseEntities.isEmpty()) { + try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getNonEmptyEntities").time()) { + nonEmptyEntities = _entityDocCountCache.getNonEmptyEntities(); + } + } else { + nonEmptyEntities = lowercaseEntities; } + return nonEmptyEntities; } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java index f8b28f6c182a72..f8f99475de23e2 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java @@ -72,11 +72,13 @@ public DataHubAuthorizer( final EntityClient entityClient, final int delayIntervalSeconds, final int refreshIntervalSeconds, - final AuthorizationMode mode) { + final AuthorizationMode mode, + final int policyFetchSize) { _systemAuthentication = Objects.requireNonNull(systemAuthentication); _mode = Objects.requireNonNull(mode); _policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient)); - _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, readWriteLock.writeLock()); + _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, + readWriteLock.writeLock(), policyFetchSize); _refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS); } @@ -244,29 +246,28 @@ static class PolicyRefreshRunnable implements Runnable { private final PolicyFetcher _policyFetcher; private final Map> _policyCache; private final Lock writeLock; + private final int count; @Override public void run() { try { // Populate new cache and swap. Map> newCache = new HashMap<>(); + Integer total = null; + String scrollId = null; - int start = 0; - int count = 30; - int total = 30; - - while (start < total) { + while (total == null || scrollId != null) { try { final PolicyFetcher.PolicyFetchResult - policyFetchResult = _policyFetcher.fetchPolicies(start, count, _systemAuthentication); + policyFetchResult = _policyFetcher.fetchPolicies(count, scrollId, _systemAuthentication); addPoliciesToCache(newCache, policyFetchResult.getPolicies()); total = policyFetchResult.getTotal(); - start = start + count; + scrollId = policyFetchResult.getScrollId(); } catch (Exception e) { log.error( - "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. start: {}, count: {}", start, count, e); + "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. count: {}, scrollId: {}", count, scrollId, e); return; } } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java index 92d12bad41c9f5..c06da4d245f917 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java @@ -8,8 +8,8 @@ import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; -import com.linkedin.metadata.search.SearchResult; import com.linkedin.policy.DataHubPolicyInfo; import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; @@ -18,11 +18,14 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.Value; import lombok.extern.slf4j.Slf4j; +import javax.annotation.Nullable; + import static com.linkedin.metadata.Constants.DATAHUB_POLICY_INFO_ASPECT_NAME; import static com.linkedin.metadata.Constants.POLICY_ENTITY_NAME; @@ -38,22 +41,53 @@ public class PolicyFetcher { private static final SortCriterion POLICY_SORT_CRITERION = new SortCriterion().setField("lastUpdatedTimestamp").setOrder(SortOrder.DESCENDING); - public PolicyFetchResult fetchPolicies(int start, int count, Authentication authentication) - throws RemoteInvocationException, URISyntaxException { - return fetchPolicies(start, count, "", authentication); + /** + * This is to provide a scroll implementation using the start/count api. It is not efficient + * and the scroll native functions should be used instead. This does fix a failure to fetch + * policies when deep pagination happens where there are >10k policies. + * Exists primarily to prevent breaking change to the graphql api. + */ + @Deprecated + public CompletableFuture fetchPolicies(int start, String query, int count, Authentication authentication) { + return CompletableFuture.supplyAsync(() -> { + try { + PolicyFetchResult result = PolicyFetchResult.EMPTY; + String scrollId = ""; + int fetchedResults = 0; + + while (PolicyFetchResult.EMPTY.equals(result) && scrollId != null) { + PolicyFetchResult tmpResult = fetchPolicies(query, count, scrollId.isEmpty() ? null : scrollId, authentication); + fetchedResults += tmpResult.getPolicies().size(); + scrollId = tmpResult.getScrollId(); + if (fetchedResults > start) { + result = tmpResult; + } + } + + return result; + } catch (Exception e) { + throw new RuntimeException("Failed to list policies", e); + } + }); } - public PolicyFetchResult fetchPolicies(int start, int count, String query, Authentication authentication) + public PolicyFetchResult fetchPolicies(int count, @Nullable String scrollId, Authentication authentication) + throws RemoteInvocationException, URISyntaxException { + return fetchPolicies("", count, scrollId, authentication); + } + + public PolicyFetchResult fetchPolicies(String query, int count, @Nullable String scrollId, Authentication authentication) throws RemoteInvocationException, URISyntaxException { - log.debug(String.format("Batch fetching policies. start: %s, count: %s ", start, count)); - // First fetch all policy urns from start - start + count - SearchResult result = - _entityClient.search(POLICY_ENTITY_NAME, query, null, POLICY_SORT_CRITERION, start, count, authentication, - new SearchFlags().setFulltext(true)); + log.debug(String.format("Batch fetching policies. count: %s, scroll: %s", count, scrollId)); + + // First fetch all policy urns + ScrollResult result = _entityClient.scrollAcrossEntities(List.of(POLICY_ENTITY_NAME), query, null, scrollId, + null, count, new SearchFlags().setSkipCache(true).setSkipAggregates(true) + .setSkipHighlighting(true).setFulltext(true), authentication); List policyUrns = result.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList()); if (policyUrns.isEmpty()) { - return new PolicyFetchResult(Collections.emptyList(), 0); + return PolicyFetchResult.EMPTY; } // Fetch DataHubPolicyInfo aspects for each urn @@ -64,7 +98,7 @@ public PolicyFetchResult fetchPolicies(int start, int count, String query, Authe .filter(Objects::nonNull) .map(this::extractPolicy) .filter(Objects::nonNull) - .collect(Collectors.toList()), result.getNumEntities()); + .collect(Collectors.toList()), result.getNumEntities(), result.getScrollId()); } private Policy extractPolicy(EntityResponse entityResponse) { @@ -82,6 +116,10 @@ private Policy extractPolicy(EntityResponse entityResponse) { public static class PolicyFetchResult { List policies; int total; + @Nullable + String scrollId; + + public static final PolicyFetchResult EMPTY = new PolicyFetchResult(Collections.emptyList(), 0, null); } @Value diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index 24ecfa6fefc856..babb1c5d00ee8a 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -22,6 +22,7 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; @@ -35,6 +36,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; + import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -89,30 +92,58 @@ public void setupTest() throws Exception { final EnvelopedAspectMap childDomainPolicyAspectMap = new EnvelopedAspectMap(); childDomainPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(childDomainPolicy.data()))); - final SearchResult policySearchResult = new SearchResult(); - policySearchResult.setNumEntities(3); - policySearchResult.setEntities( - new SearchEntityArray( - ImmutableList.of( - new SearchEntity().setEntity(activePolicyUrn), - new SearchEntity().setEntity(inactivePolicyUrn), - new SearchEntity().setEntity(parentDomainPolicyUrn), - new SearchEntity().setEntity(childDomainPolicyUrn) - ) - ) - ); - - when(_entityClient.search(eq("dataHubPolicy"), eq(""), isNull(), any(), anyInt(), anyInt(), any(), - eq(new SearchFlags().setFulltext(true)))).thenReturn(policySearchResult); - when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME), - eq(ImmutableSet.of(activePolicyUrn, inactivePolicyUrn, parentDomainPolicyUrn, childDomainPolicyUrn)), eq(null), any())).thenReturn( - ImmutableMap.of( - activePolicyUrn, new EntityResponse().setUrn(activePolicyUrn).setAspects(activeAspectMap), - inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap), - parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap), - childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap) - ) - ); + final ScrollResult policySearchResult1 = new ScrollResult() + .setScrollId("1") + .setNumEntities(4) + .setEntities( + new SearchEntityArray( + ImmutableList.of(new SearchEntity().setEntity(activePolicyUrn)))); + + final ScrollResult policySearchResult2 = new ScrollResult() + .setScrollId("2") + .setNumEntities(4) + .setEntities( + new SearchEntityArray( + ImmutableList.of(new SearchEntity().setEntity(inactivePolicyUrn)))); + + final ScrollResult policySearchResult3 = new ScrollResult() + .setScrollId("3") + .setNumEntities(4) + .setEntities( + new SearchEntityArray( + ImmutableList.of(new SearchEntity().setEntity(parentDomainPolicyUrn)))); + + final ScrollResult policySearchResult4 = new ScrollResult() + .setNumEntities(4) + .setEntities( + new SearchEntityArray( + ImmutableList.of( + new SearchEntity().setEntity(childDomainPolicyUrn)))); + + when(_entityClient.scrollAcrossEntities(eq(List.of("dataHubPolicy")), eq(""), isNull(), any(), isNull(), + anyInt(), eq(new SearchFlags().setFulltext(true).setSkipAggregates(true).setSkipHighlighting(true).setSkipCache(true)), any())) + .thenReturn(policySearchResult1) + .thenReturn(policySearchResult2) + .thenReturn(policySearchResult3) + .thenReturn(policySearchResult4); + + when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME), any(), eq(null), any())).thenAnswer(args -> { + Set inputUrns = args.getArgument(1); + Urn urn = inputUrns.stream().findFirst().get(); + + switch (urn.toString()) { + case "urn:li:dataHubPolicy:0": + return Map.of(activePolicyUrn, new EntityResponse().setUrn(activePolicyUrn).setAspects(activeAspectMap)); + case "urn:li:dataHubPolicy:1": + return Map.of(inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap)); + case "urn:li:dataHubPolicy:2": + return Map.of(parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap)); + case "urn:li:dataHubPolicy:3": + return Map.of(childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap)); + default: + throw new IllegalStateException(); + } + }); final List userUrns = ImmutableList.of(Urn.createFromString("urn:li:corpuser:user3"), Urn.createFromString("urn:li:corpuser:user4")); final List groupUrns = ImmutableList.of(Urn.createFromString("urn:li:corpGroup:group3"), Urn.createFromString("urn:li:corpGroup:group4")); @@ -146,7 +177,8 @@ childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspec _entityClient, 10, 10, - DataHubAuthorizer.AuthorizationMode.DEFAULT + DataHubAuthorizer.AuthorizationMode.DEFAULT, + 1 // force pagination logic ); _dataHubAuthorizer.init(Collections.emptyMap(), createAuthorizerContext(systemAuthentication, _entityClient)); _dataHubAuthorizer.invalidateCache(); diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index 91b10a75c922e2..e9113d339e81d7 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -39,6 +39,7 @@ authorization: defaultAuthorizer: enabled: ${AUTH_POLICIES_ENABLED:true} cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120} + cachePolicyFetchSize: ${POLICY_CACHE_FETCH_SIZE:1000} # Enables authorization of reads, writes, and deletes on REST APIs. Defaults to false for backwards compatibility, but should become true down the road restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:false} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java index 5b298a453547a7..663234e2519faf 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java @@ -32,6 +32,9 @@ public class DataHubAuthorizerFactory { @Value("${authorization.defaultAuthorizer.cacheRefreshIntervalSecs}") private Integer policyCacheRefreshIntervalSeconds; + @Value("${authorization.defaultAuthorizer.cachePolicyFetchSize}") + private Integer policyCacheFetchSize; + @Value("${authorization.defaultAuthorizer.enabled:true}") private Boolean policiesEnabled; @@ -44,6 +47,6 @@ protected DataHubAuthorizer getInstance() { : DataHubAuthorizer.AuthorizationMode.ALLOW_ALL; return new DataHubAuthorizer(systemAuthentication, entityClient, 10, - policyCacheRefreshIntervalSeconds, mode); + policyCacheRefreshIntervalSeconds, mode, policyCacheFetchSize); } } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index b9661ec75e1b1f..84d0ed6b9594df 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -241,7 +241,7 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul */ @Nonnull ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input, - @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count, @Nullable SearchFlags searchFlags, + @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count, @Nullable SearchFlags searchFlags, @Nonnull Authentication authentication) throws RemoteInvocationException; diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 47a00e711a9350..2716e27518fcc5 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -482,11 +482,11 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul @Nonnull @Override public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input, - @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count, + @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count, @Nullable SearchFlags searchFlags, @Nonnull Authentication authentication) throws RemoteInvocationException { final EntitiesDoScrollAcrossEntitiesRequestBuilder requestBuilder = - ENTITIES_REQUEST_BUILDERS.actionScrollAcrossEntities().inputParam(input).countParam(count).keepAliveParam(keepAlive); + ENTITIES_REQUEST_BUILDERS.actionScrollAcrossEntities().inputParam(input).countParam(count); if (entities != null) { requestBuilder.entitiesParam(new StringArray(entities)); @@ -500,6 +500,9 @@ public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnul if (searchFlags != null) { requestBuilder.searchFlagsParam(searchFlags); } + if (keepAlive != null) { + requestBuilder.keepAliveParam(keepAlive); + } return sendClientRequest(requestBuilder, authentication).getEntity(); } diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js index 9559435ff01c85..8d689c7e2303c4 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js @@ -81,7 +81,7 @@ describe("create and manage group", () => { cy.focused().type(expected_name); cy.get(".ant-select-item-option").contains(expected_name, { matchCase: false }).click(); cy.focused().blur(); - cy.contains(expected_name).should("have.length", 1); + cy.contains(expected_name, { matchCase: false }).should("have.length", 1); cy.get('[role="dialog"] button').contains("Done").click(); cy.waitTextVisible("Owners Added"); cy.contains(expected_name, { matchCase: false }).should("be.visible"); From 353584c10acbee7554c2eb255512173f24e86785 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 7 Nov 2023 18:22:18 -0600 Subject: [PATCH 6/6] feat(browsepathv2): Allow system-update to reprocess browse paths v2 (#9200) --- .../steps/BackfillBrowsePathsV2Step.java | 86 ++++++++++++++----- .../env/docker-without-neo4j.env | 1 + docker/datahub-upgrade/env/docker.env | 1 + docker/docker-compose.dev.yml | 4 + .../docker-compose-m1.quickstart.yml | 1 + ...er-compose-without-neo4j-m1.quickstart.yml | 1 + ...ocker-compose-without-neo4j.quickstart.yml | 1 + .../quickstart/docker-compose.quickstart.yml | 1 + .../client/CachingEntitySearchService.java | 16 ++-- .../elasticsearch/query/ESSearchDAO.java | 4 +- .../query/request/SearchRequestHandler.java | 8 +- .../src/main/resources/application.yml | 2 + .../metadata/search/EntitySearchService.java | 4 +- 13 files changed, 94 insertions(+), 36 deletions(-) diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java index 7547186ccfb230..08a752d9597f42 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java @@ -6,6 +6,7 @@ import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -13,6 +14,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -37,6 +39,8 @@ public class BackfillBrowsePathsV2Step implements UpgradeStep { public static final String BACKFILL_BROWSE_PATHS_V2 = "BACKFILL_BROWSE_PATHS_V2"; + public static final String REPROCESS_DEFAULT_BROWSE_PATHS_V2 = "REPROCESS_DEFAULT_BROWSE_PATHS_V2"; + public static final String DEFAULT_BROWSE_PATH_V2 = "␟Default"; private static final Set ENTITY_TYPES_TO_MIGRATE = ImmutableSet.of( Constants.DATASET_ENTITY_NAME, @@ -81,27 +85,14 @@ public Function executable() { private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, String scrollId) { - // Condition: has `browsePaths` AND does NOT have `browsePathV2` - Criterion missingBrowsePathV2 = new Criterion(); - missingBrowsePathV2.setCondition(Condition.IS_NULL); - missingBrowsePathV2.setField("browsePathV2"); - // Excludes entities without browsePaths - Criterion hasBrowsePathV1 = new Criterion(); - hasBrowsePathV1.setCondition(Condition.EXISTS); - hasBrowsePathV1.setField("browsePaths"); - - CriterionArray criterionArray = new CriterionArray(); - criterionArray.add(missingBrowsePathV2); - criterionArray.add(hasBrowsePathV1); - - ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion(); - conjunctiveCriterion.setAnd(criterionArray); + final Filter filter; - ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray(); - conjunctiveCriterionArray.add(conjunctiveCriterion); - - Filter filter = new Filter(); - filter.setOr(conjunctiveCriterionArray); + if (System.getenv().containsKey(REPROCESS_DEFAULT_BROWSE_PATHS_V2) + && Boolean.parseBoolean(System.getenv(REPROCESS_DEFAULT_BROWSE_PATHS_V2))) { + filter = backfillDefaultBrowsePathsV2Filter(); + } else { + filter = backfillBrowsePathsV2Filter(); + } final ScrollResult scrollResult = _searchService.scrollAcrossEntities( ImmutableList.of(entityType), @@ -109,9 +100,9 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S filter, null, scrollId, - "5m", + null, BATCH_SIZE, - null + new SearchFlags().setFulltext(true).setSkipCache(true).setSkipHighlighting(true).setSkipAggregates(true) ); if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) { return null; @@ -129,6 +120,55 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S return scrollResult.getScrollId(); } + private Filter backfillBrowsePathsV2Filter() { + // Condition: has `browsePaths` AND does NOT have `browsePathV2` + Criterion missingBrowsePathV2 = new Criterion(); + missingBrowsePathV2.setCondition(Condition.IS_NULL); + missingBrowsePathV2.setField("browsePathV2"); + // Excludes entities without browsePaths + Criterion hasBrowsePathV1 = new Criterion(); + hasBrowsePathV1.setCondition(Condition.EXISTS); + hasBrowsePathV1.setField("browsePaths"); + + CriterionArray criterionArray = new CriterionArray(); + criterionArray.add(missingBrowsePathV2); + criterionArray.add(hasBrowsePathV1); + + ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion(); + conjunctiveCriterion.setAnd(criterionArray); + + ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray(); + conjunctiveCriterionArray.add(conjunctiveCriterion); + + Filter filter = new Filter(); + filter.setOr(conjunctiveCriterionArray); + return filter; + } + + private Filter backfillDefaultBrowsePathsV2Filter() { + // Condition: has default `browsePathV2` + Criterion hasDefaultBrowsePathV2 = new Criterion(); + hasDefaultBrowsePathV2.setCondition(Condition.EQUAL); + hasDefaultBrowsePathV2.setField("browsePathV2"); + StringArray values = new StringArray(); + values.add(DEFAULT_BROWSE_PATH_V2); + hasDefaultBrowsePathV2.setValues(values); + hasDefaultBrowsePathV2.setValue(DEFAULT_BROWSE_PATH_V2); // not used, but required field? + + CriterionArray criterionArray = new CriterionArray(); + criterionArray.add(hasDefaultBrowsePathV2); + + ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion(); + conjunctiveCriterion.setAnd(criterionArray); + + ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray(); + conjunctiveCriterionArray.add(conjunctiveCriterion); + + Filter filter = new Filter(); + filter.setOr(conjunctiveCriterionArray); + return filter; + } + private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception { BrowsePathsV2 browsePathsV2 = _entityService.buildDefaultBrowsePathV2(urn, true); log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2)); @@ -142,7 +182,7 @@ private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exceptio _entityService.ingestProposal( proposal, auditStamp, - false + true ); } diff --git a/docker/datahub-upgrade/env/docker-without-neo4j.env b/docker/datahub-upgrade/env/docker-without-neo4j.env index c399f71b7b15c8..04d888f076cd68 100644 --- a/docker/datahub-upgrade/env/docker-without-neo4j.env +++ b/docker/datahub-upgrade/env/docker-without-neo4j.env @@ -21,6 +21,7 @@ DATAHUB_GMS_PORT=8080 ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml BACKFILL_BROWSE_PATHS_V2=true +REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} # Uncomment and set these to support SSL connection to Elasticsearch # ELASTICSEARCH_USE_SSL= diff --git a/docker/datahub-upgrade/env/docker.env b/docker/datahub-upgrade/env/docker.env index 491470406153b2..b2a0d01e5d4ae8 100644 --- a/docker/datahub-upgrade/env/docker.env +++ b/docker/datahub-upgrade/env/docker.env @@ -25,6 +25,7 @@ DATAHUB_GMS_PORT=8080 ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml BACKFILL_BROWSE_PATHS_V2=true +REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} # Uncomment and set these to support SSL connection to Elasticsearch # ELASTICSEARCH_USE_SSL= diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index c4e5ee7fa0cae9..774c4e17bee21f 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -54,6 +54,8 @@ services: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-upgrade: image: acryldata/datahub-upgrade:debug + ports: + - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 build: context: datahub-upgrade dockerfile: Dockerfile @@ -63,6 +65,8 @@ services: - SKIP_ELASTICSEARCH_CHECK=false - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-dev} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} + - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003 volumes: - ../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/ - ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 3b6d02c83d0f07..c96baf37551b29 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -151,6 +151,7 @@ services: - DATAHUB_GMS_PORT=8080 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - BACKFILL_BROWSE_PATHS_V2=true + - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} labels: diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index e45bafc3da480e..b1cb6c208a42d6 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -144,6 +144,7 @@ services: - DATAHUB_GMS_PORT=8080 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - BACKFILL_BROWSE_PATHS_V2=true + - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} labels: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 020ef5e9a97b96..ab5182bf98ae50 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -144,6 +144,7 @@ services: - DATAHUB_GMS_PORT=8080 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - BACKFILL_BROWSE_PATHS_V2=true + - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} labels: diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 8adc2b9063b840..8a66521cbb5221 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -151,6 +151,7 @@ services: - DATAHUB_GMS_PORT=8080 - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml - BACKFILL_BROWSE_PATHS_V2=true + - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false hostname: datahub-upgrade image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} labels: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index ceaf37a1289d99..db414d70603dc7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -16,7 +16,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; -import org.javatuples.Quintet; +import org.javatuples.Septet; import org.javatuples.Sextet; import org.springframework.cache.Cache; import org.springframework.cache.CacheManager; @@ -154,8 +154,9 @@ public SearchResult getCachedSearchResults( batchSize, querySize -> getRawSearchResults(entityNames, query, filters, sortCriterion, querySize.getFrom(), querySize.getSize(), flags, facets), - querySize -> Sextet.with(entityNames, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, facets, querySize), flags, enableCache).getSearchResults(from, size); + querySize -> Septet.with(entityNames, query, filters != null ? toJsonString(filters) : null, + sortCriterion != null ? toJsonString(sortCriterion) : null, flags != null ? toJsonString(flags) : null, + facets, querySize), flags, enableCache).getSearchResults(from, size); } @@ -175,7 +176,8 @@ public AutoCompleteResult getCachedAutoCompleteResults( if (enableCache(flags)) { try (Timer.Context ignored2 = MetricUtils.timer(this.getClass(), "getCachedAutoCompleteResults_cache").time()) { Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "autocomplete_cache_access").time(); - Object cacheKey = Quintet.with(entityName, input, field, filters != null ? toJsonString(filters) : null, limit); + Object cacheKey = Sextet.with(entityName, input, field, filters != null ? toJsonString(filters) : null, + flags != null ? toJsonString(flags) : null, limit); String json = cache.get(cacheKey, String.class); result = json != null ? toRecordTemplate(AutoCompleteResult.class, json) : null; cacheAccess.stop(); @@ -210,7 +212,8 @@ public BrowseResult getCachedBrowseResults( if (enableCache(flags)) { try (Timer.Context ignored2 = MetricUtils.timer(this.getClass(), "getCachedBrowseResults_cache").time()) { Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "browse_cache_access").time(); - Object cacheKey = Quintet.with(entityName, path, filters != null ? toJsonString(filters) : null, from, size); + Object cacheKey = Sextet.with(entityName, path, filters != null ? toJsonString(filters) : null, + flags != null ? toJsonString(flags) : null, from, size); String json = cache.get(cacheKey, String.class); result = json != null ? toRecordTemplate(BrowseResult.class, json) : null; cacheAccess.stop(); @@ -247,9 +250,10 @@ public ScrollResult getCachedScrollResults( ScrollResult result; if (enableCache(flags)) { Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "scroll_cache_access").time(); - Object cacheKey = Sextet.with(entities, query, + Object cacheKey = Septet.with(entities, query, filters != null ? toJsonString(filters) : null, sortCriterion != null ? toJsonString(sortCriterion) : null, + flags != null ? toJsonString(flags) : null, scrollId, size); String json = cache.get(cacheKey, String.class); result = json != null ? toRecordTemplate(ScrollResult.class, json) : null; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index cbaf70ca22617d..290e8c60deb000 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -157,7 +157,7 @@ private AggregationMetadataArray transformIndexIntoEntityName(AggregationMetadat @Nonnull @WithSpan private ScrollResult executeAndExtract(@Nonnull List entitySpecs, @Nonnull SearchRequest searchRequest, @Nullable Filter filter, - @Nullable String scrollId, @Nonnull String keepAlive, int size) { + @Nullable String scrollId, @Nullable String keepAlive, int size) { try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "executeAndExtract_scroll").time()) { final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // extract results, validated against document model as well @@ -166,7 +166,7 @@ private ScrollResult executeAndExtract(@Nonnull List entitySpecs, @N .extractScrollResult(searchResponse, filter, scrollId, keepAlive, size, supportsPointInTime())); } catch (Exception e) { - log.error("Search query failed", e); + log.error("Search query failed: {}", searchRequest, e); throw new ESQueryException("Search query failed:", e); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 49571a60d5f211..0df6afd49c3735 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -241,7 +241,9 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi BoolQueryBuilder filterQuery = getFilterQuery(filter); searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery)); - _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation); + if (!finalSearchFlags.isSkipAggregates()) { + _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation); + } if (!finalSearchFlags.isSkipHighlighting()) { searchSourceBuilder.highlighter(_highlights); } @@ -366,7 +368,7 @@ public SearchResult extractResult(@Nonnull SearchResponse searchResponse, Filter @WithSpan public ScrollResult extractScrollResult(@Nonnull SearchResponse searchResponse, Filter filter, @Nullable String scrollId, - @Nonnull String keepAlive, int size, boolean supportsPointInTime) { + @Nullable String keepAlive, int size, boolean supportsPointInTime) { int totalCount = (int) searchResponse.getHits().getTotalHits().value; List resultList = getResults(searchResponse); SearchResultMetadata searchResultMetadata = extractSearchResultMetadata(searchResponse, filter); @@ -376,7 +378,7 @@ public ScrollResult extractScrollResult(@Nonnull SearchResponse searchResponse, if (searchHits.length == size) { Object[] sort = searchHits[searchHits.length - 1].getSortValues(); long expirationTimeMs = 0L; - if (supportsPointInTime) { + if (keepAlive != null && supportsPointInTime) { expirationTimeMs = TimeValue.parseTimeValue(keepAlive, "expirationTime").getMillis() + System.currentTimeMillis(); } nextScrollId = new SearchAfterWrapper(sort, searchResponse.pointInTimeId(), expirationTimeMs).toScrollId(); diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index e9113d339e81d7..a06891699607bb 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -285,6 +285,8 @@ bootstrap: enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones backfillBrowsePathsV2: enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate + reprocessDefaultBrowsePathsV2: + enabled: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:false} # reprocess V2 browse paths which were set to the default: {"path":[{"id":"Default"}]} policies: file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json} # eg for local file diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index 64f59780b887f3..cbfeeaef860d34 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -193,7 +193,7 @@ BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nullable */ @Nonnull ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags); + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags); /** * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the @@ -210,7 +210,7 @@ ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String inpu */ @Nonnull ScrollResult structuredScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags); + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags); /** * Max result size returned by the underlying search backend