From db7e51bf10b20a28d5c70366f32517688e7de167 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 3 Dec 2024 19:23:10 +0530 Subject: [PATCH 01/28] fix(ingest/gc): do not cleanup empty job/flow (#12013) --- .../src/datahub/ingestion/source/gc/dataprocess_cleanup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py index 2d042c7ea68ec7..90641b7059ca40 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py @@ -114,11 +114,11 @@ class DataProcessCleanupConfig(ConfigModel): ) delete_empty_data_jobs: bool = Field( - True, description="Whether to delete Data Jobs without runs" + False, description="Whether to delete Data Jobs without runs" ) delete_empty_data_flows: bool = Field( - True, description="Whether to delete Data Flows without runs" + False, description="Whether to delete Data Flows without runs" ) hard_delete_entities: bool = Field( From aca1cd7fe41627e31b1e5ce9df3f1b1474b5c35b Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:25:24 -0600 Subject: [PATCH 02/28] fix(test): fix metadata-io tests (#12006) --- .../search/fixtures/SampleDataFixtureTestBase.java | 8 ++++---- .../src/test/resources/search_config_fixture_test.yml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 504eb5f5fc13db..fd663de40e0050 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -1367,8 +1367,8 @@ public void testScrollAcrossEntities() throws IOException { resultUrns.addAll(result.getEntities().stream().map(SearchEntity::getEntity).toList()); scrollId = result.getScrollId(); } while (scrollId != null); - // expect 2 total matching results - assertEquals(totalResults, 2, String.format("query `%s` Results: %s", query, resultUrns)); + // expect 8 total matching results + assertEquals(totalResults, 8, String.format("query `%s` Results: %s", query, resultUrns)); } @Test @@ -1745,7 +1745,7 @@ public void testOr() { String.format("%s - Expected search results to include matched fields", query)); assertEquals( result.getEntities().size(), - 2, + 8, String.format( "Query: `%s` Results: %s", query, @@ -1776,7 +1776,7 @@ public void testNegate() { String.format("%s - Expected search results to include matched fields", query)); assertEquals( result.getEntities().size(), - 2, + 8, String.format( "Query: `%s` Results: %s", query, diff --git a/metadata-io/src/test/resources/search_config_fixture_test.yml b/metadata-io/src/test/resources/search_config_fixture_test.yml index 08e713c6b1cd38..e3c97c267188fb 100644 --- a/metadata-io/src/test/resources/search_config_fixture_test.yml +++ b/metadata-io/src/test/resources/search_config_fixture_test.yml @@ -57,9 +57,9 @@ queryConfigurations: boost_mode: replace # Criteria for exact-match only - # Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query + # Contains quotes - queryRegex: >- - ^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$ + ^["'].+["']$ simpleQuery: false prefixMatchQuery: true exactMatchQuery: true From 230bd2674bb10ae004a1753a30e9e3d0e0a573e9 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 3 Dec 2024 20:01:12 +0100 Subject: [PATCH 03/28] fix(ingest/looker): Don't fail on unknown liquid filters (#12014) --- .../datahub/ingestion/source/looker/looker_liquid_tag.py | 9 ++++++++- .../tests/integration/lookml/test_lookml.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py index 7d4ebf00cc06ef..f48ba6758564bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py @@ -4,6 +4,7 @@ from liquid import Environment from liquid.ast import Node from liquid.context import Context +from liquid.filter import string_filter from liquid.parse import expect, get_parser from liquid.stream import TokenStream from liquid.tag import Tag @@ -81,12 +82,18 @@ def parse(self, stream: TokenStream) -> Node: custom_tags = [ConditionTag] +@string_filter +def sql_quote_filter(variable: str) -> str: + return f"'{variable}'" + + @lru_cache(maxsize=1) def _create_env() -> Environment: - env: Environment = Environment() + env: Environment = Environment(strict_filters=False) # register tag. One time activity for custom_tag in custom_tags: env.add_tag(custom_tag) + env.add_filter("sql_quote", sql_quote_filter) return env diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index ab55321a4d7342..4cd2777dc7dcad 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -889,7 +889,7 @@ def test_view_to_view_lineage_and_liquid_template(pytestconfig, tmp_path, mock_t @freeze_time(FROZEN_TIME) def test_special_liquid_variables(): - text: str = """ + text: str = """{% assign source_table_variable = "source_table" | sql_quote | non_existing_filter_where_it_should_not_fail %} SELECT employee_id, employee_name, @@ -903,7 +903,7 @@ def test_special_liquid_variables(): 'default_table' as source {% endif %}, employee_income - FROM source_table + FROM {{ source_table_variable }} """ input_liquid_variable: dict = {} @@ -958,7 +958,7 @@ def test_special_liquid_variables(): expected_text: str = ( "\n SELECT\n employee_id,\n employee_name,\n \n " "prod_core.data.r_metric_summary_v2\n ,\n employee_income\n FROM " - "source_table\n " + "'source_table'\n " ) assert actual_text == expected_text From b31d849b9f9f6a57fc73d58b881e6ae955c9aeb1 Mon Sep 17 00:00:00 2001 From: Jay <159848059+jayacryl@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:04:54 -0500 Subject: [PATCH 04/28] feat(docs-website) fix links (#12019) --- docs-website/src/pages/cloud/UnifiedTabs/index.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs-website/src/pages/cloud/UnifiedTabs/index.js b/docs-website/src/pages/cloud/UnifiedTabs/index.js index c0fbc25a8de6bc..d17138fcee629c 100644 --- a/docs-website/src/pages/cloud/UnifiedTabs/index.js +++ b/docs-website/src/pages/cloud/UnifiedTabs/index.js @@ -11,21 +11,21 @@ const TabbedComponent = () => { title: 'Discovery', description: 'All the search and discovery features of DataHub Core you already love, enhanced.', icon: "/img/assets/data-discovery.svg", - link: "https://www.acryldata.io/acryl-datahub", + link: "https://datahubproject.io/solutions/discovery", image: 'https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/saas/demo/discovery.webm', }, { title: 'Observability', description: 'Detect, resolve, and prevent data quality issues before they impact your business. Unify data health signals from all your data quality tools, including dbt tests and more.', icon: "/img/assets/data-ob.svg", - link: "https://www.acryldata.io/observe", + link: "https://datahubproject.io/solutions/observability", image: 'https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/saas/demo/observe.webm', }, { title: 'Governance', description: 'Powerful Automation, Reporting and Organizational tools to help you govern effectively.', icon: "/img/assets/data-governance.svg", - link: "https://www.acryldata.io/acryl-datahub#governance", + link: "https://datahubproject.io/solutions/governance", image: 'https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/saas/demo/governance.webm', }, ]; From a004c9293d68b4e04e6d1024b74ffc27d4546336 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:47:08 -0600 Subject: [PATCH 05/28] fix(ci): fix datahub-client validatePythonEnv (#12023) --- metadata-integration/java/datahub-client/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index af71227809d2a7..2535d091f6ce52 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -62,7 +62,7 @@ compileJava.dependsOn copyAvroSchemas // Add Python environment validation task -task validatePythonEnv { +task validatePythonEnv(dependsOn: [":metadata-ingestion:installDev"]) { doFirst { def venvPath = System.getProperty('python.venv.path', '../../../metadata-ingestion/venv') def isWindows = System.getProperty('os.name').toLowerCase().contains('windows') From 82774bb65eb53d4b2126b08ff343d8410226038e Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:48:46 -0600 Subject: [PATCH 06/28] test(urn-validation): additional test case (#12001) --- .../entity/validation/ValidationApiUtilsTest.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java index a2c9a15d92f90a..2ab6a50945ba37 100644 --- a/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java +++ b/metadata-io/metadata-io-api/src/test/java/com/linkedin/metadata/entity/validation/ValidationApiUtilsTest.java @@ -79,11 +79,19 @@ public void testUrnWithIllegalDelimiter() { } @Test(expectedExceptions = IllegalArgumentException.class) - public void testComplexUrnWithParens() { + public void testComplexUrnWithParens1() { Urn invalidUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,(illegal),PROD)"); ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); } + @Test(expectedExceptions = IllegalArgumentException.class) + public void testComplexUrnWithParens2() { + Urn invalidUrn = + UrnUtils.getUrn( + "urn:li:dataJob:(urn:li:dataFlow:(mssql,1/2/3/4.c_n on %28LOCAL%29,PROD),1/2/3/4.c_n on (LOCAL))"); + ValidationApiUtils.validateUrn(entityRegistry, invalidUrn, true); + } + @Test(expectedExceptions = IllegalArgumentException.class) public void testSimpleUrnWithParens() { Urn invalidUrn = UrnUtils.getUrn("urn:li:corpuser:(foo)123"); From eef9759f880b74369567ba7f297ebb0406345f6f Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Tue, 3 Dec 2024 14:59:34 -0800 Subject: [PATCH 07/28] feat(hudi): add hudi platform to the list of default platforms (#11993) Co-authored-by: Chris Collins --- datahub-web-react/src/images/hudilogo.png | Bin 0 -> 75205 bytes .../src/main/resources/bootstrap_mcps.yaml | 2 +- .../bootstrap_mcps/data-platforms.yaml | 10 ++++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 datahub-web-react/src/images/hudilogo.png diff --git a/datahub-web-react/src/images/hudilogo.png b/datahub-web-react/src/images/hudilogo.png new file mode 100644 index 0000000000000000000000000000000000000000..4b58cc5a34826df31e370f810a93116f033c5c4d GIT binary patch literal 75205 zcmcG$c|6qX8$bL(LJAY5MTjDGvK$q%j7mffqEn|0sgqWFjRrH5$RwetRMu2z?^J}w zjHR@aC<-&9&6b&wWiV#ux$c?KIp5#&d;WYn=hbn>XYTvDujPHM_vfUShs(4nbEaSz zHqCXNlQ)Jbu`x_>rSfF>0d1u65pjU>MN|!zghWHU=M2x-jgpIfnK4W7x_J z4ATzDEZblQznC1b-o*(MqknQ~1!?e!QiSUtYn8gDDrhNd-_Ji>4Ijw`hk0z2tG`0H z1S?^#POCOXe)=IKCw=8#)ElvXzr68Uo;7RVnc+_*y^(uT(v#2FSS|HFyZ-#s*;W4p zw0MNCP*V#Wx!T=7*~aYN{<^W$Npj75mQ>`AG>0{b6}6B5p>R^|$=ly%I$HA+^1drR zTXDAi{nTh1>jS|}Q#E$y*L-(hf3w~6WY749KO5h_-|?P)qh~0nXw&PP+ zg!s>qPG6_(ml>W5lcYQ<)R(h!^!&hSJ4v-N8gHEPMM~k~)tLI*v`4n5aQ#9PkzriF z^k55(C`z+ZAK7lhg&v(OUNo`v3*hbK-F}xGlCLRZxLx6du+PG%#`q0%?QjMX+!G;OXKV`kAMlK)H3DlP@lVJhN%>U;| zpoKw24l!8;_e`oPuR*E&<8HNoOr*Xl>dY{g9g>j_hjfNzhNzFv5tB`(%N}=6l|EiN z8^^t#y~}xmRe=kDRUrKvBbTqrj?UHsN_Oq;^H^fw>t^rri@;se845{KIqvDP30p1W zpZN(`IcegYE@~6!JOP9xgAn)zpSQx}?y|=t%#!UBYzP8UGw9n1M69mYr`P1naDGjY&r|9M zsmEs41T_oh!};?j#L50k_Bi7;P~)-a0O5n-=S@8}))q1`oBxTHdHE^pGOj-eKG&T1 zJQP0ndH!i0{pk4w+2J#kfpU`xc`9id%XsMV2T(W#DAd6P1BFEss)DTBI(|Z+N|uJQ z=aWHItglTdZslv4pebv>Vm*Lin+!j{ZwgOMmu(vf))L|d%g@5GJnfZAWsIRDYBCfu zSAF2vQK{>TI`s*%lX|TJ$}J}pS1s%h8GmOW#vF%TC5n5zN~SQ5Ct=q*{R4!V2A}hr z!oAkXG+@zeIPC1dWy-4*FH;!Gbb#2*4N?H{u3xoJX1o$J4@`e#yV>BgQc-7a`UExl zK#dAe6NL+1I;4|+Np>iSts}6oGVt}VS2{OwC>^j3cgG3!UA0ig8+~KYN6oX3Y}E}u z17s8~lZj~ourdR%VM_{iTTv%PSN8Gi3^1kDpi6t4o?IrXS;xtC-2-03d_J&J1E+T_ zTOnD74HOd`?CSM_^U&b)wWe^!1jP8qqFv!o1cT3~;d2w&VcAKt&!vZ@Sjr?053uEo zemg+mtgU$j&&N$XpMjn~59=6y{sy0G$v*d=3OtSj8xC-a#t-S3|1IPFEKr<`SgBj^ zu9vekkSY!ZSns~}1Lu}Y&F!XeM|s)Pi`T=U6lJ(;ue3lWDT&4VfW?Df*wAC6?;?{( zdN%OJZNgj?!q!Rk!_frv;5vXZ$Ejv6urgCq3MgK4K!7GMK4mSz^&f{_kI86Xv7@x8r5ox{HAV0%Y#Ez z!iAa)>3B_mWY}6b)ZZW}2`~WtQLWHgMx+mbWX3TdQWF<^HCrLlNy-C84e($O*uovB8g*d2j+^Xr zCm?i1hIAN3GM-tVh0jmG z^Q)Y;yV)y+7|EztL*xKvNwpX=a+Ymth^Pl_`bLd}Sc9=qw#;JWC)fPa;*6EjQ#mRk z>OuQf7P+<^Gfv!$f_Pe!DJ(nviH#ZAT-_mh~e{{0D8fh~5GvfUg3Xw5IF=KhpP6?yn| zz^@>u8a}LU?kIgcWC}d413D%>DV87uR-H6hT?yh1dxIcg3|m<$Y>EXCV*_US3fB+b zAM=ncV-6^vs_;vw>1)v~z9R|L7Ahyu3m~9=_>61w8?H3XvU16SLG$3fu!9!JpTOK)1`U z<5Om9J`3NI5dR}^el7|j*~?_#v@-Y{1r)7YH>Hh3H{D^#C5KEGh3R!X_GB~Y#g8j=MOJg@`8BeP=`m${Eli3e+6ZNQ6D z2znxNj;P;C)Ru6ZrLCke@+C8uW&d@w-GbE|Q>DET_Qa+F2z>?hlZIY$Uel{aV&}|d z&`SoyRO+kyf&uc+{L9*;!#n{*uJF900ccJp*!OMfV}K9JS==*i#$&dk)3PGL4t+Km-}rj0tCVaXTekQ z*q3d`Te&fM;!3Vio&1weAKdwaMZwppCX<;_wolqc19|)e{P(wJR}WSp8a7@APS^gz zG{>jPUo_`TVJCRLXf~}V83_J-FY$cNVG$ocwoO~?8?lC8io=uM?mQ;owdSOZR8-eB z66hVOzrjirxA0h9ebx$ZbL5v|P|lMYd>b-14ID1`klCByYr0onnu;3r$cFnL_0Q!DXp4s9#oxr0GbQO7 z%6h_XHQOUsjm;q74LDbIuiQ$0G^%SsXc~_46NZzA$n&BubA&EmQftSlwY-_*g@256 z%vO)~Eg~O6YMEgVdM3JE0OG3Bb*>5HDsEWtbEH*o9!<#fiTX{D;?SQ%>IC!NgARGa z6$m8HEmFb?@7Y2=$@f;@V9|b>e{AW%r}21l$k`lro2s}$Y*4n1lhb9d)=JS6RfGz8 z)t7mKk^w#*nX6+wpnTXJ?eXXg(6A}Q15x`(_Cp$f68h<+$RBUevC~T-iGu#cDI#E;`UDdMEyRA-{Z7^n&^2zV+j2ELODMk7GG0{g-*w zt-R_T5|G^p_>`+p%zG^fg>qdb*Q&S>TzQ6b)6E3ISERe z__yH@hAqtJ+h-{hbQ%T(dAD<`=ZIgFh>rZo%B5@Md%jIkW!FFL2<*zbf|N$z05CZI zoo&kaPVi7}tGQ8i?z;rEV9v|;^k`1FpU|fW?}7+?37L9IY}X>{ynF!wKYv_jk2$Jz#nfM{ zWr@0a8$Xe_H}ONIdxSkh#y;dbXrFX&9l8DzeXzegmC@Z5;~BgM9il{Z>Y8=5LIq3c z@I-~&GC7*8D!Nx&s2e$#i@rAlN<)jTBQ?}QJifGtFSs{WKoi9X#Giws$5l$N_AN{R*P(IzK3SObUx{^NeabvW3J%-*wLc z!ZIW}qYpZ>>=b-)DDYCwP4e)o95-PpQf4m)IHi0~mIss_)F|=|f^78{WvqndK|*cv zQnjEb>OnNU4|4bs>-`G8+6ZYRfn4GNdk~spzqvtBPfSjhp+`MG=kA+-1TlXJFPbg= zfEUK&HNg#q?i^>EjD?2|nv8WYgpL^gdowCNQsf~*(rR`FfSeL=6l{LUkcLkBegW-n zKfzD@s)#%d9>NxBgMNh`T1&X8NolBfKwHLgv!A5oaZjq#ZjXe_&Eg+^6GDakoBiF3-?o44Fh~IGDt2*pQWkif(d? za5SzW4;*2PJs0hIvpJ^-Ka?p|APueS*2HzdlPqmMl6A{@14LGTMLM}i9Y-}FrpIe9x_K(j!j2(GN+m> z<`?gtqnH~0>W~Nqsb?mAH^U z?4l4{l8`Ovf7_blRHMgdvbt$~gG|vw{%|0!Ka?vJY45@7W&ho*eF(P1;zLcroYxw| z5x}{OZk!uwtBva)A&?_)QX#;(t11HBe_AX4PAJ)BW}d^P_2uNGw5YzfU86V9*rNB) zC8GYV%a3?Yc&K?KSG;mcJ5r@7&sB&NpE)a+i}x$DwJInVHJ&mZG zT(E-Np?5WV=X69sfQ$e&Ai(8bzgns)ku`90#o&=U&jGwol(Cgc+5~fXU&`2m%LC|d zvqXnms?2974son(krq@rai<9sySD;g{k2?@-gQ+81bz8OPEz~bl1zP%g6EkS82eMI zh#v#kHSEG#^a}hjz;qo$fJ-1$^6`7NYe$p62=AGW#LVTDNz#>{^^@%w6v(>kPy`-n z36^s-mrwJ5`A+EIE&*b!cBg`_z5xWc&Q7-y-pt{LDpM2BA>`PNlO>^Q7}TvcdIRFlR%YHU$X22#3|45r zhA?Mv=pP{E{as)3bx~8eHstGwbtDzoy<3sn3nd$kCkkKBG5OIqr`dlu2)OeD?F*)p zU;fnL$u}kd!hTIs+vIWfu94J z?{aXR=luk3?$=dGO_QIPA}2$UUkQm2#Z^+K3WbuQo4qP#e1&fx_$+e;i!hUxh)^Za zZ{(n`sFT$B1wTT|8P486$UHK+g4P&CCUqSW@2YdDsV8#DZQ`P+V90f*_K3&04#X>b z`g;zC6NJc0&72mQ%LAI`Lz&FEMt${&nwJxirF!EA;wpF<3#pg+dJ5BB^iWb>+J@^7 z>)C=?&!BOihOh_6zp#WIw{QHC+2DZZvyUDPIjRL+NOEp#Fm)~OyPYvh449Itt{i(K z9Qv=m)x&N*vw!A>3Wg@U$oN}1buBk|)E;HHiWEzTD|`&cScmB;Ax_%SVj<{cXB1cO zgCdJdX;zekTNkP1)vk!wb|FZRG!-gT`X#^+3rW@K&D#MrubHr@2Vc-3T~i}Z zLH9W-ZBfBcp>XsY=8hsUTW5Zh9WvB=LegX04qnzVzM&d7mP>xUa|dsTDF86wAFk+) zMbYTGSCj2hmnd|8kLP0F^7%Atwan}ks!coh%km)Fha_6sPg(p9otfu~o!{l$Jh@So zA4&MF|A)4|gSf>Na$`XKw$oIuOU+Qz0A@Y|eBG@Nmg5mWSU2v#x3Zu8ETQXw&vl6r z+?zI1`Ns%Rr$PlkLYMfj#fnG~GJcn{OHB$Nz|H|+OwuI_^6$37H^<)h);Hx4IROwg zBf>$NueXEMh6d^8{tj|X;qWczbj9j1!|*qhNRN^Jjzg@vO%kg@IqulS?ySpxr>S~z zu=Jm3=?HpGrc}dwAfU5^k`9zzd{M_|CbkE~kvC!}_Tv*hf|mxFEU;6Pu!k>tTuJiRK2 zshacZZp^~E4~pbprj}^~rj{EGpy&pmm`$K8MnTRlkF#9cz#xI}$NxGMe4qV$2%M%y zM-}iz<8NC%BG!yhsX0ST(tqrX`&iI*k}3WU(OfyAhp`bHixE^PHs!YtnMNxW>e9Fi z0|YU@|Hr9h%$v(&I6=@85K+RsdkrjL0dRQpp{~9*WMfNPxoI2dMB&A;#(MFo6!9Bw zP;KQ>zG!rmO1lkip?1)!QxGh;%o9!Fu5P~CB`@5Cf z*w7nG{=KaA$ACi0GioinHj`^#H>5jsthNpQPh5ZsiTZ~jgm$)6&Z@2co+bi^KAu9Q&F!^VX zj`yWh1PNlwdN$w1`;dD|PeB0*{`eCo@fd$eT*e&&=<^@|d$AMUBW+iMRr?8FN z3}Le)rz^%xqn$gttVt37q>uE@bcC41iRI4ysF*X>jH;Zu2jOu;=!`4`c&hqfizPG) zd{>vT0zIX9)=@AyDive^fsUCJMz=$}s8+@b56D#|MZOYV-NDOW!9FYjy9N4`ghcS^ z(9zqswgyViCXhRPJquiUC(KTkT4~sH#+rD5m^ajxMCt`Z^#&F z^MF>9!5w5TZNLmim&|M~-mg)ZtvBHwvy-YlK}^vSF}>kCNIlbaC{NFLYik3Z-U=OS zC-B|^c>fNzfUrZd#ctT*t0Y)DxVC0}QJ`F|Iap3@NM{L$uQR7nkwt}&AmxD^7=$E< zQvqh31ZM4rE(yvZ6)9B^=7p8923)1`vB_L-kC`7u0yNl~*E^4r)*$fp-&MxaYY=Ap z(M(5Jqa*4hZUL=e4z6K#B;FvPfPaL zOCS)e5V_VCb9r9B!`hYx97q=_X}}J&_aD-|`&HO^PH-4#?acM4QwRwovR-}j=L=f( zmbQ}Wt)zDRbaVI?jl}vFN3pstS@K|=g@dAp*~31vpD>`;ljMixkoCl`)J5+-NdpJO zb_SwF2BfyAF_$&LB}x_T;tWx14x|mCKeBFdbBee*C3KM?x5OF~<O=!GU1!-XMgc*sIP^{ORsS0 ztg+$8WC@Bv-!(x8Ec|lAG?v7rc#rr)?PU3o#_JP({II=kB;lzdNzI5oGKrV!gYXW= zJyAixjMi|Yj?S~T7o&_0at|A+bWnKL4VwfiFioIPk5Q1y*A0#_!Hd5KT=!aRRdV6G z-c{Y$kA;$=i9%ZAe{8m2nLXcS@)!#F6)E1MA0Rz-?wN004u&V}JWRjv79yvFLlt@- z6z~wmM-I@WxQ5!$^Nj~-3g!!erx0n`bV~xMMNm%sN&#xGJ8`2*rx6bKlFaT~?BJcY z-ljdiU8h|VBtr9>JwMNvB_MW1DfzDijZ5P2!x|;OOC`%ku7ft{9VhYML~E1{B`FEMd{Xc8}gWa1RyE23u}7*QVsii8=BLf>hU6y9td zz6UD5`Z5MVC%!rfgct@eCHuyb=fhEl{|`sNhBRN&F+EMSf~3c>2vG0L7Zev>>f`U< z2i2BW7Gs0suAU9eU%I3f$2Ik?%PuXUx9q(${soGz0ErJ(*gGdnqj{JLk;r2|FsuvZ z#$~Wfkt8W8>NLHct>DqsZ18#D^!JyYKI+J|G%lbdptf~90^V&1>zaU7$2kOp06*<< z-g$xRzjcYalgnY_S-AC37TpTz6u8SvlX&K9L49W7z{77vdcB;+opDV-D~qV~F+ZMH zE-8JuL3z~&nsioI%122kc23U*Z!^0=sGBFqlB?~C%}D`7t$*6EgBPQ;f?YdaJ_Q%* zFQC;+8Wz_U0*T<Jy8!$u#3Ts1>sVi(_ER#QdUSm9I}_q|11>1{!l!n24Uji^jh zGH#nVz&#Ti##OA2(;OVgTRYgd71EXOcz#EQ} zRkRXi3&bb^h8{=stcMN>vcBw{(Dr43G;^S|?)OK*uWbMdI$$;nn$XO3_Da~fe12A> zG=|IiQuwcaAAh-WJvB&rb`3_g#b6bG8-!fBXAxl%UlJ(DO558;yggf~{%y+2wE#Gx z#|2#p;;d^|>K;$KfBHwPr|>}K;2(P@R4g8Xoiz~guSNj>2*D3jA5SW` zb2ELRR}k2fBdUVw9k1DGQ{PB6c&~iqECX0u2~Ls5AsF4bKmKWR!0)n{&a%6Y?;v5X z7L1-vh6>tqIU3g~jzg%sBl@A5^Ioxjs=MKDQgU}fIA9FQxn@MoaVvm?zY{+Sp1jWf zzlL-JT5ajYEWtsE9j%j(RPM0FN+F*s``7rgT&3*q?#xMgkaa~*8wu`EY(&0;s zgY~27>5%9x{)20qA}#D4`-z+o#8$b{*Ps02I*#G9uZn<$K|;7T$;O^VuDCA9a=nQc zR0Z~W+VgkokP=Bv-vyzb4eG%ae4Dj?qzB5i(tWI9pY?DikFJ_nq8f4k-_6pxD z@_%tB5zBU)UZIc*$>YsKkC$xY+xl2uSFON75db_JX}h-FH_KOEZ4%IlH=q=Rfd+h~ zG6};C!n$SMv4PX~{7b3^!^Z@X3LYpvsjGrnGVc}$eQtvy`7g8a$GRhp5y*E`@Vw(X zicytr5sXNkL;B$YGHjJaHCbCEni#dJ%x5rj-IZU2#sZ-lK&T;+Wy2V@ec+91zCE7! zoNL*~tK4SCN)1$F5-&X)0=`TLCg<1M*z4hzw{l;5qbhkaG^{*{i50poIalxz;IzjA zTz}EtrpfTxk7#%1+A5*qg-^hx32hRAK3^zj z7jFy#Ef}n@JljY3Daoj>XCE7XF$%qkY(ITi-*GLFbo?}ur5R8&*P+y&FO95hk9y0p zdi?AH+qGn<$Q5**DpKw!mUzrC7tAvc!UrOCf_1+F9b{Ss$cfl_N=OH#EVN z&9q>b+|`qKTAR$G>F1!DISb6QDj8gB8SBMO!!ovdYc&3r5~;~(ocBE{$8=!%%Kl|@-ke*43;nyR zJD&QNPXC%`F|3D&hIA8PXj%OPo)j2<2-3eriog>z@`4nd416hs8hPEY?a}7mSg|&0 zh_xqkjzrZkCr3|x(g**N+dLB>!4Wq)*VNMZ0_~O~l9J`Nxsl0(iO|I2k2;5c8?e26 z0BZ6Y{!g64|F$TrjC8Vcr~l4tw>sf4PV6~a&#rl3b!o|QR`jg)+=}2O!}3C<EZSRcEf(xvu7lKF(!-Gi@&0( zFaAf&K<#~eNJ@)djX0A1#b8)|1ka(hpQK@=NZSg%8tyaSS@aZrQZdMlto76ukMG)i zsIvMigpR8BwF_BYv>i!7_ecv3ObfEl_{^T#V>Oxe>ADh@0XP+pBHadI0AtB@x#yR) zv$F3N=dXa)-9fs~0zH{u@Uh~cmYvlS+=tc~XJz9RV2X%y6*iJV!WjAazUvQ2`FnPU6{7|@wdmmT)T@FSw z8EM;SycokLi|10t8wAN5f?IH|sxRgjwh-EM*HyEasRW_F$T#Bn zXjB<|`5#UAGRjrg6x_k|jRz~j|HAxK+qvn_9krwB&8WSjcY36v(sTshO+$4zy(fBI zvDysJ%`YVG*k^C-k;R{@TU5VWRn3mVa-AmezCv3?I#7!-=2twTEkkDe&UP_{jKU;{ zU#0j3Zv~QJ)kDLD2?aY*|M+ocS6wjyN#`Fb>hWrheX$L{UlMHoDFAIlSrL3T(J zrAeY&m=o8@JjR4%k+Co4+MDQYAXlD6NmKhjOUOx%ET&*hmFx|)Ke-J@_#M=b0J!fg;+1^7Hf^xNtJL>t`&s=Jr)qVd&Yf_Ly z<6Oqt05s0l^vP?(@nxOgw$*-|0)08j<%_-6ko4;uCOTZ1ThpwEitW9mf`cS9o95{N zeR4rH(@M#+fe(|0po((ik9`J@$MA3-4W2s`C1Ow8o?rUU3}5Qh2ZQEikRJ#naG(1pj z!X6}c7D53*VOICfi1K!P321oad0q@7st9jlee7~=(Pm18G_7MiToyzZU2C!oIWhxg z7H+;%ReN3lgJ6^x)3yAy7QxW*0#v*WiV>7w8up-J`<8X~I+$0qeB@auLE3iAV5e@@ zbFK+?syPRz{^pZ;Nw47CSlaPjW3tM{^8}ob`>3Url!7LG<`%O|9>ByybTPCpJ)H?R zX>_5(h=^UHq}Q3C+eu6rW9+)aZ{f4vzZJ9_6TQVmt-oeech!T_f-%5J?;kxV9o8?? zeoBch)4`d!)Re=VJIJ|V@13@N%QnySQJ;)yrD53%poj;G43p1n3fvwT1~WnAmHT`e zKXgsBpko%GNI)5@DE3)jGd+Vi+2igNu)RN{JLE>+x!GeO2b=Q+{PQgCq1wB!vz3p! zBBteXh_E?J_rW=Y_kDkrL6FWqQ(_Phv#GTLDq2uVz9zc5GcKWlCgSN2>3^+#Tilml z#}~Cqj{WG_F7e+zHalmMe&z_Tk=ww>=Tk>EaJylMAPDM~d!c|S5EY7jpe*RhJ!Cw* z@{2&ogI|j5hR4|anEtZfQy(=CEZZ#Fu+fT=LLK>Y2k+{;)~iCe21BuoG9wE6%f85= zIoj77CG!omuIwZ7?})S7&E{-|a#P8kQFYFrUXNo}`O>-u?P3h;wu93Y@!y*ji@x%+ zatapz58x{dksgck^Gc4diWcu03kLfFQaClfn7EPITB%R%~Av&d#c=FpwCUR8hV5%f-r?31X$PkrfvO|i3A zc#91f$`JbAmkfbb5pG;14UIwCm|YHoiyp64j=&+CK_2ECnghf|W939RBHYKZM7J~Y zTk|GewV3tf?sdDnG$==!892BLRpr-!WYa&r-DPU<9sJ~4RBkf-9PRX4-F{Aj-3k2n zXm02oexbVEceSHa#U-^We?Ae!WM~_tzJswTN>M2&RjtTew5KNzqrlMfYnZeC&4sU? z$N2HhC{hpU#?;7JI8caH{jyf3$Bg&?_AB5vKwZ?q$~4Jp7rPSrxhsGhU_ggqwteF% zwaDwHzivZJA)sJ}vtzcig0U)P*mv>^ztKMe< z2im;>80ymp*0_Pvmr2?RFiZwP7JFSWOZ^{3Pd7A9dA3wbslLhVy6zM#!%ohwu^88m z-!9*{naYk?w`2(0n9U?4ujkw9T$xCzhu$jUL(5pTKdUBr?}K^d=kXnNBeqa5JBMyT zrp>1W?nl*h>zk7wct2#*f?*_a<0Z&S3Ws!tcO2*Tk`nIx!h&a&>=<6_fT3>ZF0wLy zG4)p(aeL+bfxQ5{0T4GEZkJaWE{FR9a90DkdK!Yk@fsC}ZKp81RJNU_?HDZ}b}wZnpwk$Lha@EEozo#gjKa|rW-%YjCR?90fDCK>ha zbZTSzUh?~`<3kXV+{*;q{Y;|r&wcLvDDlyDu6Y$yks(e^X@xG@D6>G7v@Gvu|Hs(C z??+ZN7he%{dQ;MRatc{JS%pv-(7-}cvFww8_t4IlT-bq*&=T-&y+ukFPcV(dFwp8X z8|+vUR=#995)BRh#lH6(r%29x#H7r)ht2|oelSBF8#SN=N{4HppRKm&BL7P3K*iQ$ zj1Z(7Rlz9TSZ_Zm-lEOowqRbe`Tv$2qT*f{L!haJ#>La0J-P?$l^RewUr#2oj=dN$ zFSF-bt#2A2r4KbJcoBgf%XV(avK_pAAZ$&u?f%!{a9g34?-m%cgp?0@;{iw8cn-^U z-Mf~r9?AG<2iOEN;eIAN>ji(&ZttE)bw9-rBOrqpWAylfK z@r25&$rIEce|X2G<}zsiaZt-gkkTM27fzS63w+lbL(hiO9^)GXgi4wV`YMM&!tdS* z>RffK!SRQeu{uU1Z5!WIj+QW~g_NUg=I{SOa#H5Ct`8?fxrA(yGpJ@RT;s?DE6Sj% z8-Cur_(%{b9sKp8g*0yF#Xf@d#j!`)(5013@6A|X=U}^y_j)VnfaA3`7zqGy=4Tct zQMzyRaX65G%l8ab1jjcHgv?}}r2cuiH0qxtwPF(sa6uplcl$}B%Rk#47AxKma&P)U z=EqfKckKSJr*InI;57c2KbUfY8KfRvF18SsKol#6HE8W`8cO9Z@Uu=zikha;32D+vB1zFcfAvASXY$g>J%>P{9RJ>t%sJe6 zn|X9>Ns*E~r(FEaG352rRSv9&v=ZG+Z!?i8Q6U;N(3msqwD6i;=G9aHI zLVLvTR3hLlh}vwg|9Ld?yO`@8QQs(rlyzVCNhQhyO-8r@TGD|^Mb`Vi>G;>}!0z-| zCc$Vm-!}8K>BvRgOwVi}X&)NNR`5*>@*H;t+_nVA=(SNkGWmA1paQ8!Mnc0h1!Mwn zDZz+KocoIJQ@<*Y2GJYYYDnYjw%eWexS#kti&-77=e%Nx!gSiA^@jSYSFi=!cwbg@ z@P#F|gMFdmH`>{K{Gy*#llNR{E+2>Mw%Cd7DOAE-Nf`IH4cc{?e56N!eObFBqkpi3 z$-S<7)&GN!y5r97n7{z=KD(tj)@=qI1ecR4Cktw4+C69jBxNXHfP2~`4Y20luIegd ziH=zPeUy?!cxuSGJQyOa06&s@4;; zl>5(Bc36@zrE}dcxK#|rAEf2D-YpkY1F~3yCBH)r*7{~d+x!1p#pz~9qOY|b$Jmg(A>oAaqkl>3u-I<>?Iafo6P|2Bwd?KZAv0~{tE>CitywX|kwtHH~t zm=*X^bzX+VDP&Ugb+Km}49iWBD>|dAosM1Vc3`jF640n zt{*`%A&%_R2!|^z;AibZ6IWfNgzlB7NB4$bO@y)8DpU@hHxf*rw1Zd0{`f$#zNzov z9eXP0@N$ZQW{rZQGJEzRdnk42%7^v9rLKP@6XNS$AiUZR<%gPXSg=u%yrP(u_OnW9 zPafD0MFs9AdF_*rRD;U?Wi(A>qhEmXYI5}lf%`*mb;qFY80a!Z(EY~oLG*)ydDny8 zwWAGT#t5(d9+GwWf?cZFTlPW-;a@fR_{cXfea0j-( zAb}4p1yfiKdc2pI+Rt`Ge#`QFI(njyka0#j#Z}F(u6ySR4aPjV=mw7l=-exsQT%TJ z_;c2laHi%S1jPWDPSn?>xPc%Jg3HvGAAX{ZJ5_c)tpzS7ltK`8(w5)WOtK#zRoZj? z-=H*Ee!A#7cpxdMiKex_8e~H-DD$QAk-Cz6XwYt|;B?=$qpii==-hW8LbwGH0{z~Y zNOJ=o9KU(trDwOBFWoPBL25D?+i;mzYrhsh&LVZRkm_fc#rc0ikZ-O4b`}M$IzsIRPKZAGkxPLAWKTLY%aZjhKPk#E}Y`=wWXcg z`JH}|xT)|)p6N)(A%5DC%H~}W+R?1%rXx-CnY+kY2kD(>w4KbCL(Oqp$J*zPL*QZb zplF7mCLW1!iwe6tCmXrG=O>Y4WoZn4dr9n(Tj#;G^#Qzn_fC;~u!=c;U$Oon=ohAc z=7M3sP4Jvh!Njap<)WY3*sG_2SN8h|!OS1_%7UTT<^;I+J{yjk$RRw>I|igJMWm&C zqL@W!Kk4IdAE;8kjzFCf2XdYZJlrJ1+K@T&ksqqRYqzCP=c3fS>A1N!AoYSgepXR~ zP;E3<9E#JPudqE?f}1GLv;Z-OOqU;-5>C`LF!)UCI>CjzSyg|5 zIi|)j2_6Tf&7tSFcSTgz*Ftx;Ghh(v$j=1Frj1z830DP&?+bk=`RZ3LlbqBTVg)|% z>9CN8X~km|S2D8`s9nP})sZQT!?A7<7+n2fivokrne?$YA#%!L`BF8^A^%=?B$2co z^K0@MtZE+z177wBR3_E3WHlb{HO!~ny)(qquHSf}f*(LM8^T^EF$vedSM>-~L!q=a zFYLN>nr*%(%z7khyO}RVaQr;4k~A{6C_$1!&Vc;Ns=E8=jo#|FeBX_5LNr5Yb5436 z1?)fi#{Wl~>D#Cl&8x)vV5Q{@q!bOp2Nsypj$}#PfoceDJv%Y~46Gsmb#cpDxn67e zh5-#*daFKx*;(ezj4dkVy|h&=W6h__#{PaXhNI>mt(??|axcd^nih@^E(5$Q#?Y6G z9=es#IIyvC8;J8%Eba3r-%G4S&Z$L<%s@Zhh15O(6)BYrJCv!4tR+L|15O#+?7&T& zKaKAGFs@M}-)|veKbYV&`VvGQi!U@8U+ zXm6i{#GY*Zxcva!?8tSTT>sAOs_taUeoe;aI3T5|n)-NY(-P!C(2%K3+O7zC0*A2Z z1jO43Lscog$MAn>k2|Q8v*RB>{aZDA-%*edra0j3bPz%_g15T7<6#x{&0WjO?#jpA zK&d9!c(ads)z`tgG&1-GBV;;5Ey2QKST*&=k;+z*nW9;){~XB`O2-TE(gNVKFVsXb z;J)w;7t9Y{xBw#uFz2JNr;FtDThCe3+Ym2iECLpNI>zCTeGBwcYv+2+n!$TpjM21vcp>PUJQTJXRw&H{;8@-Mj^Q8+aZcm*|&U?sR-dtgDAbG z$&Z|c!3x`km}`0)D<$Ie*m9wD_+9jpvKb0mmzjh;pWDLy1Vi_PYVkgjW{^+)4tM+7 zsVawFRnN3*ya%{Nnf_P@$ATKk*5VGij1_XxXwGbPL^&&O3KKNL&%=I_C31~{*ZEnl zwjo3j*o3E(aNMn@lG3>Ef7;w=I=;|y(9(V_NId;XZmjD8bluhouG@kdLky*luBTPB zeW-q>{XS&dh_4M85!)R!MYHox1@BGa`+s2Eq`*hki}Z^ZpNN0-4SbwyE4ujhVmnOK zf$<|zA(L37Cl`%|luvh6TvC6@LFFo9&-{#1kdrmch-6;m5DJr34p-)!lFiq9ej9*^ zt3AGdRn`Dh*;A?L%>`booAXzg=BwwvFeGas+JQVG{%9y=m%<1uE?IaKQ+w+VJiJ|3rwFkxhWCQa8eL znHc~nYx%b6f9)E5bhqYX85`i&^Bppe=}#E3uE?4lUwk~FOzUz(RTF?TYCTjKpaBsI7BJ0w?kN<2x{#h8`0MkMjxbaKB>Lc+!f(n zp>it^%RGrypm9HE$%`T~rgzvTpnHif0KUum{smuNfYYL^e@8LfO~zoxq<##Lu6QL7 z=`z?6JSM#B^7|Ws=K9N^-f@z%rQmCP-d8;Xfsgo77ZJ<(UuZBtb9XjQpzVt(i?4;r|e@WKoejf!v` zAU*`3;5Brg3r}1j0G!-iX7Kph_bwPn-0>N3$3Nk4YJ@e-Cf^LBJ0HWZ*)YKD z-Wp;BAX`Y4fUIny46m}fr3bv|RS1AEuTX#_Wam48;YkJ*v&T@GN`mRBPKrv@k~pqT zBQo-#sXDp*tc&l2YVW<9Oe0&laoYaoa!A^%r)fLtKGDG;M;_3#3VRNw(%=4vJ?7qm zVB%Gktw$HM6L_FaM*E$be^wmY2vT2)nz_C4IWTq$v#hb&6#-y+s6s`dQXk83?W*o1 zN;Hn4{TBq{N$5oj6cx!$X`3MpEfg0xfmgrIg~2t|n_N6@XaF)ug*`Vzdmrujq1f#L z&9E6Y^*^*|eT2!8^Czbv0K$fkLRH~o$XQ*AgIx4|36-h5tOqEjB(hwqhiy0dObl0^ z5w&WUy@OEF6-B)i1qcVEaf&{6)tF+2=A%!eYjO};SA-iO)y9Rw9rPIx)}LGnS8i4B zJ{c&owz?~rw-_c|V5YmPalqJqtzSyMXM^y*RU%CvB( z9!Y>{Hpi{W4Et%dTo*?WLu zbRNu*)&?aVK-)Wrkx(mr8euj5x8=7Gyg^XQrN38$QhRM6vM$bO=fd>%4qo*(j&WbC z-Xl4<1|j-u2d|hlyIR%K`X*o;Hmi556I$v#P{#P7!d5ksyrBeMcmuC=V_hs{P9BD+ zuW%NPyC7dMkGjZ7`$_7#4F@a3jX*`_;`+fjup|Us2bXRB0Q!!CPjg%Ok+z7^OQ$Jp@4>}I?v7%X&1AE;>|4kn8TTQY1%QUY zPak`I0;_QRg%s9?u;RlrM9P!~LEWwYglS&23WNVs0@Rzq$oZ~C3ptnH65DwT*%Z6w z&RVND@I}g4Bgpyu`z8g(Y!%kU8vw7HtgEeIVBhuMV6CM<8m#SP*#T`1W=-thB($Bh zTj`nuMtOZb=BEL1i>2g^7j^8N)|=lP1heyk$>ocQ6Y-jonvdjyE8J3fVLc9mmv8uK z+0e|5)FUMN7QPJq7~9z-t^2JeV2R(59^RFduV8~Un4bL^UFT2d$R-}|S9Z5FK zcio!bwSA8UuHxI8yGe_xYn*DvJH^?31do@hE38kU04rp1GhAJf#NYaudp1^qLEvYD z`sfNCMS2xb>Qg=kskq}wz{v9_Y|kI2E9^Sm9UENJu!C3okpdU(@aE4)=@-VDW2*qz zwxD_^#iv}V$2S2j{r{Q7v$+7n%rG1oP;PQ>*Lh;=N4Sfa4|nI&0Mppk-U!n=r-HI9 z24%SoFXRi^a~G@t0m^Cs6lmf{i`rFMxmwMA5*N3|3n6+sikf?7wR0g&u}=OKz3DxI z8U9gb3BLl435CDFeqVIa`9W`ky(=`5G_X1kEN2%qMQp)zeafIQ0ojl8;+yA)l(P*2 z5APJ=hu~h4Qu5M2Ay*+7F=pBoJVMA}^cjX<>5`J9G8Sk^sV}k3>r4+AOXg*z|H|^< zQoikjBe?<^Y4qcT>YS65U6%r2h&Kr?2sg(UWS(0Z=>Z$TVm)s_Z{dv(SY|fnPynL{ z{*vqq?rFy@O?7=MsN49Scp@kHSa=HdoQ{Xxn~>2fO2y78%J!liF1-`$EqYfpAd*B^zq4TA55jScfD|;{_V9<9^*2 zc`(-bX1T7w(}!G_e&QFEoUKj=(PFxC(dacS=OjaV*m9+F{!?fTuztSSM6&E7w%>gx^u_$}_&Q6VE~=$#g%8nfcqwiC zsM(pv;Jmw`H@3Mk(`Yaqj%Ag|`H5fqiLYhY@~sSx9C~88w3uyEgiO?2v-!&0Dwy$6 z#u79cov-%y$gw9$;$`T!>Z}1rs0ayi+gqWUySMP8wu3DcG>OBVg{lgI&TPMJ(!Jz!)Q3&+z!i+u*8A zyaLk6c*sHCE~M1U0W+Uhzs3Dw0l!c65nVRg{3~!fRwS9iOdC1cLl(Uv8`UirxfJ2` z7C)V7XN;;Q*xARr?Y<)rCCE8qIobeDlwD;XTa8X!fw)7z0PZaXZh9uWc%$a-gIKu@ z?ot_l;rR?gCq_N!&9eZ%y2_=g^s#*h&>e=r(f%QP`H1SHaBaBf zS%2IO)X5d8ljH9fu9U0LGl?3M#hQ!)f-$J%)Yz3Ta{;q-O7%qIgopdrsDr@VlVr%; zd4T-AhdjM}!>RVdc}&3_)$Ag~Q^{i95bgo`eDeTkz2S!Ccm)fD5c;!~`9S9dA9XoR zh?FRqklDT>vR**Li1Axiu_}q9Evhnm{RS@z=ou|qwvDIr3XO?$iAwb-`=d~+29(ip zh6=l}OmhY8c}W5;)Nn|{Tm``AHt&KL4!qrcbj{l0?a5n$c^TKFc+HJ^E&K@YYk(Km zgxrlPQ4)2TMp`e~#>;d9t;W7q-F6PY_FhuSo|Nl0iFdMvRG=T!a`xJ8@NlUaD^s63qrGl9Q$fT~81%u=F$=r*AYi4jAYBmuZ)pqbdSn3j~>x4WW1f#(y z2nWHKpIEx(AN#{R!e&Bb9`@CGw9ng$?xN!XoIJ)^R)2*r6WvpXuv(ZI{HD&BhQ_<&~ta4tBHviBQ=m@jg z#Ui;f_Qg4%11=IVy@MK!kFvVCz5K{*bWaCRWqd0%H(}molKU2T#PaJE#nNkndC!mM z!+NA{GZEZ_t5ODldk0G79O1VYD(**NbOoY8WmHYAXJ$dvrObk&0CZa9uixeyq^bMV zTPOf*21v_z`UTF1E1MlgRmnt7nx_N3Hf@mEe07}|SU<)}>mhT@VCv>UB@X0`k}xbassxZo znyG1^PF4MfFu*zuX!3zO^nV7~y!qc!lX2uo#&oAfAPNh_8~f7eA%}ChF+qF3#E`Ye zL(InIh*AzpUIL_5wH|kWew-3SI^#*utT~5XZmVWp^M~<`qD#y@kz2|&hAPr(CORw% zM>qb87P}OALw@5`e#tFat)~n7o`pOowU~^BBxY}f#PlKXsRkQOY46TCP43+nu zg579c#7L5hI+-IzWKM5P!DsNd8S*slSJy*b7KEVwo2;qrTt^S-EH+%a`XHv&@qj_p(6tVJs9z+E}Ozg9wEv zr)hCW){01kvX!mH*1n7!N=nWVLYpilA*Dj9Nt7iO6v4_3nM}d^A(=>R`s4A+XM6>(koBtQV$(~T*pK?pl^OV=aNm|L#JB)SDy2@!yvf%qCkPI}m zZuO#vrNRUpv1A~>AUOo?hk^_UNNj#Q;_i_AE~AQZwVt;vJ=;H^T6tfVo!(Vi?xdLV z4pWoay+$3W9ny{&yn62`qp-t9!~tbXF8<}rsvgNU#tE=E=8jl6Wf89)vF?cjDpDKr za4B(}0L0@vI%x&tRCPG-ZX$DrCI5en#+{MZ7~aaG23jp7t{u zh*mZ^mGbm+b#MxlTSL{9V<}vxiMv5{IR!R(bINs$MllJv&7uqVM{Y!fwh~TPF@zh1 z!8AE#a=33q%@Kg06d=(MoQ%hnh0z&G80A@-d-NTT4!2yYBhS{##M7nD5TuT5k;r?2 zgd4amBvOxW?C2M^3;9cEBC?xpQA1+APT33))S(eU)w&>8;kpm&Hyu+cnYuWYC;RAo zr-iB>hxw#Wju~AS&MPA_Tml>-8iNQ8JDf_mxaHiCf&}bB?fRy+jf~3(fetulXDJce zsy%GZjXIdlaM9cib^2MIFVEg-&(hOumh~9-N!G)Dxn~1LrVH8}HY9OJe>j2&VKN5$ zATy9p&Ue+MiMjF06=;c*7&g=jbZj;VqDNYE-wM~AueqNdq9HW%DpGnztoa*A9%=NHU3{(Xm<|<@-i7{h;uHAy`%0SuCYlD_Z5)pJm}CO6bM}+A$ykKK~6bqDyv$TJ{tKYM7te>1y1BdlDrpr3NFZe z*$UiYI;LAB!!l{0vWtMfCXnuuKx|8vQhMt(GcC1Fhd~Q5G_}u87pIQHm1Uo!?if|& z4({gEo+;(n=|dR{WgwJ!N1C}RiF@eC8nXgF2YIjm0gq%u9tl5v|L9vpHE^iHyJgSx zj7$;t;>-4=*$PmzWKMNmjd#;USt_mUrKUS5V&mU@{i{Q`Q;1bx?(&*;$`4QDi0m*7 z8+1uTY3f&mI?@{}j$&sEy)CWy-Hy=ZsH4l^VP|Sq%nf-34jG+mcX^p}BLmUset6@z z)FKj+uH*b4xO=p1UukDko@ZxE#WlA@w#jlSi8ujh)P0^Kr!|Zdu%Xb^qRCCEv@73` z_jt2(H}`rXSQ!Ool-W|TaglOAm_xX1Bo|>A& z%+j9EnSwg6wsh?-c+IZYob)hjw zzNPUAzr$BQu`F=#1g6R8dE6=0)`Ep|>a=vEPbVtwVmMIc12WW#iz8e6o%S71P)_*_+A%wQc5y+x_m79?67t{F1&}`*)lR9{iQuP zTcT>E_p*6iNop16gDexpjTbG#Y78%DcPwbIy`kr-x>4=yVV37CJYhgZ>7MI04Eg4> z!xdGu#kxm-h)@mQ-6a(1Ez~a-j*2)P>sx}o%k?d2>EWiz$?OAQ4r6vaN&n&{J!RJ6 zaTm>T1Y3Cb&ZtsxB}Pxvf%5Tba$RZVUA>nm&s$3D4#t=ATAzkMZ^1(2*ImkC39rSa zf1fivm*boITPHZ<0n=6vmJYf=^f~#>&5SAq8@;kYhRo z!%6}wYY2L)J@#4i(KV*}e1d3)KVt0I#^?{c_fAx;k_QLMnZSAFp^-LnSR0$yr9qf2$#{5MnpF@#L^{Zi>J7q@ z3y-&+RpE zw$C^?pZB`WfT17SVg6{hQIj)K+27 z@g-kE-poaVii!xvE2j{jR?s3ExpZ11XjgxZ+>mk&Kx zLW}EB0ieo)&BFODE9~W8GFc0EkSfz;`~bnW`p|G3?m2GFbFhJ|TNRW1x~6n*0SHVL zazK;70jYSGVa^uk^`(zprELmS*|hHgw=x!?3^y#Hk`YyEzNY%&-DX2)bo>VMxeGD0dY+TP!hTaqH52}S@88M+qA1ER++x~ zt=hOV(sg|JEl8|B5eq?}5)j|I8hcHV(uz3b9az+5H1(aQgx`1+Js#yVoh=WTF2XCm zepkHFEnl*F)Wj-?zbs@vZ>hh}nsXlnRT-)swpQ4mvNG9aqY45@4`uKaqa<~42EezHLt@!&r^sGA-lCqGM`ldfm*sRXZa9@WkZ)KU z8lH~h2&YwMVik(83YVcSvtOjSQx6pSGP(}Ak8??(yGL_NV%iS4NkzWd!&}Zz(}i3} zbC%_RagEmXV$uHSz){(+|9p=lI%OXTxoQ0j*&V3KO2yONAb9g{^!92kI6BkW8Zuau z`3wup|CRpUx4Hg2>&{GrMbN8ZCTg&LIMuuJ1w^4EbY&|RY`J?Z?Pf>lYUs*j7eKK~ zL+;Rv9c`Kh+a(pF?`~e#{nA%}Q#y?N)BGf^9Gd^&BA*l@zQfspAG(=y4x&P1-?1I` z5P2+;UZz*L$nN5i;r^(L_e@OT`goOPSdJdcTX6zj)l7Ocgowu&mhU}<+=7LQn@;$) zVPQbZ1VU*HZ9VDK5QE7skSy)0sR33T{B?{Q&sa%6qu_YS{gyV8e~&f*H%Ld-E(O1> zZ_H!%xZ+=bT3xHoK^N&YMOpD$*pO*f#jMu}JyPsK;b|ZY0}}zB35o}tWNQ3QT<^k7=gDA**t1%_1vpe}l5*Pb z-p)%tK+;79h_R_qVR_-3RhT-2zbvS`i9kRpcHQK>0V%k7{kI?|c5)Z&Gy?!(_u#4t zlO`}v{Y~{ckl0vP>?tw+D5IR$_98EB94T*AUm_<)&lL2hYZxJ?#w-2{*%tb>Vrx!( z{Vlm2Yd<+DUx8ucOS2AOxLnpH;UzY-zn`+L&*clrJS0Bg2n0^Kf)lFc>dL!_l_B1^ z;sb;r1g`lR`@bgoWLP8j%PtCIAo{G2S-W_x26WitvJLWCmT?vzz_Pz)n zi`I|+T)|c^eEy>hM@(Ez1|v*OtNBq@As-cs&A{1SKT$KPwX!+|TX9W)*9G07#9y3M z+n6RYMAPyFru<BteZ@g4%3NVk>^1k<#3Zz#NcJ=^jLINJbovR3_BM8G?lABoMaZZ z=r7#_!2=qzpETgv#V{DLJ1kAPbir)#ot0pPUFB3R zX_9&XuJ=59lmImbB*wRJZpP~k5ZF*3=0$etV1&U$eJx3Rpan4*;dS@3Zjxm0E8|fh z>7i<;Oq0L}Six|O7Ip8h_3>G{2ks4+ECvmlXC69?CL5$}VKD48ayr>J-Zpow9U^Rs zxMkogjJuv>LIYjhKKf9YVr~vx$)5;t>wJ8Ur^uEM_ngo+Uf*Rfw*v-5T|Gn=$$`{8 zS{$eW-)~rp{DEmX6XZ}a>zy_SZvHNFLgq70s?uM&QPL|=8Xs@-%v+AY9He-xuX>P` z-$^YU`m_{agy$fubR0jAMBQ;Ds5+*}{7O@cyXB8*c3pFIe^>Mgd ziX9pHp%?dz=~!2U$)T5;VfpA=;Dm38QLL6@oPQ2%cKigJx-DDhwW9Kw#Ns_b3SNez zriB^b#1;`iIj$!STFs$YE&ab3Hg$TVWH2ql5Qho|T$z}x#RrA}PdS2o&u76@B1NJ; zIErZL!;Kvid~oK<;=Aru=}mQZ>=s% zx|FvfyGMkQOl-e04SDTdl}P{OjrU_nuhH&c-fQ%YxE>y#ZD$vfJ*4Fp5@ zEuR9o^*%P#ojtpqT}mG$t~HPCB*k0Tvd1kAuO_VK?-PjMtr0NB5Y?-(5Bm?{uA?)iB3a6t zQqbtl(Rm=Lo8_3H*ID=1C}zSo*jWu?&$&OZ7kcTrDa8hk0>oi zl#sf?s;lR3!W)KOyxshkN8ft@fLd=EbR&YCZh5Rywo9Bd+a=Q#mD zCmTBouA@|*jregJM?5L@?vtoJY_l5)MwZmW<)mUPGsgXcasMT2rGzh!&KitX1Lui7 z&R@C(aajiMeU-4Nj!o_!V8;z7bb&7t(x}oVs5iYSMAM5QDFQ=X+MN1F0NX0U@79TlsA}{l=-}% ze*Ji#TLzE84zb)vurrH2?x9r=291jO^u78-E;|jDdH|4(i9X&*8t;V0B4j56=C|&r zwN%6PdvU~91N}rU0d<}{J?H>c2A1+h&b}tu%RSo#eYs(Qc-|lHyd_lP2)g%m#tyH* z3g@`u3X_nO)7H#nz3t1DC0`n0)q^ULMOb)-QQ*Ts%=C`}e9ebVsoRo&^^^>?aM~!g z4+Kke_vO@ZgBy7`J=(GZ2vtIo;|I{Rt71ylOWN7b~VY_d_6h{MhYX2SG!! zvHL-6_R>zW&>_y4mn4Yml=J{+tyE%p1Ybsk?AFON&eoT>P4X%3^lSop6EcZVho#^k zDqa(62F2Xx*teyvMz67HUF2FjX;R;ZdVOOJ2|GiGber{D>j%L%m35~_chk?!ASHx- zQ+P$hAFfmCP>0cW1X1}7JPWy+5+qTvDWsM102l}^;qm;zCjQ#42VwgG4oqfax15x# z9JQ88s|k+{5WBKAQLPZIP}XaK16?;ypj5(tD$^2rbjJcH8)sD<|8gIFfok)pS~rX4 ze({RF@~2ll+5+VJ0mpCRODaWXi4;1SRM2A5qg(U#eob2YC;D@GR5gZ2Wklg&1aZ$% z<>@|1Q`1l7U-dOdyZV&Tja zWw|I7?piaMC0$c+uk&@T5VFYKuQ{*iZ8Ccj0nSI*$ZsfMNfNrGpO;~k?!eFqNig-

QC6MHl-Ni#XX0jMZKr+v z{y6%qMB)`XHXrr&rM+?9>lr8MsTn!%AN0KUJ|Uc(*feXT%*_W?xASnksr6%Q0_(K| z|59J*GY3ya*7O*^dm~K_g&(*LFHe8r1q)W6hzf~XVf?$$ zis&(Q6UD@^%BXEzcb;gFWjqh#Z5EAHBLkmQpg;mBk9u>fA*!L`PMzVh ziJt>vkL5RZaHZ0$GH!+RIwW@=Hj zm$pAwWLx6I=cDAH2I}fHSR(sBE!?#lrkcQ)|@Js;$Y=QSXCj#tLbDn6hc;yReyfcK%NWP;xO zRLc~5s@+59+TCAQP0#5)>_aIzFS@1G!5XIPylAlTv(yq)n<$~R#)>ihlZaL$rt*i3 z|H0%%nEP~*wL5Pg|977{>{OiCX~@d+48a~Cs(kXXCDQC?RgFF1n%3vOQs|`1xAu;# z*%OfcQCiy|-F~bm9zY(JssUzlAWW>GK(Z3K%oASzh_I4`dNH~k^fi1PO%^?lh_``} z-Y<-wGV_WM5m8-an8def^26a_@%M?i1SYN(x_S_aS2n^;TO%xGGTPmq`PX}M+`KS$ zahNuYARN#qHT;n&RDoM@qB6TKXf6vgu+-2-VpWULbWzb00z0)SoA|Q9U;ms=pD1HO zHm+yTk$xxL=N5?Iz%8LYohuZ=fjM9gzd-@YX9Vs?FC6*tC$ey~A07PFkIq>L{Rp)m zDH6V>f7ZgbzX;b5>?#+0YL6({O)-5yEKSZdPrs5(aI%`g2@x~<7nL#2CK?oiI72NK z=G&Fd6PP>hkWDA6mG)Ykgt&w+fB>XNK*_${2WbjDxO^X{agc;_`&h}7DT*gjTbd6L z5(XR$SO>j!WUo3`tr^%x|3ChY+K!1@=~r!0~nOI@qkQvCv-n&FuKZ0UBvhi=BU->Su9bLYwA$c<+g z$6wxJJ*{kKF?-C|hleM*R_X*bULP5^XzAKmrRioxJ?`P1?$p-#MUkBsp5T~+y4$z9 zo=fS!GKyzV`C8~&Rqaf_efHA~K?PKwvs@)T-+#n8$a0G%6Qyh`|s>i zpGk@7pI^K;-4Nk-=;fZY3%8QEZl{yE+ZyD$+w4aran~zi(j%pa^U?}}a zY4YKcJ%tSSMGe~}B6dzG7Nd5`0!Qq-%UNepl-hSO_@TIEK$*8v_x%Zu-1YLf<+UoA zuw#Ndscw9A?SbkW3KY3Y))EmLM|Isi-7t&_vUybZr6YVDmS+T(=iVEev1{ED=E54< zCU&i)OP=_hc#6c{iQVA|2iOYkGBQ5J6Iib9!(cK12^aZU{mQaFN$p72gGkRS9( zU7J$JlfUzluv##$uUQk1+_)?Kz(~N~Zx1(eT_8W*+UD?R=1!jCIBOqDx9Ez5z^7PD z<;_w@iW;+JXID?f5l-*$$@^Is3Os8TUryrcx$8X?=b(Qu1MBRt2M>zVE$3-V=5Vvy zn=;HC2Xl{1)?xFo-RAUmhvinP!bIBdaz>*`AYR}t!F@B+I54*=W{C9FFUld2J{xwu zrj0o0S+6#gyteH(&vE$XY6-o>bKd-B!B6$w-74p9-z?{TTl1wHv+NAsa!wsAbE>vP z4%Y{_&1@LXK7-p5`~IgPo#C)MYpX`NjAcV&?HLl`YLmp^<@8 z@d1GSeql#nc}rMLJ{M(}H?c}7{s4QJR04xqH|8_N@%Un=rp>5EtfHx%p0uPbqo7S)6&_&i=;GR{wbA4% zleqXx7Z&Fkyp|3N(?3~yb$GyKcN=wuTI_O~7n3B%G+#AQyHPGbPW+bS8eTS8cm?wRci`R4I zumpMboJCE{oBYsf!>_gqF^@|#I)UYfB*Wjxv+5=glkY@6x&T1@7sDdzu!NH*FVrxee^jUb$-DqK7YY z2bgN*NKrSUhe-3a;eYg9{nq^EPWCOLCwF5&iFY5rr*?+m(%J5n7W~WYm6f?9#GgR8 zO}OX!x6gwDZ=ZoHomm{@kbiPvI3~Ij8CWm%o3Hsq^PaUp*VV;yCh(1$agK zc!o)crCPg1P4NUOI(cdm2ZBD60gX;_RmW!Fr2M((W(Z#0tvWA(MgI5^i>$8fnVTy% za%V>7zjG_a6zx7uN9OrZ+Wmrq%}$K!$yJ8N(TW&VLA9@QsKS8B^ow1ol@K(&aB?di zEqb>(cY8c}8TMX-g|Rfjn?Z8aX(L% zbQdq@BU#lpI#4KurZzG?K3^5C?(~IiU)rtRwbat*eF5jxJhgMPJ&KYHOX&poh?N~3 zcj{#n>l+XqWBpz$ah>q`SBR|z+4g+mrAZ@FeZ-RaIoe#_ZEv?dn>CtblXcjrY?;q* z8^)<0lVeU_S|`o7z$WG1Mpn|qx~XL4*L}&YywP|sdEY4o@R^NHXRoJ=>oq;>HjJv> z=7$qFQHX3I`*j2RrG!2E$M!LeD%vQkiRfkCevIcJ0w$^LNY)f#-6_j+S47J#K_mryjpy z=p20mC?WqKZsv7HfmIc<10K3jm_$$iCCc1Rc*`j=pQVv^2F`FckK3xmO;O|6?y!ki zVnZGd2gyWemGHL5Jm~Nm38)~7A&qQd3@JxQNbnWR*Ou^i2v(pj#J-ad#P#J4n@MbI zG!=agqR)xCI(sZ&(#s-xjAHub?jv3AOj~)F6CuevP9<<+)my|e2A(3~UnOp3k|&A6 z=l?Xe+>0OABli6n{h&0oVVL?vVwSmz%()#}+LXrHXVQE(vN103SwH?WjL@4~aK5xJ z6-CSq_ey?<9#a;hQL2rUWHo`923g=%z$Gg+?SUGra4iQ5`o)cF_(f)W?87gNkTrK@29|jTzJljM zBC7~XFuwwenx{Q*7>S>8h|w3Wy_aylcat{8Cqf^re=5wFRudXBOtg;0M3@?Fat6aAUZ2>SA_M?qqilK`h6D-8iO9Gg%_F+xN zMJn`$)y$*Agq$!;Tn8gdhl!(rEq~%*8Dv8iF`7RNblFapX>P%xhlO^d8jv3`S z0sUmB1{`UyqN=e<-o0|&_gb1W!SF$g@hvxRAsbUoe75QYn8X97^$cbijZU6l66Pf3 zjkX!~B2SGo5qherqm*E}`(*68NmFVOQvg zP4*cb*NSPg_@0cX7s8R2F6xFOwR#i@G3Zs2%(e311f#Lnld#uIlDJdPk^4BgxE_@E z+g_L*SS%KC5U-nQOT2shTi?bG5?i<6!HSE7@X5d2JH59Nbl;21Fpb%CzMn8&xi`N; z$2nMGz%sLXMz<#X=Q;78`aGc#`dt%N6Xy3ws`1sz$rbY;rugrfPD(m?Ul@5IzytC^ zlcT?ehJP&hNAxrwOED$MogN_!>6{=YrN4-2$5Y(9!-uu{_YMNGnzlF&@;g8<#<-D2 zdFg!*u{H$QF5c;pv?@{40$i`9UOeZZpKeIxloNR27tLDuUmScc7MLR_=AMKA<){KH zeq%#&#daGiBCTJPn0)qLh1iT^;a?DbHN!aMrwme(>{yb@ieZ~S>GRCXsqf7FXEX}fCwGi94VDm$A#JB zIRO^?SKR4Q6vp9#C5RI~7EQ@0hgPbFx#S}(O~vz0{v0KJRbnTB<^)_ z1kkf}`?L&zo7Z`lX_+dq^ErJ$$Q}i z+qkA>p!amDD_tD#a1BvlooOd!?-s|wDh62YlHDsly%j#f5Bb&Xd6C>hIp!n+4t9}1 zqg<6&Jl}j5G4It_MCpP*;dAU}q>l@Kb%n0gh!anrw_j5CT8TF=`8~k1EO5^Bf&>%h z#5_bB6Te7Bk}`F2a`jiyac;BFTWux*6A|1z{Uv<+V*X!~^G2ak$S$4`G0;GVZTJ{* zR`vtQ9M|!KC#W#akkF82m$7bSGS|>9t@JRB9?UPm1*_>CtNLjbY-zzRX?!ebjtHHf z!Po<-!Q=MoEq+|uFg*5(9tm5B_zzkr49pan5PKGOYC>e)v8soCozsOmO54vwB9&DVJI#1kl!L7kXW z+1lNerz*@ll@otP;ejwmL(DPLu5%sxX{20L(=aaf3bE|mWo|4lWNJ#qu%_7B1gQ`^@b~ z3Bibv%Sq~CDsRRUePHI|iPn@by4NABXOXae*7|EvFAR4+Q-|4QNb^#piNV=s;K<{= zM*v}`Dzh7?*buUEC$Vx8l$m*vreL4@!I?=dtxwLui^=k}#^fbyR%g0=Nqi>E+Z{@E zL_^8kBt+bzO#s@B4U!Pn#wMDITrt+-^?jWwyoJJubnoa7aQjpxv4C8X&+M4&f;CeB zKF#l4DrgDKTvxi!zIwbiCe})L4sHkm`)WSu{V5ctFz54Rb?9`n8tEp~kgz!BQv>4} z`sK1MbUhE2hcdDMl-|{IIy%PAQHsz4TLz()-@p$d?9VWf+xV-Mq(}tDFVqjk50zv9 z^^u*hTG#SDeTp~PW^_!p&&Yl^L-2_+b8kih^V>H0I-dS$Fus`1woF3!mHAlp| z4f81$GapU%O2L^Gb)4W*2d8aG9FCVAC{6En?dj~`8G_2QT{934ajqYelnDnn@TdPX zr*$}aqrXHeeF7(mc_Rdh-UKo7=|?e~1ifG`AABEurTl1Q4h}JQxTPf7yihD{D|T&r z3nfj-;;;--|C%#Jc$j{+A-VYp%}h3fN?c9&DJ9!mC3F=dwcH_7&Rs2XOsY8NB6zV+ zDIdOjL(vkFaH@ zo-vp!X%+^FfmBXX%MT`sr+>?wxX&MWy4S*mMxn%RT!|(K@zOgLWRox8K9<75cvkv6 zOi`{la@YPhs0X3<6X$DZeUN& zBG@Kx)gTrq!F8;eTH=6d;1a`6(ue0bWL1Z z|Cmj&*v`AR5v`;X7P4p4H^K1<#C)T1avfdH(HEz&cm7VSm(_W)6U#L*IRaEetZA+z zZNV2_$aU}B@R_ENM~49dzEa#F!P?Obmnh8rjy9w9`d{~?uVuo4eDiPDE&d5>s<63o z-TbsJzMMOb1WE+b(?15Rqw$8opVWroanp(OUcKHqNbh7|ZlYsYnqW0@0%&;^!~d7c zDLy%|%GPo47=nV#X-m9G*J7v%2~8+_rMx)0 z=pB!#x2~bcPlv)MZ;KlrC$%rZJuNg!3rIYtK%LECFsV2PQ3BcjHG!he6S{I~zMlobSVpuVt5z)fF)^ z!pDA1mKvON+)iYhNW@v{o`MiST@u?6zAM9Y8jVZ<4TOuS0l@VNdqiy_1K_gwJ{K)keoE~x$sAtu$pfUnr&g(8XZzWyOY z>i_R=?39J9BdE202cL*j&J9Z;|bw5%1P$LW4RkF}&!^S8kg z9>x+9O)8w0JzFPFN`GP_d5EcXV!edRR9{bHtGh0{g?}5%`Y%dF1pWc10}{w`U%f&? zAM=;mXv?A2WNvooIVD{K&R8hcPnot(_a~L}K@>(=#jyVq#Qx2v3D+#{FKmxy{5JNm zUQ4WOpOfQY$}Edm{7D7pF)tX*fI?xJ&RRR?XZ|G4^%nprlxKzA@>$>GgtDh=7=QqW0Le7LsAgJ$eXeUQeaO*w4`x5Ir%; z4CbQN;g&!FzjD2O|8B|2lLXfKKH5|5 zVW=u3E><8fOHj|Pu(L?`{LTN}^*s9l{AEBqw)NYEBkCrHgn9M35ZnU$ak$x5ytt@h zZvdt|dY*a~(1(poC+7D8aCyuQkb*fEv9`PMKPZ{dD77R!7YcT%VKMzkkXv5IdO>4r zWQn4Jx-oGgm!BvQiDTf@{x`yVu^)!Cm>5#anjA(qskO zHWCzTt=SI^ONMF67Rj_xs{(NSmH5{Ne$OX`Sl{TA35>77VdQQ|Hl%tP~a$T_VfqJhid-Wx={q0|K3=*@NFb7X z#^eB=pv>Xk`cJ?(JqP=}5Bu#-+X)}6h0ar$)ijZhHGOquqLRoT%pu04(ntr<9d&8$ z9qQT){b&6cj+_^S`2-<;sM_NPUzV%G!@Z#h>Lh!{Ka_*{- zOj=%a24b!`hR6Gh)E`T-VK)9S28VS>tq;iSU5M?ECK)aLdirt0QHi13hgw+U5pLzM z4z?CfsA*}0$)KJ68;pztZ#(y*0hMI;r$OIRTDJt&k#_di#M!qXke0255`*qPjaJDe zkG`_#dz+CRxu(3)$Z=5cKZyG$_Wuia1$xDc7BZMQpv5TGvFNX?hJ>7Tzns<&Ub6op zy#s^Cw~ObiRFHT9@WY)6;?yft6f<`!3z5WO1Ly&0{|F)>_z?{3-iZ-PTH=+8teWY-g+mp_NvV9XW-t}?>bIjBNl;ut@N(sCsUc(!)D^bu@_73a5UK9-IPTrYZAzxucX{vgiWn zxL^mVNzpOn9AftyimlDH(niGQ&z1l zZx~0KDvVu7WN1jxRv>&_G?^1XTK9r%eDWo-tGOJC4b}q`8l;JM3IQZd)8!|EGp7{W zaP<#qT$zox*)IvCRvH_(FCt8h#5o(bf= zs9!+r;gb9Rt9KZNSY+}LduSzhm9$?xUeS1IFzeqR;B3ULek|w5M^elKab!u!eaWk~ zl2_g2U+n4CcOUOK7@5a5pxC?b%5QpLoEO_?1cKeAnN9kBcfZ~*9pVrIRjD9SGn3_M z5_!tU4|qA@dVakLy=m8C4y(a$W#A(vV-v;uD!b5@TsXtUf=@^$s)dA$_(UgZxFFm1 z_b-$e&B7m6CfjN-Qe|fUNb@~j z?PH?L5wR0Nlx^$6vMC~1E_#mWuKT`1TjcIw9}+7X5i9P^kmko;AZ|Zeq|A9?j&IaR z>|P0|wf2X)6Y7B<#W_vy(MFoCT!BK&T)S9|o3zZ6rS}3`KaL6U1Ml_`7!l??}B^-SEv#NdaoLzF$hzc=Dgkz)kcj z0-bQ@`FXH-U6WusB}f@2Q$)82xFix4786FAX7!(x0P-cTf*gSazg>vm`XnKMRXu#| z`qz^5$4=lT694Fho43qMxNwRGNpo3YfYm=g;N5&JVJWu<<4m6n+bTuY&~j9 zr-+(j5qDrI|GVjTo@B!9HW3>UT5h8EfAh64a>v4Usq;Tz-u#1WJ9@1}sW-tDsd8&0 zV9KuFd%il-*7@rOXuLr!LyBu4`+VnL8-Hdtu#Y#;Uj>`Jf?K6y)mDgHWBa>rS{td@ zp&|gho@^gk2CEIw0OxW@Mk#< zEtim8di~$6vVCNE|9p|F|B8O`7e6X#tagBQ0WehtFr_6+Y~|nkC{D+p2>*l;av+LQ z@cR;G+wyzKp=40#a#+qQ?xTu5zpU3 zuaEys--PcQxsd4h2E{AOVAo?hGN7Ag&=n)f{__RzaYS(JO`dz#A0jhR=f;S0{4js~ zb~3s--S>_C|1_p4NA}%Zy!aKwPW6DrOm;^q>MnZ0b1eFI^d#=B?^pS0kT8D*4yzsj zjV8gogyy7H?D+xn`se?QdB1A)iT3Ooga#~STU3F&G{Bmm_hle7*M60*`sVMg4D~k% zQ!c#rG_ksrtV9?sJ{_jMbekdxrQ$p4WtBq8O@Ot04c?fotRbp(KB3l8(Ppq`4EqXt zkwgxZ@PM@M;k%VQU9=*`CX4GqKSX!Li2Ox#s{)~KJ~Twgv=cTnLM8fX=%|G2q`@yu z_C7q47ocsx@w(E#SFnma=oKG?ni0}`{&f-o6E^gNysXiL$Zd{&ZX1T22n!HIWDA?xRXhkBizfq;>%c! zi6T~3-4cnwek)YVbD_>g-S|$}#Fs}ehgIbcm^pcu5Kc2`I6=S14O{Zvm&2zoeZ9&0 zPYKXT6pwN9>+Srf2ZB?mE7lW(XvIshAI=5J=8=HYbxq{W`@ZX66s%_T>oV*?h_-K~pxgXQ!D}VtYyX$5eZj3U2UO{5aJ1DpX})?1dFjhPKwZ`oXaqGA z=CJ+oq=I+EhX|7QFDRq~qQ)bokV_J?`(s^7a^5fG52ax?QAy-ggg&e!!ES+QVW#_t z=u|hn3ia_i0iQOamtvh+_f*EQ9kM!&6(`<$6B$9OoY9^y`tV<+;+{q868l;Aml#l-?d+~=pmq!x^aL9 zZSV%f;58nhG-fXw^QR7HzI&oV$2_U{BVztw?@wJu7Yt%>zRQ%P5}XEq&XZa%IioMK^DGYZm%;8o!mAhl4_=2=1h`CDZ8oc3_HqdDgxnwf|_R87?_K&rX$WSEs5cb}Dw{s8F&DVOQ zOpN=%JN0Kk@KzyNdTdLm&lW*|4`%f;WX=}F6Ik&pwRHM=TSP@G>%wlKpiMJ0lGJo) z!$NZm3BYl+!fc`twhhDP^_&kpc?7mfEfw>1EMy$$snPjTP22o!wf!m#+d~q zGJw2q(5`ROB?=M4g$90qZ${v?-|4P<>?u;JV-lw&9^u~u&bKot8@^#(@*S0$;MgzX zM%2AkbT@?IkA8!73)zdyc!j-xVmLmONX9K-r)(F2sKnY-}R-L5V~?Uq$<0 zXidX+$v3qlrhmFxiJr$NXXhKGTjD3)u`IrfNXBMJ)LRmWn-p^Plt#CA+OikqhU^*CHPDGVaGl zVT8js!3lOJwor)2=nSxmE`@>=21Jwk+!Xt&ktr(HR-g}V|E$#8W-2U{QhBtDBtH}d z`=D8(f?f(Mf7Y>lq0Hq3UmcXOYCr7JkQ?8hOIM{vcXAr3XX^sQxlmC@z zp%U*tm&Vt8#CFC)Buur5Iz8;&BG5-ghn^8;5JNh&ysj37mJc^*wsPJ~koOP+OPJ3R z`l&XwNv;KZGcq^P3#P5wC5$PHJC*Np50Cv>bIpjo_Mf6_D#FaHN30hwHRcnseNR!s ziH!zO5;+M)?cQ;f&dxSO#x5aT=W5Jh?O*Y;`h>mvMt>hAkn)G-0yZqRmEq38y~dEa zH4`OTf3k7q8mjPYyEfZz&-XzmDq^dyL8sR%i1~tAT+4!O9?&bl?59AX zbRFS(koGA3Fu6pXviqRK?IJCmoy5ysL}tYu2`$-jgg(|mMegl!;3i)6z7x`cR#Wje zM8d1Wx%kK5W0 z0J8Rd2Y?hyLy@aT8fDcOaH8a{jS+0V-*x0PjW9W;M%nlJhqA*bEQz?8vZ}*y z88kd+-(wrqH5ZohIuyIxeMF5XDtY#9O!sy4n;xQ;*NAz(Ly=5hb4?BZ41%N^N~9cY zz3`IPnQUq8TxdSYgi@Ii>A?@POGlz7ObL61kg>$r8kvjzKH3m*9)%xUMUg9kM$o@I zGZq&AGR(km81=v8$q~cAv8iMWi&Ob0jP94v8dy!jJQnrQrpGDWYsA{3m$^8Fpk14C z+W%eWMX_Hyv(k&hX|l|bM!$SpC?)rMYL z1Iig;&JfNbQ>>#c_+#>1WS+^&QH0Oc*NIpQN)r3IY%S}pJCr|vmxN1CU^eASD;n(pK7m&v?SDv{s5T-(q`X#XRi~8 z8OQnNLpY-FvEud*MjW@KAH1U^tHnIyVV1G}WW^r$cSfsA|3meK1^GTSUHu$j)Ra4tqlXnl{>4q^+(zGJ&M*?#pnn2Om7p@hRM3v(CSFiifgpO4NKwF2hSVbbz}0DEL^=PgmW@ZV;}5^NMaR+?}602A)VeIcleTTQ|*CV2-s?C6j9 zmi&nL+$~5cgM=ra&FF4K*tYsWCM19VwI|2=vlguwZMD8vQ#*sJWP8R()sj5J+6ch< z?FuunU=XN5Xb~G^PryK~l-LcspS(K}CNENqQNfbZ{NxUVOVt|D=FqKG0s&2?5JnH> z}WWettVG2~@|Jj<0_(=3SFjtt5%>jV7Gc5*-n8>>;XOZK_qHI(}a(uDqtNk>{ zNBR%+y@zg{e%L%%c^l1kSbgZeBrMI$fA7cO>4&cU*0*LilisSj8tzN4OdYmoXS6Xn zI=`edH`$o$;*%{< zDkbvMmJAjMUaIB2Sno-pnPCRc?9Ep*8}QpNBwxh#|LW%%U-wiLy!G<+^%^VF=Y}WF z!P7P)=Nrt&C=T8-Jdl1%%fI`+9JvqdOC+rNufF&BrWQR5PH$i9>>s78hyv;BMjmiL zcYxLp#>J-*sR*XW3X3#wN`SGY`{++(9t8SdM*ct{J%VYL_ol6>`=V;QW_-DWO2n8B zFH>KA4oc76#@yANHzt3PBDbanPo~=F0%C>;`eWK|r+4Yznz@X|tyI8w-L&yO;zrf& zPi6&vo?ePeF`98AaHDCa36szw;#Y@S4NJJg)R+$K6vkeM5rX{Sv}gp4Ep5o!b`b?!Kv-f_fNFoBsh2djo(laLeSN<8l%V^MaQ}QeT?50ZNxVw(A?Y|@iW>!LlBSO zDj3*J!B}ztXKZ>UK0u4Jk!%pVG2*Ttp6#qlcM@cnY_(kY>G0l~;m3X;S}C)o-|AlD zr9en!-0EzBCiaHxk74m8^B5oGrCiiM!N(*7Ph8|=>=PE^t%wI8kDH(E2h7c$f1j}^ z@FPRYQ=ct$WDQC4^~GRJdWU@aYLcRfxKeUIGyx;qx=oM9hAoV$t$f}O4w(HzGqc7r zLbWAA=e75cT{|e`roh8%o73dA% zH2#=UEN0D}m3E0%y3a8Q6LL`Mzu`#^!mk5Ks@DgfmbupBh*Gwp%_QR0*G1U)Ml3CD z(@{%*b0T^%dKy2GL_t+r+H3_EfBUmoD@qQSyb2Os%A+p{8OjJ#3GuycP8=Umx{1Bp ze^|-%Z(;(z&KEQ3<$N0i0Ozg3;X!7(gkxQ~+OLUT*pk5r4~rUW@Y&K70m}W_G~$Nnzm#0m9Hyww2u4Q#~&~``=VgtD1Vh>gSwl^ zDI|e7z9u`DD!9!sqps`S4cUgD$Rp7dNc#7RP z$p{cEW&$&f5rBm8`|mP+ z^O#wj^?)5X6~_Rw29)NE?tj~iJ@j7WiZP@x_S(4JhqW{&?PBlhK2_yN>?kQhsB5CO zuI}BU+Z%YP@;fs-bkqEbZXDb2S*aCb@r%48dW-}(c@w(CvN_wj%Sm!uLMj$pXI9>4 zOw3&$6;->-A9@6fI~A{mG9BvLx^J9#p_x~~#>=fuc)kh(cgd_29?pFL3XA$1$R>GM z`59}!k>%_ezl?E)J-~@f`Ra<*QBc;ilyN#1m0T&Nn@66@^Q(zji}*RJ3?&Ps{Klcs z#O_oN{H#fI#|FfzWb061l_Appv4LOs?*GdB5`QS$@Bdqpr#whWi=wos2t`>#g;r5| zEJ>ymT7+b0W=cg`C@m;MCDMjeBx5M0MW`&(0Ys?xMa-9Hs zgVO@)9*(x$23nv7mP>bAS>B?={!MV!03-yJlmsUsq6fnlB(CZSA!0D@+EX-uCJAp% zJ65Ca-^0-6ih8jTVbkrP7HIw8{q-JAQ9K7taIAw?**30j5>3_^+Z7u73~uIhvB#Zr zxUjXrE-1k$2a#|HC|2&`-F}@g)=wjdjIEzETE*+^go;$Q-eTz*utN}_OcA7$oGhg( zo2xABGfj*{WvW4~uqRfKVWE+K&nx3gQraCNRRb-LIQ9_wxnsIQl<@vdaEQ`y1(%QS z8|5Qh8L6#wMKxM+|7w5Gab!wqeHL*QLNT6^Rn?$XS#yh|w7x*gzWuaTcPy0lHyf)I z^|<8bW;|Wb)41?!X)AuMrgJot5RO!GEnoVwzE$jZ2sqQzu(0!u^hTf&-&`Mr!8|5p=(D`efs4^kRebDZI_9TOuZh2P1D z-G{`EJS@y`(iSJ*Jh%c4UShTE2x93z;m-a`0(8> zTTu#OkkE62>^_vUm1JXb*)7w>nc%nNeCGGzL;jacwS}8V==WjLu9=Vo!3u^t8vovdOG$;uJT_g- z4m=1`sY)S$zCa*60NZnYCm1abhyA1G;;j}=&!!<15A5O9(O!$ARp`|9(g2w2NgDD& z;1|A}grG}Im%hhYyLCesUU%?s>G-&dK$483V6ACDl}^%c%sLpNwUFOistH(XkCh@a z77XP9v9*VI{^V@nck+!o85JlSw5GsCbP^)9@H~npyR05>gbbksp&4WAzgMGeMW8jv z6>-7j5K=-p+>o~rGXuN^gc~V@Ti%c=E}+Yz*`;F(HL-_@e{IJwnGHunzJb#JZEyK~ z!bILhYDdfG@ywE_`OwN^nfR?}$arXqZ=y4DaM*`f5zt1k3Z!L$!39vf$KS!>^KX5< z`G9%^@t%#(rpDGYNc)G0ATWeX^JgNzsS?Vq6P`nLRECuYS+N+fu-0l$E`RISE?pY7 zag2D=RUb*pQ1$?)^D-(4*M^@g0O4o|exv_2pF)t1pv#Uv7->MnrtvZzke2Y@`&$)e zZk75Y2&-5TklGMFY(>?%fSm<>{z7ixq)FkBc9wlMpvK+-MTe^_jHU1#J>@ulln8*Q z4OqDsrIVG}$OR7qsk(1WfY>;S*OLgdtTyw!!>zd4!aGF-UQ!NOsJAErfS;n0AE_8T z16j?<-HE-oXh6NAMqp}uhMBw2pdW$&OA~e@xn$)@c04crE^=e^4u24Pf8fIL>t;@Y zt+Fac@oJt!;u~ViAX!u7MB9ZSu;VpTYR!tgvUZ(hxw|}rB>U%>h;fw9xS$S#sz8w) zK@w_yr|iHfaB_$On}7`|2ucyyP^U*$d?9}6z%#-PrFl^(O)}h99F6of?!{noRl|gb zUZC5xfSsZAfClab#m;}>`-$?&{>rE!GKxeg`vcf^|cA6fFR=PlpERKPq1>QgII* zRV(D1qy0h9T(#`LzV*b7%ib`@_*Jk!K!1XuKOUHwSo>;p6AWx!8z?zM4TcN?>b-(u zyw@%1%F*L;NEuB4gEdtB_6fRfF#9L)Bno6pKI5``q5H`6S_NyCO5GSR8$QCh7usdO zT2G@G&JEss6zVd3a`2iLX>|Itv(!{77A~eBpX8D-qb1#C@vzn53N4h-1tO0+grc000Mqg$Td#tQc)o`l!)y<) zt@()kP(1QU!oz-KJ?X zzR8)7e5^Isps?sn>=vLJuW#HVB7)!kEHbbhr7F#e5A1VrduIi=Ab9BdWJEPj#DJz8 z)ax#5psfM^0WH3$##PFn8=Tcz;%hh5`o~8eitD28a%_2Na|;yu!anYp zbme5UX{z#&;TB+_(M@!&;STzlcnB^b;8sn%zvT-w8^Ke+J}9iHrl03f;%j|iED{_% z`ac{C`7em;?Cs!ALxi-GUIRUnpWrqE{?stO6Ch-dc0Ugxzy(jSbq8TD0jLaOaDPn_ z`*RH+osJ(1X~hRX(EmpRG;h&AK+{^dO+oA42cT1{;mtS3y9ni`Bf;6cH4s7rXBUdl z%*939|Mi*=V~2CvG*^!{Mq=fqe>wt5PGBC$Rz@?IH=m+cgHvG>es6*v~pof7vvPCxqe{Rz>X`Y{sf-p##r@(A1DD|6gKGTB{}g7=SY%Cy=;-46kywP47cP?SNf=h3}pJM(6)J_!Js(v^>7WVA^g)|}Nv zmRt=@xePV#;4Lm{^-oEn>kP(Tq}P(DmjVJF^?7HNa#Q-&afsb=&p6$M*3BfhZeP78 z-Q4beYs+q#d&S(f)F5rizA{^Ge~cIh-K;#g{Lf$uo_bmzbNN(0#5=oM)%4b>YPpm=5>K^=$y4)oDY>*uWp3}v2^s-0=mtvu=X6|DecM$4gwjX9i zSXyu^n2~z_xq^9n2~sf_bPx~KO+ix^;`OLRtq&%^20Lm%aNPXRrdf>gZvEC`V&6t# zqWQ5-QfbV7{(TMTOfV!rC!$dH=B(Kz>Ug0&_~j#Y9~Aw5b!3yl z(zZ)ghJ6MT0JJRBBAt8QpSxtpg@9&aKsu6yabJeeUHiKCgb)fD*j>eq&o4GwFRS<{ zWgg;YvY(Ggfqn`f(&QO$!9@fBJbBc5v;e;nQV`nZ~3*7 z)Gi^==1?J6@ZND%YQ!~&PQUy&m zStY#zq9pUjfdSr;G`1pF1LthTQO-6GAJ5tSg|WxD_a9aF^&4^bayt>L@8$NhXs^7u zqRn1|)<60ilJ3%6duZ-~EJp~&5OYN~Bmu17E17B#0aEy%a-k&DydF_5al&QweMsX`pwb}DY;}9Fh$E9d>u&dA{0KEH_V*l zor)xgP83kUIf7(`S6YO+0+zo&x1Q!=f47w+P;d z?;zojRLzBdMB8(U?A` zASUPtntpjfg8mL6~&jguiLT+ zU5vHB)DzLE0Wqu3vb`qv5AaNiCzRe`#>(LxavA(uF7AnpzSS+6#pf#A+YXI&Jb*$` zZo?^efa-7rfruT=vGLiYz9DZTs;mkOfB=Gw=9g*x$GD=KGJ=ml?dfQ4Wq`Tb*oMk& zb{I(J){=6s@L@_VzquSo5;vAxW2l#f8oNmj(0kh)(g_$$|8na=Y|}o9Y;Xa_r=KOI zPj-^bEttKNpK=aLx~NlQ^OUJQ>|86{*s7w<=R*a2kb}n?bnNKp;))w6r#neiT7&|N zbLhTeFpK#qoAKg2dkT9OSE!&$rL9 z24<3fcR!`{F`s^qiCWY*9Oa*dV^FYeKE%CFS z2P|Jbp_Upmz65&56h`mR)^$LuW}fdsn7oHd$MCjyT|duD@pIw*??KYb%WxKHZuWz_ zpfIkR6*CSWsp_P7ssnc<$Y^~i)-7!Aey*Ib5%DTuU)t8&N0Dl~v_ha0k;D~Dd=!wv z>b=PQ9dASJj!#OObg7d;>IE;6k6hiwY1FamQs6cp*3;Wl$cWdr>X-g+RxyLrSKJqA zR>)Xf)W@K4Ul#V0qKYAr#wvVB`dANR5zV*=e7?SA`t^9o(=Xd+HTmOj4H?SjEN9ZZ zvjX4Ab|u7ry>&WmSrMO5y1h!~nmeOn&gv!|58ta14=~5Y2eTVJW1QdNO0fM=U;rR& zIg2=1VRZvMi342v_h8(*iZS&26Nd&XmEWe=#Yo%AT{yu z#Va5{Oe(bO08z&IcpgkpAOih33@cuWv7&0s`N|75v*$;_MPb4&ZIR$OUvOW$&^EO0tikHvrk1X zr>J4~;}71^}oYJCRv3L*vYaAs&zT&gi2XAt$TVK#u?-KQ+Z`(9Ghl>Fd zR{pi-#svj_u`PuS*&d?p1>bpc<9ftmAn=L9`D(T5$q?~JeXDXbI} z#WA-lOWYV10Vu@_YfXhP3}$LLYO0_-IS;o5I~ps7Klzf#KTzK?e=2NGv@oN1Tt#5U zXB?~JA-L(L1D9hP{EdnjPWaeSB>dW-+ynYpOk2=lY-?A8pq0Nl7!C4z<843F|3dAvU!(7%>7L9|;*CihFfD$s%Zl@Pm(#V zZl+{sPQM}-M*w{8#qEkz@VdqAUIRminwVyh&jy1Y1vWEv29I^Hf`}D`TvlJPDU*BG zTJaH!VWv9hr^|H;_GGJqrS{n4{WZ;_VD;eD3*cS+Z9)bI)jm`5^9&T`ays4eq_ zKmwjC8STZwo@nrVGz+B4qU@zrA))gwt)7o^5O@|swMHfC5+C8{*9=wR1k_mvW77b% z4m0;0*E$J}>X;7acTAqdlRK(^369NS3xdU^<43PvX2 zhncb)c`_Xx{9cYv-CZ>~=QsBl5i9&R_Xcq^5Aby3Wwxcr0FEG@gp5VqZ~Flj8-<2i z7D~fgoRRVs>o9SC=GsR!9j4|#2b*GWQ@!=+~+m)>>MVG9%&~yH=AGUJ&e`Tx$>BMSPpz2+u@`z`L^)!%y^iF6%_ZX zvLDA|IVYi&{GHLdsynh{Qi9gYa&H#ztM@I3n;8j`V)cUGH#&BKSqoEkqCXn3$k z{VV+93hC%)(ZPiDbPI+|U+e{`&4Zj5(G~7D$9J%x9+2TKvOEWL;0r5spzd*fo3+7; zn}QzmVrlW|v%9^{Cimw?>=@hKiIFWQw0EL0*5MEZgPB|=&5?6K81s|G5f2;|3=;&9 z&E4F@e&>Q3odhttRv?=X4zdYVV-VJjfS2SsAlrq|7)Zwtb6zuwjG7s;b8wa$Z7;(z zE{)hK!?QnlJ0QdbGI-BE<^IE6-stGaig*APFK!`Kp5SGTV^G?R{GiumW)I!5nve3K zIg-RRdCMqm({vddp>o?$}|Voflk&K@^CN-i1R@$vGQezC}ZWHK?k4!Xi&)pz&5t4t>_S= zw`qPEwjVN1{x>G_P}eOM1v3Lgm+JHfU}= z;?+$pR3nISp1|*is?OUoU%N#gj?-w|a^!e`Z%tleGcpMTy!*~5fxIn)>#Lv`0CW0E z?-z&G^nX;f;5pgLLva=uHGq#8_<+wT)YBU{#PrpG9{x+Gakkk$(ua!aKHoJS{&H?T z;+u|GxVYIRi?+c?uVoD|T2H4nJ>>mReG%C!0Y@R3ivYzkrg?+F&!P9X!r|eaK}pK_ zJ9wZ$=PASMS?l~)SoFNheX&=pA#7#7A%1c7T`H>#AlHqhC-zojP5&$Us{9#+5NCBF z?z=)|m3WW^AFdvGt;#s_ z>b2l_SAs`Y0?g>z?|@e^OVVMOY@c_J6h0|80PpBl5d#CLxFNdG5P)3?Q^(M9FIB_QHW(qODfv1HeqFGTNy82IqGJPg|VN6>4h;K7(nRMt{pn&A7RB@t1^ zfA5f+g5qk+fdeMPnH7_`aI@~B7OHFqCYnGPQ|f@7<6*ddJq%MdpL(DK z?6E=rXk?!pZOiMScLFzTc{Qe}A!;t~`e_6FAr=)whiZAS6QShm`DZv_`Qkq{S^o49 zO*S;_P8wxUj5o#CH2oTIb_@Qm9_YA%*NoHYjT$`*hKz$Ki*d!3e}GboiB&mIC49_t z?q(BjE`ZDw7B{YbA?m=`a>~4H2qt63GfC-S)E;6|wb5d;Csr$vZ2^*YV$T~LtvNV{ z?rjKcw&nRBh3HXt!PMhnPcrfNCcNMSydWnG7-ao6JIH8m^EQW-1pL)%S=)0Zp+ z`J5mLEip68kCdc%68J?`Zc$kqfr{qW#%B-{ z!iOudS9u^yHYbeAg20ntsNd>wHAxBgCfqR?%N|rGz8;hZt-8-H43*3&$@b7CA+Vb< zG3BeOYyMTZw$rbw1I(-yy1IPUr{2oI?K3 z)W6&#hKw(o);P(ViDKp%XYeruBdF$pw!dox19IRf3a=}jp|WNHAPPZysKE&%-1$p} zpXiFjUCL?rat(Z$Z#cL};Kh`PuIN(DX*bbpDe<;m1*di5z?jNAY)8!ylu zhre2P)`p-yVDgrRj2m7w>=%sAOnkgHz>>f7VBS7Z?$L;cXpKWSIP6&kq%JR@r1@6x zk>KtH1I|sxow1CHC5;SyWaFSY=ITB4*GS{6`!fQB<@|!}&$PSh>6)rwLnvjAVm1O(Pk=Fyi=oj5 zKn3U9>-baM4d6aFZ61@Q$3g{hzlYe3e7bqfc|h8r7OFB#3LuyLnVSbE;txB`ED!9S zDCbe#vXKfbo8mVzUKtExS>6lI{X8_KuElj<#2uYnk&T1!wK;fM+>~Sn1Y}T=6GnLU zcXR`@Jn&%11|BpP=#R5#%0y`HM1_D!-2ET9@p!yQs-uPKatv6~_QfAcfqZT>m~6cY z3^ek=5^p0AbapgNXDAKmBgB5Ly$6R4vDI@zBGxI3l)ljDNBbP zA#=dogrF`_Uu}`_jwU%R&{HL_PENceT@XZ|{ zp+RWV<;&5w+&E3Yv@YJ`6Md34zOYp{kk`OH3W>u*T(#yKbZd?;Q77+BVCB9=a&l}n zD;69#gl7Q3tMef4bv3-1^5=jS2MRN8fvyK6V+@O)Hwu5#dmBnfTi+p6pZN2rKIu{M z0cOO~L_Sfa)5E540d#RqvlUA3MHlc0kr#D4Q+O9v{bEeZB6hUuk156M^LX1Cs(Qt4 zwi!t=EY`t!+l<5?!p_#$V%QS$R=o{-;2uQnM3$RnJC2xS9u6`2ycTK*gljVd$K`~< zu)tof8NpWMLCL8i#2Ug12vlzJW*!17@<}vG>#!S+Ta68VMqT+oOg10CZs{686Nc~! zQj!szmdOByBC7OuENUudynSqN(=i>W1|=2Vu@a!z%ANikV}p@3n*U<3v2BsTmJ>mB zKMI9J8Nz>Hg+t76IAWPzg4Ci*H%RgpstIJ5PTx2<)VYFfL*67 z0fkwIznyV>2&4^3@yg=={F}%eT%!38z#&LQjm2|c)6|&d=bWbAq3#jS!WogFPS8Oo$Pb9P26xqHB@PV)Rvi|R_SYxYdEQ1)>rPY_@y>$Db#TEFR!)S?!SeO~X3R0Pz_1{^FY%6b zMjPHfR85Lq9-V{6&;n&bI0s+33SYTAVNeZ*saX79z~1aBBemQafr+f0_Yn!ck4N*y z$OfZ8RF25{P1r~j_GYTw5%eI;8ZnMmrc6i|zJQW&*!`0Lp2#)?@6KF(*i=rK0U}6Y z6?@9JtaA_8&EWq~sP3kxfv&5^V5(smDCMU|2%nUvqt`NC(=_@n$}e7j4(= z_``~mi~DY398IiISYFT2f8dBXTd#wGJ<_`y9OvCo=ev}3p)+_vj7t_KLWz& z=^QkvE|3P*3+{|XSB;e-iO2w_MSKpFlOVfzb05~r(554LpA~Iwc@O}eq$GUMdbF-oN_e3^A>Jci^iANQa1sTl&KNDFZ$sm#K=MVEO zVH%!e^65o1ejX`vkcrGa&3~?c;MX!nF=+sq=fenI#ej_i_n{ERli`?eTyAScUeIlK zqI7%SySYb}l-wj8RxR{;^QHv*dauH^)}b~ zFgvy4XDih?{=T1Ro8=$L?C(Ed6J6Q-^j0dir6J;EK6Nc!c#@i|ut(Ic90=d&_QCYo z+)R0ortQ3WJ}u|3bkvOy1?eR!?2ea zaO#j}%U)nTNUDs~vI-T*>)Dc7Y^meQm!Kcw$s8;=2cuXs_+4EOHunZJw9p4?86ruH z(>Yt=ZfDFhSi%01=;7U)8)o?}PrFULEI&zXcYAlyb}Q`o1y82&-HHMMY)@6w8&dnm zeWf8p!RN~$L5MZWJ)`+JuzwgG7c^W@aUsAv?jm8;V1Yn}M4HVP&xbcBYX!elenpAg zXN{J68ZC8SMy=efKMJ)0ySZu&{UD1(>v)0HmOveE`8Id zsYA?cabM{E9hAP4^ZV{w&tjhO6&;fB@$EL5jY&GqxxcEuKThcMvhQTUrkK#)HGp0# z(CCg~4>Rsrv>W9qQUpjVm+Cl54BYE&cRBz&Duf;FY1w}%oGJZ4VRHC3eyN9RfeaeK zVC9jU#EY%dZ>CN$4-xkfzC82o=D22ZU>cC!@upwfJD^|)P_Sh0fgbOX?d)kNkY_JH zUlT8scgA|hO3F&uh#3&wX~Sl*j=_`pzN)T^@~6ye*LAS!AJkdbzu+SUQDIM>yvNZU z)6yw@aYAIF&>U|jtfq7$tmdLg62m>c@2m9L5~gllG-6-=7`sfP*fOK9?vdN5qF>L* z3c*GSU?WdHrMYj>MGws^duKHmAx&F8DDrD3VxV=g9&NS9R+nm|&#cK4j(K&DsrRp@ zZ1O#63U$_?Q_69La{)BYleClOXPxm2LHW5W^>0kejR6!gbq1;|~MA!pN&UG=!v@ z45p>qe%_FLD|L$6JAX0fAFxp)v{4zEz{gBZ=EcM$@8HxqhF?g zcN9C-4U}i~@7c!r1oxrQ^XKFh?7r7<*~}}^c~qpifAoV3B=?5f9tkfk;k~rEbA)`D}G5uEpH?=_LFrA%%I+&KQ%__teu?YnZ~)Z67q5nDdRcbyco z{a`Y81>37JRso9%Wr>8XJpb-M(>zQ@=HBu^Pr>+*a~mV-In4~tiIx`5i6#w;7B+W2 zhxR^MqHxg9WDk3v%fC8HRBxy&g+4OSZ^`0j)tU9 zuJ@ldN|D^urF7;+aD7f`3=DKM^j^^+d1+|1&F3-%N!|k;Tux(3cy7v-j!CDhOigLh z+}=o{*MJ@A_ttw{{`!|Keiy2y1zGfLI4`RHakq2rIk`6skDA^CKEKsmS7A?oHyTMj zP*^m()idO}y_Abx>~aOGt+dMfNi(-fP)@l8R)4>_Kl#UnpGC*M%>g+PIhRuyc?+)Z z+VZJM@0+8g#T*|foH6xg-0Tcio$9er(q%@w{*I-U*|8y_gk?ej3PCg(nP<6o_De=; zYjOeuJWe#Fero(h%)4Y1pL1tF#f$psh+XaxYKTFhWbU0@iuEarnuyZU-abX{!TLTq zYd9b5#T{1d9!J*h*q_uFVY{w)7b{2w&s}xJoR&VNRcrM?a}d|oocO!s&g22-9kErL z?QSluG?Mxu@!oh6ML6V4`~~WupX%aj+Jk3)MX3&7JU?37(D(6U(R=oJRz0eN9K@#e z#8X;60}URF^9Fw$bu-eimQ6WhwZu&!(s9>Yb@vm?d!9x3GCpZ=s+`!r2Iz%fcEQ`u zaeWee;nIFHvcjI862I?>%RRN*Aek@En%SPTwQIXrqN=ve)+c$(N+U0XTws-yEvuLP z)JqNfvArffAa?3NN$Rrh57)jJKHs^`SEC|*`C6q*g|A}$TUvYu%{aU2d(G0XrHWwo zFRJtzHnOFuQcX*fBSYRR|1^pzG>R>;IWUR%JoI4Q$$GUwHHzoc!&BMnYu}y&F$o;F zK}RP2cB;tU8YP3TEdMwmKGUSIr%4#LH#()SdhJ&bv{sxhpCOP$qOX)v z-sj|XwQS+`2t{hoMWgR3SVoMv(T`{Kt0TM=(~_erOw05Se#-l_&h4uCZx^p7cimls z&NaWhUlcJVQxYV-`vo0gy&FL?yf_2O&}0S-?VfGC&$lrww_9;=Uy}x`(;mdkyg7MQ z+S{xIFT0A1#UD-I=xv%5~Guz}=>`bPZSWtFsP<7WnR@I$;h1-MLajrMK zTy~}#?Ni~SefD7;j(xmj&JwYYA9Cg5eJw$dziQQ5QGQQ^nR(!cLU2>=;n^1s_Bz-9 zC}Cci9j6I`twBG_GVWrObB?$FlzdQvCoD|dQyaC9q>$EWN6=5ChkBMrcW9S{6J>8Y zuGjG$)H3YNV5SLo{1@JQkrVm#P3x0+gyulIbSG*G=W|~64~8VB+Z~t`zt_dGrSMhF z`6-WTWp1ix`_Gn~ zEA^~>Iash3h?1|S+S4OS-(oe{uIYgS+2HllUXibhK;{>w?de)smXk->>1d?m%Pr~O zDIq^gXs2DCYK7CM?)s85M7OfG8f7U`QS-Z^QCAo%F!? z;C6=}njWPsm#mf)m2Z$anDl1LlNVM*=O&ND&`bU+ZB5#~d~Y*Je)`f-=lEH1pSsT* zSXChQ4(R#FxvnZG-KCdiA(9G~5mtB=hHsj0s=vYXQFRrDg{#~Y_gUYyd$#V=1qNA$ zU)}AcKxo*#Q(GVD>$bGdS<5gjZy6(YW^0JR<|LsY?MSck79Vy*T=tV|WXGh#E4n*# z$dUA`gd6qemw;~aOzdo#O`8k)BjeR9BTiEOlCN*xTDmG%9*GNivqipd^+(Uy7ebEo z9PFFZaq744N-^slS5)ol-(_EDUL?J{y|-CEi?qXP^2<}IGIRyO3UpB($?}O}PCMUN z6I;!87sHBmYqn*7B}rZb=a z>hG#;Y+km+*5Kw+hU^uamy-wXWh>rR6ty(b;@-G*(JtLX85ApFQ&H&V_4|~+Hdko1 zn$WD4+{`_sc(diYX-;(c#WUA6u-=Ot;_cXMQp=)Ftx2c>isu_V@KhHo~QQsn+W*fj!*g%#k!pEwc0|t-kYS z%fDv57IP`}c8)@IONuTEOl8l^zUgApVZ=1qQBnWthxircPp8Fw=wNOw^sb|B`Xc`B zE-P49D)3&YdxLhxiU7`)5If9sf}OWDA>#Y*eE=^kh~Mp>@f&5~3868@`m zUSsDxW=Qj`we7d4^p(k(=URx(jDh~#T03haPyi~$^UCrz?;}XIthE+xYStpvH+OX= z{tA5&`eR>Yu?OjQwzlV;y+30p#|IMTpITd`{4$x;z6&wqf0>T@*}8K<=_iRU-F4?{R;k-wYfpOoNmlOL#kema zok6#>Y@Ztrp8CE1;EU-;cK!CXm_&J>J5N%{m!mb6eK7?dC2vdQ+y&{$3LzAly@2FrRd6}!ixdY(F$jd zL&NM-SnG1vMY{9Cw_%uFLpi%=_Qi_FToqR^Jh2zy;yy88x=G*9QZBz3P`s9Lw_-`< z>GH47)FOALE2c7A8tXe$YPGk%HOjkYecdN=|0l(HN#RM?8tuNiODlMaboOkqt^e?t z+jL4}3igxSIq2QLxw8ED8!x-93B5 zZsrZYwl^S%zK5Xue6XI$sfk?W#v-o8S2lfjyLvN-p+n0$=?p`we}VKOi`CaMOlR(}n(cgY(f8vsh?M!aSoMS(dKoJ_E9#e4J`mn=wBC-^8~+vBzoP?} zCd+%qr?i$71ef;)MV#NIvaV`9i?u0nz#}PCQ;|rqCFVU8TEX@KPepe|9hE;;yheMr zuAA+=cB5zeo#r{Gp4RsDELqptNjXNZ)h~Ye?U69+KWXLo)lBK-*UEamX+)d)_wigc zSh}v>ziSH)7#`!a6tvp4<(!)w>!=v`UwGG-NdE8aFK`$y3-p~zM+~~3;ohr{rt9p0 z9nQAQhi;|>mq9u1I$W?R)qJ3HPFvJ?B6he($rjG@Tz@?40jXWlF)Q~0NrKbHrz@k}Sa$Dj z*`arJ!o_79?w!_g5h4XW>9TX6v;0!u%)H;V%Cm8E7MS~)1`A-uNo`dBhNg{VAJC0h z&aQp^VT>JC{J=Nz3k4$Yp3HB3@a*(uujyj($I|)jKrBCBH($hGOsGC{v%Ha6-uBmu z;VkW-7yVvETLnTBH=DXSrn@fIc*E5KGUcBT-obclYWRTOx^Q*PK87em^W-vtNH9_l zIO}!#%n^HaN6=IT?YYt~-JOXartG0Nmj-$dr zORZ-dyEFKFq+`fdYz6E;!%o+t8$8 z-tekUGhxNNc1*Xd*9kouladIs)1XPl<6b1JTRkwq?8^q0AE@%L58!}b+(b?`CVc3w zr#75z<>XHK>8D^GVWm0#GvEW2%i=!TUhu&M^yZ}br=(*PCfjQZ!=UsV_@UV=3FRs) zo#>H@dFJPl4QF~=1=^Bc{XQ@mo;F-60OV|%w0BHUCSL)qt{EIkuzDs*Q0njAN+sU} zY@XY-QDsI}|Kw|<&n0`OT)4rCa%>F#B03fPwAJ zn$)EQ7D&DfQsp3VeM3{HM^W)*bX1`W7$>VDUD3j{B^tAFI={}DjoCe{#{_i=SjCyftC&^_JKvcZ_~@77U;j&2R8YQ z)PVE2Y%sK%oFP9I$g+w@7P%GdE})xz@cjUGdYxvl93oi<{#nM`uInEPzt_aqC5V-#dmw=)2?NM|GYd~i87m|n$?g_OWi74-UOAeT z<{otuy>?mWT_84gX9+w8T)6fCJZ6a=Tg-cGqr{WgA3!GIG)MTv4t-+MtAs8?UC0@u zEVriZAg)@0@S3oY*^pC&QTI4o-TCyk`&8Qlg@v0$55A}!5QLLUS&j&C!6yKo#=dOL z#DCaU8;&0?t0DeuApY!=oJl<@K*5h6`(cZzo=n{k2%%txHSZ9K%z-Z_0Y~Z>SUyV* zgAZ*W0D)bGPz-!nZ@b8Q9<@MV3G6@L9xXS7osZs^yLp=SWnhFEtC7AQ^*9fv zC<&D%#{(m;QiBWj)myMs4{_5ZX>^A&-fzitw1)GLbekh)tqiZ0L^LSEnIUS8M2h3L zl+}X^T^C56OrC$r@k(;3p#3cLjW!=OwCcj(GmLat^*UhRs6upLrPmN6l0RECq+7v0 z_k2uqC!(hy+3qlhAwD@mStXKALfd9v%c&Vc{Y~Hd&EUAZpb9=t|XC+6Nmr z!Y%0BKc2xQLBUOd?%UsV>Id=N{X&enPt6=%0IzFj803zY9#ONjeiZUJ0x$AHFFJ{z zgR!?nk)wg*x&CtS1uf%*1PJI>emw1W3Rd_~bp`utgW~Jf3aU z>eit}UY~y*m{T`tOQu=8H2g+^!Rq-UCg6g;$X6TW_{TNFZ(%E0w?Mod15=zV4*no{ z506A)(UpAmtKi28&5-JVt$s*qmm-@&^d8&oG#v@Kuge5Pb>$FSSuVvd_tOJ>+SpUX z$H#B`Il?#W1y^`rd(pyw&f~3cflvn@E?Nk;=bu7rd$T2zKYu0cJi`>frMkx~f<<+Y zp+hdKCP>1Mly03SMJ__ZWgQ;l#`fmD^}+U{i}Z7fULkR`$FL!J8u zMNNt`cfVHAB+U>jJ6`(W=yl>gkyL3}4=Y%l@>27&zc0^ORb4=a31R4HKex6@5A6Sy zJUE~sTpyHe-ON^#;Z)z#zvXvA;qpl!3g{YwkQYXnseL7Ydonlsj$GK2)hJ$q>7GDt zKmHdrDFUe={7-i#B0BG%0`LmnKcmpK;r$u&!RC2?56M6|-rpWXRo>sqC^&lF-z;=q z_#b)N{|`S~$>qoBE0_KvOg{{}UV2L3{M4ZXrw(h_oIDKwhpkasqq1uCnpH~5d)6pv zD5+|!SuLjo|4<705dUc00}hUdj@Wws?*}-yXG_2X%SYaD>WIT(ms1BEoc?@9O=FER W?qhOY}MbX_iQ8K%>M&w(q3%< literal 0 HcmV?d00001 diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml index 0e283dfdfc93ca..a81cf39ce386ff 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps.yaml @@ -13,7 +13,7 @@ bootstrap: mcps_location: "bootstrap_mcps/root-user.yaml" - name: data-platforms - version: v1 + version: v2 blocking: true async: false mcps_location: "bootstrap_mcps/data-platforms.yaml" diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml index 0b3d815c710980..2230d552ed4c0e 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/data-platforms.yaml @@ -119,6 +119,16 @@ displayName: Hive type: FILE_SYSTEM logoUrl: "/assets/platforms/hivelogo.png" +- entityUrn: urn:li:dataPlatform:hudi + entityType: dataPlatform + aspectName: dataPlatformInfo + changeType: UPSERT + aspect: + datasetNameDelimiter: "." + name: hudi + displayName: Hudi + type: FILE_SYSTEM + logoUrl: "/assets/platforms/hudilogo.png" - entityUrn: urn:li:dataPlatform:iceberg entityType: dataPlatform aspectName: dataPlatformInfo From 49b6284ebfa6fae65bf463e0eb3218b9793bb1f2 Mon Sep 17 00:00:00 2001 From: Steffen Grohsschmiedt Date: Wed, 4 Dec 2024 01:16:44 +0100 Subject: [PATCH 08/28] fix(airflow): fix AthenaOperator extraction (#11857) Co-authored-by: Harshal Sheth --- .../airflow-plugin/setup.py | 2 +- .../src/datahub_airflow_plugin/_extractors.py | 24 +- .../tests/integration/dags/athena_operator.py | 43 ++ .../goldens/v2_athena_operator.json | 672 ++++++++++++++++++ .../v2_athena_operator_no_dag_listener.json | 672 ++++++++++++++++++ .../tests/integration/test_plugin.py | 29 + 6 files changed, 1440 insertions(+), 2 deletions(-) create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/athena_operator.py create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator.json create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator_no_dag_listener.json diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 0d5ceefd989dca..02a0bbb6022e04 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -96,7 +96,7 @@ def get_long_description(): *plugins["datahub-kafka"], f"acryl-datahub[testing-utils]{_self_pin}", # Extra requirements for loading our test dags. - "apache-airflow[snowflake]>=2.0.2", + "apache-airflow[snowflake,amazon]>=2.0.2", # A collection of issues we've encountered: # - Connexion's new version breaks Airflow: # See https://github.com/apache/airflow/issues/35234. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index de0d4f8711f531..28d5775f61f542 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -50,7 +50,6 @@ def __init__(self): "BigQueryOperator", "BigQueryExecuteQueryOperator", # Athena also does something similar. - "AthenaOperator", "AWSAthenaOperator", # Additional types that OL doesn't support. This is only necessary because # on older versions of Airflow, these operators don't inherit from SQLExecuteQueryOperator. @@ -59,6 +58,8 @@ def __init__(self): for operator in _sql_operator_overrides: self.task_to_extractor.extractors[operator] = GenericSqlExtractor + self.task_to_extractor.extractors["AthenaOperator"] = AthenaOperatorExtractor + self.task_to_extractor.extractors[ "BigQueryInsertJobOperator" ] = BigQueryInsertJobOperatorExtractor @@ -276,6 +277,27 @@ def extract(self) -> Optional[TaskMetadata]: ) +class AthenaOperatorExtractor(BaseExtractor): + def extract(self) -> Optional[TaskMetadata]: + from airflow.providers.amazon.aws.operators.athena import ( + AthenaOperator, # type: ignore + ) + + operator: "AthenaOperator" = self.operator + sql = operator.query + if not sql: + self.log.warning("No query found in AthenaOperator") + return None + + return _parse_sql_into_task_metadata( + self, + sql, + platform="athena", + default_database=None, + default_schema=self.operator.database, + ) + + def _snowflake_default_schema(self: "SnowflakeExtractor") -> Optional[str]: if hasattr(self.operator, "schema") and self.operator.schema is not None: return self.operator.schema diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/athena_operator.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/athena_operator.py new file mode 100644 index 00000000000000..96cdacbbad37dd --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/athena_operator.py @@ -0,0 +1,43 @@ +from datetime import datetime + +from airflow import DAG +from airflow.providers.amazon.aws.operators.athena import AthenaOperator + +ATHENA_COST_TABLE = "costs" +ATHENA_PROCESSED_TABLE = "processed_costs" + + +def _fake_athena_execute(*args, **kwargs): + pass + + +with DAG( + "athena_operator", + start_date=datetime(2023, 1, 1), + schedule_interval=None, + catchup=False, +) as dag: + # HACK: We don't want to send real requests to Athena. As a workaround, + # we can simply monkey-patch the operator. + AthenaOperator.execute = _fake_athena_execute # type: ignore + + transform_cost_table = AthenaOperator( + aws_conn_id="my_aws", + task_id="transform_cost_table", + database="athena_db", + query=""" + CREATE OR REPLACE TABLE {{ params.out_table_name }} AS + SELECT + id, + month, + total_cost, + area, + total_cost / area as cost_per_area + FROM {{ params.in_table_name }} + """, + params={ + "in_table_name": ATHENA_COST_TABLE, + "out_table_name": ATHENA_PROCESSED_TABLE, + }, + output_location="s3://athena-results-bucket/", + ) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator.json new file mode 100644 index 00000000000000..baa738fef7b5fd --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator.json @@ -0,0 +1,672 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "None", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", + "tags": "[]", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=athena_operator", + "name": "athena_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "athena_operator" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.22.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=athena_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "AthenaOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/dags/athena_operator/grid?dag_run_id=manual_run_test&task_id=transform_cost_table&map_index=-1&tab=logs", + "orchestrator": "airflow", + "dag_id": "athena_operator", + "task_id": "transform_cost_table" + }, + "externalUrl": "http://airflow.example.com/dags/athena_operator/grid?dag_run_id=manual_run_test&task_id=transform_cost_table&map_index=-1&tab=logs", + "name": "athena_operator_transform_cost_table_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1732719433576, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1732719433576, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1732719433736, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1732719433736 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.22.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=athena_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1732719433747, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator_no_dag_listener.json new file mode 100644 index 00000000000000..c53825a9979e3d --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_athena_operator_no_dag_listener.json @@ -0,0 +1,672 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": { + "_access_control": "None", + "catchup": "False", + "description": "None", + "doc_md": "None", + "fileloc": "", + "is_paused_upon_creation": "None", + "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", + "tags": "[]", + "timezone": "Timezone('UTC')" + }, + "externalUrl": "http://airflow.example.com/tree?dag_id=athena_operator", + "name": "athena_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,athena_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "athena_operator" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.22.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=athena_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "run_id": "manual_run_test", + "duration": "", + "start_date": "", + "end_date": "", + "execution_date": "2023-09-27 21:34:38+00:00", + "try_number": "0", + "max_tries": "0", + "external_executor_id": "None", + "state": "running", + "operator": "AthenaOperator", + "priority_weight": "1", + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=athena_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "athena_operator", + "task_id": "transform_cost_table" + }, + "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=athena_operator&map_index=-1", + "name": "athena_operator_transform_cost_table_manual_run_test", + "type": "BATCH_AD_HOC", + "created": { + "time": 1733121901482, + "actor": "urn:li:corpuser:datahub" + } + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "upstreamInstances": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1733121901482, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1733121901625, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1733121901625 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.22.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=athena_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,athena_db.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena", + "name": "athena_db.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:airflow", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:airflow" + } + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,athena_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:9cd4fbcec3a50a4988ffc5841beaf0ad", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1733121901675, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "airflow" + } + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 3becf10703df6c..75bb43af1a43dd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -111,6 +111,24 @@ def _wait_for_dag_finish( raise NotReadyError(f"DAG has not finished yet: {dag_run['state']}") +@tenacity.retry( + reraise=True, + wait=tenacity.wait_fixed(1), + stop=tenacity.stop_after_delay(90), + retry=tenacity.retry_if_exception_type(NotReadyError), +) +def _wait_for_dag_to_load(airflow_instance: AirflowInstance, dag_id: str) -> None: + print("Checking if DAG was loaded") + res = airflow_instance.session.get( + url=f"{airflow_instance.airflow_url}/api/v1/dags", + timeout=5, + ) + res.raise_for_status() + + if len(list(filter(lambda x: x["dag_id"] == dag_id, res.json()["dags"]))) == 0: + raise NotReadyError("DAG was not loaded yet") + + def _dump_dag_logs(airflow_instance: AirflowInstance, dag_id: str) -> None: # Get the dag run info res = airflow_instance.session.get( @@ -206,6 +224,15 @@ def _run_airflow( "insecure_mode": "true", }, ).get_uri(), + "AIRFLOW_CONN_MY_AWS": Connection( + conn_id="my_aws", + conn_type="aws", + extra={ + "region_name": "us-east-1", + "aws_access_key_id": "AKIAIOSFODNN7EXAMPLE", + "aws_secret_access_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + }, + ).get_uri(), "AIRFLOW_CONN_MY_SQLITE": Connection( conn_id="my_sqlite", conn_type="sqlite", @@ -327,6 +354,7 @@ class DagTestCase: DagTestCase("sqlite_operator", v2_only=True), DagTestCase("custom_operator_dag", v2_only=True), DagTestCase("datahub_emitter_operator_jinja_template_dag", v2_only=True), + DagTestCase("athena_operator", v2_only=True), ] @@ -398,6 +426,7 @@ def test_airflow_plugin( tmp_path, dags_folder=DAGS_FOLDER, is_v1=is_v1 ) as airflow_instance: print(f"Running DAG {dag_id}...") + _wait_for_dag_to_load(airflow_instance, dag_id) subprocess.check_call( [ "airflow", From df9755c9483d9d46603c82b122bbece71dad89be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Wed, 4 Dec 2024 10:06:25 +0100 Subject: [PATCH 09/28] feat(tableau): review reporting and debug traces (#12015) Co-authored-by: Harshal Sheth --- .../ingestion/source/tableau/tableau.py | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 0eafdb4ad23ba0..f3ad5ea706f7ca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -289,16 +289,12 @@ def make_tableau_client(self, site: str) -> Server: server.auth.sign_in(authentication) return server except ServerResponseError as e: + message = f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}" if isinstance(authentication, PersonalAccessTokenAuth): # Docs on token expiry in Tableau: # https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm#token-expiry - logger.info( - "Error authenticating with Tableau. Note that Tableau personal access tokens " - "expire if not used for 15 days or if over 1 year old" - ) - raise ValueError( - f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}" - ) from e + message = f"Error authenticating with Tableau. Note that Tableau personal access tokens expire if not used for 15 days or if over 1 year old: {str(e)}" + raise ValueError(message) from e except Exception as e: raise ValueError( f"Unable to login (check your Tableau connection and credentials): {str(e)}" @@ -722,6 +718,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: title="Failed to Retrieve Tableau Metadata", message="Unable to retrieve metadata from tableau.", context=str(md_exception), + exc=md_exception, ) def close(self) -> None: @@ -826,6 +823,7 @@ def _populate_usage_stat_registry(self) -> None: if not view.id: continue self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views) + logger.info(f"Got Tableau stats for {len(self.tableau_stat_registry)} assets") logger.debug("Tableau stats %s", self.tableau_stat_registry) def _populate_database_server_hostname_map(self) -> None: @@ -876,7 +874,7 @@ def form_path(project_id: str) -> List[str]: ancestors = [cur_proj.name] while cur_proj.parent_id is not None: if cur_proj.parent_id not in all_project_map: - self.report.report_warning( + self.report.warning( "project-issue", f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.", ) @@ -974,8 +972,11 @@ def _init_datasource_registry(self) -> None: self.datasource_project_map[ds.id] = ds.project_id except Exception as e: self.report.get_all_datasources_query_failed = True - logger.info(f"Get all datasources query failed due to error {e}") - logger.debug("Error stack trace", exc_info=True) + self.report.warning( + title="Unexpected Query Error", + message="Get all datasources query failed due to error", + exc=e, + ) def _init_workbook_registry(self) -> None: if self.server is None: @@ -1141,7 +1142,6 @@ def get_connection_object_page( ) if node_limit_errors: - logger.debug(f"Node Limit Error. query_data {query_data}") self.report.warning( title="Tableau Data Exceed Predefined Limit", message="The numbers of record in result set exceeds a predefined limit. Increase the tableau " @@ -1257,9 +1257,10 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]: wrk_id: Optional[str] = workbook.get(c.ID) prj_name: Optional[str] = workbook.get(c.PROJECT_NAME) - logger.debug( - f"Skipping workbook {wrk_name}({wrk_id}) as it is project {prj_name}({project_luid}) not " - f"present in project registry" + self.report.warning( + title="Skipping Missing Workbook", + message="Skipping workbook as its project is not present in project registry", + context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})", ) continue @@ -1453,7 +1454,7 @@ def get_upstream_tables( c.COLUMNS_CONNECTION ].get("totalCount") if not is_custom_sql and not num_tbl_cols: - logger.debug( + logger.warning( f"Skipping upstream table with id {table[c.ID]}, no columns: {table}" ) continue @@ -1469,7 +1470,12 @@ def get_upstream_tables( table, default_schema_map=self.config.default_schema_map ) except Exception as e: - logger.info(f"Failed to generate upstream reference for {table}: {e}") + self.report.warning( + title="Potentially Missing Lineage Issue", + message="Failed to generate upstream reference", + exc=e, + context=f"table={table}", + ) continue table_urn = ref.make_dataset_urn( @@ -1917,10 +1923,12 @@ def _query_published_datasource_for_project_luid(self, ds_luid: str) -> None: self.datasource_project_map[ds_result.id] = ds_result.project_id except Exception as e: self.report.num_get_datasource_query_failures += 1 - logger.warning( - f"Failed to get datasource project_luid for {ds_luid} due to error {e}" + self.report.warning( + title="Unexpected Query Error", + message="Failed to get datasource details", + exc=e, + context=f"ds_luid={ds_luid}", ) - logger.debug("Error stack trace", exc_info=True) def _get_workbook_project_luid(self, wb: dict) -> Optional[str]: if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]): From 2b42b29d2fbfb12cfb68a0578b63993bcd182c07 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 4 Dec 2024 04:07:09 -0500 Subject: [PATCH 10/28] fix(ingest/tableau): make `sites.get_by_id` call optional (#12024) --- .../ingestion/source/tableau/tableau.py | 34 ++++++++++++++----- .../tableau/test_tableau_ingest.py | 2 ++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index f3ad5ea706f7ca..197e73dca7141b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -68,6 +68,7 @@ CapabilityReport, MetadataWorkUnitProcessor, Source, + StructuredLogLevel, TestableSource, TestConnectionReport, ) @@ -696,6 +697,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: config=self.config, ctx=self.ctx, site=site, + site_id=site.id, report=self.report, server=self.server, platform=self.platform, @@ -703,11 +705,19 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: logger.info(f"Ingesting assets of site '{site.content_url}'.") yield from site_source.ingest_tableau_site() else: - site = self.server.sites.get_by_id(self.server.site_id) + site = None + with self.report.report_exc( + title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.", + message="This usually indicates missing permissions. Ensure that you have all necessary permissions.", + level=StructuredLogLevel.WARN, + ): + site = self.server.sites.get_by_id(self.server.site_id) + site_source = TableauSiteSource( config=self.config, ctx=self.ctx, site=site, + site_id=self.server.site_id, report=self.report, server=self.server, platform=self.platform, @@ -740,7 +750,8 @@ def __init__( self, config: TableauConfig, ctx: PipelineContext, - site: SiteItem, + site: Optional[SiteItem], + site_id: Optional[str], report: TableauSourceReport, server: Server, platform: str, @@ -749,9 +760,16 @@ def __init__( self.report = report self.server: Server = server self.ctx: PipelineContext = ctx - self.site: SiteItem = site self.platform = platform + self.site: Optional[SiteItem] = site + if site_id is not None: + self.site_id: str = site_id + else: + assert self.site is not None, "site or site_id is required" + assert self.site.id is not None, "site_id is required when site is provided" + self.site_id = self.site.id + self.database_tables: Dict[str, DatabaseTable] = {} self.tableau_stat_registry: Dict[str, UsageStat] = {} self.tableau_project_registry: Dict[str, TableauProject] = {} @@ -805,7 +823,7 @@ def dataset_browse_prefix(self) -> str: def _re_authenticate(self): tableau_auth: Union[ TableauAuth, PersonalAccessTokenAuth - ] = self.config.get_tableau_auth(self.site.content_url) + ] = self.config.get_tableau_auth(self.site_id) self.server.auth.sign_in(tableau_auth) @property @@ -3189,10 +3207,10 @@ def emit_project_in_topological_order( else: # This is a root Tableau project since the parent_project_id is None. # For a root project, either the site is the parent, or the platform is the default parent. - if self.config.add_site_container and self.site and self.site.id: + if self.config.add_site_container: # The site containers have already been generated by emit_site_container, so we # don't need to emit them again here. - parent_project_key = self.gen_site_key(self.site.id) + parent_project_key = self.gen_site_key(self.site_id) yield from gen_containers( container_key=project_key, @@ -3209,12 +3227,12 @@ def emit_project_in_topological_order( yield from emit_project_in_topological_order(project) def emit_site_container(self): - if not self.site or not self.site.id: + if not self.site: logger.warning("Can not ingest site container. No site information found.") return yield from gen_containers( - container_key=self.gen_site_key(self.site.id), + container_key=self.gen_site_key(self.site_id), name=self.site.name or "Default", sub_types=[c.SITE], ) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 6c45b8a47de412..38a53b323876d1 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1028,6 +1028,7 @@ def check_lineage_metadata( ctx=context, platform="tableau", site=SiteItem(name="Site 1", content_url="site1"), + site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), ) @@ -1248,6 +1249,7 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra config=mock.MagicMock(), ctx=mock.MagicMock(), site=mock.MagicMock(), + site_id=None, server=mock_sdk.return_value, report=reporter, ) From fcc8b367c1c71e777710b3298f1d0c0c0a362e1e Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 4 Dec 2024 16:24:05 +0530 Subject: [PATCH 11/28] feat(cli): add platform filter for undo soft delete (#12012) --- .../src/datahub/cli/delete_cli.py | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py index 8b852513e03c0f..a640f941b75276 100644 --- a/metadata-ingestion/src/datahub/cli/delete_cli.py +++ b/metadata-ingestion/src/datahub/cli/delete_cli.py @@ -214,14 +214,47 @@ def references(urn: str, dry_run: bool, force: bool) -> None: @delete.command() -@click.option("--urn", required=True, type=str, help="the urn of the entity") -def undo_by_filter(urn: str) -> None: +@click.option("--urn", required=False, type=str, help="the urn of the entity") +@click.option( + "-p", + "--platform", + required=False, + type=str, + help="Platform filter (e.g. snowflake)", +) +@click.option( + "-b", + "--batch-size", + required=False, + default=3000, + type=int, + help="Batch size when querying for entities to un-soft delete." + "Maximum 10000. Large batch sizes may cause timeouts.", +) +def undo_by_filter( + urn: Optional[str], platform: Optional[str], batch_size: int +) -> None: """ - Undo a soft deletion of an entity + Undo soft deletion by filters """ graph = get_default_graph() logger.info(f"Using {graph}") - graph.set_soft_delete_status(urn=urn, delete=False) + if urn: + graph.set_soft_delete_status(urn=urn, delete=False) + else: + urns = list( + graph.get_urns_by_filter( + platform=platform, + query="*", + status=RemovedStatusFilter.ONLY_SOFT_DELETED, + batch_size=batch_size, + ) + ) + logger.info(f"Going to un-soft delete {len(urns)} urns") + urns_iter = progressbar.progressbar(urns, redirect_stdout=True) + for urn in urns_iter: + assert urn + graph.set_soft_delete_status(urn=urn, delete=False) @delete.command(no_args_is_help=True) From 02815259e0dd0544a9b060f15a61f3ed05820682 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:28:40 -0600 Subject: [PATCH 12/28] feat(mcp): add kafka batch processing mode option (#4449) (#12021) Co-authored-by: RyanHolstien --- .../metadata/client/JavaEntityClient.java | 106 +++++++++------- .../client/SystemJavaEntityClient.java | 5 +- .../metadata/client/JavaEntityClientTest.java | 3 +- .../SampleDataFixtureConfiguration.java | 3 +- .../SearchLineageFixtureConfiguration.java | 3 +- .../MetadataChangeProposalsProcessor.java | 80 ++---------- ...BatchMetadataChangeProposalsProcessor.java | 116 ++++++++++++++++++ ...adataChangeProposalProcessorCondition.java | 5 +- ...adataChangeProposalProcessorCondition.java | 16 +++ .../kafka/util/KafkaListenerUtil.java | 96 +++++++++++++++ .../entityclient/JavaEntityClientFactory.java | 4 +- 11 files changed, 312 insertions(+), 125 deletions(-) create mode 100644 metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java create mode 100644 metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/batch/BatchMetadataChangeProposalProcessorCondition.java create mode 100644 metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index fa9109689caad4..29faa3955ea662 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -18,6 +18,7 @@ import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.EntityClientConfig; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.aspect.EnvelopedAspectArray; @@ -97,7 +98,7 @@ public class JavaEntityClient implements EntityClient { private final TimeseriesAspectService timeseriesAspectService; private final RollbackService rollbackService; private final EventProducer eventProducer; - private final int batchGetV2Size; + private final EntityClientConfig entityClientConfig; @Override @Nullable @@ -132,7 +133,7 @@ public Map batchGetV2( Map responseMap = new HashMap<>(); - Iterators.partition(urns.iterator(), Math.max(1, batchGetV2Size)) + Iterators.partition(urns.iterator(), Math.max(1, entityClientConfig.getBatchGetV2Size())) .forEachRemaining( batch -> { try { @@ -159,7 +160,8 @@ public Map batchGetVersionedV2( Map responseMap = new HashMap<>(); - Iterators.partition(versionedUrns.iterator(), Math.max(1, batchGetV2Size)) + Iterators.partition( + versionedUrns.iterator(), Math.max(1, entityClientConfig.getBatchGetV2Size())) .forEachRemaining( batch -> { try { @@ -760,48 +762,62 @@ public List batchIngestProposals( : Constants.UNKNOWN_ACTOR; final AuditStamp auditStamp = AuditStampUtils.createAuditStamp(actorUrnStr); - AspectsBatch batch = - AspectsBatchImpl.builder() - .mcps( - metadataChangeProposals, - auditStamp, - opContext.getRetrieverContext().get(), - opContext.getValidationContext().isAlternateValidation()) - .build(); - - List results = entityService.ingestProposal(opContext, batch, async); - entitySearchService.appendRunId(opContext, results); - - Map, List> resultMap = - results.stream() - .collect( - Collectors.groupingBy( - result -> - Pair.of( - result.getRequest().getUrn(), result.getRequest().getAspectName()))); - - // Preserve ordering - return batch.getItems().stream() - .map( - requestItem -> { - // Urns generated - List urnsForRequest = - resultMap - .getOrDefault( - Pair.of(requestItem.getUrn(), requestItem.getAspectName()), List.of()) - .stream() - .map(IngestResult::getUrn) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); - - // Update runIds - urnsForRequest.forEach( - urn -> tryIndexRunId(opContext, urn, requestItem.getSystemMetadata())); - - return urnsForRequest.isEmpty() ? null : urnsForRequest.get(0).toString(); - }) - .collect(Collectors.toList()); + List updatedUrns = new ArrayList<>(); + Iterators.partition( + metadataChangeProposals.iterator(), Math.max(1, entityClientConfig.getBatchGetV2Size())) + .forEachRemaining( + batch -> { + AspectsBatch aspectsBatch = + AspectsBatchImpl.builder() + .mcps( + batch, + auditStamp, + opContext.getRetrieverContext().get(), + opContext.getValidationContext().isAlternateValidation()) + .build(); + + List results = + entityService.ingestProposal(opContext, aspectsBatch, async); + entitySearchService.appendRunId(opContext, results); + + Map, List> resultMap = + results.stream() + .collect( + Collectors.groupingBy( + result -> + Pair.of( + result.getRequest().getUrn(), + result.getRequest().getAspectName()))); + + // Preserve ordering + updatedUrns.addAll( + aspectsBatch.getItems().stream() + .map( + requestItem -> { + // Urns generated + List urnsForRequest = + resultMap + .getOrDefault( + Pair.of(requestItem.getUrn(), requestItem.getAspectName()), + List.of()) + .stream() + .map(IngestResult::getUrn) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); + + // Update runIds + urnsForRequest.forEach( + urn -> + tryIndexRunId(opContext, urn, requestItem.getSystemMetadata())); + + return urnsForRequest.isEmpty() + ? null + : urnsForRequest.get(0).toString(); + }) + .collect(Collectors.toList())); + }); + return updatedUrns; } @SneakyThrows diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java index ab68abc69bce7c..eda9b3a880228f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClientCache; +import com.linkedin.entity.client.EntityClientConfig; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; import com.linkedin.metadata.entity.DeleteEntityService; @@ -43,7 +44,7 @@ public SystemJavaEntityClient( RollbackService rollbackService, EventProducer eventProducer, EntityClientCacheConfig cacheConfig, - int batchGetV2Size) { + EntityClientConfig entityClientConfig) { super( entityService, deleteEntityService, @@ -54,7 +55,7 @@ public SystemJavaEntityClient( timeseriesAspectService, rollbackService, eventProducer, - batchGetV2Size); + entityClientConfig); this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build(); this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, cacheConfig); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java index 7b1fccafbb9e63..4d977d179f91e4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java @@ -12,6 +12,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RequiredFieldNotPresentException; import com.linkedin.domain.Domains; +import com.linkedin.entity.client.EntityClientConfig; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; @@ -90,7 +91,7 @@ private JavaEntityClient getJavaEntityClient() { _timeseriesAspectService, rollbackService, _eventProducer, - 1); + EntityClientConfig.builder().batchGetV2Size(1).build()); } @Test diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index e47cdf80281c9a..d5aa7e9c51983a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -7,6 +7,7 @@ import static org.mockito.Mockito.when; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.EntityClientConfig; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; @@ -330,6 +331,6 @@ private EntityClient entityClientHelper( null, null, null, - 1); + EntityClientConfig.builder().batchGetV2Size(1).build()); } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 889473d32d1a35..b7b698c73ddac3 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -4,6 +4,7 @@ import static io.datahubproject.test.search.SearchTestUtils.getGraphQueryConfiguration; import com.linkedin.entity.client.EntityClient; +import com.linkedin.entity.client.EntityClientConfig; import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.DataHubAppConfiguration; import com.linkedin.metadata.config.MetadataChangeProposalConfig; @@ -276,6 +277,6 @@ protected EntityClient entityClient( null, null, null, - 1); + EntityClientConfig.builder().batchGetV2Size(1).build()); } } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index ef87afdef46cb7..4e356f5fb3670a 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -15,26 +15,21 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; import com.linkedin.metadata.EventUtils; -import com.linkedin.metadata.dao.throttle.ThrottleControl; import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.kafka.config.MetadataChangeProposalProcessorCondition; +import com.linkedin.metadata.kafka.util.KafkaListenerUtil; import com.linkedin.metadata.utils.metrics.MetricUtils; -import com.linkedin.mxe.FailedMetadataChangeProposal; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.Topics; import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; import java.util.Optional; -import javax.annotation.Nonnull; import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; -import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerRecord; import org.slf4j.MDC; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; @@ -43,7 +38,6 @@ import org.springframework.kafka.annotation.EnableKafka; import org.springframework.kafka.annotation.KafkaListener; import org.springframework.kafka.config.KafkaListenerEndpointRegistry; -import org.springframework.kafka.listener.MessageListenerContainer; import org.springframework.stereotype.Component; @Slf4j @@ -80,38 +74,7 @@ public class MetadataChangeProposalsProcessor { @PostConstruct public void registerConsumerThrottle() { - if (kafkaThrottle != null - && provider - .getMetadataChangeProposal() - .getThrottle() - .getComponents() - .getMceConsumer() - .isEnabled()) { - log.info("MCE Consumer Throttle Enabled"); - kafkaThrottle.addCallback( - (throttleEvent) -> { - Optional container = - Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); - if (container.isEmpty()) { - log.warn( - "Expected container was missing: {} throttle is not possible.", - mceConsumerGroupId); - } else { - if (throttleEvent.isThrottled()) { - container.ifPresent(MessageListenerContainer::pause); - return ThrottleControl.builder() - // resume consumer after sleep - .callback( - (resumeEvent) -> container.ifPresent(MessageListenerContainer::resume)) - .build(); - } - } - - return ThrottleControl.NONE; - }); - } else { - log.info("MCE Consumer Throttle Disabled"); - } + KafkaListenerUtil.registerThrottle(kafkaThrottle, provider, registry, mceConsumerGroupId); } @KafkaListener( @@ -132,7 +95,9 @@ public void consume(final ConsumerRecord consumerRecord) consumerRecord.serializedValueSize(), consumerRecord.timestamp()); - log.debug("Record {}", record); + if (log.isDebugEnabled()) { + log.debug("Record {}", record); + } MetadataChangeProposal event = new MetadataChangeProposal(); try { @@ -148,45 +113,18 @@ public void consume(final ConsumerRecord consumerRecord) MDC.put( MDC_CHANGE_TYPE, Optional.ofNullable(changeType).map(ChangeType::toString).orElse("")); - log.debug("MetadataChangeProposal {}", event); - // TODO: Get this from the event itself. + if (log.isDebugEnabled()) { + log.debug("MetadataChangeProposal {}", event); + } String urn = entityClient.ingestProposal(systemOperationContext, event, false); log.info("Successfully processed MCP event urn: {}", urn); } catch (Throwable throwable) { log.error("MCP Processor Error", throwable); log.error("Message: {}", record); - sendFailedMCP(event, throwable); + KafkaListenerUtil.sendFailedMCP(event, throwable, fmcpTopicName, kafkaProducer); } } finally { MDC.clear(); } } - - private void sendFailedMCP(@Nonnull MetadataChangeProposal event, @Nonnull Throwable throwable) { - final FailedMetadataChangeProposal failedMetadataChangeProposal = - createFailedMCPEvent(event, throwable); - try { - final GenericRecord genericFailedMCERecord = - EventUtils.pegasusToAvroFailedMCP(failedMetadataChangeProposal); - log.debug("Sending FailedMessages to topic - {}", fmcpTopicName); - log.info( - "Error while processing FMCP: FailedMetadataChangeProposal - {}", - failedMetadataChangeProposal); - kafkaProducer.send(new ProducerRecord<>(fmcpTopicName, genericFailedMCERecord)); - } catch (IOException e) { - log.error( - "Error while sending FailedMetadataChangeProposal: Exception - {}, FailedMetadataChangeProposal - {}", - e.getStackTrace(), - failedMetadataChangeProposal); - } - } - - @Nonnull - private FailedMetadataChangeProposal createFailedMCPEvent( - @Nonnull MetadataChangeProposal event, @Nonnull Throwable throwable) { - final FailedMetadataChangeProposal fmcp = new FailedMetadataChangeProposal(); - fmcp.setError(ExceptionUtils.getStackTrace(throwable)); - fmcp.setMetadataChangeProposal(event); - return fmcp; - } } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java new file mode 100644 index 00000000000000..fed93628fe4d79 --- /dev/null +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java @@ -0,0 +1,116 @@ +package com.linkedin.metadata.kafka.batch; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; +import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.kafka.config.batch.BatchMetadataChangeProposalProcessorCondition; +import com.linkedin.metadata.kafka.util.KafkaListenerUtil; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.Topics; +import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; +import java.util.List; +import javax.annotation.PostConstruct; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.producer.Producer; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Import; +import org.springframework.kafka.annotation.EnableKafka; +import org.springframework.kafka.annotation.KafkaListener; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.stereotype.Component; + +@Slf4j +@Component +@Import({RestliEntityClientFactory.class}) +@Conditional(BatchMetadataChangeProposalProcessorCondition.class) +@EnableKafka +@RequiredArgsConstructor +public class BatchMetadataChangeProposalsProcessor { + private static final String CONSUMER_GROUP_ID_VALUE = + "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; + + private final OperationContext systemOperationContext; + private final SystemEntityClient entityClient; + private final Producer kafkaProducer; + + @Qualifier("kafkaThrottle") + private final ThrottleSensor kafkaThrottle; + + private final KafkaListenerEndpointRegistry registry; + private final ConfigurationProvider provider; + + private final Histogram kafkaLagStats = + MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); + + @Value( + "${FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + + Topics.FAILED_METADATA_CHANGE_PROPOSAL + + "}") + private String fmcpTopicName; + + @Value(CONSUMER_GROUP_ID_VALUE) + private String mceConsumerGroupId; + + @PostConstruct + public void registerConsumerThrottle() { + KafkaListenerUtil.registerThrottle(kafkaThrottle, provider, registry, mceConsumerGroupId); + } + + @KafkaListener( + id = CONSUMER_GROUP_ID_VALUE, + topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", + containerFactory = "kafkaEventConsumer", + batch = "true") + public void consume(final List> consumerRecords) { + try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "consume").time()) { + List metadataChangeProposals = + new ArrayList<>(consumerRecords.size()); + for (ConsumerRecord consumerRecord : consumerRecords) { + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final GenericRecord record = consumerRecord.value(); + + log.info( + "Got MCP event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); + + MetadataChangeProposal event = new MetadataChangeProposal(); + try { + event = EventUtils.avroToPegasusMCP(record); + } catch (Throwable throwable) { + log.error("MCP Processor Error", throwable); + log.error("Message: {}", record); + KafkaListenerUtil.sendFailedMCP(event, throwable, fmcpTopicName, kafkaProducer); + } + metadataChangeProposals.add(event); + } + + try { + List urns = + entityClient.batchIngestProposals( + systemOperationContext, metadataChangeProposals, false); + log.info("Successfully processed MCP event urns: {}", urns); + } catch (Exception e) { + // Java client should never throw this + log.error("Exception in batch ingest", e); + } + } + } +} diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/MetadataChangeProposalProcessorCondition.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/MetadataChangeProposalProcessorCondition.java index 1cdb05b04e0ac9..554684d5e8fe77 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/MetadataChangeProposalProcessorCondition.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/MetadataChangeProposalProcessorCondition.java @@ -9,7 +9,8 @@ public class MetadataChangeProposalProcessorCondition implements Condition { @Override public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { Environment env = context.getEnvironment(); - return "true".equals(env.getProperty("MCE_CONSUMER_ENABLED")) - || "true".equals(env.getProperty("MCP_CONSUMER_ENABLED")); + return ("true".equals(env.getProperty("MCE_CONSUMER_ENABLED")) + || "true".equals(env.getProperty("MCP_CONSUMER_ENABLED"))) + && !Boolean.parseBoolean(env.getProperty("MCP_CONSUMER_BATCH_ENABLED")); } } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/batch/BatchMetadataChangeProposalProcessorCondition.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/batch/BatchMetadataChangeProposalProcessorCondition.java new file mode 100644 index 00000000000000..296e37c7a90695 --- /dev/null +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/config/batch/BatchMetadataChangeProposalProcessorCondition.java @@ -0,0 +1,16 @@ +package com.linkedin.metadata.kafka.config.batch; + +import org.springframework.context.annotation.Condition; +import org.springframework.context.annotation.ConditionContext; +import org.springframework.core.env.Environment; +import org.springframework.core.type.AnnotatedTypeMetadata; + +public class BatchMetadataChangeProposalProcessorCondition implements Condition { + @Override + public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { + Environment env = context.getEnvironment(); + return ("true".equals(env.getProperty("MCE_CONSUMER_ENABLED")) + || "true".equals(env.getProperty("MCP_CONSUMER_ENABLED"))) + && Boolean.parseBoolean(env.getProperty("MCP_CONSUMER_BATCH_ENABLED")); + } +} diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java new file mode 100644 index 00000000000000..874a45c995e911 --- /dev/null +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java @@ -0,0 +1,96 @@ +package com.linkedin.metadata.kafka.util; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.mxe.FailedMetadataChangeProposal; +import com.linkedin.mxe.MetadataChangeProposal; +import java.io.IOException; +import java.util.Optional; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.listener.MessageListenerContainer; + +@Slf4j +public class KafkaListenerUtil { + + private KafkaListenerUtil() {} + + public static void registerThrottle( + ThrottleSensor kafkaThrottle, + ConfigurationProvider provider, + KafkaListenerEndpointRegistry registry, + String mceConsumerGroupId) { + if (kafkaThrottle != null + && provider + .getMetadataChangeProposal() + .getThrottle() + .getComponents() + .getMceConsumer() + .isEnabled()) { + log.info("MCE Consumer Throttle Enabled"); + kafkaThrottle.addCallback( + (throttleEvent) -> { + Optional container = + Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); + if (container.isEmpty()) { + log.warn( + "Expected container was missing: {} throttle is not possible.", + mceConsumerGroupId); + } else { + if (throttleEvent.isThrottled()) { + container.ifPresent(MessageListenerContainer::pause); + return ThrottleControl.builder() + // resume consumer after sleep + .callback( + (resumeEvent) -> container.ifPresent(MessageListenerContainer::resume)) + .build(); + } + } + + return ThrottleControl.NONE; + }); + } else { + log.info("MCE Consumer Throttle Disabled"); + } + } + + public static void sendFailedMCP( + @Nonnull MetadataChangeProposal event, + @Nonnull Throwable throwable, + String fmcpTopicName, + Producer kafkaProducer) { + final FailedMetadataChangeProposal failedMetadataChangeProposal = + createFailedMCPEvent(event, throwable); + try { + final GenericRecord genericFailedMCERecord = + EventUtils.pegasusToAvroFailedMCP(failedMetadataChangeProposal); + log.debug("Sending FailedMessages to topic - {}", fmcpTopicName); + log.info( + "Error while processing FMCP: FailedMetadataChangeProposal - {}", + failedMetadataChangeProposal); + kafkaProducer.send(new ProducerRecord<>(fmcpTopicName, genericFailedMCERecord)); + } catch (IOException e) { + log.error( + "Error while sending FailedMetadataChangeProposal: Exception - {}, FailedMetadataChangeProposal - {}", + e.getStackTrace(), + failedMetadataChangeProposal); + } + } + + @Nonnull + public static FailedMetadataChangeProposal createFailedMCPEvent( + @Nonnull MetadataChangeProposal event, @Nonnull Throwable throwable) { + final FailedMetadataChangeProposal fmcp = new FailedMetadataChangeProposal(); + fmcp.setError(ExceptionUtils.getStackTrace(throwable)); + fmcp.setMetadataChangeProposal(event); + return fmcp; + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java index e99978a26d6cf5..e783b4e1963d0a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java @@ -50,7 +50,7 @@ public EntityClient entityClient( _timeseriesAspectService, rollbackService, _eventProducer, - entityClientConfig.getBatchGetV2Size()); + entityClientConfig); } @Bean("systemEntityClient") @@ -79,6 +79,6 @@ public SystemEntityClient systemEntityClient( rollbackService, _eventProducer, entityClientCacheConfig, - entityClientConfig.getBatchGetV2Size()); + entityClientConfig); } } From 9334b2e324a26e28096c57ddbdcd744aca0a4988 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 4 Dec 2024 21:47:21 +0530 Subject: [PATCH 13/28] chore: update label for team (#12032) --- .github/workflows/pr-labeler.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index 7da20ece44f6d6..de7ad21b3e67bb 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -29,7 +29,6 @@ jobs: "swaroopjagadish", "treff7es", "yoonhyejin", - "eboneil", "gabe-lyons", "hsheth2", "jjoyce0510", @@ -37,16 +36,17 @@ jobs: "pedro93", "RyanHolstien", "sakethvarma397", - "Kunal-kankriya", "purnimagarg1", - "dushayntAW", "sagar-salvi-apptware", "kushagra-apptware", "Salman-Apptware", "mayurinehate", "noggi", "skrydal", - "kevinkarchacryl" + "kevinkarchacryl", + "sgomezvillamor", + "acrylJonny", + "chakru-r" ]'), github.actor ) From cbae728cf97e2227ab8e3c140e2f35098c973363 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 4 Dec 2024 11:30:53 -0800 Subject: [PATCH 14/28] fix(ui): Adding overflow handling (also goes to oss) (#12022) Co-authored-by: John Joyce --- .../ingest/source/executions/reporting/StructuredReportItem.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/datahub-web-react/src/app/ingest/source/executions/reporting/StructuredReportItem.tsx b/datahub-web-react/src/app/ingest/source/executions/reporting/StructuredReportItem.tsx index d15f30bc03211c..1cd4349f37d949 100644 --- a/datahub-web-react/src/app/ingest/source/executions/reporting/StructuredReportItem.tsx +++ b/datahub-web-react/src/app/ingest/source/executions/reporting/StructuredReportItem.tsx @@ -16,6 +16,7 @@ const StyledCollapse = styled(Collapse)<{ color: string }>` .ant-collapse-header { display: flex; align-items: center; + overflow: auto; } .ant-collapse-item { From ca46c022d5c05f00ba32f37b20691b1155b3e2c8 Mon Sep 17 00:00:00 2001 From: Alice-sky <1835063592@qq.com> Date: Thu, 5 Dec 2024 04:19:18 +0800 Subject: [PATCH 15/28] fix(ingest/pulsar): handle missing/invalid schema objects (#11945) Co-authored-by: Alice --- .../src/datahub/ingestion/source/pulsar.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py index 790c1f918cdfd2..15ee995b2d5fdc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py +++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py @@ -78,7 +78,17 @@ class PulsarSchema: def __init__(self, schema): self.schema_version = schema.get("version") - avro_schema = json.loads(schema.get("data")) + schema_data = schema.get("data") + if not schema_data: + logger.warning("Schema data is empty or None. Using default empty schema.") + schema_data = "{}" + + try: + avro_schema = json.loads(schema_data) + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON schema: {schema_data}. Error: {str(e)}") + avro_schema = {} + self.schema_name = avro_schema.get("namespace") + "." + avro_schema.get("name") self.schema_description = avro_schema.get("doc") self.schema_type = schema.get("type") From 97e328260f02c4e9b24ae29321bd6e99390388b8 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Wed, 4 Dec 2024 17:12:46 -0500 Subject: [PATCH 16/28] fix(filters) Fix issues with structured properties filters (#11946) --- .../models/StructuredPropertyUtils.java | 2 +- .../request/AggregationQueryBuilder.java | 21 +++- .../metadata/search/utils/ESUtils.java | 25 ++++- .../request/AggregationQueryBuilderTest.java | 104 ++++++++++++++++++ .../metadata/search/utils/ESUtilsTest.java | 43 ++++++++ 5 files changed, 187 insertions(+), 8 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java index 41ef9c25a0f3eb..e9ee7789550c6c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java @@ -178,7 +178,7 @@ public static String toElasticsearchFieldName( /** * Return an elasticsearch type from structured property type * - * @param fieldName filter or facet field name + * @param fieldName filter or facet field name - must match actual FQN of structured prop * @param aspectRetriever aspect retriever * @return elasticsearch type */ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 39f69ed1716abd..60ca7649331a00 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -379,7 +379,7 @@ private void addCriteriaFiltersToAggregationMetadata( } } - private void addCriterionFiltersToAggregationMetadata( + public void addCriterionFiltersToAggregationMetadata( @Nonnull final Criterion criterion, @Nonnull final List aggregationMetadata, @Nullable AspectRetriever aspectRetriever) { @@ -422,6 +422,17 @@ private void addCriterionFiltersToAggregationMetadata( value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); } + } else if (aggregationMetadataMap.containsKey(criterion.getField())) { + /* + * If we already have aggregations for the facet field (original field name), simply inject any missing values counts into the set. + * If there are no results for a particular facet value, it will NOT be in the original aggregation set returned by + * Elasticsearch. + */ + AggregationMetadata originalAggMetadata = aggregationMetadataMap.get(criterion.getField()); + criterion + .getValues() + .forEach( + value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); } else { /* * If we do not have ANY aggregation for the facet field, then inject a new aggregation metadata object for the @@ -429,10 +440,14 @@ private void addCriterionFiltersToAggregationMetadata( * If there are no results for a particular facet, it will NOT be in the original aggregation set returned by * Elasticsearch. */ + // Simply replace suffix from original field when there are no aggregations for it. Prevents + // bug where ES mappings for field are different from how we map the field back to UI + // (ie. Structured Properties with dots in them) + String facetField = ESUtils.replaceSuffix(criterion.getField()); aggregationMetadata.add( buildAggregationMetadata( - finalFacetField, - getFacetToDisplayNames().getOrDefault(finalFacetField, finalFacetField), + facetField, + getFacetToDisplayNames().getOrDefault(facetField, facetField), new LongMap( criterion.getValues().stream().collect(Collectors.toMap(i -> i, i -> 0L))), new FilterValueArray( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 9698a1c10d8b54..17bbbaf059dec4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -448,9 +448,20 @@ public static String toParentField( urnDefinition.getFirst(), urnDefinition.getSecond())) .orElse(filterField); + return replaceSuffix(fieldName); + } + + /** + * Strip subfields from filter field + * + * @param fieldName name of the field + * @return normalized field name without subfields + */ + @Nonnull + public static String replaceSuffix(@Nonnull final String fieldName) { for (String subfield : SUBFIELDS) { String SUFFIX = "." + subfield; - if (filterField.endsWith(SUFFIX)) { + if (fieldName.endsWith(SUFFIX)) { return fieldName.replace(SUFFIX, ""); } } @@ -710,7 +721,8 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( final Map> searchableFieldTypes, @Nonnull AspectRetriever aspectRetriever, boolean enableCaseInsensitiveSearch) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); + Set fieldTypes = + getFieldTypes(searchableFieldTypes, fieldName, criterion, aspectRetriever); if (fieldTypes.size() > 1) { log.warn( "Multiple field types for field name {}, determining best fit for set: {}", @@ -753,12 +765,16 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( private static Set getFieldTypes( Map> searchableFields, String fieldName, + @Nonnull final Criterion criterion, @Nullable AspectRetriever aspectRetriever) { final Set finalFieldTypes; if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + // use criterion field here for structured props since fieldName has dots replaced with + // underscores finalFieldTypes = - StructuredPropertyUtils.toElasticsearchFieldType(fieldName, aspectRetriever); + StructuredPropertyUtils.toElasticsearchFieldType( + replaceSuffix(criterion.getField()), aspectRetriever); } else { Set fieldTypes = searchableFields.getOrDefault(fieldName.split("\\.")[0], Collections.emptySet()); @@ -782,7 +798,8 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( Condition condition, boolean isTimeseries, AspectRetriever aspectRetriever) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); + Set fieldTypes = + getFieldTypes(searchableFieldTypes, fieldName, criterion, aspectRetriever); // Determine criterion value, range query only accepts single value so take first value in // values if multiple diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index cef463802a6b14..3969223981ec3f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -3,6 +3,7 @@ import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; @@ -12,23 +13,36 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.LongMap; import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringArray; import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.search.AggregationMetadata; +import com.linkedin.metadata.search.FilterValue; +import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.StructuredPropertyDefinition; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import org.mockito.Mockito; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.testng.Assert; @@ -598,4 +612,94 @@ public void testMissingAggregation() { .equals( MISSING_SPECIAL_TYPE + AGGREGATION_SPECIAL_TYPE_DELIMITER + "test"))); } + + @Test + public void testAddFiltersToMetadataWithStructuredPropsNoResults() { + final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one"); + + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); + + Criterion criterion = + new Criterion() + .setField("structuredProperties.test_me.one") + .setValues(new StringArray("test123")) + .setCondition(Condition.EQUAL); + + AspectRetriever mockAspectRetriever = getMockAspectRetriever(propertyUrn); + + final List aggregationMetadataList = new ArrayList<>(); + builder.addCriterionFiltersToAggregationMetadata( + criterion, aggregationMetadataList, mockAspectRetriever); + + // ensure we add the correct structured prop aggregation here + Assert.assertEquals(aggregationMetadataList.size(), 1); + // Assert.assertEquals(aggregationMetadataList.get(0).getEntity(), propertyUrn); + Assert.assertEquals( + aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one"); + Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1); + Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().get("test123"), 0); + } + + @Test + public void testAddFiltersToMetadataWithStructuredPropsWithAggregations() { + final Urn propertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:test_me.one"); + + final AggregationMetadata aggregationMetadata = new AggregationMetadata(); + aggregationMetadata.setName("structuredProperties.test_me.one"); + FilterValue filterValue = + new FilterValue().setValue("test123").setFiltered(false).setFacetCount(1); + aggregationMetadata.setFilterValues(new FilterValueArray(filterValue)); + LongMap aggregations = new LongMap(); + aggregations.put("test123", 1L); + aggregationMetadata.setAggregations(aggregations); + + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); + + Criterion criterion = + new Criterion() + .setField("structuredProperties.test_me.one") + .setValues(new StringArray("test123")) + .setCondition(Condition.EQUAL); + + AspectRetriever mockAspectRetriever = getMockAspectRetriever(propertyUrn); + + final List aggregationMetadataList = new ArrayList<>(); + aggregationMetadataList.add(aggregationMetadata); + builder.addCriterionFiltersToAggregationMetadata( + criterion, aggregationMetadataList, mockAspectRetriever); + + Assert.assertEquals(aggregationMetadataList.size(), 1); + Assert.assertEquals( + aggregationMetadataList.get(0).getName(), "structuredProperties.test_me.one"); + Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().size(), 1); + Assert.assertEquals(aggregationMetadataList.get(0).getAggregations().get("test123"), 1); + } + + private AspectRetriever getMockAspectRetriever(Urn propertyUrn) { + AspectRetriever mockAspectRetriever = Mockito.mock(AspectRetriever.class); + Map> mockResult = new HashMap<>(); + Map aspectMap = new HashMap<>(); + DataMap definition = new DataMap(); + definition.put("qualifiedName", "test_me.one"); + definition.put("valueType", "urn:li:dataType:datahub.string"); + Aspect definitionAspect = new Aspect(definition); + aspectMap.put(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, definitionAspect); + mockResult.put(propertyUrn, aspectMap); + Set urns = new HashSet<>(); + urns.add(propertyUrn); + Mockito.when(mockAspectRetriever.getLatestAspectObjects(eq(urns), any())) + .thenReturn(mockResult); + + return mockAspectRetriever; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 54a9e7d8b47bda..4f2bda39ad2117 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -45,6 +45,7 @@ public static void setup() throws RemoteInvocationException, URISyntaxException Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); Urn underscoresAndDotsUrn = Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess"); + Urn dateWithDotsUrn = Urn.createFromString("urn:li:structuredProperty:date_here.with_dot"); // legacy aspectRetriever = mock(AspectRetriever.class); @@ -64,6 +65,18 @@ public static void setup() throws RemoteInvocationException, URISyntaxException STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, new Aspect(structPropAbFghTenDefinition.data())))); + StructuredPropertyDefinition dateWithDotsDefinition = new StructuredPropertyDefinition(); + dateWithDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + dateWithDotsDefinition.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "date")); + dateWithDotsDefinition.setQualifiedName("date_here.with_dot"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(dateWithDotsUrn)), anySet())) + .thenReturn( + Map.of( + dateWithDotsUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(dateWithDotsDefinition.data())))); + StructuredPropertyDefinition structPropUnderscoresAndDotsDefinition = new StructuredPropertyDefinition(); structPropUnderscoresAndDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); @@ -895,6 +908,36 @@ public void testGetQueryBuilderFromNamespacedStructPropEqualsValueV1() { Assert.assertEquals(result.toString(), expected); } + @Test + public void testGetQueryBuilderFromDatesWithDots() { + + final Criterion singleValueCriterion = + buildCriterion( + "structuredProperties.date_here.with_dot", Condition.GREATER_THAN, "1731974400000"); + + OperationContext opContext = mock(OperationContext.class); + when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); + // structuredProperties.date_here_with_dot should not have .keyword at the end since this field + // type is type long for dates + String expected = + "{\n" + + " \"range\" : {\n" + + " \"structuredProperties.date_here_with_dot\" : {\n" + + " \"from\" : 1731974400000,\n" + + " \"to\" : null,\n" + + " \"include_lower\" : false,\n" + + " \"include_upper\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"structuredProperties.date_here.with_dot\"\n" + + " }\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + } + @Test public void testGetQueryBuilderFromStructPropExists() { final Criterion singleValueCriterion = buildExistsCriterion("structuredProperties.ab.fgh.ten"); From 65f44efbad04417d6cc761bde5f81bbe0cb986db Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 4 Dec 2024 17:50:56 -0500 Subject: [PATCH 17/28] fix(ingest): avoid bad IPython version (#12035) --- metadata-ingestion/setup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c6d55fb5bcc56e..5ae5438e212c5b 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -142,6 +142,15 @@ # datahub does not depend on traitlets directly but great expectations does. # https://github.com/ipython/traitlets/issues/741 "traitlets!=5.2.2", + # GE depends on IPython - we have no direct dependency on it. + # IPython 8.22.0 added a dependency on traitlets 5.13.x, but only declared a + # version requirement of traitlets>5. + # See https://github.com/ipython/ipython/issues/14352. + # This issue was fixed by https://github.com/ipython/ipython/pull/14353, + # which first appeared in IPython 8.22.1. + # As such, we just need to avoid that version in order to get the + # dependencies that we need. IPython probably should've yanked 8.22.0. + "IPython!=8.22.0", "greenlet", *cachetools_lib, } From 8d15df0c112b412dd89e4dd474534493749cdcb8 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:41:43 +0530 Subject: [PATCH 18/28] feat(ingest/kafka): additional validation for oauth_db signature (#11996) --- .../configuration/kafka_consumer_config.py | 32 ++++++++++++++++++- .../tests/integration/kafka/oauth.py | 16 ++++++++++ .../tests/integration/kafka/test_kafka.py | 31 +++++++++++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py b/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py index cac6bb4996391f..f08c78cadc0b2b 100644 --- a/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py +++ b/metadata-ingestion/src/datahub/configuration/kafka_consumer_config.py @@ -1,3 +1,4 @@ +import inspect import logging from typing import Any, Dict, Optional @@ -34,5 +35,34 @@ def _resolve_oauth_callback(self) -> None: "oauth_cb must be a string representing python function reference " "in the format :." ) + + call_back_fn = import_path(call_back) + self._validate_call_back_fn_signature(call_back_fn) + # Set the callback - self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back) + self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = call_back_fn + + def _validate_call_back_fn_signature(self, call_back_fn: Any) -> None: + sig = inspect.signature(call_back_fn) + + num_positional_args = len( + [ + param + for param in sig.parameters.values() + if param.kind + in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + ) + and param.default == inspect.Parameter.empty + ] + ) + + has_variadic_args = any( + param.kind == inspect.Parameter.VAR_POSITIONAL + for param in sig.parameters.values() + ) + + assert num_positional_args == 1 or ( + has_variadic_args and num_positional_args <= 1 + ), "oauth_cb function must accept single positional argument." diff --git a/metadata-ingestion/tests/integration/kafka/oauth.py b/metadata-ingestion/tests/integration/kafka/oauth.py index 28cfee521d6c0f..81a91fcd5e4069 100644 --- a/metadata-ingestion/tests/integration/kafka/oauth.py +++ b/metadata-ingestion/tests/integration/kafka/oauth.py @@ -12,3 +12,19 @@ def create_token(*args: Any, **kwargs: Any) -> Tuple[str, int]: "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJjbGllbnRfaWQiOiJrYWZrYV9jbGllbnQiLCJleHAiOjE2OTg3NjYwMDB9.dummy_sig_abcdef123456", 3600, ) + + +def create_token_no_args() -> Tuple[str, int]: + logger.warning(MESSAGE) + return ( + "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJjbGllbnRfaWQiOiJrYWZrYV9jbGllbnQiLCJleHAiOjE2OTg3NjYwMDB9.dummy_sig_abcdef123456", + 3600, + ) + + +def create_token_only_kwargs(**kwargs: Any) -> Tuple[str, int]: + logger.warning(MESSAGE) + return ( + "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJjbGllbnRfaWQiOiJrYWZrYV9jbGllbnQiLCJleHAiOjE2OTg3NjYwMDB9.dummy_sig_abcdef123456", + 3600, + ) diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka.py b/metadata-ingestion/tests/integration/kafka/test_kafka.py index 7462f177684b7e..bf0ec1845a66c2 100644 --- a/metadata-ingestion/tests/integration/kafka/test_kafka.py +++ b/metadata-ingestion/tests/integration/kafka/test_kafka.py @@ -5,9 +5,10 @@ import yaml from freezegun import freeze_time +from datahub.configuration.common import ConfigurationError from datahub.ingestion.api.source import SourceCapability from datahub.ingestion.run.pipeline import Pipeline -from datahub.ingestion.source.kafka.kafka import KafkaSource +from datahub.ingestion.source.kafka.kafka import KafkaSource, KafkaSourceConfig from tests.integration.kafka import oauth # type: ignore from tests.test_helpers import mce_helpers, test_connection_helpers from tests.test_helpers.click_helpers import run_datahub_cmd @@ -157,3 +158,31 @@ def test_kafka_oauth_callback( assert checks["consumer_oauth_callback"], "Consumer oauth callback not found" assert checks["admin_polling"], "Admin polling was not initiated" assert checks["admin_oauth_callback"], "Admin oauth callback not found" + + +def test_kafka_source_oauth_cb_signature(): + with pytest.raises( + ConfigurationError, + match=("oauth_cb function must accept single positional argument."), + ): + KafkaSourceConfig.parse_obj( + { + "connection": { + "bootstrap": "foobar:9092", + "consumer_config": {"oauth_cb": "oauth:create_token_no_args"}, + } + } + ) + + with pytest.raises( + ConfigurationError, + match=("oauth_cb function must accept single positional argument."), + ): + KafkaSourceConfig.parse_obj( + { + "connection": { + "bootstrap": "foobar:9092", + "consumer_config": {"oauth_cb": "oauth:create_token_only_kwargs"}, + } + } + ) From 3c388a56a5d320d9d8a2a3aef02e6794285cf85c Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Thu, 5 Dec 2024 09:49:44 +0100 Subject: [PATCH 19/28] fix(ingest/gc): Adding test and more checks to gc source (#12027) --- .../source/gc/dataprocess_cleanup.py | 70 +++++++---- metadata-ingestion/tests/unit/test_gc.py | 109 ++++++++++++++++++ 2 files changed, 156 insertions(+), 23 deletions(-) create mode 100644 metadata-ingestion/tests/unit/test_gc.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py index 90641b7059ca40..3e51b7da9e8be1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py @@ -208,22 +208,28 @@ def fetch_dpis(self, job_urn: str, batch_size: int) -> List[dict]: dpis = [] start = 0 while True: - job_query_result = self.ctx.graph.execute_graphql( - DATA_PROCESS_INSTANCES_QUERY, - {"dataJobUrn": job_urn, "start": start, "count": batch_size}, - ) - job_data = job_query_result.get("dataJob") - if not job_data: - raise ValueError(f"Error getting job {job_urn}") - - runs_data = job_data.get("runs") - if not runs_data: - raise ValueError(f"Error getting runs for {job_urn}") - - runs = runs_data.get("runs") - dpis.extend(runs) - start += batch_size - if len(runs) < batch_size: + try: + job_query_result = self.ctx.graph.execute_graphql( + DATA_PROCESS_INSTANCES_QUERY, + {"dataJobUrn": job_urn, "start": start, "count": batch_size}, + ) + job_data = job_query_result.get("dataJob") + if not job_data: + logger.error(f"Error getting job {job_urn}") + break + + runs_data = job_data.get("runs") + if not runs_data: + logger.error(f"Error getting runs for {job_urn}") + break + + runs = runs_data.get("runs") + dpis.extend(runs) + start += batch_size + if len(runs) < batch_size: + break + except Exception as e: + logger.error(f"Exception while fetching DPIs for job {job_urn}: {e}") break return dpis @@ -243,8 +249,12 @@ def keep_last_n_dpi( futures[future] = dpi for future in as_completed(futures): - deleted_count_last_n += 1 - futures[future]["deleted"] = True + try: + future.result() + deleted_count_last_n += 1 + futures[future]["deleted"] = True + except Exception as e: + logger.error(f"Exception while deleting DPI: {e}") if deleted_count_last_n % self.config.batch_size == 0: logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}") @@ -279,7 +289,7 @@ def delete_dpi_from_datajobs(self, job: DataJobEntity) -> None: dpis = self.fetch_dpis(job.urn, self.config.batch_size) dpis.sort( key=lambda x: x["created"]["time"] - if x["created"] and x["created"]["time"] + if "created" in x and "time" in x["created"] else 0, reverse=True, ) @@ -314,15 +324,23 @@ def remove_old_dpis( if dpi.get("deleted"): continue - if dpi["created"]["time"] < retention_time * 1000: + if ( + "created" not in dpi + or "time" not in dpi["created"] + or dpi["created"]["time"] < retention_time * 1000 + ): future = executor.submit( self.delete_entity, dpi["urn"], "dataprocessInstance" ) futures[future] = dpi for future in as_completed(futures): - deleted_count_retention += 1 - futures[future]["deleted"] = True + try: + future.result() + deleted_count_retention += 1 + futures[future]["deleted"] = True + except Exception as e: + logger.error(f"Exception while deleting DPI: {e}") if deleted_count_retention % self.config.batch_size == 0: logger.info( @@ -378,8 +396,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: dataFlows[flow.urn] = flow scroll_id: Optional[str] = None + previous_scroll_id: Optional[str] = None + dataJobs: Dict[str, List[DataJobEntity]] = defaultdict(list) deleted_jobs: int = 0 + while True: result = self.ctx.graph.execute_graphql( DATAJOB_QUERY, @@ -426,9 +447,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: else: dataJobs[datajob_entity.flow_urn].append(datajob_entity) - if not scroll_id: + if not scroll_id or previous_scroll_id == scroll_id: break + previous_scroll_id = scroll_id + logger.info(f"Deleted {deleted_jobs} DataJobs") # Delete empty dataflows if needed if self.config.delete_empty_data_flows: @@ -443,4 +466,5 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if deleted_jobs % self.config.batch_size == 0: logger.info(f"Deleted {deleted_data_flows} DataFlows") logger.info(f"Deleted {deleted_data_flows} DataFlows") + return [] diff --git a/metadata-ingestion/tests/unit/test_gc.py b/metadata-ingestion/tests/unit/test_gc.py new file mode 100644 index 00000000000000..5429c85dd608dc --- /dev/null +++ b/metadata-ingestion/tests/unit/test_gc.py @@ -0,0 +1,109 @@ +import unittest +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.source.gc.dataprocess_cleanup import ( + DataJobEntity, + DataProcessCleanup, + DataProcessCleanupConfig, + DataProcessCleanupReport, +) + + +class TestDataProcessCleanup(unittest.TestCase): + def setUp(self): + self.ctx = PipelineContext(run_id="test_run") + self.ctx.graph = MagicMock() + self.config = DataProcessCleanupConfig() + self.report = DataProcessCleanupReport() + self.cleanup = DataProcessCleanup( + self.ctx, self.config, self.report, dry_run=True + ) + + @patch( + "datahub.ingestion.source.gc.dataprocess_cleanup.DataProcessCleanup.fetch_dpis" + ) + def test_delete_dpi_from_datajobs(self, mock_fetch_dpis): + job = DataJobEntity( + urn="urn:li:dataJob:1", + flow_urn="urn:li:dataFlow:1", + lastIngested=int(datetime.now(timezone.utc).timestamp()), + jobId="job1", + dataPlatformInstance="urn:li:dataPlatformInstance:1", + total_runs=10, + ) + mock_fetch_dpis.return_value = [ + { + "urn": f"urn:li:dataprocessInstance:{i}", + "created": { + "time": int(datetime.now(timezone.utc).timestamp() + i) * 1000 + }, + } + for i in range(10) + ] + self.cleanup.delete_dpi_from_datajobs(job) + self.assertEqual(5, self.report.num_aspects_removed) + + @patch( + "datahub.ingestion.source.gc.dataprocess_cleanup.DataProcessCleanup.fetch_dpis" + ) + def test_delete_dpi_from_datajobs_without_dpis(self, mock_fetch_dpis): + job = DataJobEntity( + urn="urn:li:dataJob:1", + flow_urn="urn:li:dataFlow:1", + lastIngested=int(datetime.now(timezone.utc).timestamp()), + jobId="job1", + dataPlatformInstance="urn:li:dataPlatformInstance:1", + total_runs=10, + ) + mock_fetch_dpis.return_value = [] + self.cleanup.delete_dpi_from_datajobs(job) + self.assertEqual(0, self.report.num_aspects_removed) + + @patch( + "datahub.ingestion.source.gc.dataprocess_cleanup.DataProcessCleanup.fetch_dpis" + ) + def test_delete_dpi_from_datajobs_without_dpi_created_time(self, mock_fetch_dpis): + job = DataJobEntity( + urn="urn:li:dataJob:1", + flow_urn="urn:li:dataFlow:1", + lastIngested=int(datetime.now(timezone.utc).timestamp()), + jobId="job1", + dataPlatformInstance="urn:li:dataPlatformInstance:1", + total_runs=10, + ) + mock_fetch_dpis.return_value = [ + {"urn": f"urn:li:dataprocessInstance:{i}"} for i in range(10) + ] + [ + { + "urn": "urn:li:dataprocessInstance:11", + "created": {"time": int(datetime.now(timezone.utc).timestamp() * 1000)}, + } + ] + self.cleanup.delete_dpi_from_datajobs(job) + self.assertEqual(10, self.report.num_aspects_removed) + + def test_fetch_dpis(self): + assert self.cleanup.ctx.graph + self.cleanup.ctx.graph = MagicMock() + self.cleanup.ctx.graph.execute_graphql.return_value = { + "dataJob": { + "runs": { + "runs": [ + { + "urn": "urn:li:dataprocessInstance:1", + "created": { + "time": int(datetime.now(timezone.utc).timestamp()) + }, + } + ] + } + } + } + dpis = self.cleanup.fetch_dpis("urn:li:dataJob:1", 10) + self.assertEqual(len(dpis), 1) + + +if __name__ == "__main__": + unittest.main() From 3f3f777c063bb10c3ae8cca1bd13652dd3d23b3d Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 5 Dec 2024 07:44:20 -0600 Subject: [PATCH 20/28] fix(graph-edge): fix graph edge delete exception (#12025) --- .github/workflows/build-and-test.yml | 2 + .../models/extractor/FieldExtractor.java | 29 ++++- .../service/UpdateGraphIndicesService.java | 70 +++++----- .../UpdateGraphIndicesServiceTest.java | 122 ++++++++++++++++++ .../service/UpdateIndicesServiceTest.java | 84 ++++++++++++ 5 files changed, 268 insertions(+), 39 deletions(-) create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 412c962cb6e36f..a5889b2d2f92de 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -75,6 +75,8 @@ jobs: path: | ~/.cache/uv key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }} + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh - name: Set up JDK 17 uses: actions/setup-java@v4 with: diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java index bef7782d8f7c9a..f4dc2ec2f0cd56 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java @@ -14,7 +14,7 @@ import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; -import javax.annotation.Nonnull; +import javax.annotation.Nullable; /** Extracts fields from a RecordTemplate based on the appropriate {@link FieldSpec}. */ public class FieldExtractor { @@ -30,15 +30,34 @@ private static long getNumArrayWildcards(PathSpec pathSpec) { // Extract the value of each field in the field specs from the input record public static Map> extractFields( - @Nonnull RecordTemplate record, List fieldSpecs) { - return extractFields(record, fieldSpecs, MAX_VALUE_LENGTH); + @Nullable RecordTemplate record, List fieldSpecs) { + return extractFields(record, fieldSpecs, false); } public static Map> extractFields( - @Nonnull RecordTemplate record, List fieldSpecs, int maxValueLength) { + @Nullable RecordTemplate record, List fieldSpecs, boolean requiredFieldExtract) { + return extractFields(record, fieldSpecs, MAX_VALUE_LENGTH, requiredFieldExtract); + } + + public static Map> extractFields( + @Nullable RecordTemplate record, List fieldSpecs, int maxValueLength) { + return extractFields(record, fieldSpecs, maxValueLength, false); + } + + public static Map> extractFields( + @Nullable RecordTemplate record, + List fieldSpecs, + int maxValueLength, + boolean requiredFieldExtract) { final Map> extractedFields = new HashMap<>(); for (T fieldSpec : fieldSpecs) { - Optional value = RecordUtils.getFieldValue(record, fieldSpec.getPath()); + if (requiredFieldExtract && record == null) { + throw new IllegalArgumentException( + "Field extraction is required and the RecordTemplate is null"); + } + Optional value = + Optional.ofNullable(record) + .flatMap(maybeRecord -> RecordUtils.getFieldValue(maybeRecord, fieldSpec.getPath())); if (!value.isPresent()) { extractedFields.put(fieldSpec, Collections.emptyList()); } else { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java index ef7f681a81539d..efe073fc00dfdc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java @@ -190,7 +190,10 @@ private void handleDeleteChangeEvent( urn.getEntityType(), event.getAspectName())); } - RecordTemplate aspect = event.getRecordTemplate(); + final RecordTemplate aspect = + event.getPreviousRecordTemplate() != null + ? event.getPreviousRecordTemplate() + : event.getRecordTemplate(); Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); if (!aspectSpec.isTimeseries()) { @@ -280,8 +283,8 @@ private Pair, HashMap>> getEdgesAndRelationshipTypes @Nonnull final RecordTemplate aspect, @Nonnull final MetadataChangeLog event, final boolean isNewAspectVersion) { - final List edgesToAdd = new ArrayList<>(); - final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); + final List edges = new ArrayList<>(); + final HashMap> urnToRelationshipTypes = new HashMap<>(); // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and // inputFields @@ -289,36 +292,28 @@ private Pair, HashMap>> getEdgesAndRelationshipTypes if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); updateFineGrainedEdgesAndRelationships( - urn, - upstreamLineage.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); + urn, upstreamLineage.getFineGrainedLineages(), edges, urnToRelationshipTypes); } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { final InputFields inputFields = new InputFields(aspect.data()); - updateInputFieldEdgesAndRelationships( - urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); + updateInputFieldEdgesAndRelationships(urn, inputFields, edges, urnToRelationshipTypes); } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); updateFineGrainedEdgesAndRelationships( - urn, - dataJobInputOutput.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); + urn, dataJobInputOutput.getFineGrainedLineages(), edges, urnToRelationshipTypes); } Map> extractedFields = - FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); + FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs(), true); for (Map.Entry> entry : extractedFields.entrySet()) { - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); + Set relationshipTypes = urnToRelationshipTypes.getOrDefault(urn, new HashSet<>()); relationshipTypes.add(entry.getKey().getRelationshipName()); - urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); + urnToRelationshipTypes.put(urn, relationshipTypes); final List newEdges = GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); - edgesToAdd.addAll(newEdges); + edges.addAll(newEdges); } - return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); + return Pair.of(edges, urnToRelationshipTypes); } /** Process snapshot and update graph index */ @@ -433,7 +428,7 @@ private void deleteGraphData( @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, + @Nullable final RecordTemplate aspect, @Nonnull final Boolean isKeyAspect, @Nonnull final MetadataChangeLog event) { if (isKeyAspect) { @@ -441,21 +436,28 @@ private void deleteGraphData( return; } - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - if (!urnToRelationshipTypesBeingAdded.isEmpty()) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - graphService.removeEdgesFromNode( - opContext, - entry.getKey(), - new ArrayList<>(entry.getValue()), - createRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); + if (aspect != null) { + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final HashMap> urnToRelationshipTypesBeingRemoved = + edgeAndRelationTypes.getSecond(); + if (!urnToRelationshipTypesBeingRemoved.isEmpty()) { + for (Map.Entry> entry : urnToRelationshipTypesBeingRemoved.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + createRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } } + } else { + log.warn( + "Insufficient information to perform graph delete. Missing deleted aspect {} for entity {}", + aspectSpec.getName(), + urn); } } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateGraphIndicesServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateGraphIndicesServiceTest.java index 03e381a9059da6..dd02b1fdc9d78d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateGraphIndicesServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateGraphIndicesServiceTest.java @@ -1,6 +1,11 @@ package com.linkedin.metadata.service; +import static com.linkedin.metadata.Constants.CONTAINER_ENTITY_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; @@ -8,9 +13,11 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.testng.Assert.assertEquals; +import com.google.common.collect.ImmutableList; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.container.Container; import com.linkedin.dataset.DatasetProperties; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; @@ -21,6 +28,14 @@ import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.LineageRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -29,6 +44,8 @@ import com.linkedin.mxe.MetadataChangeLog; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import javax.annotation.Nonnull; import org.mockito.ArgumentCaptor; import org.opensearch.index.query.QueryBuilder; import org.opensearch.script.Script; @@ -180,4 +197,109 @@ public void testStatusNoOpEvent() { verifyNoInteractions(mockWriteDAO); } + + @Test + public void testMissingAspectGraphDelete() { + // Test deleting a null aspect + test.handleChangeEvent( + TEST_OP_CONTEXT, + new MetadataChangeLog() + .setChangeType(ChangeType.DELETE) + .setEntityType(TEST_URN.getEntityType()) + .setEntityUrn(TEST_URN) + .setAspectName(Constants.CONTAINER_ASPECT_NAME)); + + // For missing aspects, verify no writes + verifyNoInteractions(mockWriteDAO); + } + + @Test + public void testNodeGraphDelete() { + Urn containerUrn = UrnUtils.getUrn("urn:li:container:foo"); + + // Test deleting container entity + test.handleChangeEvent( + TEST_OP_CONTEXT, + new MetadataChangeLog() + .setChangeType(ChangeType.DELETE) + .setEntityType(CONTAINER_ENTITY_NAME) + .setEntityUrn(containerUrn) + .setAspectName(Constants.CONTAINER_KEY_ASPECT_NAME)); + + // Delete all outgoing edges of this entity + verify(mockWriteDAO, times(1)) + .deleteByQuery( + eq(TEST_OP_CONTEXT), + nullable(String.class), + eq(createUrnFilter(containerUrn)), + nullable(String.class), + eq(new Filter().setOr(new ConjunctiveCriterionArray())), + eq(List.of()), + eq(new RelationshipFilter().setDirection(RelationshipDirection.OUTGOING))); + + // Delete all incoming edges of this entity + verify(mockWriteDAO, times(1)) + .deleteByQuery( + eq(TEST_OP_CONTEXT), + nullable(String.class), + eq(createUrnFilter(containerUrn)), + nullable(String.class), + eq(new Filter().setOr(new ConjunctiveCriterionArray())), + eq(List.of()), + eq(new RelationshipFilter().setDirection(RelationshipDirection.INCOMING))); + + // Delete all edges where this entity is a lifecycle owner + verify(mockWriteDAO, times(1)) + .deleteByQuery( + eq(TEST_OP_CONTEXT), + nullable(String.class), + eq(new Filter().setOr(new ConjunctiveCriterionArray())), + nullable(String.class), + eq(new Filter().setOr(new ConjunctiveCriterionArray())), + eq(List.of()), + eq(new RelationshipFilter().setDirection(RelationshipDirection.INCOMING)), + eq(containerUrn.toString())); + } + + @Test + public void testContainerDelete() { + Urn containerUrn = UrnUtils.getUrn("urn:li:container:foo"); + + // Test deleting a container aspect + test.handleChangeEvent( + TEST_OP_CONTEXT, + new MetadataChangeLog() + .setChangeType(ChangeType.DELETE) + .setEntityType(TEST_URN.getEntityType()) + .setEntityUrn(TEST_URN) + .setAspectName(Constants.CONTAINER_ASPECT_NAME) + .setPreviousAspectValue( + GenericRecordUtils.serializeAspect(new Container().setContainer(containerUrn)))); + + // For container aspects, verify that only edges are removed in both cases + verify(mockWriteDAO, times(1)) + .deleteByQuery( + eq(TEST_OP_CONTEXT), + nullable(String.class), + eq(createUrnFilter(TEST_URN)), + nullable(String.class), + eq(new Filter().setOr(new ConjunctiveCriterionArray())), + eq(List.of("IsPartOf")), + eq( + createRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING))); + } + + private static Filter createUrnFilter(@Nonnull final Urn urn) { + Filter filter = new Filter(); + CriterionArray criterionArray = new CriterionArray(); + Criterion criterion = buildCriterion("urn", Condition.EQUAL, urn.toString()); + criterionArray.add(criterion); + filter.setOr( + new ConjunctiveCriterionArray( + ImmutableList.of(new ConjunctiveCriterion().setAnd(criterionArray)))); + + return filter; + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java new file mode 100644 index 00000000000000..43f8cc0ef191d6 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java @@ -0,0 +1,84 @@ +package com.linkedin.metadata.service; + +import static com.linkedin.metadata.Constants.CONTAINER_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.verify; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; +import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.metadata.utils.SystemMetadataUtils; +import com.linkedin.mxe.MetadataChangeLog; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class UpdateIndicesServiceTest { + + @Mock private UpdateGraphIndicesService updateGraphIndicesService; + @Mock private EntitySearchService entitySearchService; + @Mock private TimeseriesAspectService timeseriesAspectService; + @Mock private SystemMetadataService systemMetadataService; + @Mock private SearchDocumentTransformer searchDocumentTransformer; + @Mock private EntityIndexBuilders entityIndexBuilders; + + private OperationContext operationContext; + private UpdateIndicesService updateIndicesService; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + operationContext = TestOperationContexts.systemContextNoSearchAuthorization(); + updateIndicesService = + new UpdateIndicesService( + updateGraphIndicesService, + entitySearchService, + timeseriesAspectService, + systemMetadataService, + searchDocumentTransformer, + entityIndexBuilders, + "MD5"); + } + + @Test + public void testContainerHandleDeleteEvent() throws Exception { + Urn urn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"); + EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME); + AspectSpec aspectSpec = entitySpec.getAspectSpec(CONTAINER_ASPECT_NAME); + + // Create test data + MetadataChangeLog event = new MetadataChangeLog(); + event.setChangeType(ChangeType.DELETE); + event.setEntityUrn(urn); + event.setAspectName(CONTAINER_ASPECT_NAME); + event.setEntityType(urn.getEntityType()); + event.setSystemMetadata(SystemMetadataUtils.createDefaultSystemMetadata()); + + // Execute Delete + updateIndicesService.handleChangeEvent(operationContext, event); + + // Verify + verify(systemMetadataService).deleteAspect(urn.toString(), CONTAINER_ASPECT_NAME); + verify(searchDocumentTransformer) + .transformAspect( + eq(operationContext), + eq(urn), + nullable(RecordTemplate.class), + eq(aspectSpec), + eq(true)); + verify(updateGraphIndicesService).handleChangeEvent(operationContext, event); + } +} From 48b5a6221c8a203268e8905de3238d9d47411f75 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 5 Dec 2024 11:32:31 -0500 Subject: [PATCH 21/28] feat(ingest): add urn validation test files (#12036) --- .../tests/unit/urns/invalid_urns.txt | 40 +++++++++ .../tests/unit/urns/test_corp_group_urn.py | 10 --- .../tests/unit/urns/test_corpuser_urn.py | 10 --- .../tests/unit/urns/test_data_flow_urn.py | 8 -- .../tests/unit/urns/test_data_job_urn.py | 15 ---- .../urns/test_data_process_instance_urn.py | 10 --- .../tests/unit/urns/test_dataset_urn.py | 20 ----- .../tests/unit/urns/test_domain_urn.py | 8 -- .../tests/unit/urns/test_notebook_urn.py | 10 --- .../tests/unit/urns/test_tag_urn.py | 8 -- .../tests/unit/urns/test_urn.py | 88 +++++++++++-------- .../tests/unit/urns/valid_urns.txt | 24 +++++ 12 files changed, 115 insertions(+), 136 deletions(-) create mode 100644 metadata-ingestion/tests/unit/urns/invalid_urns.txt create mode 100644 metadata-ingestion/tests/unit/urns/valid_urns.txt diff --git a/metadata-ingestion/tests/unit/urns/invalid_urns.txt b/metadata-ingestion/tests/unit/urns/invalid_urns.txt new file mode 100644 index 00000000000000..9ce2c99a1a4ee8 --- /dev/null +++ b/metadata-ingestion/tests/unit/urns/invalid_urns.txt @@ -0,0 +1,40 @@ +# Basic URN format tests +urn:li:abc +urn:li:abc: +urn:li:abc:() +urn:li:abc:(abc,) +urn:li:corpuser:abc) + +# Reserved characters +urn:li:corpuser:foo␟bar +urn:li:tag:a,b,c + +# CorpUser URN tests +urn:li:corpuser:(part1,part2) + +# Dataset URN tests +urn:li:dataset:(urn:li:user:abc,dataset,prod) +urn:li:dataset:(urn:li:user:abc,dataset) +urn:li:dataset:(urn:li:user:abc,dataset,invalidEnv) + +# DataFlow URN tests +urn:li:dataFlow:(airflow,flow_id) + +# DataJob URN tests +urn:li:dataJob:(urn:li:user:abc,job_id) +urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod)) + +# Domain URN tests +urn:li:domain:(part1,part2) + +# Tag URN tests +urn:li:tag:(part1,part2) + +# Notebook URN tests +urn:li:notebook:(part1,part2,part3) + +# CorpGroup URN tests +urn:li:corpGroup:(part1,part2) + +# DataProcessInstance URN tests +urn:li:dataProcessInstance:(part1,part2) diff --git a/metadata-ingestion/tests/unit/urns/test_corp_group_urn.py b/metadata-ingestion/tests/unit/urns/test_corp_group_urn.py index 1897a0e8686f09..4e55e78255d1c1 100644 --- a/metadata-ingestion/tests/unit/urns/test_corp_group_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_corp_group_urn.py @@ -3,7 +3,6 @@ import pytest from datahub.utilities.urns.corp_group_urn import CorpGroupUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -17,12 +16,3 @@ def test_parse_urn(self) -> None: assert str(corp_group_urn) == corp_group_urn_str assert corp_group_urn == CorpGroupUrn(name="abc") assert corp_group_urn == CorpGroupUrn.create_from_id("abc") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - CorpGroupUrn.create_from_string( - "urn:li:abc:(urn:li:dataPlatform:abc,def,prod)" - ) - - with self.assertRaises(InvalidUrnError): - CorpGroupUrn.create_from_string("urn:li:corpGroup:(part1,part2)") diff --git a/metadata-ingestion/tests/unit/urns/test_corpuser_urn.py b/metadata-ingestion/tests/unit/urns/test_corpuser_urn.py index 7a2a4f4ff4493c..e4a11b4f404c6e 100644 --- a/metadata-ingestion/tests/unit/urns/test_corpuser_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_corpuser_urn.py @@ -3,7 +3,6 @@ import pytest from datahub.utilities.urns.corpuser_urn import CorpuserUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -17,12 +16,3 @@ def test_parse_urn(self) -> None: assert str(corpuser_urn) == corpuser_urn_str assert corpuser_urn == CorpuserUrn("abc") assert corpuser_urn == CorpuserUrn.create_from_id("abc") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - CorpuserUrn.create_from_string( - "urn:li:abc:(urn:li:dataPlatform:abc,def,prod)" - ) - - with self.assertRaises(InvalidUrnError): - CorpuserUrn.create_from_string("urn:li:corpuser:(part1,part2)") diff --git a/metadata-ingestion/tests/unit/urns/test_data_flow_urn.py b/metadata-ingestion/tests/unit/urns/test_data_flow_urn.py index 524411121d418b..edb5563c5b22e3 100644 --- a/metadata-ingestion/tests/unit/urns/test_data_flow_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_data_flow_urn.py @@ -3,7 +3,6 @@ import pytest from datahub.utilities.urns.data_flow_urn import DataFlowUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -16,10 +15,3 @@ def test_parse_urn(self) -> None: assert data_flow_urn.get_env() == "prod" assert data_flow_urn.__str__() == "urn:li:dataFlow:(airflow,def,prod)" assert data_flow_urn == DataFlowUrn("airflow", "def", "prod") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - DataFlowUrn.create_from_string("urn:li:abc:(airflow,def,prod)") - - with self.assertRaises(InvalidUrnError): - DataFlowUrn.create_from_string("urn:li:dataFlow:(airflow,flow_id)") diff --git a/metadata-ingestion/tests/unit/urns/test_data_job_urn.py b/metadata-ingestion/tests/unit/urns/test_data_job_urn.py index bf039cd2a91f96..484e5a474c0cd2 100644 --- a/metadata-ingestion/tests/unit/urns/test_data_job_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_data_job_urn.py @@ -4,7 +4,6 @@ from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -22,17 +21,3 @@ def test_parse_urn(self) -> None: assert data_job_urn == DataJobUrn( "urn:li:dataFlow:(airflow,flow_id,prod)", "job_id" ) - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - DataJobUrn.create_from_string( - "urn:li:abc:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)" - ) - - with self.assertRaises(InvalidUrnError): - DataJobUrn.create_from_string("urn:li:dataJob:(urn:li:user:abc,job_id)") - - with self.assertRaises(InvalidUrnError): - DataJobUrn.create_from_string( - "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod))" - ) diff --git a/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py b/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py index a86f8dd99416ff..f9087b19b13c32 100644 --- a/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py @@ -3,7 +3,6 @@ import pytest from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -20,12 +19,3 @@ def test_parse_urn(self) -> None: assert dataprocessinstance_urn == DataProcessInstanceUrn("abc") assert dataprocessinstance_urn == DataProcessInstanceUrn.create_from_id("abc") assert "abc" == dataprocessinstance_urn.get_dataprocessinstance_id() - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - DataProcessInstanceUrn.create_from_string("urn:li:abc:dataProcessInstance") - - with self.assertRaises(InvalidUrnError): - DataProcessInstanceUrn.create_from_string( - "urn:li:dataProcessInstance:(part1,part2)" - ) diff --git a/metadata-ingestion/tests/unit/urns/test_dataset_urn.py b/metadata-ingestion/tests/unit/urns/test_dataset_urn.py index 53065143a6ae4f..1be5cd59152009 100644 --- a/metadata-ingestion/tests/unit/urns/test_dataset_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_dataset_urn.py @@ -4,7 +4,6 @@ from datahub.utilities.urns.data_platform_urn import DataPlatformUrn from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -20,22 +19,3 @@ def test_parse_urn(self) -> None: assert dataset_urn.get_env() == "PROD" assert dataset_urn.__str__() == dataset_urn_str assert dataset_urn == DatasetUrn("urn:li:dataPlatform:abc", "def", "prod") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - DatasetUrn.create_from_string( - "urn:li:abc:(urn:li:dataPlatform:abc,def,prod)" - ) - - with self.assertRaises(InvalidUrnError): - DatasetUrn.create_from_string( - "urn:li:dataset:(urn:li:user:abc,dataset,prod)" - ) - - with self.assertRaises(InvalidUrnError): - DatasetUrn.create_from_string("urn:li:dataset:(urn:li:user:abc,dataset)") - - with self.assertRaises(InvalidUrnError): - DatasetUrn.create_from_string( - "urn:li:dataset:(urn:li:user:abc,dataset,invalidEnv)" - ) diff --git a/metadata-ingestion/tests/unit/urns/test_domain_urn.py b/metadata-ingestion/tests/unit/urns/test_domain_urn.py index 843a5bf40f5c63..aa5050ce1c030e 100644 --- a/metadata-ingestion/tests/unit/urns/test_domain_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_domain_urn.py @@ -3,7 +3,6 @@ import pytest from datahub.utilities.urns.domain_urn import DomainUrn -from datahub.utilities.urns.error import InvalidUrnError @pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -17,10 +16,3 @@ def test_parse_urn(self) -> None: assert str(domain_urn) == domain_urn_str assert domain_urn == DomainUrn("abc") assert domain_urn == DomainUrn.create_from_id("abc") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - DomainUrn.create_from_string("urn:li:abc:domain") - - with self.assertRaises(InvalidUrnError): - DomainUrn.create_from_string("urn:li:domain:(part1,part2)") diff --git a/metadata-ingestion/tests/unit/urns/test_notebook_urn.py b/metadata-ingestion/tests/unit/urns/test_notebook_urn.py index 3ec580f02142b7..6d4dd2ee6fa8c0 100644 --- a/metadata-ingestion/tests/unit/urns/test_notebook_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_notebook_urn.py @@ -2,7 +2,6 @@ import pytest -from datahub.utilities.urns.error import InvalidUrnError from datahub.utilities.urns.notebook_urn import NotebookUrn @@ -16,12 +15,3 @@ def test_parse_urn(self) -> None: assert str(notebook_urn) == notebook_urn_str assert notebook_urn == NotebookUrn("querybook", "123") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - NotebookUrn.create_from_string( - "urn:li:abc:(urn:li:dataPlatform:abc,def,prod)" - ) - - with self.assertRaises(InvalidUrnError): - NotebookUrn.create_from_string("urn:li:notebook:(part1,part2,part3)") diff --git a/metadata-ingestion/tests/unit/urns/test_tag_urn.py b/metadata-ingestion/tests/unit/urns/test_tag_urn.py index fa3664bcc02180..5f4c9077e28294 100644 --- a/metadata-ingestion/tests/unit/urns/test_tag_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_tag_urn.py @@ -2,7 +2,6 @@ import pytest -from datahub.utilities.urns.error import InvalidUrnError from datahub.utilities.urns.tag_urn import TagUrn @@ -17,10 +16,3 @@ def test_parse_urn(self) -> None: assert str(tag_urn) == tag_urn_str assert tag_urn == TagUrn("abc") assert tag_urn == TagUrn.create_from_id("abc") - - def test_invalid_urn(self) -> None: - with self.assertRaises(InvalidUrnError): - TagUrn.create_from_string("urn:li:abc:tag_id") - - with self.assertRaises(InvalidUrnError): - TagUrn.create_from_string("urn:li:tag:(part1,part2)") diff --git a/metadata-ingestion/tests/unit/urns/test_urn.py b/metadata-ingestion/tests/unit/urns/test_urn.py index 73badb3d1b4234..0c362473c0cf18 100644 --- a/metadata-ingestion/tests/unit/urns/test_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_urn.py @@ -1,16 +1,17 @@ +import logging +import pathlib +from typing import List + import pytest -from datahub.metadata.urns import ( - CorpUserUrn, - DashboardUrn, - DataPlatformUrn, - DatasetUrn, - Urn, -) +from datahub.metadata.urns import CorpUserUrn, DatasetUrn, Urn from datahub.utilities.urns.error import InvalidUrnError pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning") +_CURRENT_DIR = pathlib.Path(__file__).parent +logger = logging.getLogger(__name__) + def test_parse_urn() -> None: simple_urn_str = "urn:li:dataPlatform:abc" @@ -40,38 +41,12 @@ def test_url_encode_urn() -> None: ) -def test_invalid_urn() -> None: - with pytest.raises(InvalidUrnError): - Urn.from_string("urn:li:abc") - - with pytest.raises(InvalidUrnError): - Urn.from_string("urn:li:abc:") - - with pytest.raises(InvalidUrnError): - Urn.from_string("urn:li:abc:()") - - with pytest.raises(InvalidUrnError): - Urn.from_string("urn:li:abc:(abc,)") - - with pytest.raises(InvalidUrnError): - Urn.from_string("urn:li:corpuser:abc)") - - def test_urn_colon() -> None: - # Colon characters are valid in urns, and should not mess up parsing. - - urn = Urn.from_string( - "urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)" - ) - assert isinstance(urn, DashboardUrn) - - assert DataPlatformUrn.from_string("urn:li:dataPlatform:abc:def") - assert DatasetUrn.from_string( - "urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)" - ) - assert Urn.from_string("urn:li:corpuser:foo:bar@example.com") + # There's a bunch of other, simpler tests for special characters in the valid_urns test. + # This test ensures that the type dispatch and fields work fine here. # I'm not sure why you'd ever want this, but technically it's a valid urn. + urn = Urn.from_string("urn:li:corpuser::") assert isinstance(urn, CorpUserUrn) assert urn.username == ":" @@ -85,9 +60,48 @@ def test_urn_coercion() -> None: assert urn == Urn.from_string(urn.urn()) -def test_urn_type_dispatch() -> None: +def test_urn_type_dispatch_1() -> None: urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)") assert isinstance(urn, DatasetUrn) with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"): DatasetUrn.from_string("urn:li:corpuser:foo") + + +def test_urn_type_dispatch_2() -> None: + urn = "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id)" + assert Urn.from_string(urn).urn() == urn + + with pytest.raises(InvalidUrnError, match="Passed an urn of type dataJob"): + CorpUserUrn.from_string(urn) + + +def _load_urns(file_name: pathlib.Path) -> List[str]: + urns = [ + line.strip() + for line in file_name.read_text().splitlines() + if line.strip() and not line.startswith("#") + ] + assert len(urns) > 0, f"No urns found in {file_name}" + return urns + + +def test_valid_urns() -> None: + valid_urns_file = _CURRENT_DIR / "valid_urns.txt" + valid_urns = _load_urns(valid_urns_file) + + for valid_urn in valid_urns: + logger.info(f"Testing valid URN: {valid_urn}") + parsed_urn = Urn.from_string(valid_urn) + assert parsed_urn.urn() == valid_urn + + +def test_invalid_urns() -> None: + invalid_urns_file = _CURRENT_DIR / "invalid_urns.txt" + invalid_urns = _load_urns(invalid_urns_file) + + # Test each invalid URN + for invalid_urn in invalid_urns: + with pytest.raises(InvalidUrnError): + logger.info(f"Testing invalid URN: {invalid_urn}") + Urn.from_string(invalid_urn) diff --git a/metadata-ingestion/tests/unit/urns/valid_urns.txt b/metadata-ingestion/tests/unit/urns/valid_urns.txt new file mode 100644 index 00000000000000..23205ec9a7235b --- /dev/null +++ b/metadata-ingestion/tests/unit/urns/valid_urns.txt @@ -0,0 +1,24 @@ +# Unknown entity types become generic urns +urn:li:abc:foo +urn:li:abc:(foo,bar) +urn:li:abc:(urn:li:dataPlatform:abc,def,prod) + +# A bunch of pretty normal urns +urn:li:corpuser:foo +urn:li:corpGroup:bar +urn:li:dataset:(urn:li:dataPlatform:abc,def/ghi,prod) +urn:li:dataFlow:(airflow,def,prod) +urn:li:dataJob:(urn:li:dataFlow:(airflow,flow_id,prod),job_id) +urn:li:tag:abc +urn:li:chart:(looker,chart_name) +urn:li:dashboard:(looker,dashboard_name) +urn:li:dataProcessInstance:abc +urn:li:domain:abc +urn:li:notebook:(querybook,123) + +# Urns with colons and other special characters +urn:li:tag:dbt:bar +urn:li:tag:: +urn:li:dashboard:(looker,dashboards.thelook::customer_lookup) +urn:li:dataPlatform:abc:def +urn:li:corpuser:foo:bar@example.com From 14fe8891a3c653e17fbfe86bd5fbf80df82e8cbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:40:10 -0600 Subject: [PATCH 22/28] chore(deps): bump cross-spawn from 7.0.3 to 7.0.6 in /datahub-web-react (#11978) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- datahub-web-react/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 9dc563c958dd19..ddda98d7f83268 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -5043,9 +5043,9 @@ cross-inspect@1.0.0: tslib "^2.4.0" cross-spawn@^7.0.2: - version "7.0.3" - resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" - integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + version "7.0.6" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f" + integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA== dependencies: path-key "^3.1.0" shebang-command "^2.0.0" From cb7d68779455cad537710e1fd8bed4a03f6b0405 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:45:04 -0600 Subject: [PATCH 23/28] fix(datahub-client): prevent unneeded classes in datahub-client jar (#12037) --- entity-registry/build.gradle | 2 +- .../java/datahub-event/build.gradle | 1 + metadata-models/build.gradle | 15 +++++++++------ .../openapi-servlet/models/build.gradle | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index e5baa95967f304..ee5ece4049399e 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -8,7 +8,7 @@ apply from: "../gradle/coverage/java-coverage.gradle" dependencies { implementation spec.product.pegasus.data - implementation spec.product.pegasus.generator + compileOnly spec.product.pegasus.generator api project(path: ':metadata-models') api project(path: ':metadata-models', configuration: "dataTemplate") api externalDependency.classGraph diff --git a/metadata-integration/java/datahub-event/build.gradle b/metadata-integration/java/datahub-event/build.gradle index 24e119c6229369..3dca2eb0a40c9f 100644 --- a/metadata-integration/java/datahub-event/build.gradle +++ b/metadata-integration/java/datahub-event/build.gradle @@ -18,6 +18,7 @@ dependencies { implementation externalDependency.jacksonDataBind runtimeOnly externalDependency.jna + compileOnly externalDependency.swaggerAnnotations compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok // VisibleForTesting diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index e9379163ecaecc..2d0b433d69013e 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -9,12 +9,15 @@ plugins { apply from: '../gradle/coverage/java-coverage.gradle' dependencies { - api spec.product.pegasus.data - constraints { - implementation('org.apache.commons:commons-text:1.10.0') { - because 'Vulnerability Issue' - } + constraints { + implementation('org.apache.commons:commons-text:1.10.0') { + because 'Vulnerability Issue' } + } + + api(spec.product.pegasus.data) { + exclude group: 'javax.servlet', module: 'javax.servlet-api' + } api project(':li-utils') api project(path: ':li-utils', configuration: "dataTemplate") dataModel project(':li-utils') @@ -26,7 +29,7 @@ dependencies { compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok - api externalDependency.swaggerAnnotations + compileOnly externalDependency.swaggerAnnotations compileOnly externalDependency.jacksonCore compileOnly externalDependency.jacksonDataBind diff --git a/metadata-service/openapi-servlet/models/build.gradle b/metadata-service/openapi-servlet/models/build.gradle index e4100b2d094e04..d75e656e5ecd6c 100644 --- a/metadata-service/openapi-servlet/models/build.gradle +++ b/metadata-service/openapi-servlet/models/build.gradle @@ -10,6 +10,7 @@ dependencies { implementation externalDependency.jacksonDataBind implementation externalDependency.httpClient + compileOnly externalDependency.swaggerAnnotations compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok From c4ada540c4b1460377dd47e8dd6234314c0c7d4c Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:02:58 -0600 Subject: [PATCH 24/28] fix(entity-service): no-op batches (#12047) --- .../java/com/linkedin/metadata/Constants.java | 1 + .../metadata/entity/EntityServiceImpl.java | 188 ++++++++++-------- .../entity/EbeanEntityServiceTest.java | 99 ++++++++- 3 files changed, 201 insertions(+), 87 deletions(-) diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 077e0e2b666be1..9c608187342e8c 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -51,6 +51,7 @@ public class Constants { // App sources public static final String UI_SOURCE = "ui"; public static final String SYSTEM_UPDATE_SOURCE = "systemUpdate"; + public static final String METADATA_TESTS_SOURCE = "metadataTests"; /** Entities */ public static final String CORP_USER_ENTITY_NAME = "corpuser"; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index a0a55cf505cf35..bf3481205fb5ab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -855,6 +855,7 @@ private List ingestAspectsToLocalDB( if (inputBatch.containsDuplicateAspects()) { log.warn(String.format("Batch contains duplicates: %s", inputBatch)); + MetricUtils.counter(EntityServiceImpl.class, "batch_with_duplicate").inc(); } return aspectDao @@ -928,6 +929,7 @@ private List ingestAspectsToLocalDB( // No changes, return if (changeMCPs.isEmpty()) { + MetricUtils.counter(EntityServiceImpl.class, "batch_empty").inc(); return Collections.emptyList(); } @@ -935,6 +937,7 @@ private List ingestAspectsToLocalDB( ValidationExceptionCollection exceptions = AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext().get()); if (!exceptions.isEmpty()) { + MetricUtils.counter(EntityServiceImpl.class, "batch_validation_exception").inc(); throw new ValidationException(collectMetrics(exceptions).toString()); } @@ -972,10 +975,13 @@ This condition is specifically for an older conditional write ingestAspectIfNotP */ if (overwrite || databaseAspect == null) { result = - ingestAspectToLocalDB(txContext, writeItem, databaseSystemAspect) - .toBuilder() - .request(writeItem) - .build(); + Optional.ofNullable( + ingestAspectToLocalDB( + txContext, writeItem, databaseSystemAspect)) + .map( + optResult -> + optResult.toBuilder().request(writeItem).build()) + .orElse(null); } else { RecordTemplate oldValue = databaseSystemAspect.getRecordTemplate(); @@ -996,49 +1002,56 @@ This condition is specifically for an older conditional write ingestAspectIfNotP return result; }) + .filter(Objects::nonNull) .collect(Collectors.toList()); - // commit upserts prior to retention or kafka send, if supported by impl - if (txContext != null) { - txContext.commitAndContinue(); - } - long took = TimeUnit.NANOSECONDS.toMillis(ingestToLocalDBTimer.stop()); - if (took > DB_TIMER_LOG_THRESHOLD_MS) { - log.info("Ingestion of aspects batch to database took {} ms", took); - } + if (!upsertResults.isEmpty()) { + // commit upserts prior to retention or kafka send, if supported by impl + if (txContext != null) { + txContext.commitAndContinue(); + } + long took = TimeUnit.NANOSECONDS.toMillis(ingestToLocalDBTimer.stop()); + if (took > DB_TIMER_LOG_THRESHOLD_MS) { + log.info("Ingestion of aspects batch to database took {} ms", took); + } - // Retention optimization and tx - if (retentionService != null) { - List retentionBatch = - upsertResults.stream() - // Only consider retention when there was a previous version - .filter( - result -> - batchAspects.containsKey(result.getUrn().toString()) - && batchAspects - .get(result.getUrn().toString()) - .containsKey(result.getRequest().getAspectName())) - .filter( - result -> { - RecordTemplate oldAspect = result.getOldValue(); - RecordTemplate newAspect = result.getNewValue(); - // Apply retention policies if there was an update to existing aspect - // value - return oldAspect != newAspect - && oldAspect != null - && retentionService != null; - }) - .map( - result -> - RetentionService.RetentionContext.builder() - .urn(result.getUrn()) - .aspectName(result.getRequest().getAspectName()) - .maxVersion(Optional.of(result.getMaxVersion())) - .build()) - .collect(Collectors.toList()); - retentionService.applyRetentionWithPolicyDefaults(opContext, retentionBatch); + // Retention optimization and tx + if (retentionService != null) { + List retentionBatch = + upsertResults.stream() + // Only consider retention when there was a previous version + .filter( + result -> + batchAspects.containsKey(result.getUrn().toString()) + && batchAspects + .get(result.getUrn().toString()) + .containsKey(result.getRequest().getAspectName())) + .filter( + result -> { + RecordTemplate oldAspect = result.getOldValue(); + RecordTemplate newAspect = result.getNewValue(); + // Apply retention policies if there was an update to existing + // aspect + // value + return oldAspect != newAspect + && oldAspect != null + && retentionService != null; + }) + .map( + result -> + RetentionService.RetentionContext.builder() + .urn(result.getUrn()) + .aspectName(result.getRequest().getAspectName()) + .maxVersion(Optional.of(result.getMaxVersion())) + .build()) + .collect(Collectors.toList()); + retentionService.applyRetentionWithPolicyDefaults(opContext, retentionBatch); + } else { + log.warn("Retention service is missing!"); + } } else { - log.warn("Retention service is missing!"); + MetricUtils.counter(EntityServiceImpl.class, "batch_empty_transaction").inc(); + log.warn("Empty transaction detected. {}", inputBatch); } return upsertResults; @@ -2506,7 +2519,7 @@ private Map getEnvelopedAspects( * @param databaseAspect The aspect as it exists in the database. * @return result object */ - @Nonnull + @Nullable private UpdateAspectResult ingestAspectToLocalDB( @Nullable TransactionContext txContext, @Nonnull final ChangeMCP writeItem, @@ -2520,6 +2533,9 @@ private UpdateAspectResult ingestAspectToLocalDB( .setLastRunId(writeItem.getSystemMetadata().getRunId(GetMode.NULL), SetMode.IGNORE_NULL); // 2. Compare the latest existing and new. + final RecordTemplate databaseValue = + databaseAspect == null ? null : databaseAspect.getRecordTemplate(); + final EntityAspect.EntitySystemAspect previousBatchAspect = (EntityAspect.EntitySystemAspect) writeItem.getPreviousSystemAspect(); final RecordTemplate previousValue = @@ -2528,7 +2544,7 @@ private UpdateAspectResult ingestAspectToLocalDB( // 3. If there is no difference between existing and new, we just update // the lastObserved in system metadata. RunId should stay as the original runId if (previousValue != null - && DataTemplateUtil.areEqual(previousValue, writeItem.getRecordTemplate())) { + && DataTemplateUtil.areEqual(databaseValue, writeItem.getRecordTemplate())) { SystemMetadata latestSystemMetadata = previousBatchAspect.getSystemMetadata(); latestSystemMetadata.setLastObserved(writeItem.getSystemMetadata().getLastObserved()); @@ -2564,45 +2580,49 @@ private UpdateAspectResult ingestAspectToLocalDB( } // 4. Save the newValue as the latest version - log.debug( - "Ingesting aspect with name {}, urn {}", writeItem.getAspectName(), writeItem.getUrn()); - String newValueStr = EntityApiUtils.toJsonAspect(writeItem.getRecordTemplate()); - long versionOfOld = - aspectDao.saveLatestAspect( - txContext, - writeItem.getUrn().toString(), - writeItem.getAspectName(), - previousBatchAspect == null ? null : EntityApiUtils.toJsonAspect(previousValue), - previousBatchAspect == null ? null : previousBatchAspect.getCreatedBy(), - previousBatchAspect == null - ? null - : previousBatchAspect.getEntityAspect().getCreatedFor(), - previousBatchAspect == null ? null : previousBatchAspect.getCreatedOn(), - previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadataRaw(), - newValueStr, - writeItem.getAuditStamp().getActor().toString(), - writeItem.getAuditStamp().hasImpersonator() - ? writeItem.getAuditStamp().getImpersonator().toString() - : null, - new Timestamp(writeItem.getAuditStamp().getTime()), - EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()), - writeItem.getNextAspectVersion()); - - // metrics - aspectDao.incrementWriteMetrics( - writeItem.getAspectName(), 1, newValueStr.getBytes(StandardCharsets.UTF_8).length); - - return UpdateAspectResult.builder() - .urn(writeItem.getUrn()) - .oldValue(previousValue) - .newValue(writeItem.getRecordTemplate()) - .oldSystemMetadata( - previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadata()) - .newSystemMetadata(writeItem.getSystemMetadata()) - .operation(MetadataAuditOperation.UPDATE) - .auditStamp(writeItem.getAuditStamp()) - .maxVersion(versionOfOld) - .build(); + if (!DataTemplateUtil.areEqual(databaseValue, writeItem.getRecordTemplate())) { + log.debug( + "Ingesting aspect with name {}, urn {}", writeItem.getAspectName(), writeItem.getUrn()); + String newValueStr = EntityApiUtils.toJsonAspect(writeItem.getRecordTemplate()); + long versionOfOld = + aspectDao.saveLatestAspect( + txContext, + writeItem.getUrn().toString(), + writeItem.getAspectName(), + previousBatchAspect == null ? null : EntityApiUtils.toJsonAspect(previousValue), + previousBatchAspect == null ? null : previousBatchAspect.getCreatedBy(), + previousBatchAspect == null + ? null + : previousBatchAspect.getEntityAspect().getCreatedFor(), + previousBatchAspect == null ? null : previousBatchAspect.getCreatedOn(), + previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadataRaw(), + newValueStr, + writeItem.getAuditStamp().getActor().toString(), + writeItem.getAuditStamp().hasImpersonator() + ? writeItem.getAuditStamp().getImpersonator().toString() + : null, + new Timestamp(writeItem.getAuditStamp().getTime()), + EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()), + writeItem.getNextAspectVersion()); + + // metrics + aspectDao.incrementWriteMetrics( + writeItem.getAspectName(), 1, newValueStr.getBytes(StandardCharsets.UTF_8).length); + + return UpdateAspectResult.builder() + .urn(writeItem.getUrn()) + .oldValue(previousValue) + .newValue(writeItem.getRecordTemplate()) + .oldSystemMetadata( + previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadata()) + .newSystemMetadata(writeItem.getSystemMetadata()) + .operation(MetadataAuditOperation.UPDATE) + .auditStamp(writeItem.getAuditStamp()) + .maxVersion(versionOfOld) + .build(); + } + + return null; } private static boolean shouldAspectEmitChangeLog(@Nonnull final AspectSpec aspectSpec) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index f2ed2fddba7654..a1000fd02abfe1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -1,8 +1,10 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.Constants.APP_SOURCE; import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.METADATA_TESTS_SOURCE; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; @@ -19,6 +21,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.DataTemplateUtil; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringMap; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.AspectGenerationUtils; @@ -61,6 +64,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; import org.apache.commons.lang3.tuple.Triple; import org.testng.Assert; import org.testng.annotations.BeforeMethod; @@ -534,8 +538,8 @@ public void testBatchPatchWithTrailingNoOp() throws Exception { opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME); assertEquals( envelopedAspect.getSystemMetadata().getVersion(), - "2", - "Expected version 2. 1 - Initial, + 1 batch operation (1 add, 1 remove)"); + "3", + "Expected version 3. 1 - Initial, + 1 add, 1 remove"); assertEquals( new GlobalTags(envelopedAspect.getValue().data()) .getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()), @@ -649,7 +653,7 @@ public void testBatchPatchAdd() throws Exception { EnvelopedAspect envelopedAspect = _entityServiceImpl.getLatestEnvelopedAspect( opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME); - assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "3", "Expected version 3"); + assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "4", "Expected version 4"); assertEquals( new GlobalTags(envelopedAspect.getValue().data()) .getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()), @@ -657,6 +661,95 @@ public void testBatchPatchAdd() throws Exception { "Expected all tags"); } + @Test + public void testBatchPatchAddDuplicate() throws Exception { + Urn entityUrn = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,testBatchPatchAdd,PROD)"); + List initialTags = + List.of( + TagUrn.createFromString("urn:li:tag:__default_large_table"), + TagUrn.createFromString("urn:li:tag:__default_low_queries"), + TagUrn.createFromString("urn:li:tag:__default_low_changes"), + TagUrn.createFromString("urn:li:tag:!10TB+ tables")) + .stream() + .map(tag -> new TagAssociation().setTag(tag)) + .collect(Collectors.toList()); + TagUrn tag2 = TagUrn.createFromString("urn:li:tag:$ 1TB+"); + + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + + SystemMetadata patchSystemMetadata = new SystemMetadata(); + patchSystemMetadata.setLastObserved(systemMetadata.getLastObserved() + 1); + patchSystemMetadata.setProperties(new StringMap(Map.of(APP_SOURCE, METADATA_TESTS_SOURCE))); + + ChangeItemImpl initialAspectTag1 = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .recordTemplate(new GlobalTags().setTags(new TagAssociationArray(initialTags))) + .systemMetadata(systemMetadata.copy()) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + + PatchItemImpl patchAdd2 = + PatchItemImpl.builder() + .urn(entityUrn) + .entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME)) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .aspectSpec( + _testEntityRegistry + .getEntitySpec(DATASET_ENTITY_NAME) + .getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys(Map.of("properties", List.of("tag"))) + .patch(List.of(tagPatchOp(PatchOperationType.ADD, tag2))) + .build() + .getJsonPatch()) + .systemMetadata(patchSystemMetadata) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(_testEntityRegistry); + + // establish base entity + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(initialAspectTag1)) + .build(), + false, + true); + + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(patchAdd2, patchAdd2)) // duplicate + .build(), + false, + true); + + // List aspects urns + ListUrnsResult batch = _entityServiceImpl.listUrns(opContext, entityUrn.getEntityType(), 0, 1); + + assertEquals(batch.getStart().intValue(), 0); + assertEquals(batch.getCount().intValue(), 1); + assertEquals(batch.getTotal().intValue(), 1); + assertEquals(batch.getEntities().size(), 1); + assertEquals(entityUrn.toString(), batch.getEntities().get(0).toString()); + + EnvelopedAspect envelopedAspect = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME); + assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "3", "Expected version 3"); + assertEquals( + new GlobalTags(envelopedAspect.getValue().data()) + .getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()), + Stream.concat(initialTags.stream().map(TagAssociation::getTag), Stream.of(tag2)) + .collect(Collectors.toSet()), + "Expected all tags"); + } + @Test public void dataGeneratorThreadingTest() { DataGenerator dataGenerator = new DataGenerator(opContext, _entityServiceImpl); From b755c6856de65884d7982626574ac9752c8e774f Mon Sep 17 00:00:00 2001 From: Maggie Hays Date: Thu, 5 Dec 2024 16:17:57 -0600 Subject: [PATCH 25/28] docs(compliance-forms) update guide for creating form via UI (#11936) Co-authored-by: yoonhyejin <0327jane@gmail.com> --- docs-website/sidebars.js | 20 +- docs/api/tutorials/forms.md | 10 +- .../compliance-forms/complete-a-form.md | 177 +++++++++++++++++ .../compliance-forms/create-a-form.md | 186 ++++++++++++++++++ .../compliance-forms/overview.md | 46 +++++ .../feature-guides/documentation-forms.md | 113 ----------- docs/features/feature-guides/properties.md | 2 +- 7 files changed, 435 insertions(+), 119 deletions(-) create mode 100644 docs/features/feature-guides/compliance-forms/complete-a-form.md create mode 100644 docs/features/feature-guides/compliance-forms/create-a-form.md create mode 100644 docs/features/feature-guides/compliance-forms/overview.md delete mode 100644 docs/features/feature-guides/documentation-forms.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 3a9d6e10ea8d42..6ae50215c8166f 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -149,6 +149,25 @@ module.exports = { type: "doc", id: "docs/glossary/business-glossary", }, + { + label: "Compliance Forms", + type: "category", + collapsed: true, + items: [ + { + type: "doc", + id: "docs/features/feature-guides/compliance-forms/overview", + }, + { + type: "doc", + id: "docs/features/feature-guides/compliance-forms/create-a-form", + }, + { + type: "doc", + id: "docs/features/feature-guides/compliance-forms/complete-a-form", + }, + ], + }, { label: "Data Contract", type: "doc", @@ -164,7 +183,6 @@ module.exports = { type: "doc", id: "docs/features/dataset-usage-and-query-history", }, - "docs/features/feature-guides/documentation-forms", { label: "Domains", type: "doc", diff --git a/docs/api/tutorials/forms.md b/docs/api/tutorials/forms.md index cf51f1579f1c8a..30dd4db7d8f111 100644 --- a/docs/api/tutorials/forms.md +++ b/docs/api/tutorials/forms.md @@ -1,13 +1,15 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Documentation Forms +# Compliance Forms -## Why Would You Use Documentation Forms? +## Why Would You Use Compliance Forms? -Documentation Forms are a way for end-users to fill out all mandatory attributes associated with a data asset. The form will be dynamically generated based on the definitions provided by administrators and stewards and matching rules. +**DataHub Compliance Forms** streamline the process of documenting, annotating, and classifying your most critical Data Assets through a collaborative, crowdsourced approach. -Learn more about forms in the [Documentation Forms Feature Guide](../../../docs/features/feature-guides/documentation-forms.md). +With Compliance Forms, you can execute large-scale compliance initiatives by assigning tasks (e.g., documentation, tagging, or classification requirements) to the appropriate stakeholders — data owners, stewards, and subject matter experts. + +Learn more about forms in the [Compliance Forms Feature Guide](../../../docs/features/feature-guides/compliance-forms/overview.md). ### Goal Of This Guide This guide will show you how to diff --git a/docs/features/feature-guides/compliance-forms/complete-a-form.md b/docs/features/feature-guides/compliance-forms/complete-a-form.md new file mode 100644 index 00000000000000..285c722179e4d7 --- /dev/null +++ b/docs/features/feature-guides/compliance-forms/complete-a-form.md @@ -0,0 +1,177 @@ +--- +title: Complete a Form +--- + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Complete a DataHub Compliance Form + + +Once a Compliance Form has been published (see [Create a Compliance Form](create-a-form.md)), Assignees will receive notifications in their Task Center prompting them to complete the Form for each Asset they are responsible for. + +This guide provides an example of completing a Compliance Form, covering: + +1. Accessing a Form from an Asset Page or the Task Center +2. Completing a Form for a single Asset or multiple Assets (DataHub Cloud only) +3. Understanding different Form Question completion states + +The example uses the **Governance Initiative 2024**, a Verification Form with 3 Required Questions: + +

+ Sample Compliance Form +

+ +## Access a Compliance Form + +Once you have been assigned to complete a Compliance Form, you will see a **Complete Documentation** or **Complete Verification** option on the right-hand side of an Asset Page: + +

+ Open Compliance Form from Asset Page +

+ +**DataHub Cloud** users can find all outstanding Compliance Form requests by navigating to the **Task Center**: + +

+ Open Compliance Form from Task Center +

+ +## Complete a Form for a Single Asset + +When filling out a Compliance Form for a single Asset, you'll see a list of Questions tailored to that Asset, with clear labels showing which ones are required. Here's how it works: + +- **Question Details:** Each Question specifies if it's required or optional. Required Questions must be completed to submit the Form. +- **Pre-Populated Metadata:** If metadata already exists for a Question, it will appear pre-filled. You can confirm the existing value or make updates as needed. +- **Assignee Contributions:** If another Assignee has already provided a response, their name and the time of submission will be displayed. This gives you visibility into previous input, though you can still update the response. + +:::tip +For Verification Forms, after addressing all required Questions, you'll be prompted to provide final sign-off. This ensures all responses are complete and accurate, marking the Form ready for submission. +::: + +Once you complete all required responses, the sidebar will update with the status of the Asset: + +- **Documented**: All required Questions are completed, Verification is not needed +- **Verified**: All required Questions are completed and Verified + +Here's what the **Governance Initiative 2024** Verification Form looks like for `dogs_in_movies` after responding to all Required Questions: + +

+ Asset Ready to Verify +

+ +And here's the `dogs_in_movies` sidebar after Verifying all responses: + +

+ Asset is Verified +

+ +### Navigate to the Next Asset + +To continue working through the Compliance Forms assigned to you, **use the navigation arrows located in the top-right corner**. These arrows will take you to the next Asset that is still pending Form completion or Verification. Only Assets that require action will appear in this flow, allowing you to focus on the remaining tasks without unnecessary steps. + +## Complete a Form Question for Multiple Assets + +When you want to provide the same response for a question to multiple assets, you can apply it in bulk by selecting the **By Question** option in the top-right corner. This allows you to navigate through the Form question-by-question and apply the same response to multiple assets. + +:::note +Completing Form Questions for multiple Assets is only supported for DataHub Cloud. +::: + +### Example: Apply a Response in Bulk + +Let's look at an example. Imagine we are trying to provide the same answer to a Question for all Assets in a Snowflake schema called `DEMO_DB`. Here's how we'd do it: + +1. **Filter Assets**: Filter down to all datasets in the `DEMO_DB` Snowflake schema. +2. **Set a Response**: For the selected Question, provide a response. In this case, we'll set the Deletion Date to be `2024-12-31`. +3. **Apply to All Selected Assets**: Use the bulk application feature to apply this response to all filtered Assets. + +

+ Apply Response to Multiple Assets +

+ +After setting the response, toggle through each Question, providing the necessary responses to combinations of Assets. + +### Verification for Multiple Assets + +For Verification Forms, as you complete Questions, you will see the number of assets eligible for Verification in the top-right corner. This makes it easy to track which Assets have met the requirements. + +

+ Multiple Assets ready to Verify +

+ +When you are ready to bulk Verify Assets, you will be prompted to confirm that all responses are complete and accurate before proceeding. + +

+ Final Bulk Verification +

+ +### Switch Between Completion Modes + +You can easily toggle between the **Complete By Asset** and **Complete By Question** views as needed, ensuring flexibility while completing and verifying the Compliance Forms. + +## Understanding Different Form Question Completion States + +When completing a Compliance Form, you may encounter various types of Questions, each with unique completion states based on existing metadata or prior responses from other Assignees. This section highlights examples of various completion states to help you understand how Questions can be answered, confirmed, or updated when completing a Form. + +**_1. What is the primary use case for this asset?_** + +This required Question is asking the Assignee to provide Documentation on how the Asset should be used. Note that there is no text populated in the description, meaning the Asset does not have any documentation at all. + +

+ Sample Compliance Form +

+ +**_2. When will this asset be deleted?_** + +You may notice that this question has a pre-populated value. When metadata has been populated from a source _outside_ of a Form, users will have the option to update and save the value, or, simply **Confirm** that the value is accurate. + +

+ Sample Compliance Form +

+ +**_3. Who is the Data Steward of this Asset?_** + +Here's an example where a different Form Assignee has already provided an answer through the Compliance Form 3 days ago. All Assignees will still have the option to update the response, but this allows users to see how other Form Assignees have already answered the questions. + +

+ Sample Compliance Form +

+ + +## FAQ and Troubleshooting + +**Why don’t I see any Compliance Forms in the Task Center or on an Asset Page?** + +If you don’t see any Compliance Forms, check with the Form author to ensure your DataHub user account has been assigned to complete a Form for one or more Assets. Forms can be assigned to Asset Owners, specific DataHub Users, or a combination of both. \ No newline at end of file diff --git a/docs/features/feature-guides/compliance-forms/create-a-form.md b/docs/features/feature-guides/compliance-forms/create-a-form.md new file mode 100644 index 00000000000000..e97aaaa581777d --- /dev/null +++ b/docs/features/feature-guides/compliance-forms/create-a-form.md @@ -0,0 +1,186 @@ +--- +title: Create a Form +--- + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Create a DataHub Compliance Form + + +This guide will walk you through creating and assigning Compliance Forms, including: + +1. Creating a new Compliance Form +2. Building **Questions** for the Compliance Form +3. Assigning **Assets** for the Compliance Form +4. Selecting **Assignees** for the Compliance Form +5. Publishing a Compliance Form + +:::note +Managing Compliance Forms via the DataHub UI is only available in DataHub Cloud. If you are using DataHub Core, please refer to the [Compliance Forms API Guide](../../../api/tutorials/forms.md). +::: + +### Prerequisites + +In order to create, edit, or remove Compliance Forms, you must have the **Manage Compliance Forms** Platform privilege. + +### Step 1: Create a new Compliance Form + +From the navigation bar, head to **Govern** > **Compliance Forms**. Click **+ Create** to start building your Form. + +

+ View of all Compliance Forms +

+ +First up, provide the following details: + +1. **Name:** Select a unique and descriptive name for your Compliance Form that clearly communicates its purpose, such as **"PII Certification Q4 2024"**. + + _**Pro Tip:** This name will be displayed to Assignees when they are assigned tasks, so make it clear and detailed to ensure it conveys the intent of the Form effectively._ + +2. **Description:** Craft a concise yet informative description that explains the purpose of the Compliance Form. Include key details such as the importance of the initiative, its objectives, and the expected completion timeline. This helps Assignees understand the context and significance of their role in the process. + + _**Example:** "This Compliance Form is designed to ensure all datasets containing PII are reviewed and verified by Q4 2024. Completing this Form is critical for compliance with organizational and regulatory requirements."_ + +3. **Type:** Specify the collection type for the Form, based on your compliance requirements: + - **Completion:** The Form is considered complete once all required questions are answered for the selected Assets. We recommend this option for basic requirement completion use cases. + + - **Verification:** The Form is considered complete only when all required questions are answered for the selected Assets **and** an Assignee has explicitly "verified" the responses. We recommend this option when final sign-off by Assignees is necessary, ensuring they acknowledge the accuracy and validity of their responses. + +4. Next, click **Add Question** to begin building the requirements for your Form. + +

+ Create a new Compliance Form +

+ +### Step 2: Build Questions for your Form + +Next, define the Questions for your Compliance Forms. These are used to collect required information about selected assets, and must be completed by an Assignee in order for the Form to be considered complete. + +There are 5 different question types to choose from: + +* **Ownership:** Request one or more owners to be assigned to selected assets. Optionally restrict responses to a specific set of valid users, groups, and ownership types. + * _E.g. Who is responsible for ensuring the accuracy of this Dataset?_ +* **Domain:** Assign a Domain to the Asset, with the option to predefine the set of allowed Domains. + * _E.g. Which Domain does this Dashboard belong to? Sales, Marketing, Finance._ +* **Documentation:** Provide Documentation about the Asset and/or Column. + * _E.g. What is the primary use case of this Dataset? What caveats should others be aware of?_ +* **Glossary Terms:** Assign one or more Glossary Term to the Asset and/or Column, with the option to predefine the set of allowed Glossary Terms. + * _E.g. What types of personally identifiable information (PII) are included in this Asset? Email, Address, SSN, etc._ +* **Structured Properties:** Apply custom properties to an Asset and/or Column. + * _E.g. What date will this Dataset be deprecated and deleted?_ + +When creating a Question, use a clear and concise Title that is easy for Assignees to understand. In the Description, include additional context or instructions to guide their responses. Both the Title and Description will be visible to Assignees when completing the Form, so make sure to provide any specific hints or details they may need to answer the Question accurately and confidently. + +

+ Create a new Compliance Form prompt +

+ +### Step 3: Assign Assets to your Compliance Form + +Now that you have defined the Questions you want Assignees to complete, it's now time to assign the in-scope Assets for this exercise. + +In the **Assign Assets** section, you can easily target the specific set of Assets that are relevant for this Form with the following steps: + +1. Add a Condition or Group of Conditions +2. Choose the appropriate filter type, such as: + * Asset Type (Dataset, Chart, etc.) + * Platform (Snowflake, dbt, etc.) + * Domain (Sales, Marketing, Finance, etc.) + * Assigned Owners + * Assigned Glossary Terms +3. Decide between **All**, **Any**, or **None** of the filters should apply +4. Preview the relevant Assets to confirm you have applied the appropriate filters + +For example, you can apply filters to focus on all **Snowflake Datasets** that are also associated with the **Finance Domain**. This allows you to break down your compliance initiatives into manageable chunks, so you don't have to go after your entire data ecosystem in one go. + +

+ Assign assets to a Compliance Form +

+ +### Step 4: Select Assignees to complete your Compliance Form + +With the Questions and assigned Assets defined, the next step is to select the Assignees—the Users and/or Groups responsible for completing the Form. + +In the **Add Recipients** section, decide who is responsible for completing the Form: + +* **Asset Owners:** Any User that is assigned to one of the in-scope Assets will be able to complete the Form. This is useful for larger initiatives when you may not know the full set of Users. +* **Specific Users and/or Groups:** Select a specific set of Users and/or Groups within DataHub. This is useful when Ownership of the Assets may be poorly-defined. + +

+ Assign recipients to a Compliance Form +

+ +### Step 5: Publish your Form + +After defining the Questions, assigning Assets, and selecting the Assignees, your Form is ready to be published. Once published, Assignees will be notified to complete the Form for the Assets they are responsible for. + + +To publish a Form, simply click **Publish**. + +:::caution +Once you have published a Form, you **cannot** change or add Questions. You can, however, change the set of Assets and/or Assignees for the Form. +::: + +Not ready for primetime just yet? No worries! You also have the option to **Save Draft**. + +

+ Publish a Compliance Form +

+ +## FAQ and Troubleshooting + +**Does answering a Compliance Form Question update the selected Asset?** + +Yes! Compliance Forms serve as a powerful tool for gathering and updating key attributes for your mission-critical Data Assets at scale. When a Question is answered, the response directly updates the corresponding attributes of the selected Asset. + +**How does a Compliance Form interact with existing metadata?** + +If an Asset already has existing metadata that is also referenced in a Form Question, Assignees will have the option to confirm the existing value, overwrite the value, or append additional details. + +_You can find more details and examples in the [Complete a Form](complete-a-form.md#understanding-different-form-question-completion-states) guide._ + +**What is the difference between Completion and Verification Forms?** + +Both Form types are a way to configure a set of optional and/or required Questions for DataHub users to complete. When using Verification Forms, users will be presented with a final verification step once all required questions have been completed; you can think of this as a final acknowledgment of the accuracy of information submitted. + +**Can I assign multiple Forms to a single Asset?** + +You sure can! Please keep in mind that an Asset will only be considered Documented or Verified if all required questions are completed on all assigned Forms. + +**How will DataHub Users know that a Compliance Form has been assigned to them?** + +They have to check the Inbox on the navigation bar. There are no off-platform notifications for Compliance Forms at this time. + +**How do I track the progress of Form completion?** + +Great question. We are working on Compliance Forms Analytics that will directly show you the progress of your initiative across the selected Assets. Stay tuned! + +### API Tutorials + +- [API Guides on Documentation Form](../../../api/tutorials/forms.md) + +### Related Features + +- [DataHub Properties](../../feature-guides/properties.md) + +## Next Steps + +Now that you have created a DataHub Compliance Form, you're ready to [Complete a Compliance Form](complete-a-form.md). \ No newline at end of file diff --git a/docs/features/feature-guides/compliance-forms/overview.md b/docs/features/feature-guides/compliance-forms/overview.md new file mode 100644 index 00000000000000..86a6d8cc6dadfb --- /dev/null +++ b/docs/features/feature-guides/compliance-forms/overview.md @@ -0,0 +1,46 @@ +--- +title: Overview +--- + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# About DataHub Compliance Forms + + +**DataHub Compliance Forms** streamline the process of documenting, annotating, and classifying your most critical Data Assets through a collaborative, crowdsourced approach. + +With Compliance Forms, you can execute large-scale compliance initiatives by assigning tasks (e.g., documentation, tagging, or classification requirements) to the appropriate stakeholders — data owners, stewards, and subject matter experts. + +## What are Compliance Forms? + +A **Compliance Form** is a flexible and centrally managed tool that enables your data governance or compliance teams to define, enforce, and monitor requirements for specific Data Assets or Columns. + +A Compliance Form consists of: + +1. **Assets:** The Data Assets or Columns for which the Form must be completed. These represent the scope of the compliance initiative. + +2. **Questions:** The set of requirements or conditions that must be completed for each asset. Questions are a vehicle to collect key attributes for your data assets. These can range from simple to complex, with questions that require differing types of answers to complete. Examples include Descriptions, Domains, Owners, Tags, Glossary Terms, and custom Structured Properties. + +3. **Assignees:** The users or groups responsible for completing the Form (e.g., asset owners, domain experts, or stewards). + +Once a Compliance Form is defined, it can be published. When a form is published, the assignees who are required to complete the requirements will be notified via the Inbox of the tasks that they must complete. In addition, analytics will begin to be gathered about the assets that are meeting or violating the requirements in the form so you can understand your initiative's progress over time. + +### Why Use Compliance Forms? + +Compliance Forms enable organizations to: +- Standardize documentation and metadata across critical Data Assets. +- Crowdsource compliance-related tasks to domain experts who are best equipped to provide accurate information. +- Scale governance initiatives efficiently while maintaining accuracy and accountability. + +By leveraging Compliance Forms, organizations can ensure consistent metadata quality and foster collaboration between data experts and governance teams. + +

+ Sample Compliance Form +

+ +## Next Steps + +Now that you understand the basics of DataHub Compliance Forms, you're ready to [Create a Compliance Form](create-a-form.md). \ No newline at end of file diff --git a/docs/features/feature-guides/documentation-forms.md b/docs/features/feature-guides/documentation-forms.md deleted file mode 100644 index 2edeb8ce302d77..00000000000000 --- a/docs/features/feature-guides/documentation-forms.md +++ /dev/null @@ -1,113 +0,0 @@ -import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -# About DataHub Documentation Forms - - -DataHub Documentation Forms streamline the process of setting documentation requirements and delegating annotation responsibilities to the relevant data asset owners, stewards, and subject matter experts. - -Forms are highly configurable, making it easy to ask the right questions of the right people, for a specific set of assets. - -## What are Documentation Forms? - -You can think of Documentation Forms as a survey for your data assets: a set of questions that must be answered in order for an asset to be considered properly documented. - -Verification Forms are an extension of Documentation Forms, requiring a final verification, or sign-off, on all responses before the asset can be considered Verified. This is useful for compliance and/or governance annotation initiatives where you want assignees to provide a final acknowledgement that the information provided is correct. - -## Creating and Assigning Documentation Forms - -Documentation Forms are defined via YAML with the following details: - -- Name and Description to help end-users understand the scope and use case -- Form Type, either Documentation or Verification - - Verification Forms require a final signoff, i.e. Verification, of all required questions before the Form can be considered complete -- Form Questions (aka "prompts") for end-users to complete - - Questions can be assigned at the asset-level and/or the field-level - - Asset-level questions can be configured to be required; by default, all questions are optional -- Assigned Assets, defined by: - - A set of specific asset URNs, OR - - Assets related to a set of filters, such as Type (Datasets, Dashboards, etc.), Platform (Snowflake, Looker, etc.), Domain (Product, Marketing, etc.), or Container (Schema, Folder, etc.) -- Optional: Form Assignees - - Optionally assign specific DataHub users/groups to complete the Form for all relevant assets - - If omitted, any Owner of an Asset can complete Forms assigned to that Asset - -Here's an example of defining a Documentation Form via YAML: -```yaml -- id: 123456 - # urn: "urn:li:form:123456" # optional if id is provided - type: VERIFICATION # Supported Types: DOCUMENTATION, VERIFICATION - name: "Metadata Initiative 2024" - description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out" - prompts: # Questions for Form assignees to complete - - id: "123" - title: "Data Retention Time" - description: "Apply Retention Time structured property to form" - type: STRUCTURED_PROPERTY - structured_property_id: io.acryl.privacy.retentionTime - required: True # optional; default value is False - entities: # Either pass a list of urns or a group of filters. This example shows a list of urns - urns: - - urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD) - # optionally assign the form to a specific set of users and/or groups - # when omitted, form will be assigned to Asset owners - actors: - users: - - urn:li:corpuser:jane@email.com # note: these should be URNs - - urn:li:corpuser:john@email.com - groups: - - urn:li:corpGroup:team@email.com # note: these should be URNs - -``` - -:::note -Documentation Forms currently only support defining Structured Properties as Form Questions -::: - - - - - -## Additional Resources - -### Videos - -**Asset Verification in DataHub Cloud** - -

- -

- -## FAQ and Troubleshooting - -**What is the difference between Documentation and Verification Forms?** - -Both form types are a way to configure a set of optional and/or required questions for DataHub users to complete. When using Verification Forms, users will be presented with a final verification step once all required questions have been completed; you can think of this as a final acknowledgement of the accuracy of information submitted. - -**Who is able to complete Forms in DataHub?** - -By default, any owner of an Asset will be able to respond to questions assigned via a Form. - -When assigning a Form to an Asset, you can optionally assign specific DataHub users/groups to fill them out. - -**Can I assign multiple Forms to a single asset?** - -You sure can! Please keep in mind that an Asset will only be considered Documented or Verified if all required questions are completed on all assiged Forms. - -### API Tutorials - -- [API Guides on Documentation Form](../../../docs/api/tutorials/forms.md) - -:::note -You must create a Structured Property before including it in a Documentation Form. -To learn more about creating Structured Properties via CLI, please see the [Create Structured Properties](/docs/api/tutorials/structured-properties.md) tutorial. -::: - -### Related Features - -- [DataHub Properties](/docs/features/feature-guides/properties.md) \ No newline at end of file diff --git a/docs/features/feature-guides/properties.md b/docs/features/feature-guides/properties.md index 0d961b9ceac4ff..abdb736ad2a429 100644 --- a/docs/features/feature-guides/properties.md +++ b/docs/features/feature-guides/properties.md @@ -155,4 +155,4 @@ Please see the following API guides related to Custom and Structured Properties: ### Related Features -- [Documentation Forms](/docs/features/feature-guides/documentation-forms.md) \ No newline at end of file +- [Compliance Forms](compliance-forms/overview.md) \ No newline at end of file From 1ed55f417606477071f7843be87caff0ff1ea4c8 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 5 Dec 2024 17:50:56 -0800 Subject: [PATCH 26/28] feat(snowflake): adding oauth token bypass to snowflake (#12048) --- .../source/snowflake/snowflake_connection.py | 28 ++++++++++ .../unit/snowflake/test_snowflake_source.py | 54 +++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py index 397606400d389c..2239338972d9be 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py @@ -43,6 +43,7 @@ "EXTERNAL_BROWSER_AUTHENTICATOR": EXTERNAL_BROWSER_AUTHENTICATOR, "KEY_PAIR_AUTHENTICATOR": KEY_PAIR_AUTHENTICATOR, "OAUTH_AUTHENTICATOR": OAUTH_AUTHENTICATOR, + "OAUTH_AUTHENTICATOR_TOKEN": OAUTH_AUTHENTICATOR, } _SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com" @@ -104,6 +105,10 @@ class SnowflakeConnectionConfig(ConfigModel): description="Connect args to pass to Snowflake SqlAlchemy driver", exclude=True, ) + token: Optional[str] = pydantic.Field( + default=None, + description="OAuth token from external identity provider. Not recommended for most use cases because it will not be able to refresh once expired.", + ) def get_account(self) -> str: assert self.account_id @@ -148,6 +153,18 @@ def authenticator_type_is_valid(cls, v, values): logger.info(f"using authenticator type '{v}'") return v + @pydantic.validator("token", always=True) + def validate_token_oauth_config(cls, v, values): + auth_type = values.get("authentication_type") + if auth_type == "OAUTH_AUTHENTICATOR_TOKEN": + if not v: + raise ValueError("Token required for OAUTH_AUTHENTICATOR_TOKEN.") + elif v is not None: + raise ValueError( + "Token can only be provided when using OAUTH_AUTHENTICATOR_TOKEN" + ) + return v + @staticmethod def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None: if oauth_config is None: @@ -333,6 +350,17 @@ def get_native_connection(self) -> NativeSnowflakeConnection: application=_APPLICATION_NAME, **connect_args, ) + elif self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN": + return snowflake.connector.connect( + user=self.username, + account=self.account_id, + authenticator="oauth", + token=self.token, # Token generated externally and provided directly to the recipe + warehouse=self.warehouse, + role=self.role, + application=_APPLICATION_NAME, + **connect_args, + ) elif self.authentication_type == "OAUTH_AUTHENTICATOR": return self.get_oauth_connection() elif self.authentication_type == "KEY_PAIR_AUTHENTICATOR": diff --git a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py index 161dfa2b4e78f3..3284baf103e5af 100644 --- a/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py +++ b/metadata-ingestion/tests/unit/snowflake/test_snowflake_source.py @@ -130,6 +130,60 @@ def test_snowflake_oauth_happy_paths(): ) +def test_snowflake_oauth_token_happy_path(): + assert SnowflakeV2Config.parse_obj( + { + "account_id": "test", + "authentication_type": "OAUTH_AUTHENTICATOR_TOKEN", + "token": "valid-token", + "username": "test-user", + "oauth_config": None, + } + ) + + +def test_snowflake_oauth_token_without_token(): + with pytest.raises( + ValidationError, match="Token required for OAUTH_AUTHENTICATOR_TOKEN." + ): + SnowflakeV2Config.parse_obj( + { + "account_id": "test", + "authentication_type": "OAUTH_AUTHENTICATOR_TOKEN", + "username": "test-user", + } + ) + + +def test_snowflake_oauth_token_with_wrong_auth_type(): + with pytest.raises( + ValueError, + match="Token can only be provided when using OAUTH_AUTHENTICATOR_TOKEN.", + ): + SnowflakeV2Config.parse_obj( + { + "account_id": "test", + "authentication_type": "OAUTH_AUTHENTICATOR", + "token": "some-token", + "username": "test-user", + } + ) + + +def test_snowflake_oauth_token_with_empty_token(): + with pytest.raises( + ValidationError, match="Token required for OAUTH_AUTHENTICATOR_TOKEN." + ): + SnowflakeV2Config.parse_obj( + { + "account_id": "test", + "authentication_type": "OAUTH_AUTHENTICATOR_TOKEN", + "token": "", + "username": "test-user", + } + ) + + default_config_dict: Dict[str, Any] = { "username": "user", "password": "password", From 2fe21329fa34e8c3f9946870966756c64351c9f4 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:43:05 +0530 Subject: [PATCH 27/28] fix(ingest): avoid shell entities during view lineage generation (#12044) --- .../ingestion/source/sql/sql_common.py | 2 + .../ingestion/source/sql/sql_report.py | 1 + .../datahub/ingestion/source/unity/source.py | 2 + .../datahub/sql_parsing/sqlglot_lineage.py | 8 +- .../golden_test_ingest_with_database.json | 194 ++++++++---------- .../golden_mces_mssql_no_db_to_file.json | 54 +---- .../golden_mces_mssql_no_db_with_filter.json | 28 +-- .../golden_mces_mssql_to_file.json | 32 +-- 8 files changed, 118 insertions(+), 203 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 41ffcb95a7cc43..64aa8cfc6ef6c7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1197,6 +1197,8 @@ def _run_sql_parser( ) else: self.report.num_view_definitions_parsed += 1 + if raw_lineage.out_tables != [view_urn]: + self.report.num_view_definitions_view_urn_mismatch += 1 return view_definition_lineage_helper(raw_lineage, view_urn) def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py index c1f722b5d1e783..c445ce44a91449 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_report.py @@ -48,6 +48,7 @@ class SQLSourceReport( query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None num_view_definitions_parsed: int = 0 + num_view_definitions_view_urn_mismatch: int = 0 num_view_definitions_failed_parsing: int = 0 num_view_definitions_failed_column_parsing: int = 0 view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 9a6cde78cf10d3..f758746193cd83 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -974,6 +974,8 @@ def _run_sql_parser( ) else: self.report.num_view_definitions_parsed += 1 + if raw_lineage.out_tables != [view_urn]: + self.report.num_view_definitions_view_urn_mismatch += 1 return view_definition_lineage_helper(raw_lineage, view_urn) def get_view_lineage(self) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index 9adb792a4be518..4ff68574bf20e6 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -1243,13 +1243,19 @@ def infer_output_schema(result: SqlParsingResult) -> Optional[List[SchemaFieldCl def view_definition_lineage_helper( result: SqlParsingResult, view_urn: str ) -> SqlParsingResult: - if result.query_type is QueryType.SELECT: + if result.query_type is QueryType.SELECT or ( + result.out_tables and result.out_tables != [view_urn] + ): # Some platforms (e.g. postgres) store only ` . For such view definitions, `result.out_tables` and # `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream # details and downstream column details are extracted correctly. # Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream` # to get complete lineage result. + + # Some platforms(e.g. mssql) may have slightly different view name in view definition than + # actual view name used elsewhere. Therefore we overwrite downstream table for such cases as well. + result.out_tables = [view_urn] if result.column_lineage: for col_result in result.column_lineage: diff --git a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json index cbcadde6feb213..abd9b2350638a2 100644 --- a/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json +++ b/metadata-ingestion/tests/integration/oracle/golden_test_ingest_with_database.json @@ -17,7 +17,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -33,7 +33,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -49,7 +49,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -67,7 +67,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -83,7 +83,23 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:937a38ee28b69ecae38665c5e842d0ad", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -106,7 +122,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -122,7 +138,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -138,7 +154,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -156,23 +172,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:937a38ee28b69ecae38665c5e842d0ad", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -193,7 +193,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -209,7 +209,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -272,7 +272,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -290,7 +290,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -315,7 +315,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -331,7 +331,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -394,7 +394,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -412,7 +412,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -437,7 +437,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -453,7 +453,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -519,7 +519,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -537,7 +537,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -555,7 +555,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -580,7 +580,23 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1965527855ae77f259a8ddea2b8eed2f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -603,7 +619,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -619,7 +635,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -635,7 +651,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -653,23 +669,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1965527855ae77f259a8ddea2b8eed2f", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0e497517e191d344b0c403231bc708d0" - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -690,7 +690,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -706,7 +706,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -769,7 +769,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -787,7 +787,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -812,7 +812,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -828,7 +828,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -891,7 +891,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -909,7 +909,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -934,7 +934,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -950,7 +950,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -1016,7 +1016,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -1034,7 +1034,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -1052,7 +1052,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, @@ -1077,13 +1077,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1106,7 +1106,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),MOCK_COLUMN1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN1)" ], "confidenceScore": 1.0 }, @@ -1117,7 +1117,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD),MOCK_COLUMN2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema1.view1,PROD),MOCK_COLUMN2)" ], "confidenceScore": 1.0 } @@ -1126,13 +1126,13 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -1155,7 +1155,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),MOCK_COLUMN1)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN1)" ], "confidenceScore": 1.0 }, @@ -1166,7 +1166,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD),MOCK_COLUMN2)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:oracle,OraDoc.schema2.view1,PROD),MOCK_COLUMN2)" ], "confidenceScore": 1.0 } @@ -1175,39 +1175,7 @@ }, "systemMetadata": { "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema1.mock_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:oracle,oradoc.schema2.mock_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "oracle-2022_02_03-07_00_00", + "runId": "oracle-2022_02_03-07_00_00-uzcdxn", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 54821347fd28b8..72dcda25c1296c 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", + "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:58:03.260000", - "date_modified": "2024-11-22 12:58:03.440000", + "date_created": "2024-12-05 16:44:43.910000", + "date_modified": "2024-12-05 16:44:44.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:58:03.137000", - "date_modified": "2024-11-22 12:58:03.137000" + "date_created": "2024-12-05 16:44:43.800000", + "date_modified": "2024-12-05 16:44:43.800000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2310,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:58:03.140000", - "date_modified": "2024-11-22 12:58:03.140000" + "date_created": "2024-12-05 16:44:43.803000", + "date_modified": "2024-12-05 16:44:43.803000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -4883,7 +4883,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -4908,7 +4908,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -4931,7 +4931,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),firstname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),firstname)" ], "confidenceScore": 1.0 }, @@ -4942,7 +4942,7 @@ ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),lastname)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD),lastname)" ], "confidenceScore": 1.0 } @@ -5034,37 +5034,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 1d702214fedf79..0df89ff1eb94d7 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "2a055367-5e6a-4162-b3a9-dd60f52c79a8", + "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-26 07:22:19.640000", - "date_modified": "2024-11-26 07:22:19.773000", + "date_created": "2024-12-05 16:44:43.910000", + "date_modified": "2024-12-05 16:44:44.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-26 07:22:19.510000", - "date_modified": "2024-11-26 07:22:19.510000" + "date_created": "2024-12-05 16:44:43.800000", + "date_modified": "2024-12-05 16:44:43.800000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2630,7 +2630,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -2716,21 +2716,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 3836e587ef8cf4..b67ebfb206883a 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", + "job_id": "c2d77890-83ba-435f-879b-1c77fa38dd47", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:58:03.260000", - "date_modified": "2024-11-22 12:58:03.440000", + "date_created": "2024-12-05 16:44:43.910000", + "date_modified": "2024-12-05 16:44:44.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:58:03.137000", - "date_modified": "2024-11-22 12:58:03.137000" + "date_created": "2024-12-05 16:44:43.800000", + "date_modified": "2024-12-05 16:44:43.800000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2310,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:58:03.140000", - "date_modified": "2024-11-22 12:58:03.140000" + "date_created": "2024-12-05 16:44:43.803000", + "date_modified": "2024-12-05 16:44:43.803000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -2658,7 +2658,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.PersonsView,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -2760,21 +2760,5 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file From eef2077a55c45ff5024f6b28a35a682909a44c80 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 6 Dec 2024 16:57:07 +0530 Subject: [PATCH 28/28] fix(logs): add actor urn on unauthorised (#12030) --- .../authentication/filter/AuthenticationFilter.java | 11 +++++++---- .../auth/authentication/AuthServiceController.java | 6 ++++-- .../metadata/resources/entity/AspectResource.java | 6 +++--- .../metadata/resources/entity/EntityResource.java | 8 ++++---- .../linkedin/metadata/resources/usage/UsageStats.java | 5 +++-- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java index 0a54677eb6149b..30f98180f80180 100644 --- a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java +++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java @@ -98,11 +98,12 @@ public void doFilter(ServletRequest request, ServletResponse response, FilterCha } if (authentication != null) { + String actorUrnStr = authentication.getActor().toUrnStr(); // Successfully authenticated. log.debug( - String.format( - "Successfully authenticated request for Actor with type: %s, id: %s", - authentication.getActor().getType(), authentication.getActor().getId())); + "Successfully authenticated request for Actor with type: {}, id: {}", + authentication.getActor().getType(), + authentication.getActor().getId()); AuthenticationContext.setAuthentication(authentication); chain.doFilter(request, response); } else { @@ -110,7 +111,9 @@ public void doFilter(ServletRequest request, ServletResponse response, FilterCha log.debug( "Failed to authenticate request. Received 'null' Authentication value from authenticator chain."); ((HttpServletResponse) response) - .sendError(HttpServletResponse.SC_UNAUTHORIZED, "Unauthorized to perform this action."); + .sendError( + HttpServletResponse.SC_UNAUTHORIZED, + "Unauthorized to perform this action due to expired auth."); return; } AuthenticationContext.remove(); diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index de2582af00a932..5d4542cf0826e8 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -138,7 +138,9 @@ CompletableFuture> generateSessionTokenForUser( } log.info("Attempting to generate session token for user {}", userId.asText()); - final String actorId = AuthenticationContext.getAuthentication().getActor().getId(); + Authentication authentication = AuthenticationContext.getAuthentication(); + final String actorId = authentication.getActor().getId(); + final String actorUrn = authentication.getActor().toUrnStr(); return CompletableFuture.supplyAsync( () -> { // 1. Verify that only those authorized to generate a token (datahub system) are able to. @@ -164,7 +166,7 @@ CompletableFuture> generateSessionTokenForUser( } throw HttpClientErrorException.create( HttpStatus.UNAUTHORIZED, - "Unauthorized to perform this action.", + actorUrn + " unauthorized to perform this action.", new HttpHeaders(), null, null); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index a8b9c34ab66ae6..6033ead36f10ec 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -281,12 +281,13 @@ private Task ingestProposals( boolean asyncBool) throws URISyntaxException { Authentication authentication = AuthenticationContext.getAuthentication(); + String actorUrnStr = authentication.getActor().toUrnStr(); Set entityTypes = metadataChangeProposals.stream() .map(MetadataChangeProposal::getEntityType) .collect(Collectors.toSet()); final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), + systemOperationContext, RequestContext.builder().buildRestli(actorUrnStr, getContext(), ACTION_INGEST_PROPOSAL, entityTypes), _authorizer, authentication, true); // Ingest Authorization Checks @@ -299,9 +300,8 @@ private Task ingestProposals( .map(ex -> String.format("HttpStatus: %s Urn: %s", ex.getSecond(), ex.getFirst().getEntityUrn())) .collect(Collectors.joining(", ")); throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to modify entity: " + errorMessages); + HttpStatus.S_403_FORBIDDEN, "User " + actorUrnStr + " is unauthorized to modify entity: " + errorMessages); } - String actorUrnStr = authentication.getActor().toUrnStr(); final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 6c5576f2e5d9f4..0c374c29cf958a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -274,7 +274,7 @@ public Task ingest( String actorUrnStr = authentication.getActor().toUrnStr(); final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue()); final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), + systemOperationContext, RequestContext.builder().buildRestli(actorUrnStr, getContext(), ACTION_INGEST, urn.getEntityType()), authorizer, authentication, true); if (!isAPIAuthorizedEntityUrns( @@ -282,7 +282,7 @@ public Task ingest( CREATE, List.of(urn))) { throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entity " + urn); + HttpStatus.S_403_FORBIDDEN, "User " + actorUrnStr + " is unauthorized to edit entity " + urn); } try { @@ -320,7 +320,7 @@ public Task batchIngest( .map(Entity::getValue) .map(com.datahub.util.ModelUtils::getUrnFromSnapshotUnion).collect(Collectors.toList()); final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), + systemOperationContext, RequestContext.builder().buildRestli(actorUrnStr, getContext(), ACTION_BATCH_INGEST, urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), authorizer, authentication, true); @@ -328,7 +328,7 @@ public Task batchIngest( opContext, CREATE, urns)) { throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entities."); + HttpStatus.S_403_FORBIDDEN, "User " + actorUrnStr + " is unauthorized to edit entities."); } for (Entity entity : entities) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index a0c3d460951605..426eff20c9c6eb 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -104,9 +104,10 @@ public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregat () -> { final Authentication auth = AuthenticationContext.getAuthentication(); + String actorUrnStr = auth.getActor().toUrnStr(); Set urns = Arrays.stream(buckets).sequential().map(UsageAggregation::getResource).collect(Collectors.toSet()); final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + systemOperationContext, RequestContext.builder().buildRestli(actorUrnStr, getContext(), ACTION_BATCH_INGEST, urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); @@ -115,7 +116,7 @@ public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregat UPDATE, urns)) { throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entities."); + HttpStatus.S_403_FORBIDDEN, "User " + actorUrnStr + " is unauthorized to edit entities."); } for (UsageAggregation agg : buckets) {