diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 47be16c..5f82f28 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dependencies = [ - "decentriq_platform >=0.26.3", + "decentriq_platform >=0.33.0", "curies", "pandas[excel,spss]", "pyarrow", @@ -123,6 +123,8 @@ cov-check = [ ] compile = "pip-compile -o requirements.txt pyproject.toml" # TODO: use uv +# uv venv +# uv pip install . # uv pip compile pyproject.toml -o requirements.txt # uv run ruff format # uv run ruff check src --fix diff --git a/backend/requirements.txt b/backend/requirements.txt index 11a9b2f..b7bb152 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -31,7 +31,6 @@ cbor2==5.6.2 # via decentriq-platform certifi==2023.11.17 # via - # decentriq-platform # httpcore # httpx # requests @@ -45,7 +44,7 @@ click==8.1.7 # via uvicorn coloredlogs==15.0.1 # via onnxruntime -cryptography==42.0.5 +cryptography==42.0.8 # via # decentriq-platform # pyopenssl @@ -54,10 +53,12 @@ curies==0.7.9 # via cohort-explorer-backend (pyproject.toml) dataclasses-json==0.6.7 # via langchain-community -decentriq-dcr-compiler==0.7.1 +decentriq-dcr-compiler==0.13.0 # via decentriq-platform -decentriq-platform==0.27.2 +decentriq-platform==0.33.0 # via cohort-explorer-backend (pyproject.toml) +decentriq-transparency-verification==0.1.0.dev2 + # via decentriq-platform defusedxml==0.7.1 # via odfpy diskcache==5.6.3 @@ -123,6 +124,7 @@ httpx==0.27.0 # cohort-explorer-backend (pyproject.toml) # groq # langsmith + # ollama # openai # qdrant-client huggingface-hub==0.25.1 @@ -136,9 +138,10 @@ humanfriendly==10.0 # via coloredlogs hyperframe==6.0.1 # via h2 -idna==3.6 +idna==3.10 # via # anyio + # decentriq-platform # httpx # requests # yarl @@ -166,6 +169,7 @@ langchain-core==0.2.41 # langchain-community # langchain-groq # langchain-huggingface + # langchain-ollama # langchain-openai # langchain-qdrant # langchain-text-splitters @@ -174,6 +178,8 @@ langchain-groq==0.1.10 # via cohort-explorer-backend (pyproject.toml) langchain-huggingface==0.0.3 # via cohort-explorer-backend (pyproject.toml) +langchain-ollama==0.1.3 + # via cohort-explorer-backend (pyproject.toml) langchain-openai==0.1.20 # via # cohort-explorer-backend (pyproject.toml) @@ -224,40 +230,42 @@ numpy==1.26.4 # rank-bm25 # scikit-learn # scipy -# # transformers -# nvidia-cublas-cu12==12.1.3.1 + # transformers +nvidia-cublas-cu12==12.1.3.1 # via # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch -# nvidia-cuda-cupti-cu12==12.1.105 -# # via torch -# nvidia-cuda-nvrtc-cu12==12.1.105 -# # via torch -# nvidia-cuda-runtime-cu12==12.1.105 -# # via torch -# nvidia-cudnn-cu12==9.1.0.70 -# # via torch -# nvidia-cufft-cu12==11.0.2.54 -# # via torch -# nvidia-curand-cu12==10.3.2.106 -# # via torch -# nvidia-cusolver-cu12==11.4.5.107 -# # via torch -# nvidia-cusparse-cu12==12.1.0.106 -# # via -# # nvidia-cusolver-cu12 -# # torch -# nvidia-nccl-cu12==2.20.5 -# # via torch -# nvidia-nvjitlink-cu12==12.6.77 -# # via -# # nvidia-cusolver-cu12 -# # nvidia-cusparse-cu12 -# nvidia-nvtx-cu12==12.1.105 +nvidia-cuda-cupti-cu12==12.1.105 + # via torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via torch +nvidia-cuda-runtime-cu12==12.1.105 + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch +nvidia-cufft-cu12==11.0.2.54 + # via torch +nvidia-curand-cu12==10.3.2.106 + # via torch +nvidia-cusolver-cu12==11.4.5.107 + # via torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # nvidia-cusolver-cu12 + # torch +nvidia-nccl-cu12==2.20.5 + # via torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 # via torch odfpy==1.4.1 # via pandas +ollama==0.3.3 + # via langchain-ollama onnx==1.17.0 # via fastembed onnxruntime==1.19.2 @@ -374,7 +382,7 @@ regex==2024.9.11 # via # tiktoken # transformers -requests==2.31.0 +requests==2.32.3 # via # curies # decentriq-platform @@ -431,8 +439,6 @@ sqlalchemy==2.0.35 # via # langchain # langchain-community -sqloxide==0.1.43 - # via decentriq-platform starlette==0.36.3 # via fastapi sympy==1.13.3 @@ -473,7 +479,7 @@ transformers==4.43.4 # adapters # langchain-huggingface # sentence-transformers -# triton==3.0.0 +triton==3.0.0 # via torch typing-extensions==4.9.0 # via @@ -496,8 +502,9 @@ typing-inspect==0.9.0 # via dataclasses-json tzdata==2024.1 # via pandas -urllib3==2.2.1 +urllib3==1.26.19 # via + # decentriq-platform # qdrant-client # requests uvicorn==0.27.1 diff --git a/backend/src/decentriq.py b/backend/src/decentriq.py index b56ccf2..d1339aa 100644 --- a/backend/src/decentriq.py +++ b/backend/src/decentriq.py @@ -1,4 +1,5 @@ from copy import deepcopy +import json from typing import Any import decentriq_platform as dq @@ -107,16 +108,11 @@ def pandas_script_merge_cohorts(merged_cohorts: dict[str, list[str]], all_cohort return merge_script -@router.post( - "/create-dcr", - name="Create Data Clean Room for computing", - response_description="Upload result", -) -async def create_compute_dcr( +async def get_compute_dcr_definition( cohorts_request: dict[str, Any], - user: Any = Depends(get_current_user), -) -> dict[str, Any]: - """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK""" + user: Any, + client: Any, +) -> Any: # users = [user["email"]] # TODO: cohorts_request could also be a dict of union of cohorts to merge # {"cohorts": {"cohort_id": ["var1", "var2"], "merged_cohort3": {"cohort1": ["weight", "sex"], "cohort2": ["gender", "patient weight"]}}} @@ -143,8 +139,6 @@ async def create_compute_dcr( else: raise HTTPException(status_code=400, detail=f"Invalid structure for cohort {cohort_id}") - # Establish connection to Decentriq - client = dq.create_client(settings.decentriq_email, settings.decentriq_token) # Creation of a Data Clean Room (DCR) data_nodes = [] @@ -231,13 +225,51 @@ async def create_compute_dcr( ) # Build and publish DCR - dcr_definition = builder.build() + return builder.build(), dcr_title + + + +@router.post( + "/create-compute-dcr", + name="Create Data Clean Room for computing", + response_description="Upload result", +) +async def create_compute_dcr( + cohorts_request: dict[str, Any], + user: Any = Depends(get_current_user), +) -> dict[str, Any]: + """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK""" + # Establish connection to Decentriq + client = dq.create_client(settings.decentriq_email, settings.decentriq_token) + + dcr_definition, dcr_title = await get_compute_dcr_definition(cohorts_request, user, client) + dcr = client.publish_analytics_dcr(dcr_definition) dcr_url = f"https://platform.decentriq.com/datarooms/p/{dcr.id}" return { "message": f"Data Clean Room available for compute at {dcr_url}", "dcr_url": dcr_url, "dcr_title": dcr_title, - "merge_script": pandas_script, + # "merge_script": pandas_script, **cohorts_request, } + + +@router.post( + "/get-compute-dcr-definition", + name="Get the Data Clean Room definition for computing as JSON", + response_description="Upload result", +) +async def api_get_compute_dcr_definition( + cohorts_request: dict[str, Any], + user: Any = Depends(get_current_user), +) -> Any: + """Create a Data Clean Room for computing with the cohorts requested using Decentriq SDK""" + # Establish connection to Decentriq + client = dq.create_client(settings.decentriq_email, settings.decentriq_token) + + dcr_definition, _dcr_title = await get_compute_dcr_definition(cohorts_request, user, client) + + # return dcr_definition.model_dump_json(by_alias=True) + # return json.dumps(dcr_definition.high_level) + return dcr_definition.high_level diff --git a/frontend/src/components/Nav.tsx b/frontend/src/components/Nav.tsx index 72904cc..078ab81 100644 --- a/frontend/src/components/Nav.tsx +++ b/frontend/src/components/Nav.tsx @@ -62,7 +62,7 @@ export function Nav() { // Replace with actual API endpoint and required request format // console.log('Sending request to Decentriq', dataCleanRoom); try { - const response = await fetch(`${apiUrl}/create-dcr`, { + const response = await fetch(`${apiUrl}/create-compute-dcr`, { method: 'POST', credentials: 'include', headers: { @@ -71,13 +71,57 @@ export function Nav() { body: JSON.stringify(dataCleanRoom) }); const res = await response.json(); - // console.log(res); - setPublishedDCR(res); + console.log(res); + // setPublishedDCR(res); + setPublishedDCR((<> +

✅ Data Clean Room{' '} + + {res['dcr_title']} + {' '} + published in Decentriq. +

+

You can now access it in Decentriq to request compute.

+ )) setIsLoading(false); // Handle response } catch (error) { console.error('Error sending cohorts:', error); setIsLoading(false); + // TODO: Handle error + } + }; + + const getDCRDefinitionFile = async () => { + setIsLoading(true); + // Replace with actual API endpoint and required request format + // console.log('Sending request to Decentriq', dataCleanRoom); + try { + const response = await fetch(`${apiUrl}/get-compute-dcr-definition`, { + method: 'POST', + credentials: 'include', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(dataCleanRoom) + }); + const res = await response.json(); + const blob = new Blob([JSON.stringify(res, null, 2)], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = 'dcr_definition.json'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + setPublishedDCR(( +

✅ Data Clean Room definition file generated

+ )) + setIsLoading(false); + // Handle response + } catch (error) { + console.error('Error getting DCR definition file:', error); + setIsLoading(false); // Handle error } }; @@ -178,16 +222,21 @@ export function Nav() { - A list of autocomplete using the dataCleanRoom.cohorts Once the first is selected we only show the cohorts with same number of variables? */} -
- - - +
+ {/*
*/} + + + + + {/*
*/}
{/* TODO: {isLoading &&
} */} {isLoading && ( @@ -199,14 +248,7 @@ export function Nav() { {publishedDCR && (
-

- ✅ Data Clean Room{' '} - - {publishedDCR['dcr_title']} - {' '} - published in Decentriq. -

-

You can now access it in Decentriq to request compute.

+ {publishedDCR}
)}