Skip to content

Commit

Permalink
fix count check
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Apr 4, 2024
1 parent 7db9372 commit acfe93b
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 48 deletions.
33 changes: 18 additions & 15 deletions backend/src/decentriq.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ async def create_compute_dcr(
# We generate a pandas script to automatically prepare the data from the cohort based on known metadata
pandas_script = "import pandas as pd\nimport decentriq_util\n\n"

# TODO: DONT FILTER COLUMNS IN SCHEMA
# 1 prepare script per data node
for cohort_id, requested_vars in cohorts_request["cohorts"].items():
cohort_meta = deepcopy(all_cohorts[cohort_id])
df_var = f"df_{cohort_id.replace(' ', '_').replace('-', '_').replace('(', '').replace(')', '')}"
Expand All @@ -136,10 +138,12 @@ async def create_compute_dcr(
if len(requested_vars) <= len(cohort_meta.variables):
# Add filter variables to pandas script
pandas_script += f"{df_var} = {df_var}[{requested_vars}]\n"
# NOTE: this block would filter variables only selected by user.
# We don't want this anymore.
# Get all cohort and variables metadata for selected variables
for var in all_cohorts[cohort_id].variables:
if var not in requested_vars:
del cohort_meta.variables[var]
# for var in all_cohorts[cohort_id].variables:
# if var not in requested_vars:
# del cohort_meta.variables[var]
selected_cohorts[cohort_id] = cohort_meta
elif isinstance(requested_vars, dict):
# Merge operation, need to be implemented on the frontend
Expand Down Expand Up @@ -168,32 +172,31 @@ async def create_compute_dcr(
.with_airlock()
)

# builder = dq.DataRoomBuilder(f"iCare4CVD DCR compute {dcr_count}", enclave_specs=enclave_specs)

preview_nodes = []
# Convert cohort variables to decentriq schema
for cohort_id, cohort in selected_cohorts.items():
# Create data node for cohort
data_node_id = cohort_id.replace(" ", "-")
# builder.add_node_definition(RawDataNodeDefinition(name=data_node_id, is_required=True))
# TODO: providing schema is broken in new SDK
builder.add_node_definition(TableDataNodeDefinition(name=data_node_id, columns=get_cohort_schema(cohort), is_required=True))
data_nodes.append(data_node_id)

# Add airlock node to make it easy to access small part of the dataset
preview_node_id = f"preview-{data_node_id}"
builder.add_node_definition(PreviewComputeNodeDefinition(
name=preview_node_id,
dependency=data_node_id,
quota_bytes=1048576, # 10MB
))
preview_nodes.append(preview_node_id)

# Add python data preparation script
builder.add_node_definition(
PythonComputeNodeDefinition(name="prepare-data", script=pandas_script, dependencies=data_nodes)
)

# Add users permissions
builder.add_participant(user["email"], data_owner_of=[data_node_id], analyst_of=["prepare-data"])

# Add airlock node to make it easy to access small part of the dataset
builder.add_node_definition(PreviewComputeNodeDefinition(
name="preview-data",
dependency="prepare-data",
quota_bytes=52428800, # 50MB
))

builder.add_participant(user["email"], data_owner_of=data_nodes, analyst_of=["prepare-data", *preview_nodes])

# Build and publish DCR
dcr_definition = builder.build()
Expand Down
4 changes: 2 additions & 2 deletions backend/src/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ def load_cohort_dict_file(dict_path: str, cohort_id: str, user_email: str) -> Da
errors = []
for i, row in df.iterrows():
# Check if required columns are present
if not row["VARIABLE NAME"] or not row["VARIABLE LABEL"] or not row["VAR TYPE"]:
errors.append(f"Row {i+2} is missing required data: variable_name, variable_label, or var_type")
if not row["VARIABLE NAME"] or not row["VARIABLE LABEL"] or not row["VAR TYPE"] or not row["count"]:
errors.append(f"Row {i+2} is missing required data: VARIABLE NAME, VARIABLE LABEL, VAR TYPE, or count")
if row["VAR TYPE"] not in ACCEPTED_DATATYPES:
errors.append(
f"Row {i+2} for variable `{row['VARIABLE NAME']}` is using a wrong datatype: `{row['VAR TYPE']}`. It should be one of: {', '.join(ACCEPTED_DATATYPES)}"
Expand Down
63 changes: 32 additions & 31 deletions frontend/src/pages/upload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ export default function UploadPage() {
{/* Upload cohort metadata file */}
<div className="flex items-center">
<label htmlFor="metadataFile" className="block text-sm">
<div role="alert" className="alert alert-success">
<div role="alert" className="alert">
<svg
xmlns="http://www.w3.org/2000/svg"
className="stroke-current shrink-0 h-6 w-6"
Expand Down Expand Up @@ -160,36 +160,37 @@ export default function UploadPage() {
<input type="file" id="metadataFile" className="mt-2" onChange={handleMetadataFileChange} required />

{/* Upload data file */}
<div className="flex items-center">
<label htmlFor="dataFile" className="block text-sm">
<div role="alert" className="alert alert-warning">
<svg
xmlns="http://www.w3.org/2000/svg"
className="stroke-current shrink-0 h-6 w-6"
fill="none"
viewBox="0 0 24 24"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth="2"
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
/>
</svg>
<span>
<b>Optional</b> sensible data: upload the actual cohort patients data file, if you wish to let the
server upload the data to Decentriq for you
</span>
</div>
</label>
{dataFile && (
<button type="button" onClick={clearDataFile} className="ml-2 btn btn-xs btn-neutral">
<TrashIcon />
</button>
)}
</div>
<input type="file" id="dataFile" className="mt-2" onChange={handleDataFilesChange} />

{metadataFile && <>
<div className="flex items-center">
<label htmlFor="dataFile" className="block text-sm">
<div role="alert" className="alert">
<svg
xmlns="http://www.w3.org/2000/svg"
className="stroke-current shrink-0 h-6 w-6"
fill="none"
viewBox="0 0 24 24"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth="2"
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
/>
</svg>
<span>
<b>Optional</b> sensible data: if you want Maastricht University to store the cohort data
on their server for helping researchers to understand it, you can upload it here (no obligations)
</span>
</div>
</label>
{dataFile && (
<button type="button" onClick={clearDataFile} className="ml-2 btn btn-xs btn-neutral">
<TrashIcon />
</button>
)}
</div>
<input type="file" id="dataFile" className="mt-2" onChange={handleDataFilesChange} />
</>}
<div>
<button type="submit" className="btn btn-sm btn-info mt-6 text-slate-900 font-normal">
<Upload className="w-4 h-4" />
Expand Down

0 comments on commit acfe93b

Please sign in to comment.