Skip to content

Commit

Permalink
[ENH] Implemented pipeline_version and pipeline_name query fields (
Browse files Browse the repository at this point in the history
…#345)

* Added query parameters for pipeline name and version

* Updated query model

* Updated SPARQL query template

Added nipoppy namespace

* Updated crud.get function

* Updated `query_string` in `create_query` function

* Updated the `default_neurobagel_query`

* Refactored `query_string` in `create_query` function

* Updated test fixtures

* Added tests for `pipeline_version` and `pipeline_name`

* Updated `np` namespace

* Refactored the pipeline information representation in the response

* Updated test fixture

* Fixed the test

* Fixed the pipeline info at the subject level

* Addressed requested changes

* Left over clean up

* Addressed second wave requested changes
  • Loading branch information
rmanaem authored Oct 9, 2024
1 parent 38b58f7 commit a93d476
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 5 deletions.
64 changes: 64 additions & 0 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
"dataset_uuid",
"dataset_name",
"dataset_portal_uri",
"pipeline_version",
"pipeline_name",
]


Expand Down Expand Up @@ -100,6 +102,8 @@ async def get(
min_num_phenotypic_sessions: int,
assessment: str,
image_modal: str,
pipeline_name: str,
pipeline_version: str,
) -> list[CohortQueryResponse]:
"""
Sends SPARQL queries to the graph API via httpx POST requests for subject-session or dataset metadata
Expand All @@ -125,6 +129,10 @@ async def get(
Non-imaging assessment completed by subjects.
image_modal : str
Imaging modality of subject scans.
pipeline_name : str
Name of pipeline run on subject scans.
pipeline_version : str
Version of pipeline run on subject scans.
Returns
-------
Expand All @@ -142,6 +150,8 @@ async def get(
min_num_imaging_sessions=min_num_imaging_sessions,
assessment=assessment,
image_modal=image_modal,
pipeline_version=pipeline_version,
pipeline_name=pipeline_name,
)
)

Expand All @@ -158,6 +168,7 @@ async def get(

response_obj = []
dataset_cols = ["dataset_uuid", "dataset_name"]
dataset_available_pipeline_info = {}
if not results_df.empty:
for (dataset_uuid, dataset_name), group in results_df.groupby(
by=dataset_cols
Expand Down Expand Up @@ -189,6 +200,50 @@ async def get(
)
)

pipeline_grouped_data = (
group.groupby(
[
"sub_id",
"session_id",
"session_type",
"pipeline_name",
],
dropna=True,
)
.agg(
{
"pipeline_version": lambda x: list(
x.dropna().unique()
)
}
)
.reset_index()
)

session_completed_pipeline_data = (
pipeline_grouped_data.groupby(
["sub_id", "session_id", "session_type"]
)
.apply(
lambda x: dict(
zip(x["pipeline_name"], x["pipeline_version"])
)
)
.reset_index(name="completed_pipelines")
)

subject_data = pd.merge(
subject_data.reset_index(drop=True),
session_completed_pipeline_data,
on=["sub_id", "session_id", "session_type"],
how="left",
)

# ensure that for sessions missing completed pipeline info, completed_pipelines is still a dict rather than null/nan
subject_data["completed_pipelines"] = subject_data[
"completed_pipelines"
].apply(lambda x: x if isinstance(x, dict) else {})

# TODO: Revisit this as there may be a more elegant solution.
# The following code replaces columns with all NaN values with values of None, to ensure they show up in the final JSON as `null`.
# This is needed as the above .agg() seems to turn NaN into None for object-type columns (which have some non-missing values)
Expand All @@ -204,6 +259,14 @@ async def get(

subject_data = list(subject_data.to_dict("records"))

dataset_available_pipeline_info = (
group.groupby("pipeline_name", dropna=True)[
"pipeline_version"
]
.apply(lambda x: list(x.dropna().unique()))
.to_dict()
)

response_obj.append(
CohortQueryResponse(
dataset_uuid=dataset_uuid,
Expand All @@ -224,6 +287,7 @@ async def get(
group["image_modal"].notna()
].unique()
),
available_pipelines=dataset_available_pipeline_info,
)
)

Expand Down
6 changes: 6 additions & 0 deletions app/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pydantic import BaseModel, constr, root_validator

CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
VERSION_REGEX = r"^([A-Za-z0-9-]+)\.(\d+)\.([A-Za-z0-9-]+)$"


class QueryModel(BaseModel):
Expand All @@ -22,6 +23,9 @@ class QueryModel(BaseModel):
min_num_phenotypic_sessions: int = Query(default=None, ge=0)
assessment: constr(regex=CONTROLLED_TERM_REGEX) = None
image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None
pipeline_name: constr(regex=CONTROLLED_TERM_REGEX) = None
# TODO: Check back if validating using a regex is too restrictive
pipeline_version: constr(regex=VERSION_REGEX) = None

@root_validator()
def check_maxage_ge_minage(cls, values):
Expand Down Expand Up @@ -67,6 +71,7 @@ class SessionResponse(BaseModel):
assessment: list
image_modal: list
session_file_path: Optional[str]
completed_pipelines: dict


class CohortQueryResponse(BaseModel):
Expand All @@ -81,6 +86,7 @@ class CohortQueryResponse(BaseModel):
num_matching_subjects: int
subject_data: Union[list[SessionResponse], str]
image_modals: list
available_pipelines: dict


class DataElementURI(str, Enum):
Expand Down
2 changes: 2 additions & 0 deletions app/api/routers/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ async def get_query(
query.min_num_phenotypic_sessions,
query.assessment,
query.image_modal,
query.pipeline_name,
query.pipeline_version,
)

return response
48 changes: 44 additions & 4 deletions app/api/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
"nidm": "http://purl.org/nidash/nidm#",
"snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/",
"np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing/",
}

# Store domains in named tuples
Expand All @@ -61,6 +62,8 @@
IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
ASSESSMENT = Domain("assessment", "nb:hasAssessment")
IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
PIPELINE_NAME = Domain("pipeline_name", "nb:hasPipelineName")
PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion")
PROJECT = Domain("project", "nb:hasSamples")


Expand Down Expand Up @@ -115,6 +118,8 @@ def create_query(
min_num_phenotypic_sessions: Optional[int] = None,
assessment: Optional[str] = None,
image_modal: Optional[str] = None,
pipeline_name: Optional[str] = None,
pipeline_version: Optional[str] = None,
) -> str:
"""
Creates a SPARQL query using a query template and filters it using the input parameters.
Expand All @@ -139,6 +144,10 @@ def create_query(
Non-imaging assessment completed by subjects, by default None.
image_modal : str, optional
Imaging modality of subject scans, by default None.
pipeline_name : str, optional
Name of pipeline run on subject scans, by default None.
pipeline_version : str, optional
Version of pipeline run on subject scans, by default None.
Returns
-------
Expand Down Expand Up @@ -203,13 +212,28 @@ def create_query(
imaging_session_level_filters = ""
if image_modal is not None:
imaging_session_level_filters += (
"\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
"\n"
+ f"{create_bound_filter(IMAGE_MODAL.var)} && ?{IMAGE_MODAL.var} = {image_modal})."
)

if pipeline_name is not None:
imaging_session_level_filters += (
"\n"
+ f"{create_bound_filter(PIPELINE_NAME.var)} && (?{PIPELINE_NAME.var} = {pipeline_name})."
)

# In case a user specified the pipeline version but not the name
if pipeline_version is not None:
imaging_session_level_filters += (
"\n"
+ f'{create_bound_filter(PIPELINE_VERSION.var)} && ?{PIPELINE_VERSION.var} = "{pipeline_version}").' # Wrap with quotes to avoid workaround implicit conversion
)

query_string = textwrap.dedent(
f"""
SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path
?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions
?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_version ?pipeline_name
WHERE {{
?dataset_uuid a nb:Dataset;
nb:hasLabel ?dataset_name;
Expand Down Expand Up @@ -244,14 +268,30 @@ def create_query(
{phenotypic_session_level_filters}
}} GROUP BY ?subject
}}
OPTIONAL {{
?session nb:hasCompletedPipeline ?pipeline.
?pipeline nb:hasPipelineVersion ?pipeline_version.
?pipeline nb:hasPipelineName ?pipeline_name.
}}
{{
SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions)
WHERE {{
?subject a nb:Subject.
OPTIONAL {{
?subject nb:hasSession ?imaging_session.
?imaging_session a nb:ImagingSession;
nb:hasAcquisition/nb:hasContrastType ?image_modal.
?imaging_session a nb:ImagingSession.
OPTIONAL {{
?imaging_session nb:hasAcquisition ?acquisition.
?acquisition nb:hasContrastType ?image_modal.
}}
OPTIONAL {{
?imaging_session nb:hasCompletedPipeline ?pipeline.
?pipeline nb:hasPipelineVersion ?pipeline_version;
nb:hasPipelineName ?pipeline_name.
}}
}}
{imaging_session_level_filters}
}} GROUP BY ?subject
Expand Down
12 changes: 11 additions & 1 deletion docs/default_neurobagel_query.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PREFIX nidm: <http://purl.org/nidash/nidm#>
PREFIX snomed: <http://purl.bioontology.org/ontology/SNOMEDCT/>

SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path
?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_name ?pipeline_version
WHERE {
?dataset_uuid a nb:Dataset;
nb:hasLabel ?dataset_name;
Expand Down Expand Up @@ -41,6 +41,11 @@ WHERE {

} GROUP BY ?subject
}
OPTIONAL {
?session nb:hasCompletedPipeline ?pipeline.
?pipeline nb:hasPipelineVersion ?pipeline_version.
?pipeline nb:hasPipelineName ?pipeline_name.
}
{
SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions)
WHERE {
Expand All @@ -50,6 +55,11 @@ WHERE {
?imaging_session a nb:ImagingSession;
nb:hasAcquisition/nb:hasContrastType ?image_modal.
}
OPTIONAL {
?imaging_session nb:hasCompletedPipeline ?pipeline.
?pipeline nb:hasPipelineVersion ?pipeline_version.
?pipeline nb:hasPipelineName ?pipeline_name.
}

} GROUP BY ?subject
}
Expand Down
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def test_data():
"http://purl.org/nidash/nidm#T1Weighted",
"http://purl.org/nidash/nidm#T2Weighted",
],
"available_pipelines": {
"freesurfer": ["7.3.2", "2.8.2", "8.7.0-rc"]
},
},
{
"dataset_uuid": "http://neurobagel.org/vocab/67890",
Expand All @@ -86,6 +89,10 @@ def test_data():
"http://purl.org/nidash/nidm#FlowWeighted",
"http://purl.org/nidash/nidm#T1Weighted",
],
"available_pipelines": {
"freesurfer": ["7.3.2", "2.1.2"],
"fmriprep": ["23.1.3", "22.1.4", "v2.0.1"],
},
},
]

Expand Down Expand Up @@ -178,6 +185,8 @@ async def _mock_get_with_exception(
min_num_phenotypic_sessions,
assessment,
image_modal,
pipeline_version,
pipeline_name,
):
raise request.param

Expand Down Expand Up @@ -206,6 +215,8 @@ async def _mock_get(
min_num_phenotypic_sessions,
assessment,
image_modal,
pipeline_version,
pipeline_name,
):
return request.param

Expand All @@ -226,6 +237,8 @@ async def _mock_successful_get(
min_num_phenotypic_sessions,
assessment,
image_modal,
pipeline_version,
pipeline_name,
):
return test_data

Expand Down
Loading

0 comments on commit a93d476

Please sign in to comment.