Skip to content

Commit

Permalink
store label & doc fields as prospective provenance
Browse files Browse the repository at this point in the history
TODO: fix intent list

add/amend tests
  • Loading branch information
mr-c committed Dec 18, 2023
1 parent 8edabf8 commit fad5e8e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 32 deletions.
69 changes: 38 additions & 31 deletions cwltool/cwlprov/provenance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,14 @@
)
from .writablebagfile import create_job, write_bag_file # change this later

# from schema_salad.utils import convert_to_dict


if TYPE_CHECKING:
from .ro import ResearchObject

_attributes_type = Dict[Union[str, Identifier], Any]


def copy_job_order(job: Union[Process, JobsType], job_order_object: CWLObjectType) -> CWLObjectType:
"""Create copy of job object for provenance."""
Expand Down Expand Up @@ -235,13 +240,13 @@ def evaluate(
"""Evaluate the nature of job."""
if not hasattr(process, "steps"):
# record provenance of independent commandline tool executions
self.prospective_prov(job)
self.prospective_prov(job, process)
customised_job = copy_job_order(job, job_order_object)
self.used_artefacts(customised_job, self.workflow_run_uri)
create_job(research_obj, customised_job)
elif hasattr(job, "workflow"):
# record provenance of workflow executions
self.prospective_prov(job)
self.prospective_prov(job, process)
customised_job = copy_job_order(job, job_order_object)
self.used_artefacts(customised_job, self.workflow_run_uri)
# if CWLPROV['prov'].uri in job_order_object: # maybe move this to another place
Expand Down Expand Up @@ -306,8 +311,7 @@ def _add_nested_annotations(
) -> ProvEntity:
"""Propagate input data annotations to provenance."""
# Change https:// into http:// first
schema2_uri = "https://schema.org/"
if schema2_uri in annotation_key:
if (schema2_uri := "https://schema.org/") in annotation_key:
annotation_key = SCHEMA[annotation_key.replace(schema2_uri, "")].uri

if not isinstance(annotation_value, (MutableSequence, MutableMapping)):
Expand Down Expand Up @@ -377,9 +381,9 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
self.document.specializationOf(file_entity, entity)

# Identify all schema annotations
schema_annotations = dict(
[(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")]
)
schema_annotations = {
v: value[v] for v in value.keys() if v.startswith("https://schema.org")

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
https://schema.org
may be at an arbitrary position in the sanitized URL.
}

# Transfer SCHEMA annotations to provenance
for s in schema_annotations:
Expand Down Expand Up @@ -509,9 +513,9 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
coll_b.add_attributes(coll_b_attribs)

# Identify all schema annotations
schema_annotations = dict(
[(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")]
)
schema_annotations = {
v: value[v] for v in value.keys() if v.startswith("https://schema.org")

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
https://schema.org
may be at an arbitrary position in the sanitized URL.
}

# Transfer SCHEMA annotations to provenance
for s in schema_annotations:
Expand Down Expand Up @@ -571,7 +575,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
self.research_object.add_uri(entity.identifier.uri)
return entity

if isinstance(value, (str, str)):
if isinstance(value, str):
(entity, _) = self.declare_string(value)
return entity

Expand Down Expand Up @@ -734,35 +738,38 @@ def generate_output_prov(
entity, process_run_id, timestamp, None, {"prov:role": role}
)

def prospective_prov(self, job: JobsType) -> None:
def prospective_prov(self, job: JobsType, process: Process) -> None:
"""Create prospective prov recording as wfdesc prov:Plan."""
prov_items: _attributes_type = {
PROV_TYPE: WFDESC["Workflow"] if isinstance(job, WorkflowJob) else WFDESC["Process"],
"prov:type": PROV["Plan"],
"prov:label": "Prospective provenance",
}
if "doc" in process.tool:
prov_items["schema:description"] = process.tool["doc"]
if "label" in process.tool:
prov_items["schema:name"] = process.tool["label"]
# # TypeError: unhashable type: 'list'
# if "intent" in process.tool:
# prov_items["schema:featureList"] = convert_to_dict(process.tool["intent"])
self.document.entity("wf:main", prov_items)
if not isinstance(job, WorkflowJob):
# direct command line tool execution
self.document.entity(
"wf:main",
{
PROV_TYPE: WFDESC["Process"],
"prov:type": PROV["Plan"],
"prov:label": "Prospective provenance",
},
)
return

self.document.entity(
"wf:main",
{
PROV_TYPE: WFDESC["Workflow"],
"prov:type": PROV["Plan"],
"prov:label": "Prospective provenance",
},
)

for step in job.steps:
stepnametemp = "wf:main/" + str(step.name)[5:]
stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
provstep_items: _attributes_type = {
PROV_TYPE: WFDESC["Process"],
"prov:type": PROV["Plan"],
}
if "doc" in step.tool:
provstep_items["schema:description"] = step.tool["doc"]
if "label" in step.tool:
provstep_items["schema:name"] = step.tool["label"]
provstep = self.document.entity(
stepname,
{PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"]},
provstep_items,
)
self.document.entity(
"wf:main",
Expand Down
2 changes: 1 addition & 1 deletion cwltool/singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def add_writable_file_volume(
if self.inplace_update:
try:
os.link(os.path.realpath(volume.resolved), host_outdir_tgt)
except os.error:
except OSError:

Check warning on line 372 in cwltool/singularity.py

View check run for this annotation

Codecov / codecov/patch

cwltool/singularity.py#L372

Added line #L372 was not covered by tests
shutil.copy(volume.resolved, host_outdir_tgt)
else:
shutil.copy(volume.resolved, host_outdir_tgt)
Expand Down

0 comments on commit fad5e8e

Please sign in to comment.