From 1dce8cf75b68c969696a025d3591b9bd0f4b7565 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 10 Sep 2024 15:57:06 +0200 Subject: [PATCH 1/2] Add test for keeping picked dataset's extension in place --- lib/galaxy/managers/datasets.py | 2 +- lib/galaxy/model/__init__.py | 1 + lib/galaxy/schema/schema.py | 3 ++ .../webapps/galaxy/services/datasets.py | 6 ++- lib/galaxy_test/base/populators.py | 11 ++-- test/integration/test_workflow_invocation.py | 50 +++++++++++++++++++ 6 files changed, 67 insertions(+), 6 deletions(-) diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 3a23a25e19a1..0bbb4f4cbaf6 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -648,7 +648,7 @@ def add_serializers(self): "file_size": lambda item, key, **context: self.serializers["size"](item, key, **context), "nice_size": lambda item, key, **context: item.get_size(nice_size=True, calculate_size=False), # common to lddas and hdas - from mapping.py - "copied_from_history_dataset_association_id": self.serialize_id, + "copied_from_history_dataset_association_id": lambda item, key, **context: item.id, "copied_from_library_dataset_dataset_association_id": self.serialize_id, "info": lambda item, key, **context: item.info.strip() if isinstance(item.info, str) else item.info, "blurb": lambda item, key, **context: item.blurb, diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 3be16a84c066..81f32d9d58d8 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -5273,6 +5273,7 @@ def copy_from(self, other_hda, new_dataset=None, include_tags=True, include_meta if include_tags and self.history: self.copy_tags_from(self.user, other_hda) self.dataset = new_dataset or other_hda.dataset + self.copied_from_history_dataset_association_id = other_hda.id if old_dataset: old_dataset.full_delete() diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 25fb1763e44d..d16facb1d76d 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -902,6 +902,9 @@ class HDADetailed(HDASummary, WithModelClass): description="The list of sources associated with this dataset.", ), ] + copied_from_history_dataset_association_id: Annotated[ + Optional[EncodedDatabaseIdField], Field(None, description="ID of HDA this HDA was copied from.") + ] class HDAExtended(HDADetailed): diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py index 25dc26b5bb09..4f4ad0d46090 100644 --- a/lib/galaxy/webapps/galaxy/services/datasets.py +++ b/lib/galaxy/webapps/galaxy/services/datasets.py @@ -403,7 +403,11 @@ def show( # Default: return dataset as dict. if hda_ldda == DatasetSourceType.hda: return self.hda_serializer.serialize_to_view( - dataset, view=serialization_params.view or "detailed", user=trans.user, trans=trans + dataset, + view=serialization_params.view or "detailed", + keys=serialization_params.keys, + user=trans.user, + trans=trans, ) else: dataset_dict = dataset.to_dict() diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 0347fbdfadb2..4d1c46ca5309 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -1009,14 +1009,17 @@ def get_history_dataset_source_transform_actions(self, history_id: str, **kwd) - assert isinstance(transform, list) return {t["action"] for t in transform} - def get_history_dataset_details(self, history_id: str, **kwds) -> Dict[str, Any]: + def get_history_dataset_details(self, history_id: str, keys: Optional[str] = None, **kwds) -> Dict[str, Any]: dataset_id = self.__history_content_id(history_id, **kwds) - details_response = self.get_history_dataset_details_raw(history_id, dataset_id) + details_response = self.get_history_dataset_details_raw(history_id, dataset_id, keys=keys) details_response.raise_for_status() return details_response.json() - def get_history_dataset_details_raw(self, history_id: str, dataset_id: str) -> Response: - details_response = self._get_contents_request(history_id, f"/datasets/{dataset_id}") + def get_history_dataset_details_raw(self, history_id: str, dataset_id: str, keys: Optional[str] = None) -> Response: + data = None + if keys: + data = {"keys": keys} + details_response = self._get_contents_request(history_id, f"/datasets/{dataset_id}", data=data) return details_response def get_history_dataset_extra_files(self, history_id: str, **kwds) -> list: diff --git a/test/integration/test_workflow_invocation.py b/test/integration/test_workflow_invocation.py index 72276bf77979..3ef39b06779c 100644 --- a/test/integration/test_workflow_invocation.py +++ b/test/integration/test_workflow_invocation.py @@ -69,6 +69,56 @@ def test_run_workflow_optional_data_skips_step(self) -> None: if step["workflow_step_label"] == "cat1": assert sum(1 for j in step["jobs"] if j["state"] == "skipped") == 1 + def test_pick_value_preserves_datatype_and_inheritance_chain(self): + self.install_repository("iuc", "pick_value", "b19e21af9c52") + with self.dataset_populator.test_history() as history_id: + summary = self.workflow_populator.run_workflow( + """ +class: GalaxyWorkflow +inputs: + input_dataset: data +outputs: + the_output: + outputSource: pick_value/data_param +steps: + skip_step: + tool_id: job_properties + when: $(false) + pick_value: + tool_id: toolshed.g2.bx.psu.edu/repos/iuc/pick_value/pick_value/0.2.0 + in: + style_cond|type_cond|pick_from_0|value: + source: skip_step/out_file1 + style_cond|type_cond|pick_from_1|value: + source: input_dataset + tool_state: + style_cond: + pick_style: first + type_cond: + param_type: data + pick_from: + - value: + __class__: RuntimeValue + - value: + __class__: RuntimeValue +test_data: + input_dataset: + value: 1.txt + type: File +""", + history_id=history_id, + ) + invocation = self.workflow_populator.get_invocation(summary.invocation_id) + output = self.dataset_populator.get_history_dataset_details( + history_id, + content_id=invocation["outputs"]["the_output"]["id"], + # copied_from_history_dataset_association_id is not in any of the default serializers + keys="state,extension,copied_from_history_dataset_association_id", + ) + assert output["state"] == "ok" + assert output["extension"] == "txt" + assert output["copied_from_history_dataset_association_id"] + def test_run_workflow_optional_data_provided_runs_step(self) -> None: self.install_repository("iuc", "map_param_value", "5ac8a4bf7a8d") with self.dataset_populator.test_history() as history_id: From ee63214516e4248fc4de403da137635dcc0ed7a1 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 10 Sep 2024 15:58:09 +0200 Subject: [PATCH 2/2] Fix wrong extension on pick data output This fixes https://github.com/galaxyproject/galaxy/issues/18795 --- client/src/api/schema/schema.ts | 10 ++++++++++ lib/galaxy/tools/__init__.py | 12 +++++++++--- test/unit/app/managers/test_HDAManager.py | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index 9abd677c7b27..1577170c9663 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -5704,6 +5704,11 @@ export interface components { * @description TODO */ api_type?: "file" | null; + /** + * Copied From History Dataset Association Id + * @description ID of HDA this HDA was copied from. + */ + copied_from_history_dataset_association_id?: string | null; /** Copied From Ldda Id */ copied_from_ldda_id?: string | null; /** @@ -5948,6 +5953,11 @@ export interface components { * @enum {string} */ api_type?: "file"; + /** + * Copied From History Dataset Association Id + * @description ID of HDA this HDA was copied from. + */ + copied_from_history_dataset_association_id?: string | null; /** Copied From Ldda Id */ copied_from_ldda_id?: string | null; /** diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 570008c7c64b..d1f50b945c28 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2880,7 +2880,9 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw # if change_datatype PJA is associated with expression tool output the new output already has # the desired datatype, so we use it. If the extension is "data" there's no change_dataset PJA and # we want to use the existing extension. - new_ext = output.extension if output.extension != "data" else copy_object.extension + new_ext = ( + output.extension if output.extension not in ("data", "expression.json") else copy_object.extension + ) require_metadata_regeneration = copy_object.extension != new_ext output.copy_from(copy_object, include_metadata=not require_metadata_regeneration) output.extension = new_ext @@ -2895,8 +2897,12 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw else: # TODO: move exec_after_process into metadata script so this doesn't run on the headnode ? output.init_meta() - output.set_meta() - output.set_metadata_success_state() + try: + output.set_meta() + output.set_metadata_success_state() + except Exception: + output.state = model.HistoryDatasetAssociation.states.FAILED_METADATA + log.exception("Exception occured while setting metdata") def parse_environment_variables(self, tool_source): """Setup environment variable for inputs file.""" diff --git a/test/unit/app/managers/test_HDAManager.py b/test/unit/app/managers/test_HDAManager.py index a868c7ce5ca3..aac4e2273c4b 100644 --- a/test/unit/app/managers/test_HDAManager.py +++ b/test/unit/app/managers/test_HDAManager.py @@ -412,7 +412,7 @@ def test_serializers(self): assert isinstance(serialized["file_size"], int) assert isinstance(serialized["nice_size"], str) # TODO: these should be tested w/copy - self.assertNullableEncodedId(serialized["copied_from_history_dataset_association_id"]) + assert isinstance(serialized["copied_from_history_dataset_association_id"], int) self.assertNullableEncodedId(serialized["copied_from_library_dataset_dataset_association_id"]) self.assertNullableBasestring(serialized["info"]) self.assertNullableBasestring(serialized["blurb"])