diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py index b51e2a33ad5a..c31dba70175e 100644 --- a/lib/galaxy/tool_util/verify/interactor.py +++ b/lib/galaxy/tool_util/verify/interactor.py @@ -95,11 +95,11 @@ class ValidToolTestDict(TypedDict): output_collections: List[Dict[str, Any]] stdout: NotRequired[AssertionList] stderr: NotRequired[AssertionList] - expect_exit_code: NotRequired[int] + expect_exit_code: NotRequired[Optional[Union[str, int]]] expect_failure: NotRequired[bool] expect_test_failure: NotRequired[bool] - maxseconds: NotRequired[int] - num_outputs: NotRequired[int] + maxseconds: NotRequired[Optional[int]] + num_outputs: NotRequired[Optional[Union[str, int]]] command_line: NotRequired[AssertionList] command_version: NotRequired[AssertionList] required_files: NotRequired[List[Any]] diff --git a/lib/galaxy/tool_util/verify/parse.py b/lib/galaxy/tool_util/verify/parse.py new file mode 100644 index 000000000000..f232c3d882cc --- /dev/null +++ b/lib/galaxy/tool_util/verify/parse.py @@ -0,0 +1,495 @@ +import logging +import os +from typing import ( + Any, + Iterable, + List, + Optional, + Tuple, + Union, +) + +from galaxy.tool_util.parser.interface import ( + InputSource, + ToolSource, + ToolSourceTest, + ToolSourceTestInputs, + ToolSourceTests, +) +from galaxy.tool_util.parser.util import ( + boolean_is_checked, + boolean_true_and_false_values, + parse_tool_version_with_defaults, +) +from galaxy.tool_util.verify.interactor import ( + InvalidToolTestDict, + ToolTestDescription, + ValidToolTestDict, +) +from galaxy.util import ( + string_as_bool, + string_as_bool_or_none, + unicodify, +) + +log = logging.getLogger(__name__) + +RequiredFilesT = List[Tuple[str, dict]] +RequiredDataTablesT = List[str] +RequiredLocFileT = List[str] + + +def parse_tool_test_descriptions( + tool_source: ToolSource, tool_guid: Optional[str] = None +) -> Iterable[ToolTestDescription]: + """ + Build ToolTestDescription objects for each test description. + """ + raw_tests_dict: ToolSourceTests = tool_source.parse_tests_to_dict() + tests: List[ToolTestDescription] = [] + for i, raw_test_dict in enumerate(raw_tests_dict.get("tests", [])): + test = _description_from_tool_source(tool_source, raw_test_dict, i, tool_guid) + tests.append(test) + return tests + + +def _description_from_tool_source( + tool_source: ToolSource, raw_test_dict: ToolSourceTest, test_index: int, tool_guid: Optional[str] +) -> ToolTestDescription: + required_files: RequiredFilesT = [] + required_data_tables: RequiredDataTablesT = [] + required_loc_files: RequiredLocFileT = [] + + num_outputs = raw_test_dict.get("expect_num_outputs", None) + if num_outputs: + num_outputs = int(num_outputs) + maxseconds = raw_test_dict.get("maxseconds", None) + if maxseconds is not None: + maxseconds = int(maxseconds) + + tool_id = tool_guid or tool_source.parse_id() + assert tool_id + tool_version = parse_tool_version_with_defaults(tool_id, tool_source) + + processed_test_dict: Union[ValidToolTestDict, InvalidToolTestDict] + try: + processed_inputs = _process_raw_inputs( + tool_source, + input_sources(tool_source), + raw_test_dict["inputs"], + required_files, + required_data_tables, + required_loc_files, + ) + processed_test_dict = ValidToolTestDict( + { + "inputs": processed_inputs, + "outputs": raw_test_dict["outputs"], + "output_collections": raw_test_dict["output_collections"], + "num_outputs": num_outputs, + "command_line": raw_test_dict.get("command", None), + "command_version": raw_test_dict.get("command_version", None), + "stdout": raw_test_dict.get("stdout", None), + "stderr": raw_test_dict.get("stderr", None), + "expect_exit_code": raw_test_dict.get("expect_exit_code", None), + "expect_failure": raw_test_dict.get("expect_failure", False), + "expect_test_failure": raw_test_dict.get("expect_test_failure", False), + "required_files": required_files, + "required_data_tables": required_data_tables, + "required_loc_files": required_loc_files, + "tool_id": tool_id, + "tool_version": tool_version, + "test_index": test_index, + "maxseconds": maxseconds, + "error": False, + } + ) + except Exception as e: + processed_test_dict = InvalidToolTestDict( + { + "tool_id": tool_id, + "tool_version": tool_version, + "test_index": test_index, + "inputs": {}, + "error": True, + "exception": unicodify(e), + "maxseconds": maxseconds, + } + ) + + return ToolTestDescription(processed_test_dict) + + +def _process_raw_inputs( + tool_source: ToolSource, + input_sources: List[InputSource], + raw_inputs: ToolSourceTestInputs, + required_files: RequiredFilesT, + required_data_tables: RequiredDataTablesT, + required_loc_files: RequiredLocFileT, + parent_context: Optional[Union["ParamContext", "RootParamContext"]] = None, +): + """ + Recursively expand flat list of inputs into "tree" form of flat list + (| using to nest to new levels) structure and expand dataset + information as proceeding to populate self.required_files. + """ + parent_context = parent_context or RootParamContext() + expanded_inputs = {} + for input_source in input_sources: + input_type = input_source.parse_input_type() + name = input_source.get("name") + if input_type == "conditional": + cond_context = ParamContext(name=name, parent_context=parent_context) + test_param_input_source = input_source.parse_test_input_source() + case_name = test_param_input_source.get("name") + case_context = ParamContext(name=case_name, parent_context=cond_context) + raw_input_dict = case_context.extract_value(raw_inputs) + case_value = raw_input_dict["value"] if raw_input_dict else None + case_when, case_input_sources = _matching_case_for_value( + tool_source, input_source, test_param_input_source, case_value + ) + if case_input_sources: + for input_source in case_input_sources.parse_input_sources(): + input_name = input_source.get("name") + case_inputs = _process_raw_inputs( + tool_source, + [input_source], + raw_inputs, + required_files, + required_data_tables, + required_loc_files, + parent_context=cond_context, + ) + expanded_inputs.update(case_inputs) + expanded_case_value = split_if_str(case_when) + if case_value is not None: + # A bit tricky here - we are growing inputs with value + # that may be implicit (i.e. not defined by user just + # a default defined in tool). So we do not want to grow + # expanded_inputs and risk repeat block viewing this + # as a new instance with value defined and hence enter + # an infinite loop - hence the "case_value is not None" + # check. + processed_value = _process_simple_value( + test_param_input_source, expanded_case_value, required_data_tables, required_loc_files + ) + expanded_inputs[case_context.for_state()] = processed_value + elif input_type == "section": + context = ParamContext(name=name, parent_context=parent_context) + page_source = input_source.parse_nested_inputs_source() + for section_input_source in page_source.parse_input_sources(): + expanded_input = _process_raw_inputs( + tool_source, + [section_input_source], + raw_inputs, + required_files, + required_data_tables, + required_loc_files, + parent_context=context, + ) + if expanded_input: + expanded_inputs.update(expanded_input) + elif input_type == "repeat": + repeat_index = 0 + while True: + context = ParamContext(name=name, index=repeat_index, parent_context=parent_context) + updated = False + page_source = input_source.parse_nested_inputs_source() + for r_value in page_source.parse_input_sources(): + expanded_input = _process_raw_inputs( + tool_source, + [r_value], + raw_inputs, + required_files, + required_data_tables, + required_loc_files, + parent_context=context, + ) + if expanded_input: + expanded_inputs.update(expanded_input) + updated = True + if not updated: + break + repeat_index += 1 + else: + context = ParamContext(name=name, parent_context=parent_context) + raw_input_dict = context.extract_value(raw_inputs) + param_type = input_source.get("type") + if raw_input_dict: + name = raw_input_dict["name"] + param_value = raw_input_dict["value"] + param_extra = raw_input_dict["attributes"] + location = param_extra.get("location") + param_value = split_if_str(param_value) + if param_type == "data": + if location and input_source.get_bool("multiple", False): + # We get the input/s from the location which can be a list of urls separated by commas + locations = split_if_str(location) + param_value = [] + for location in locations: + v = os.path.basename(location) + param_value.append(v) + # param_extra should contain only the corresponding location + extra = dict(param_extra) + extra["location"] = location + _add_uploaded_dataset(context.for_state(), v, extra, input_source, required_files) + else: + if not isinstance(param_value, list): + param_value = [param_value] + for v in param_value: + _add_uploaded_dataset(context.for_state(), v, param_extra, input_source, required_files) + processed_value = param_value + elif param_type == "collection": + assert "collection" in param_extra + collection_def = param_extra["collection"] + for input_dict in collection_def.collect_inputs(): + name = input_dict["name"] + value = input_dict["value"] + attributes = input_dict["attributes"] + require_file(name, value, attributes, required_files) + processed_value = collection_def + else: + processed_value = _process_simple_value( + input_source, param_value, required_data_tables, required_loc_files + ) + expanded_inputs[context.for_state()] = processed_value + return expanded_inputs + + +def input_sources(tool_source: ToolSource) -> List[InputSource]: + input_sources = [] + pages_source = tool_source.parse_input_pages() + if pages_source.inputs_defined: + for page_source in pages_source.page_sources: + for input_source in page_source.parse_input_sources(): + input_sources.append(input_source) + return input_sources + + +class ParamContext: + def __init__(self, name, index=None, parent_context=None): + self.parent_context = parent_context + self.name = name + self.index = None if index is None else int(index) + + def for_state(self): + name = self.name if self.index is None else "%s_%d" % (self.name, self.index) + if parent_for_state := self.parent_context.for_state(): + return f"{parent_for_state}|{name}" + else: + return name + + def __str__(self): + return f"Context[for_state={self.for_state()}]" + + def param_names(self): + for parent_context_param in self.parent_context.param_names(): + if self.index is not None: + yield "%s|%s_%d" % (parent_context_param, self.name, self.index) + else: + yield f"{parent_context_param}|{self.name}" + if self.index is not None: + yield "%s_%d" % (self.name, self.index) + else: + yield self.name + + def extract_value(self, raw_inputs): + for param_name in self.param_names(): + value = self.__raw_param_found(param_name, raw_inputs) + if value: + return value + return None + + def __raw_param_found(self, param_name, raw_inputs): + index = None + for i, raw_input_dict in enumerate(raw_inputs): + if raw_input_dict["name"] == param_name: + index = i + if index is not None: + raw_input_dict = raw_inputs[index] + del raw_inputs[index] + return raw_input_dict + else: + return None + + +class RootParamContext: + def __init__(self): + pass + + def for_state(self): + return "" + + def param_names(self): + return [] + + def get_index(self): + return 0 + + +def _process_simple_value( + param: InputSource, + param_value: Any, + required_data_tables: RequiredDataTablesT, + required_loc_files: RequiredLocFileT, +): + input_type = param.get("type") + if input_type == "select": + # Tests may specify values as either raw value or the value + # as they appear in the list - the API doesn't and shouldn't + # accept the text value - so we need to convert the text + # into the form value. + def process_param_value(param_value): + found_value = False + value_for_text = None + static_options = param.parse_static_options() + for text, opt_value, _ in static_options: + if param_value == opt_value: + found_value = True + if value_for_text is None and param_value == text: + value_for_text = opt_value + dynamic_options = param.parse_dynamic_options() + if dynamic_options and not input_type == "drill_down": + data_table_name = dynamic_options.get_data_table_name() + index_file_name = dynamic_options.get_index_file_name() + if data_table_name: + required_data_tables.append(data_table_name) + elif index_file_name: + required_loc_files.append(index_file_name) + if not found_value and value_for_text is not None: + processed_value = value_for_text + else: + processed_value = param_value + return processed_value + + # Do replacement described above for lists or singleton + # values. + if isinstance(param_value, list): + processed_value = list(map(process_param_value, param_value)) + else: + processed_value = process_param_value(param_value) + elif input_type == "boolean": + # Like above, tests may use the tool define values of simply + # true/false. + processed_value = _process_bool_param_value(param, param_value) + else: + processed_value = param_value + return processed_value + + +def _matching_case_for_value(tool_source: ToolSource, cond: InputSource, test_param: InputSource, declared_value: Any): + tool_id = tool_source.parse_id() + cond_name = cond.get("name") + + assert test_param.parse_input_type() == "param" + test_param_type = test_param.get("type") + + if test_param_type == "boolean": + if declared_value is None: + # No explicit value for param in test case, determine from default + query_value = boolean_is_checked(test_param) + else: + query_value = _process_bool_param_value(test_param, declared_value) + + def matches_declared_value(case_value): + return _process_bool_param_value(test_param, case_value) == query_value + + elif test_param_type == "select": + static_options = test_param.parse_static_options() + if declared_value is not None: + # Test case supplied explicit value to check against. + + def matches_declared_value(case_value): + return case_value == declared_value + + elif static_options: + # No explicit value in test case, not much to do if options are dynamic but + # if static options are available can find the one specified as default or + # fallback on top most option (like GUI). + + for name, _, selected in static_options: + if selected: + default_option = name + else: + first_option = static_options[0] + first_option_value = first_option[1] + default_option = first_option_value + + def matches_declared_value(case_value): + return case_value == default_option + + else: + # No explicit value for this param and cannot determine a + # default - give up. Previously this would just result in a key + # error exception. + msg = f"Failed to find test parameter value specification required for conditional {cond_name}" + raise Exception(msg) + else: + msg = f"Invalid conditional test type found {test_param_type}" + raise Exception(msg) + + # Check the tool's defined cases against predicate to determine + # selected or default. + for case_when, case_input_sources in cond.parse_when_input_sources(): + if matches_declared_value(case_when): + return case_when, case_input_sources + else: + msg_template = "%s - Failed to find case matching value (%s) for test parameter specification for conditional %s. Remainder of test behavior is unspecified." + msg = msg_template % (tool_id, declared_value, cond_name) + log.info(msg) + return None + + +def _add_uploaded_dataset(name: str, value: Any, extra, input_parameter: InputSource, required_files: RequiredFilesT): + if value is None: + assert input_parameter.parse_optional(), f"{name} is not optional. You must provide a valid filename." + return value + return require_file(name, value, extra, required_files) + + +def require_file(name, value, extra, required_files): + if (value, extra) not in required_files: + required_files.append((value, extra)) # these files will be uploaded + name_change = [att for att in extra.get("edit_attributes", []) if att.get("type") == "name"] + if name_change: + name_change = name_change[-1].get("value") # only the last name change really matters + value = name_change # change value for select to renamed uploaded file for e.g. composite dataset + else: + for end in [".zip", ".gz"]: + if value.endswith(end): + value = value[: -len(end)] + break + value = os.path.basename(value) # if uploading a file in a path other than root of test-data + return value + + +def _process_bool_param_value(input_source: InputSource, param_value: Any) -> Any: + truevalue, falsevalue = boolean_true_and_false_values(input_source) + optional = input_source.parse_optional() + return process_bool_param_value(truevalue, falsevalue, optional, param_value) + + +def process_bool_param_value(truevalue: str, falsevalue: str, optional: bool, param_value: Any) -> Any: + was_list = False + if isinstance(param_value, list): + was_list = True + param_value = param_value[0] + + if truevalue == param_value: + processed_value = True + elif falsevalue == param_value: + processed_value = False + else: + if optional: + processed_value = string_as_bool_or_none(param_value) + else: + processed_value = string_as_bool(param_value) + return [processed_value] if was_list else processed_value + + +def split_if_str(value): + split = isinstance(value, str) + if split: + value = value.split(",") + return value diff --git a/lib/galaxy/tools/test.py b/lib/galaxy/tools/test.py index 41921dd4eaea..7447c17bd1d9 100644 --- a/lib/galaxy/tools/test.py +++ b/lib/galaxy/tools/test.py @@ -15,6 +15,15 @@ ToolTestDescription, ValidToolTestDict, ) +from galaxy.tool_util.verify.parse import ( + ParamContext, + process_bool_param_value, + RequiredDataTablesT, + RequiredFilesT, + RequiredLocFileT, + RootParamContext, + split_if_str, +) from galaxy.util import ( string_as_bool, string_as_bool_or_none, @@ -38,9 +47,9 @@ def parse_tests(tool, tests_source) -> Iterable[ToolTestDescription]: def description_from_tool_object(tool, test_index, raw_test_dict) -> ToolTestDescription: - required_files: List[Tuple[str, dict]] = [] - required_data_tables: List[str] = [] - required_loc_files: List[str] = [] + required_files: RequiredFilesT = [] + required_data_tables: RequiredDataTablesT = [] + required_loc_files: RequiredLocFileT = [] num_outputs = raw_test_dict.get("expect_num_outputs", None) if num_outputs: @@ -126,7 +135,7 @@ def _process_raw_inputs( ) expanded_inputs.update(case_inputs) if not value.type == "text": - expanded_case_value = _split_if_str(case.value) + expanded_case_value = split_if_str(case.value) if case_value is not None: # A bit tricky here - we are growing inputs with value # that may be implicit (i.e. not defined by user just @@ -185,11 +194,11 @@ def _process_raw_inputs( param_extra = raw_input_dict["attributes"] location = param_extra.get("location") if not value.type == "text": - param_value = _split_if_str(param_value) + param_value = split_if_str(param_value) if isinstance(value, galaxy.tools.parameters.basic.DataToolParameter): if location and value.multiple: # We get the input/s from the location which can be a list of urls separated by commas - locations = _split_if_str(location) + locations = split_if_str(location) param_value = [] for location in locations: v = os.path.basename(location) @@ -320,29 +329,9 @@ def _add_uploaded_dataset(name, value, extra, input_parameter, required_files): return require_file(name, value, extra, required_files) -def _split_if_str(value): - split = isinstance(value, str) - if split: - value = value.split(",") - return value - - def _process_bool_param_value(param, param_value): assert isinstance(param, galaxy.tools.parameters.basic.BooleanToolParameter) - was_list = False - if isinstance(param_value, list): - was_list = True - param_value = param_value[0] - if param.truevalue == param_value: - processed_value = True - elif param.falsevalue == param_value: - processed_value = False - else: - if param.optional: - processed_value = string_as_bool_or_none(param_value) - else: - processed_value = string_as_bool(param_value) - return [processed_value] if was_list else processed_value + return process_bool_param_value(param.truevalue, param.falsevalue, param.optional, param_value) def require_file(name, value, extra, required_files): @@ -359,64 +348,3 @@ def require_file(name, value, extra, required_files): break value = os.path.basename(value) # if uploading a file in a path other than root of test-data return value - - -class ParamContext: - def __init__(self, name, index=None, parent_context=None): - self.parent_context = parent_context - self.name = name - self.index = None if index is None else int(index) - - def for_state(self): - name = self.name if self.index is None else "%s_%d" % (self.name, self.index) - if parent_for_state := self.parent_context.for_state(): - return f"{parent_for_state}|{name}" - else: - return name - - def __str__(self): - return f"Context[for_state={self.for_state()}]" - - def param_names(self): - for parent_context_param in self.parent_context.param_names(): - if self.index is not None: - yield "%s|%s_%d" % (parent_context_param, self.name, self.index) - else: - yield f"{parent_context_param}|{self.name}" - if self.index is not None: - yield "%s_%d" % (self.name, self.index) - else: - yield self.name - - def extract_value(self, raw_inputs): - for param_name in self.param_names(): - value = self.__raw_param_found(param_name, raw_inputs) - if value: - return value - return None - - def __raw_param_found(self, param_name, raw_inputs): - index = None - for i, raw_input_dict in enumerate(raw_inputs): - if raw_input_dict["name"] == param_name: - index = i - if index is not None: - raw_input_dict = raw_inputs[index] - del raw_inputs[index] - return raw_input_dict - else: - return None - - -class RootParamContext: - def __init__(self): - pass - - def for_state(self): - return "" - - def param_names(self): - return [] - - def get_index(self): - return 0 diff --git a/test/unit/app/tools/test_test_parsing.py b/test/unit/app/tools/test_test_parsing.py index 00391e647176..84dc14831332 100644 --- a/test/unit/app/tools/test_test_parsing.py +++ b/test/unit/app/tools/test_test_parsing.py @@ -11,16 +11,42 @@ from galaxy.app_unittest_utils import tools_support from galaxy.tool_util.unittest_utils import functional_test_tool_path +from galaxy.tool_util.verify.parse import parse_tool_test_descriptions from galaxy.tools.test import parse_tests from galaxy.util.unittest import TestCase # Not the whole response, just some keys and such to test... -SIMPLE_CONSTRUCTS_EXPECTATIONS = [ - (["inputs", "p1|p1use"], [True]), +SIMPLE_CONSTRUCTS_EXPECTATIONS_0 = [ (["inputs", "booltest"], [True]), (["inputs", "inttest"], ["12456"]), + (["inputs", "floattest"], ["6.789"]), + (["inputs", "p1|p1use"], [True]), (["inputs", "files_0|file"], ["simple_line.txt"]), (["outputs", 0, "name"], "out_file1"), + (["required_files", 0, 0], "simple_line.txt"), + (["required_files", 0, 1, "value"], "simple_line.txt"), +] +# this test case covers specifying boolean parameters by string truevalue/falsevalue +SIMPLE_CONSTRUCTS_EXPECTATIONS_1 = [ + (["inputs", "booltest"], [True]), + (["inputs", "p1|p1use"], [True]), + (["inputs", "files_0|file"], ["simple_line.txt"]), + (["outputs", 0, "name"], "out_file1"), +] +SECTION_EXPECTATIONS = [ + (["inputs", "int|inttest"], ["12456"]), + (["inputs", "float|floattest"], ["6.789"]), +] +MIN_REPEAT_EXPECTATIONS = [ + (["inputs", "queries_0|input"], ["simple_line.txt"]), + (["inputs", "queries_1|input"], ["simple_line.txt"]), + (["inputs", "queries2_0|input2"], ["simple_line_alternative.txt"]), +] +DBKEY_FILTER_INPUT_EXPECTATIONS = [ + (["inputs", "inputs"], ["simple_line.txt"]), + (["inputs", "index"], ["hg19_value"]), + (["required_files", 0, 1, "dbkey"], "hg19"), + (["required_data_tables", 0], "test_fasta_indexes"), ] @@ -33,19 +59,48 @@ def setUp(self): def tearDown(self): self.tear_down_app() - def test_state_parsing(self): + def _parse_tests(self): + return parse_tests(self.tool, self.tool_source) + + def test_simple_state_parsing(self): self._init_tool_for_path(functional_test_tool_path("simple_constructs.xml")) - test_dicts = parse_tests(self.tool, self.tool_source) - self._verify_each(test_dicts[0].to_dict(), SIMPLE_CONSTRUCTS_EXPECTATIONS) + test_dicts = self._parse_tests() + self._verify_each(test_dicts[0].to_dict(), SIMPLE_CONSTRUCTS_EXPECTATIONS_0) + self._verify_each(test_dicts[1].to_dict(), SIMPLE_CONSTRUCTS_EXPECTATIONS_1) + + def test_section_state_parsing(self): + self._init_tool_for_path(functional_test_tool_path("section.xml")) + test_dicts = self._parse_tests() + # without and with section tags in the tests... + self._verify_each(test_dicts[0].to_dict(), SECTION_EXPECTATIONS) + self._verify_each(test_dicts[1].to_dict(), SECTION_EXPECTATIONS) + + def test_repeat_state_parsing(self): + self._init_tool_for_path(functional_test_tool_path("min_repeat.xml")) + test_dicts = self._parse_tests() + # without and with section tags in the tests... + self._verify_each(test_dicts[0].to_dict(), MIN_REPEAT_EXPECTATIONS) + + def test_dynamic_options_data_table_parsing(self): + self._init_tool_for_path(functional_test_tool_path("dbkey_filter_input.xml")) + test_dicts = self._parse_tests() print(test_dicts[0].to_dict()) - assert False + self._verify_each(test_dicts[0].to_dict(), DBKEY_FILTER_INPUT_EXPECTATIONS) def _verify_each(self, target_dict: dict, expectations: List[Any]): for path, expectation in expectations: + exception = target_dict.get("exception") + assert not exception, f"Test failed to generate with exception {exception}" self._verify(target_dict, path, expectation) def _verify(self, target_dict: dict, expectation_path: List[str], expectation: Any): rest = target_dict for path_part in expectation_path: rest = rest[path_part] - assert rest == expectation + assert rest == expectation, f"{rest} != {expectation} for {expectation_path}" + + +class TestToolSourceTestParsing(TestTestParsing): + + def _parse_tests(self): + return parse_tool_test_descriptions(self.tool_source)