conda-incubator · schuylermartin45 · Apr 30, 2024 · Apr 29, 2024 · Apr 29, 2024 · Apr 29, 2024
@@ -46,6 +46,7 @@ class ExitCode(IntEnum):
     PARSE_EXCEPTION = 102
     RENDER_EXCEPTION = 103
     READ_EXCEPTION = 104
+    PRE_PROCESS_EXCEPTION = 105
 
 
 @dataclass
@@ -74,13 +75,38 @@ def set_return_code(self) -> None:
             self.code = ExitCode.RENDER_WARNINGS
 
 
+def _record_unrecoverable_failure(
+    conversion_result: ConversionResult,
+    exit_code: ExitCode,
+    e_msg: str,
+    print_output: bool,
+    e: Optional[Exception] = None,
+) -> ConversionResult:
+    """
+    Convenience function that streamlines the process of recording an unrecoverable conversion failure.
+    :param conversion_result: Conversion result instance to use. This is passed into aggregate any other messages that
+        could be logged prior to reaching this fatal error case.
+    :param exit_code: Exit code to return for this error case.
+    :param e_msg: Error message to display, if enabled.
+    :param print_output: Prints the recipe to STDERR if the output file is not specified and this flag is `True`.
+    :param e: (Optional) Exception instance to capture, if applicable
+    :returns: The final `conversion_result` instance that should be returned immediately.
+    """
+    print_err(e_msg, print_enabled=print_output)
+    if e is not None:
+        print_err(e, print_enabled=print_output)
+    conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
+    conversion_result.code = exit_code
+    return conversion_result
+
+
 def convert_file(file_path: Path, output: Optional[Path], print_output: bool) -> ConversionResult:
     """
     Converts a single recipe file to the new format, tracking results.
     :param file_path: Path to the recipe file to convert
     :param output: If specified, the file contents are written to this file path. Otherwise, the file is dumped to
         STDOUT IF `print_output` is set to `True`.
-    :param print_output: Prints the recipe to STDOUT if the output file is not specified and this flag is `True`.
+    :param print_output: Prints the recipe to STDOUT/STDERR if the output file is not specified and this flag is `True`.
     :returns: A struct containing the results of the conversion process, including debugging metadata.
     """
     conversion_result = ConversionResult(
@@ -89,43 +115,52 @@ def convert_file(file_path: Path, output: Optional[Path], print_output: bool) ->
 
     recipe_content = None
     try:
-        with open(file_path, "r", encoding="utf-8") as file:
-            recipe_content = file.read()
-    except IOError:
-        pass
-
-    if recipe_content is None:
-        conversion_result.code = ExitCode.READ_EXCEPTION
-        e_msg = f"EXCEPTION: Failed to read: {file_path}"
-        print_err(e_msg, print_enabled=print_output)
-        conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
-        return conversion_result
+        recipe_content = Path(file_path).read_text(encoding="utf-8")
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        return _record_unrecoverable_failure(
+            conversion_result, ExitCode.READ_EXCEPTION, f"EXCEPTION: Failed to read: {file_path}", print_output, e
+        )
 
+    # Pre-process the recipe
+    try:
+        recipe_content = RecipeParserConvert.pre_process_recipe_text(recipe_content)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        return _record_unrecoverable_failure(
+            conversion_result,
+            ExitCode.PRE_PROCESS_EXCEPTION,
+            "EXCEPTION: An exception occurred while pre-processing the recipe file",
+            print_output,
+            e,
+        )
+
+    # Parse the recipe
     parser: RecipeParserConvert
     try:
         parser = RecipeParserConvert(recipe_content)
     except Exception as e:  # pylint: disable=broad-exception-caught
-        e_msg = "EXCEPTION: An exception occurred while parsing the recipe file"
-        print_err(e_msg, print_enabled=print_output)
-        print_err(e, print_enabled=print_output)
-        conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
-        conversion_result.code = ExitCode.PARSE_EXCEPTION
-        return conversion_result
+        return _record_unrecoverable_failure(
+            conversion_result,
+            ExitCode.PARSE_EXCEPTION,
+            "EXCEPTION: An exception occurred while parsing the recipe file",
+            print_output,
+            e,
+        )
 
+    # Convert the recipe
     try:
         conversion_result.content, conversion_result.msg_tbl = parser.render_to_new_recipe_format()
     except Exception as e:  # pylint: disable=broad-exception-caught
-        e_msg = "EXCEPTION: An exception occurred while converting to the new recipe file"
-        print_err(e_msg, print_enabled=print_output)
-        print_err(e, print_enabled=print_output)
-        conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
-        conversion_result.code = ExitCode.RENDER_EXCEPTION
-        return conversion_result
+        return _record_unrecoverable_failure(
+            conversion_result,
+            ExitCode.RENDER_EXCEPTION,
+            "EXCEPTION: An exception occurred while converting to the new recipe file",
+            print_output,
+            e,
+        )
 
     # Print or dump the results to a file. Printing is disabled for bulk operations.
-    if output is None:
-        print_out(conversion_result.content, print_enabled=print_output)
-    else:
+    print_out(conversion_result.content, print_enabled=print_output and (output is None))
+    if output is not None:
         print_err(
             "WARNING: File is not called `recipe.yaml`.",
             print_enabled=print_output and os.path.basename(output) != "recipe.yaml",

@@ -91,7 +91,12 @@ class Regex:
     # Pattern to detect Jinja variable names and functions
     _JINJA_VAR_FUNCTION_PATTERN: Final[str] = r"[a-zA-Z_][a-zA-Z0-9_\|\'\"\(\)\, =\.\-]*"
 
-    # Jinja regular expressions
+    ## Pre-process conversion tooling regular expressions ##
+    # Finds `environ[]` used by a some recipe files. Requires a whitespace character to prevent matches with
+    # `os.environ[]`, which is very rare.
+    PRE_PROCESS_ENVIRON: Final[re.Pattern[str]] = re.compile(r"\s+environ\[(\"|')(.*)(\"|')\]")
+
+    ## Jinja regular expressions ##
     JINJA_SUB: Final[re.Pattern[str]] = re.compile(r"{{\s*" + _JINJA_VAR_FUNCTION_PATTERN + r"\s*}}")
     JINJA_FUNCTION_LOWER: Final[re.Pattern[str]] = re.compile(r"\|\s*lower")
     JINJA_LINE: Final[re.Pattern[str]] = re.compile(r"({%.*%}|{#.*#})\n")

@@ -402,6 +402,41 @@ def _upgrade_multi_output(self, base_package_paths: list[str]) -> None:
             # found at the top-level. So for consistency, we sort on that ordering.
             self._sort_subtree_keys(output_path, TOP_LEVEL_KEY_SORT_ORDER)
 
+    @staticmethod
+    def pre_process_recipe_text(content: str) -> str:
+        """
+        Takes the content of a recipe file and performs manipulations prior to the parsing stage. This should be
+        used sparingly for solving conversion issues.
+
+        Ideally the pre-processor phase is only used when:
+          - There is no other feasible way to solve a conversion issue.
+          - There is a proof-of-concept fix that would be easier to develop as a pre-processor step that could be
+            refactored into the parser later.
+          - The number of recipes afflicted by an issue does not justify the engineering effort required to handle
+            the issue in the parsing phase.
+        :param content: Recipe file contents to pre-process
+        :returns: Pre-processed recipe file contents
+        """
+        # Convert the old JINJA `environ[""]` variable usage to the new `get.env("")` syntax.
+        # NOTE:
+        #   - This is mostly used by Bioconda recipes and R-based-packages in the `license_file` field.
+        #   - From our search, it looks like we never deal with more than one set of outer quotes within the brackets
+        replacements: list[tuple[str, str]] = []
+        for groups in cast(list[str], Regex.PRE_PROCESS_ENVIRON.findall(content)):
+            # Each match should return ["<quote char>", "<key>", "<quote_char>"]
+            quote_char = groups[0]
+            key = groups[1]
+            replacements.append(
+                (
+                    f"environ[{quote_char}{key}{quote_char}]",
+                    f"env.get({quote_char}{key}{quote_char})",
+                )
+            )
+        for old, new in replacements:
+            content = content.replace(old, new, 1)
+
+        return content
+
     def render_to_new_recipe_format(self) -> tuple[str, MessageTable]:
         """
         Takes the current recipe representation and renders it to the new format WITHOUT modifying the current recipe

@@ -7,10 +7,30 @@
 
 import pytest
 
+from conda_recipe_manager.parser.recipe_parser_convert import RecipeParserConvert
 from conda_recipe_manager.parser.types import MessageCategory
 from tests.file_loading import TEST_FILES_PATH, load_file, load_recipe_convert
 
 
+@pytest.mark.parametrize(
+    "input_file,expected_file",
+    [
+        ("simple-recipe_environ.yaml", "pre-processed-simple-recipe_environ.yaml"),
+        ("simple-recipe.yaml", "simple-recipe.yaml"),  # Unchanged file
+    ],
+)
+def test_pre_process_recipe_text(input_file: str, expected_file: str) -> None:
+    """
+    Validates the pre-processor phase of the conversion process. A recipe file should come in
+    as a string and return a modified string, if applicable.
+    :param input_file: Test input recipe file name
+    :param expected_file: Name of the file containing the expected output of a test instance
+    """
+    assert RecipeParserConvert.pre_process_recipe_text(load_file(f"{TEST_FILES_PATH}/{input_file}")) == load_file(
+        f"{TEST_FILES_PATH}/{expected_file}"
+    )
+
+
 @pytest.mark.parametrize(
     "file_base,errors,warnings",
     [

@@ -0,0 +1,53 @@
+{% set zz_non_alpha_first = 42 %}
+{% set name = "types-toml" %}
+{% set version = "0.10.8.6" %}
+
+package:
+  name: {{ name|lower }}  # [unix]
+
+build:
+  number: 0
+  skip: true  # [py<37]
+  is_true: true
+
+# Comment above a top-level structure
+requirements:
+  empty_field1:
+  host:
+    - setuptools  # [unix]
+    - fakereq  # [unix] selector with comment
+  empty_field2:  # [unix and win] # selector with comment with comment symbol
+  run:
+    - python  # not a selector
+  empty_field3:
+
+about:
+  summary: This is a small recipe for testing
+  description: |
+    This is a PEP '561 type stub package for the toml package.
+    It can be used by type-checking tools like mypy, pyright,
+    pytype, PyCharm, etc. to check code that uses toml.
+  license: Apache-2.0 AND MIT
+
+multi_level:
+  list_1:
+    - foo
+    # Ensure a comment in a list is supported
+    - bar
+  list_2:
+    - cat
+    - {{ env.get('baz') }}
+    - mat
+  list_3:
+    - ls
+    - sl
+    - cowsay
+
+test_var_usage:
+  foo: {{ version }}
+  bar:
+    - baz
+    - {{ env.get("foobar") }}
+    - blah
+    - This {{ name }} is silly
+    - last
@@ -0,0 +1,53 @@
+{% set zz_non_alpha_first = 42 %}
+{% set name = "types-toml" %}
+{% set version = "0.10.8.6" %}
+
+package:
+  name: {{ name|lower }}  # [unix]
+
+build:
+  number: 0
+  skip: true  # [py<37]
+  is_true: true
+
+# Comment above a top-level structure
+requirements:
+  empty_field1:
+  host:
+    - setuptools  # [unix]
+    - fakereq  # [unix] selector with comment
+  empty_field2:  # [unix and win] # selector with comment with comment symbol
+  run:
+    - python  # not a selector
+  empty_field3:
+
+about:
+  summary: This is a small recipe for testing
+  description: |
+    This is a PEP '561 type stub package for the toml package.
+    It can be used by type-checking tools like mypy, pyright,
+    pytype, PyCharm, etc. to check code that uses toml.
+  license: Apache-2.0 AND MIT
+
+multi_level:
+  list_1:
+    - foo
+    # Ensure a comment in a list is supported
+    - bar
+  list_2:
+    - cat
+    - {{ environ['baz'] }}
+    - mat
+  list_3:
+    - ls
+    - sl
+    - cowsay
+
+test_var_usage:
+  foo: {{ version }}
+  bar:
+    - baz
+    - {{ environ["foobar"] }}
+    - blah
+    - This {{ name }} is silly
+    - last