Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes Issue #18 by adding a new conversion pre-processor #39

Merged
merged 6 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 62 additions & 27 deletions conda_recipe_manager/commands/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ExitCode(IntEnum):
PARSE_EXCEPTION = 102
RENDER_EXCEPTION = 103
READ_EXCEPTION = 104
PRE_PROCESS_EXCEPTION = 105


@dataclass
Expand Down Expand Up @@ -74,13 +75,38 @@ def set_return_code(self) -> None:
self.code = ExitCode.RENDER_WARNINGS


def _record_unrecoverable_failure(
conversion_result: ConversionResult,
exit_code: ExitCode,
e_msg: str,
print_output: bool,
e: Optional[Exception] = None,
) -> ConversionResult:
"""
Convenience function that streamlines the process of recording an unrecoverable conversion failure.
:param conversion_result: Conversion result instance to use. This is passed into aggregate any other messages that
could be logged prior to reaching this fatal error case.
:param exit_code: Exit code to return for this error case.
:param e_msg: Error message to display, if enabled.
:param print_output: Prints the recipe to STDERR if the output file is not specified and this flag is `True`.
:param e: (Optional) Exception instance to capture, if applicable
:returns: The final `conversion_result` instance that should be returned immediately.
"""
print_err(e_msg, print_enabled=print_output)
if e is not None:
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = exit_code
return conversion_result


def convert_file(file_path: Path, output: Optional[Path], print_output: bool) -> ConversionResult:
"""
Converts a single recipe file to the new format, tracking results.
:param file_path: Path to the recipe file to convert
:param output: If specified, the file contents are written to this file path. Otherwise, the file is dumped to
STDOUT IF `print_output` is set to `True`.
:param print_output: Prints the recipe to STDOUT if the output file is not specified and this flag is `True`.
:param print_output: Prints the recipe to STDOUT/STDERR if the output file is not specified and this flag is `True`.
:returns: A struct containing the results of the conversion process, including debugging metadata.
"""
conversion_result = ConversionResult(
Expand All @@ -89,43 +115,52 @@ def convert_file(file_path: Path, output: Optional[Path], print_output: bool) ->

recipe_content = None
try:
with open(file_path, "r", encoding="utf-8") as file:
recipe_content = file.read()
except IOError:
pass

if recipe_content is None:
conversion_result.code = ExitCode.READ_EXCEPTION
e_msg = f"EXCEPTION: Failed to read: {file_path}"
print_err(e_msg, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
return conversion_result
recipe_content = Path(file_path).read_text(encoding="utf-8")
except Exception as e: # pylint: disable=broad-exception-caught
return _record_unrecoverable_failure(
conversion_result, ExitCode.READ_EXCEPTION, f"EXCEPTION: Failed to read: {file_path}", print_output, e
)

# Pre-process the recipe
try:
recipe_content = RecipeParserConvert.pre_process_recipe_text(recipe_content)
except Exception as e: # pylint: disable=broad-exception-caught
return _record_unrecoverable_failure(
conversion_result,
ExitCode.PRE_PROCESS_EXCEPTION,
"EXCEPTION: An exception occurred while pre-processing the recipe file",
print_output,
e,
)

# Parse the recipe
parser: RecipeParserConvert
try:
parser = RecipeParserConvert(recipe_content)
except Exception as e: # pylint: disable=broad-exception-caught
e_msg = "EXCEPTION: An exception occurred while parsing the recipe file"
print_err(e_msg, print_enabled=print_output)
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = ExitCode.PARSE_EXCEPTION
return conversion_result
return _record_unrecoverable_failure(
conversion_result,
ExitCode.PARSE_EXCEPTION,
"EXCEPTION: An exception occurred while parsing the recipe file",
print_output,
e,
)

# Convert the recipe
try:
conversion_result.content, conversion_result.msg_tbl = parser.render_to_new_recipe_format()
except Exception as e: # pylint: disable=broad-exception-caught
e_msg = "EXCEPTION: An exception occurred while converting to the new recipe file"
print_err(e_msg, print_enabled=print_output)
print_err(e, print_enabled=print_output)
conversion_result.msg_tbl.add_message(MessageCategory.EXCEPTION, e_msg)
conversion_result.code = ExitCode.RENDER_EXCEPTION
return conversion_result
return _record_unrecoverable_failure(
conversion_result,
ExitCode.RENDER_EXCEPTION,
"EXCEPTION: An exception occurred while converting to the new recipe file",
print_output,
e,
)

# Print or dump the results to a file. Printing is disabled for bulk operations.
if output is None:
print_out(conversion_result.content, print_enabled=print_output)
else:
print_out(conversion_result.content, print_enabled=print_output and (output is None))
if output is not None:
print_err(
"WARNING: File is not called `recipe.yaml`.",
print_enabled=print_output and os.path.basename(output) != "recipe.yaml",
Expand Down
7 changes: 6 additions & 1 deletion conda_recipe_manager/parser/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ class Regex:
# Pattern to detect Jinja variable names and functions
_JINJA_VAR_FUNCTION_PATTERN: Final[str] = r"[a-zA-Z_][a-zA-Z0-9_\|\'\"\(\)\, =\.\-]*"

# Jinja regular expressions
## Pre-process conversion tooling regular expressions ##
# Finds `environ[]` used by a some recipe files. Requires a whitespace character to prevent matches with
# `os.environ[]`, which is very rare.
PRE_PROCESS_ENVIRON: Final[re.Pattern[str]] = re.compile(r"\s+environ\[(\"|')(.*)(\"|')\]")

## Jinja regular expressions ##
JINJA_SUB: Final[re.Pattern[str]] = re.compile(r"{{\s*" + _JINJA_VAR_FUNCTION_PATTERN + r"\s*}}")
JINJA_FUNCTION_LOWER: Final[re.Pattern[str]] = re.compile(r"\|\s*lower")
JINJA_LINE: Final[re.Pattern[str]] = re.compile(r"({%.*%}|{#.*#})\n")
Expand Down
35 changes: 35 additions & 0 deletions conda_recipe_manager/parser/recipe_parser_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,41 @@ def _upgrade_multi_output(self, base_package_paths: list[str]) -> None:
# found at the top-level. So for consistency, we sort on that ordering.
self._sort_subtree_keys(output_path, TOP_LEVEL_KEY_SORT_ORDER)

@staticmethod
def pre_process_recipe_text(content: str) -> str:
"""
Takes the content of a recipe file and performs manipulations prior to the parsing stage. This should be
used sparingly for solving conversion issues.

Ideally the pre-processor phase is only used when:
- There is no other feasible way to solve a conversion issue.
- There is a proof-of-concept fix that would be easier to develop as a pre-processor step that could be
refactored into the parser later.
- The number of recipes afflicted by an issue does not justify the engineering effort required to handle
the issue in the parsing phase.
:param content: Recipe file contents to pre-process
:returns: Pre-processed recipe file contents
"""
# Convert the old JINJA `environ[""]` variable usage to the new `get.env("")` syntax.
# NOTE:
# - This is mostly used by Bioconda recipes and R-based-packages in the `license_file` field.
# - From our search, it looks like we never deal with more than one set of outer quotes within the brackets
replacements: list[tuple[str, str]] = []
for groups in cast(list[str], Regex.PRE_PROCESS_ENVIRON.findall(content)):
# Each match should return ["<quote char>", "<key>", "<quote_char>"]
quote_char = groups[0]
key = groups[1]
replacements.append(
(
f"environ[{quote_char}{key}{quote_char}]",
f"env.get({quote_char}{key}{quote_char})",
)
)
for old, new in replacements:
content = content.replace(old, new, 1)

return content

def render_to_new_recipe_format(self) -> tuple[str, MessageTable]:
"""
Takes the current recipe representation and renders it to the new format WITHOUT modifying the current recipe
Expand Down
20 changes: 20 additions & 0 deletions tests/parser/test_recipe_parser_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,30 @@

import pytest

from conda_recipe_manager.parser.recipe_parser_convert import RecipeParserConvert
from conda_recipe_manager.parser.types import MessageCategory
from tests.file_loading import TEST_FILES_PATH, load_file, load_recipe_convert


@pytest.mark.parametrize(
"input_file,expected_file",
[
("simple-recipe_environ.yaml", "pre-processed-simple-recipe_environ.yaml"),
("simple-recipe.yaml", "simple-recipe.yaml"), # Unchanged file
],
)
def test_pre_process_recipe_text(input_file: str, expected_file: str) -> None:
"""
Validates the pre-processor phase of the conversion process. A recipe file should come in
as a string and return a modified string, if applicable.
:param input_file: Test input recipe file name
:param expected_file: Name of the file containing the expected output of a test instance
"""
assert RecipeParserConvert.pre_process_recipe_text(load_file(f"{TEST_FILES_PATH}/{input_file}")) == load_file(
f"{TEST_FILES_PATH}/{expected_file}"
)


@pytest.mark.parametrize(
"file_base,errors,warnings",
[
Expand Down
53 changes: 53 additions & 0 deletions tests/test_aux_files/pre-processed-simple-recipe_environ.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% set zz_non_alpha_first = 42 %}
{% set name = "types-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }} # [unix]

build:
number: 0
skip: true # [py<37]
is_true: true

# Comment above a top-level structure
requirements:
empty_field1:
host:
- setuptools # [unix]
- fakereq # [unix] selector with comment
empty_field2: # [unix and win] # selector with comment with comment symbol
run:
- python # not a selector
empty_field3:

about:
summary: This is a small recipe for testing
description: |
This is a PEP '561 type stub package for the toml package.
It can be used by type-checking tools like mypy, pyright,
pytype, PyCharm, etc. to check code that uses toml.
license: Apache-2.0 AND MIT

multi_level:
list_1:
- foo
# Ensure a comment in a list is supported
- bar
list_2:
- cat
- {{ env.get('baz') }}
- mat
list_3:
- ls
- sl
- cowsay

test_var_usage:
foo: {{ version }}
bar:
- baz
- {{ env.get("foobar") }}
- blah
- This {{ name }} is silly
- last
53 changes: 53 additions & 0 deletions tests/test_aux_files/simple-recipe_environ.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% set zz_non_alpha_first = 42 %}
{% set name = "types-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }} # [unix]

build:
number: 0
skip: true # [py<37]
is_true: true

# Comment above a top-level structure
requirements:
empty_field1:
host:
- setuptools # [unix]
- fakereq # [unix] selector with comment
empty_field2: # [unix and win] # selector with comment with comment symbol
run:
- python # not a selector
empty_field3:

about:
summary: This is a small recipe for testing
description: |
This is a PEP '561 type stub package for the toml package.
It can be used by type-checking tools like mypy, pyright,
pytype, PyCharm, etc. to check code that uses toml.
license: Apache-2.0 AND MIT

multi_level:
list_1:
- foo
# Ensure a comment in a list is supported
- bar
list_2:
- cat
- {{ environ['baz'] }}
- mat
list_3:
- ls
- sl
- cowsay

test_var_usage:
foo: {{ version }}
bar:
- baz
- {{ environ["foobar"] }}
- blah
- This {{ name }} is silly
- last