Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove tree from PythonSequentialLinter #3535

Open
wants to merge 55 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
b53e0fd
Let append tree return None
JCZuurmond Jan 15, 2025
108dd7b
Test bidirectionality of appended trees
JCZuurmond Jan 15, 2025
8a22d90
Rename append_tree to attach tree
JCZuurmond Jan 15, 2025
6c0ebcf
Clean test for attaching child tree
JCZuurmond Jan 15, 2025
fd2c761
Rewrite test for module propagation
JCZuurmond Jan 15, 2025
2c46a01
Rewrite test for not implemented error
JCZuurmond Jan 15, 2025
ca30dcf
Test append globals
JCZuurmond Jan 15, 2025
1d22084
Narrow not implemented test
JCZuurmond Jan 15, 2025
48ddf4e
Test appending globals during attach tree
JCZuurmond Jan 15, 2025
1211dda
Refactor `append_globals` to `extend_globals`
JCZuurmond Jan 15, 2025
043fc6b
Test appending nodes sets parent on node
JCZuurmond Jan 15, 2025
b0e39ef
Test appending nodes adds nodes to end of body
JCZuurmond Jan 15, 2025
51358f2
Move append_nodes method up
JCZuurmond Jan 15, 2025
460e88f
Rename append_nodes to attach_nodes
JCZuurmond Jan 15, 2025
357a2e5
Narrow raising not implemented error test
JCZuurmond Jan 15, 2025
d9ffd91
Add docstring for attach nodes
JCZuurmond Jan 15, 2025
a3c023b
Change defining sources in test
JCZuurmond Jan 16, 2025
1e7a4b9
Update constructing sources
JCZuurmond Jan 17, 2025
249857e
Test PythonLinter with dummy advices
JCZuurmond Jan 16, 2025
ba04281
Test linting unparsable python code
JCZuurmond Jan 16, 2025
18e9c98
Test sequential linter with dummy advices
JCZuurmond Jan 16, 2025
fb8d6ee
Test linting print(1) sets no globals
JCZuurmond Jan 16, 2025
96ef1ec
Test linting with one global
JCZuurmond Jan 16, 2025
e5365ef
Test linting with two globals
JCZuurmond Jan 16, 2025
aed3b12
Test linting separate code sources separates globals
JCZuurmond Jan 16, 2025
ee148eb
Test appending globals sets global
JCZuurmond Jan 16, 2025
84b5f69
Remove SquentialLinter.make_tree
JCZuurmond Jan 16, 2025
d142f7c
Refactor globals linter to fetch globals from body nodes
JCZuurmond Jan 16, 2025
00aad28
Sort globals for consistent testing
JCZuurmond Jan 16, 2025
4c1e79e
Test dummy DFSA Python collector
JCZuurmond Jan 16, 2025
f860a67
Test dummy used table Python collector
JCZuurmond Jan 16, 2025
906ba87
Delete dead code `PythonSequentialLinter.process_child_cell`
JCZuurmond Jan 16, 2025
feb0a8c
Format imports
JCZuurmond Jan 16, 2025
73991ae
Remove Tree from python sequential linter
JCZuurmond Jan 16, 2025
4cf6d8a
Fix type hinting for classmethod with child classes
JCZuurmond Jan 16, 2025
04fda6f
Let tree loading return failure
JCZuurmond Jan 16, 2025
7e16589
Connect cells using parents
JCZuurmond Jan 16, 2025
d2db4b6
Pass inherited tree to notebook linter
JCZuurmond Jan 17, 2025
14b8c45
Format
JCZuurmond Jan 17, 2025
5c28fc9
Disable test that does not reflect a realistic scenario
JCZuurmond Jan 17, 2025
1b8f4b5
Pass run cell's tree as parent to the notebook it is running
JCZuurmond Jan 17, 2025
b7998bc
Do not append child nodes to parents body
JCZuurmond Jan 17, 2025
70aa5bf
Use type over Type from type hinting
JCZuurmond Jan 17, 2025
c81e6b1
Rename attach_nodes to attach_child_nodes in Python analyzer
JCZuurmond Jan 17, 2025
3fb49a7
Delete test for unrealistic scenario
JCZuurmond Jan 17, 2025
1ff9891
Rename method to parse trees
JCZuurmond Jan 17, 2025
273a40d
Add tests for notebook linter
JCZuurmond Jan 17, 2025
7728719
Test for a table migration deprecation advice to be given
JCZuurmond Jan 17, 2025
772684c
Test for notebook cells to consider only code above
JCZuurmond Jan 17, 2025
2f4c7fa
Test inverse of previous commit
JCZuurmond Jan 17, 2025
5441ec5
Test inverse of reading table from other cell
JCZuurmond Jan 17, 2025
d6c0afc
Format
JCZuurmond Jan 17, 2025
435f6c8
Let PythonSequentialLinter inherit correctly
JCZuurmond Jan 17, 2025
fd8f04d
Remove PythonSequentialLinter initialization from NotebookLinter init
JCZuurmond Jan 17, 2025
7209aac
Test NotebookLinter to lint parse failure
JCZuurmond Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, BinaryIO, TextIO
from typing import Any, BinaryIO, TextIO, TypeVar

from astroid import NodeNG # type: ignore
from sqlglot import Expression, parse as parse_sql
Expand Down Expand Up @@ -40,6 +40,9 @@
logger = logging.getLogger(__name__)


T = TypeVar("T", bound="Advice")


@dataclass
class Advice:
code: str
Expand All @@ -66,7 +69,7 @@ def for_path(self, path: Path) -> LocatedAdvice:
return LocatedAdvice(self, path)

@classmethod
def from_node(cls, *, code: str, message: str, node: NodeNG) -> Advice:
def from_node(cls: type[T], *, code: str, message: str, node: NodeNG) -> T:
# Astroid lines are 1-based.
return cls(
code=code,
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def append(self, context: InheritedContext, copy_found: bool) -> InheritedContex
if tree is None:
return InheritedContext(self.tree, found, context.problems)
new_tree = self.tree or Tree.new_module()
new_tree.append_tree(tree)
new_tree.attach_child_tree(tree)
new_problems = itertools.chain(self.problems, context.problems)
return InheritedContext(new_tree, found, new_problems)

Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ def _collect_from_notebook(
logger.warning(maybe_tree.failure.message)
continue
assert maybe_tree.tree is not None
inherited_tree.append_tree(maybe_tree.tree)
inherited_tree.attach_child_tree(maybe_tree.tree)

def _collect_from_source(
self,
Expand Down
140 changes: 68 additions & 72 deletions src/databricks/labs/ucx/source_code/notebooks/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@
safe_read_text,
read_text,
Advice,
Advisory,
CurrentSessionState,
Failure,
Linter,
)

from databricks.labs.ucx.source_code.graph import (
Expand All @@ -36,7 +34,7 @@
UnresolvedPath,
)
from databricks.labs.ucx.source_code.notebooks.magic import MagicLine
from databricks.labs.ucx.source_code.python.python_ast import Tree, NodeBase, PythonSequentialLinter
from databricks.labs.ucx.source_code.python.python_ast import NodeBase, PythonLinter, Tree
from databricks.labs.ucx.source_code.notebooks.cells import (
CellLanguage,
Cell,
Expand Down Expand Up @@ -155,76 +153,74 @@ def __init__(
self._path_lookup = path_lookup
self._session_state = session_state
self._notebook: Notebook = notebook
# reuse Python linter across related files and notebook cells
# this is required in order to accumulate statements for improved inference
self._python_linter: PythonSequentialLinter = cast(PythonSequentialLinter, context.linter(Language.PYTHON))
if inherited_tree is not None:
self._python_linter.append_tree(inherited_tree)
self._inherited_tree = inherited_tree

self._python_trees: dict[PythonCell, Tree] = {} # the original trees to be linted

def lint(self) -> Iterable[Advice]:
has_failure = False
for advice in self._load_tree_from_notebook(self._notebook, True):
if isinstance(advice, Failure): # happens when a cell is unparseable
has_failure = True
yield advice
if has_failure:
failure = self._parse_trees(self._notebook, True, parent_tree=self._inherited_tree)
if failure:
yield failure
return
for cell in self._notebook.cells:
if not self._context.is_supported(cell.language.language):
try:
linter = self._context.linter(cell.language.language)
except ValueError: # Language is not supported (yet)
continue
if isinstance(cell, PythonCell):
linter = cast(PythonLinter, linter)
tree = self._python_trees[cell]
advices = self._python_linter.lint_tree(tree)
advices = linter.lint_tree(tree)
else:
linter = self._linter(cell.language.language)
advices = linter.lint(cell.original_code)
for advice in advices:
yield dataclasses.replace(
advice,
start_line=advice.start_line + cell.original_offset,
end_line=advice.end_line + cell.original_offset,
)
return

def _load_tree_from_notebook(self, notebook: Notebook, register_trees: bool) -> Iterable[Advice]:
def _parse_trees(self, notebook: Notebook, register_trees: bool, *, parent_tree: Tree | None) -> Failure | None:
for cell in notebook.cells:
failure = None
if isinstance(cell, RunCell):
yield from self._load_tree_from_run_cell(cell)
continue
if isinstance(cell, PythonCell):
yield from self._load_tree_from_python_cell(cell, register_trees)
continue

def _load_tree_from_python_cell(self, python_cell: PythonCell, register_trees: bool) -> Iterable[Advice]:
failure = self._load_tree_from_run_cell(cell, parent_tree=parent_tree)
elif isinstance(cell, PythonCell):
failure = self._load_tree_from_python_cell(cell, register_trees, parent_tree=parent_tree)
parent_tree = self._python_trees.get(cell)
if failure:
return failure
return None

def _load_tree_from_python_cell(
self, python_cell: PythonCell, register_trees: bool, *, parent_tree: Tree | None
) -> Failure | None:
maybe_tree = Tree.maybe_normalized_parse(python_cell.original_code)
if maybe_tree.failure:
yield maybe_tree.failure
tree = maybe_tree.tree
# a cell with only comments will not produce a tree
return maybe_tree.failure
assert maybe_tree.tree is not None
if parent_tree:
parent_tree.attach_child_tree(maybe_tree.tree)
if register_trees:
self._python_trees[python_cell] = tree or Tree.new_module()
if not tree:
return
yield from self._load_children_from_tree(tree)
# A cell with only comments will not produce a tree
self._python_trees[python_cell] = maybe_tree.tree or Tree.new_module()
if maybe_tree.tree:
return self._load_children_from_tree(maybe_tree.tree)
return None

def _load_children_from_tree(self, tree: Tree) -> Iterable[Advice]:
def _load_children_from_tree(self, tree: Tree) -> Failure | None:
assert isinstance(tree.node, Module)
# look for child notebooks (and sys.path changes that might affect their loading)
base_nodes: list[NodeBase] = []
base_nodes.extend(self._list_run_magic_lines(tree))
base_nodes.extend(SysPathChange.extract_from_tree(self._session_state, tree))
if len(base_nodes) == 0:
self._python_linter.append_tree(tree)
return
# append globals
globs = cast(Module, tree.node).globals
self._python_linter.append_globals(globs)
return None
# need to execute things in intertwined sequence so concat and sort them
nodes = list(cast(Module, tree.node).body)
base_nodes = sorted(base_nodes, key=lambda node: (node.node.lineno, node.node.col_offset))
yield from self._load_children_with_base_nodes(nodes, base_nodes)
# append remaining nodes
self._python_linter.append_nodes(nodes)
return self._load_children_with_base_nodes(nodes, base_nodes, parent_tree=tree)

@staticmethod
def _list_run_magic_lines(tree: Tree) -> Iterable[MagicLine]:
Expand All @@ -237,53 +233,64 @@ def _ignore_problem(_code: str, _message: str, _node: NodeNG) -> None:
if isinstance(command.as_magic(), RunCommand):
yield command

def _load_children_with_base_nodes(self, nodes: list[NodeNG], base_nodes: list[NodeBase]) -> Iterable[Advice]:
def _load_children_with_base_nodes(
self, nodes: list[NodeNG], base_nodes: list[NodeBase], *, parent_tree: Tree | None
) -> Failure | None:
for base_node in base_nodes:
yield from self._load_children_with_base_node(nodes, base_node)

def _load_children_with_base_node(self, nodes: list[NodeNG], base_node: NodeBase) -> Iterable[Advice]:
failure = self._load_children_with_base_node(nodes, base_node, parent_tree=parent_tree)
if failure:
return failure
return None

def _load_children_with_base_node(
self, nodes: list[NodeNG], base_node: NodeBase, *, parent_tree: Tree | None
) -> Failure | None:
while len(nodes) > 0:
node = nodes.pop(0)
self._python_linter.append_nodes([node])
if node.lineno < base_node.node.lineno:
continue
yield from self._load_children_from_base_node(base_node)
failure = self._load_children_from_base_node(base_node, parent_tree=parent_tree)
if failure:
return failure
return None

def _load_children_from_base_node(self, base_node: NodeBase) -> Iterable[Advice]:
def _load_children_from_base_node(self, base_node: NodeBase, *, parent_tree: Tree | None) -> Failure | None:
if isinstance(base_node, SysPathChange):
yield from self._mutate_path_lookup(base_node)
return
failure = self._mutate_path_lookup(base_node)
if failure:
return failure
if isinstance(base_node, MagicLine):
magic = base_node.as_magic()
assert isinstance(magic, RunCommand)
notebook = self._load_source_from_path(magic.notebook_path)
if notebook is None:
yield Advisory.from_node(
failure = Failure.from_node(
code='dependency-not-found',
message=f"Can't locate dependency: {magic.notebook_path}",
node=base_node.node,
)
return
yield from self._load_tree_from_notebook(notebook, False)
return
return failure
return self._parse_trees(notebook, False, parent_tree=parent_tree)
return None

def _mutate_path_lookup(self, change: SysPathChange) -> Iterable[Advice]:
def _mutate_path_lookup(self, change: SysPathChange) -> Failure | None:
if isinstance(change, UnresolvedPath):
yield Advisory.from_node(
return Failure.from_node(
code='sys-path-cannot-compute-value',
message=f"Can't update sys.path from {change.node.as_string()} because the expression cannot be computed",
node=change.node,
)
return
change.apply_to(self._path_lookup)
return None

def _load_tree_from_run_cell(self, run_cell: RunCell) -> Iterable[Advice]:
def _load_tree_from_run_cell(self, run_cell: RunCell, *, parent_tree: Tree | None = None) -> Failure | None:
path = run_cell.maybe_notebook_path()
if path is None:
return # malformed run cell already reported
return None # malformed run cell already reported
notebook = self._load_source_from_path(path)
if notebook is not None:
yield from self._load_tree_from_notebook(notebook, False)
return self._parse_trees(notebook, False, parent_tree=parent_tree)
return None

def _load_source_from_path(self, path: Path | None) -> Notebook | None:
if path is None:
Expand All @@ -302,11 +309,6 @@ def _load_source_from_path(self, path: Path | None) -> Notebook | None:
return None
return Notebook.parse(path, source, language)

def _linter(self, language: Language) -> Linter:
if language is Language.PYTHON:
return self._python_linter
return self._context.linter(language)

@staticmethod
def name() -> str:
return "notebook-linter"
Expand Down Expand Up @@ -417,8 +419,6 @@ def _lint_file(self) -> Iterable[Advice]:
else:
try:
linter = self._ctx.linter(language)
if self._inherited_tree is not None and isinstance(linter, PythonSequentialLinter):
linter.append_tree(self._inherited_tree)
yield from linter.lint(self._content)
except ValueError as err:
failure_message = f"Error while parsing content of {self._path.as_posix()}: {err}"
Expand All @@ -432,10 +432,6 @@ def _lint_notebook(self) -> Iterable[Advice]:
return
notebook = Notebook.parse(self._path, self._content, language)
notebook_linter = NotebookLinter(
self._ctx,
self._path_lookup,
self._session_state,
notebook,
self._inherited_tree,
self._ctx, self._path_lookup, self._session_state, notebook, self._inherited_tree
)
yield from notebook_linter.lint()
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
# append nodes
node_line = base_node.node.lineno
nodes = tree.nodes_between(last_line + 1, node_line - 1)
context.tree.append_nodes(nodes)
context.tree.attach_child_nodes(nodes)
globs = tree.globals_between(last_line + 1, node_line - 1)
context.tree.append_globals(globs)
context.tree.extend_globals(globs)
last_line = node_line
# process node
child_context = self._build_inherited_context_from_node(base_node, child_path)
Expand All @@ -86,9 +86,9 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
assert context.tree is not None, "Tree should be initialized"
if last_line < line_count:
nodes = tree.nodes_between(last_line + 1, line_count)
context.tree.append_nodes(nodes)
context.tree.attach_child_nodes(nodes)
globs = tree.globals_between(last_line + 1, line_count)
context.tree.append_globals(globs)
context.tree.extend_globals(globs)
return context

def _parse_and_extract_nodes(self) -> tuple[Tree | None, list[NodeBase], Iterable[DependencyProblem]]:
Expand Down
Loading