Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Match on module aliases for auto import suggestions #730

Merged
merged 14 commits into from
Jan 30, 2024
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# **Upcoming release**

- ...
- #730 Match on module aliases for autoimport suggestions

# Release 1.12.0

Expand Down
9 changes: 7 additions & 2 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ Will be used if [tool.rope] is configured.

[tool.rope]
split_imports = true
autoimport.aliases = [
['dt', 'datetime'],
['mp', 'multiprocessing'],
]


config.py
---------
Expand Down Expand Up @@ -48,9 +53,9 @@ Additionally, you can run an executable function at startup of rope.
pytool.toml
-----------
If neither a config.py or a pyproject.toml is present, rope will use a pytool.toml.
It follows the exact same syntax of the pyproject.toml.
It follows the exact same syntax as ``pyproject.toml``.

- Mac OS X: ``~/Library/Application Support/pytool.toml.``
- Mac OS X: ``~/Library/Application Support/pytool.toml``.
- Unix: ``~/.config/pytool.toml``` or in $XDG_CONFIG_HOME, if defined
- Windows: ``C:\Users\<username>\AppData\Local\pytool.toml``

Expand Down
1 change: 1 addition & 0 deletions rope/base/builtins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module tries to support builtin types and functions."""

import inspect
import io

Expand Down
1 change: 1 addition & 0 deletions rope/base/fscommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
`MercurialCommands` for example.

"""

import os
import re
import shutil
Expand Down
1 change: 1 addition & 0 deletions rope/base/libutils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A few useful functions for using rope as a library"""

import os.path

import rope.base.project
Expand Down
1 change: 1 addition & 0 deletions rope/base/oi/soi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package.

"""

import rope.base.builtins # Use full qualification for clarity.
from rope.base import arguments, evaluate, pynames, pyobjects, utils
from rope.base.oi.type_hinting.factory import get_type_hinting_factory
Expand Down
1 change: 1 addition & 0 deletions rope/base/oi/transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provides classes for persisting `PyObject`"""

import os
import re

Expand Down
1 change: 1 addition & 0 deletions rope/base/oi/type_hinting/providers/docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- https://groups.google.com/d/topic/rope-dev/LCFNN98vckI/discussion

"""

import re

from rope.base.oi.type_hinting import utils
Expand Down
1 change: 1 addition & 0 deletions rope/base/oi/type_hinting/providers/numpydocstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
https://github.com/davidhalter/jedi/blob/b489019f5bd5750051122b94cc767df47751ecb7/jedi/evaluate/docstrings.py
Thanks to @davidhalter for this utils under MIT License.
"""

import re

from rope.base.ast import literal_eval
Expand Down
27 changes: 26 additions & 1 deletion rope/base/prefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,30 @@
from rope.base.resources import Folder


@dataclass
class AutoimportPrefs:
# underlined: bool = field(
# default=False, description="Cache underlined (private) modules")
# memory: bool = field(default=None, description="Cache in memory instead of disk")
# parallel: bool = field(default=True, description="Use multiple processes to parse")

aliases: List[Tuple[str, str]] = field(
default_factory=lambda : [
("np", "numpy"),
("pd", "pandas"),
("plt", "matplotlib.pyplot"),
("sns", "seaborn"),
("tf", "tensorflow"),
("sk", "sklearn"),
("sm", "statsmodels"),
],
description=dedent("""
Aliases for module names. For example, `[('np', 'numpy')]` makes rope recommend
``import numpy as np``.
"""),
)


@dataclass
class Prefs:
"""Class to store rope preferences."""
Expand Down Expand Up @@ -139,7 +163,6 @@ class Prefs:
appear in the importing namespace.
"""),
)

prefer_module_from_imports: bool = field(
default=False,
description=dedent("""
Expand Down Expand Up @@ -206,6 +229,8 @@ class Prefs:
Can only be set in config.py.
"""),
)
autoimport: AutoimportPrefs = field(
default_factory=AutoimportPrefs, description="Preferences for Autoimport")

def set(self, key: str, value: Any):
"""Set the value of `key` preference to `value`."""
Expand Down
1 change: 1 addition & 0 deletions rope/base/simplify.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

This module is here to help source code analysis.
"""

import re

from rope.base import codeanalyze, utils
Expand Down
5 changes: 4 additions & 1 deletion rope/base/versioning.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dataclasses
import hashlib
import importlib.util
import json
Expand Down Expand Up @@ -31,7 +32,9 @@ def _get_prefs_data(project) -> str:
del prefs_data["project_opened"]
del prefs_data["callbacks"]
del prefs_data["dependencies"]
return json.dumps(prefs_data, sort_keys=True, indent=2)
return json.dumps(
prefs_data, sort_keys=True, indent=2, default=lambda o: o.__dict__
)


def _get_file_content(module_name: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions rope/contrib/autoimport/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""AutoImport module for rope."""

from .pickle import AutoImport as _PickleAutoImport
from .sqlite import AutoImport as _SqliteAutoImport

Expand Down
8 changes: 8 additions & 0 deletions rope/contrib/autoimport/defs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Definitions of types for the Autoimport program."""

import pathlib
from enum import Enum
from typing import NamedTuple, Optional
Expand Down Expand Up @@ -92,6 +93,13 @@ class Package(NamedTuple):
type: PackageType


class Alias(NamedTuple):
"""A module alias to be added to the database."""

alias: str
modname: str


class Name(NamedTuple):
"""A Name to be added to the database."""

Expand Down
29 changes: 25 additions & 4 deletions rope/contrib/autoimport/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,11 @@ def delete_from(self) -> FinalQuery:
class Model(ABC):
@property
@abstractmethod
def table_name(self) -> str:
...
def table_name(self) -> str: ...

@property
@abstractmethod
def schema(self) -> Dict[str, str]:
...
def schema(self) -> Dict[str, str]: ...

@classmethod
def create_table(cls, connection):
Expand All @@ -88,6 +86,29 @@ class Metadata(Model):
objects = Query(table_name, columns)


class Alias(Model):
table_name = "aliases"
schema = {
"alias": "TEXT",
"module": "TEXT",
}
columns = list(schema.keys())
objects = Query(table_name, columns)

@classmethod
def create_table(cls, connection):
super().create_table(connection)
connection.execute(
"CREATE INDEX IF NOT EXISTS aliases_alias_nocase ON aliases(alias COLLATE NOCASE)"
)

modules = Query(
"(SELECT DISTINCT aliases.*, package, source, type FROM aliases INNER JOIN names on aliases.module = names.module)",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a DB expert, but the names table can comprise 10,000 - 100,000 rows, so I am wondering if we should run this inner join on every autoimport request (which can happen with every keystroke when rope is run inside of a language server).
Can we quickly test how much adding alias support slows down search?
Alternatively, I'd make sure aliases only contains the aliases to modules that exist in names

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The joins are pretty fast, I made a notebook to test it out.

The join time should be dominated by the Alias table, not the Names table, because the Names table has an index on the module column. Also, here we're including a where clause which makes the left side of the join even smaller. Most DB engines are pretty good about pushing down the filter past the join and sqlite3 seems to handle it well.

I thought about this a little bit before testing out this implementations I see 3 main paths forward:

  • The join approach
  • Materialize the availability information in the Aliases table as a column, we'd need to be careful to always update the Aliases table whenever updating the cache. This would probably be the fastest approach, but more work.
  • Keep the aliases in memory as a list or dict. We'd basically be implementing the join logic manually, but it might be really fast if the # of Aliases is very small. Then again if the # of Aliases is very slow the join should also be very fast.

@tkrabel what do you think?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current approach has the benefit that we never have to do any updates on the aliases tables. The names table is the source of truth of that is available to the user.
If you're happy with the performance, then let's go with the current approach.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MrBago thanks for doing testing the performance notebook, the notebook brings up something that is interesting/surprising to me, in that the module search_by_name_like query is much slower than what I was expecting. A prefix search using an index should not have been that slow.

That is an unrelated issue from this PR though, so I've created another ticket for that #736, but with the fixed index the Alias query should hopefully become faster as well. 883ms for an inner join between a large table and a very small table doesn't smell right to me that seems to indicate a full table scan as well.

I'll see if I can fix this tomorrow, but in the meantime, apologies but I'll be holding off on merging this PR yet until that is fixed and then we can see the new performance impact.

columns + ["package", "source", "type"],
)
search_modules_with_alias = modules.where("alias LIKE (?)")


class Name(Model):
table_name = "names"
schema = {
Expand Down
1 change: 0 additions & 1 deletion rope/contrib/autoimport/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
sqlite-based storage backend (rope.contrib.autoimport.sqlite.AutoImport).
"""


import contextlib
import re

Expand Down
15 changes: 15 additions & 0 deletions rope/contrib/autoimport/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from rope.contrib.autoimport import models
from rope.contrib.autoimport.defs import (
ModuleFile,
Alias,
Name,
NameType,
Package,
Expand Down Expand Up @@ -330,6 +331,13 @@ def _search_module(
yield SearchResult(
f"import {module}", module, source, NameType.Module.value
)
for alias, module, source in self._execute(
models.Alias.search_modules_with_alias.select("alias", "module", "source"),
(name,),
):
yield SearchResult(
f"import {module} as {alias}", alias, source, NameType.Module.value
)

def get_modules(self, name) -> List[str]:
"""Get the list of modules that have global `name`."""
Expand Down Expand Up @@ -471,11 +479,14 @@ def clear_cache(self):
"""
with self.connection:
self._execute(models.Name.objects.drop_table())
self._execute(models.Alias.objects.drop_table())
self._execute(models.Package.objects.drop_table())
self._execute(models.Metadata.objects.drop_table())
models.Name.create_table(self.connection)
models.Alias.create_table(self.connection)
models.Package.create_table(self.connection)
models.Metadata.create_table(self.connection)
self.add_aliases(self.project.prefs.autoimport.aliases)
data = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if I understand this correctly, this will add the aliases into the database only when the database is created. IIUC, this would need to depend on the database being re-created when preference changes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. I didn't look at the different ways that prefs can change, we could add a method to clear the aliases table and reset it and invoke that when the prefs are updated.

versioning.calculate_version_hash(self.project),
json.dumps(versioning.get_version_hash_data(self.project)),
Expand Down Expand Up @@ -595,6 +606,10 @@ def _convert_name(name: Name) -> tuple:
name.name_type.value,
)

def add_aliases(self, aliases: Iterable[Alias]):
if aliases:
self._executemany(models.Alias.objects.insert_into(), aliases)

def _add_names(self, names: Iterable[Name]):
if names is not None:
self._executemany(
Expand Down
1 change: 1 addition & 0 deletions rope/contrib/autoimport/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Utility functions for the autoimport code."""

import pathlib
import sys
from collections import OrderedDict
Expand Down
1 change: 1 addition & 0 deletions rope/contrib/finderrors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* ... ;-)

"""

from rope.base import ast, evaluate, pyobjects


Expand Down
1 change: 1 addition & 0 deletions rope/contrib/fixmodnames.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
argument.

"""

from rope.base import taskhandle
from rope.contrib import changestack
from rope.refactor import rename
Expand Down
1 change: 1 addition & 0 deletions rope/refactor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
monitoring the progress of refactorings.

"""

from rope.refactor.importutils import ImportOrganizer # noqa
from rope.refactor.topackage import ModuleToPackage # noqa

Expand Down
1 change: 1 addition & 0 deletions rope/refactor/importutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
refactorings or as a separate task.

"""

import rope.base.codeanalyze
import rope.base.evaluate
from rope.base import libutils
Expand Down
6 changes: 3 additions & 3 deletions rope/refactor/inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,9 @@ def _get_definition_params(self):
"Cannot inline functions with list and keyword arguments."
)
if self.pyfunction.get_kind() == "classmethod":
paramdict[
definition_info.args_with_defaults[0][0]
] = self.pyfunction.parent.get_name()
paramdict[definition_info.args_with_defaults[0][0]] = (
self.pyfunction.parent.get_name()
)
return paramdict

def get_function_name(self):
Expand Down
1 change: 1 addition & 0 deletions rope/refactor/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
based on inputs.

"""

from __future__ import annotations

import typing
Expand Down
1 change: 0 additions & 1 deletion rope/refactor/occurrences.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
arguments
"""


import contextlib
import re

Expand Down
1 change: 1 addition & 0 deletions rope/refactor/similarfinder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module can be used for finding similar code"""

import re

import rope.base.builtins # Use full qualification for clarity.
Expand Down
25 changes: 25 additions & 0 deletions ropetest/contrib/autoimporttest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

from rope.contrib.autoimport import sqlite as autoimport
from rope.contrib.autoimport.defs import Alias
from ropetest import testutils


Expand Down Expand Up @@ -124,6 +125,30 @@ def test_search_module(self):
self.assertIn(import_statement, self.importer.search("os"))
self.assertIn(import_statement, self.importer.search("o"))

def test_search_alias(self):
self.mod2.write("myvar = None\n")
self.importer.update_resource(self.mod2)
self.importer.add_aliases([
("noMatch", "does_not_exists_this"),
("hasMatch", "pkg.mod2"),
])

self.assertEqual([], self.importer.search("noMatch", exact_match=True))

import_statement = ("import pkg.mod2 as hasMatch", "hasMatch")
self.assertIn(import_statement, self.importer.search("hasMatch", exact_match=True))
self.assertIn(import_statement, self.importer.search("hasM"))
self.assertIn(import_statement, self.importer.search("h"))

def test_alias_updated_from_prefs(self):
self.mod2.write("myvar = None\n")
self.project.prefs.autoimport.aliases = [("mod2_alias", "pkg.mod2")]
self.importer.clear_cache()
self.importer.update_resource(self.mod2)
import_statement = ("import pkg.mod2 as mod2_alias", "mod2_alias")
self.assertIn(import_statement, self.importer.search("mod2_alias", exact_match=True))
self.assertIn(import_statement, self.importer.search("mod2", exact_match=False))

def test_search(self):
self.importer.update_module("typing")
import_statement = ("from typing import Dict", "Dict")
Expand Down
Loading