Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

setuptools plugin solution for variables #1121

Closed
wants to merge 15 commits into from
22 changes: 15 additions & 7 deletions dedupe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pkgutil import extend_path

__path__ = extend_path(__path__, __name__)

from dedupe._init import * # noqa
from dedupe.api import ( # noqa: F401
Dedupe,
Gazetteer,
RecordLink,
StaticDedupe,
StaticGazetteer,
StaticRecordLink,
)
from dedupe.convenience import ( # noqa: F401
canonicalize,
console_label,
training_data_dedupe,
training_data_link,
)
from dedupe.serializer import read_training, write_training # noqa: F401
15 changes: 0 additions & 15 deletions dedupe/_init.py

This file was deleted.

33 changes: 25 additions & 8 deletions dedupe/datamodel.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import copyreg
import pkgutil
import sys
import types
from typing import TYPE_CHECKING, cast

Expand All @@ -12,10 +12,10 @@
from dedupe.variables.base import MissingDataType, Variable
from dedupe.variables.interaction import InteractionType

for _, module, _ in pkgutil.iter_modules( # type: ignore
dedupe.variables.__path__, "dedupe.variables."
):
__import__(module)
if sys.version_info >= (3, 8):
from importlib import metadata as importlib_metadata
else:
import importlib_metadata

if TYPE_CHECKING:
from typing import Generator, Iterable, Sequence
Expand All @@ -28,7 +28,14 @@
)
from dedupe.predicates import Predicate

VARIABLE_CLASSES = {k: v for k, v in FieldVariable.all_subclasses() if k}

def load_setuptools_entrypoints(group: str):
variables = []
for dist in list(importlib_metadata.distributions()):
for ep in dist.entry_points:
if ep.group == group:
variables.append(ep.load())
return variables


class DataModel(object):
Expand Down Expand Up @@ -145,6 +152,16 @@ def __setstate__(self, d):
def typify_variables(
variable_definitions: Iterable[VariableDefinition],
) -> tuple[list[FieldVariable], list[Variable]]:

variable_types = {}
for variablename in dedupe.variables.__all__:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you copied this from my PR, I'm not positive that all is actually the correct solution, perhaps I'm abusing it and it actually is intended for something else.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's intended use is to control the behavior of "import *" but i think this works fine.

variable = getattr(dedupe.variables, variablename)
variable_types[variable.type] = variable

plugin_vars = load_setuptools_entrypoints("dedupevariables")
for var in plugin_vars:
variable_types[var.type] = var

primary_variables: list[FieldVariable] = []
all_variables: list[Variable] = []
only_custom = True
Expand Down Expand Up @@ -181,11 +198,11 @@ def typify_variables(
]

try:
variable_class = VARIABLE_CLASSES[variable_type]
variable_class = variable_types[variable_type]
except KeyError:
raise KeyError(
"Field type %s not valid. Valid types include %s"
% (definition["type"], ", ".join(VARIABLE_CLASSES))
% (definition["type"], ", ".join(variable_types))
)

variable_object = variable_class(definition)
Expand Down
25 changes: 23 additions & 2 deletions dedupe/variables/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
from pkgutil import extend_path
# flake8: noqa
from dedupe.variables.base import CustomType
from dedupe.variables.categorical_type import CategoricalType
from dedupe.variables.exact import ExactType
from dedupe.variables.exists import ExistsType
from dedupe.variables.interaction import InteractionType
from dedupe.variables.latlong import LatLongType
from dedupe.variables.price import PriceType
from dedupe.variables.set import SetType
from dedupe.variables.string import ShortStringType, StringType, TextType

__path__ = extend_path(__path__, __name__)
__all__ = [
"CustomType",
"CategoricalType",
"ExactType",
"ExistsType",
"InteractionType",
"LatLongType",
"PriceType",
"SetType",
"ShortStringType",
"StringType",
"TextType",
]
11 changes: 1 addition & 10 deletions dedupe/variables/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dedupe import predicates

if TYPE_CHECKING:
from typing import Any, ClassVar, Generator, Iterable, Optional, Sequence, Type
from typing import Any, ClassVar, Iterable, Sequence, Type

from dedupe._typing import Comparator, PredicateFunction, VariableDefinition

Expand Down Expand Up @@ -47,15 +47,6 @@ def __getstate__(self) -> dict[str, Any]:

return odict

@classmethod
def all_subclasses(
cls,
) -> Generator[tuple[Optional[str], Type["Variable"]], None, None]:
for q in cls.__subclasses__():
yield getattr(q, "type", None), q
for p in q.all_subclasses():
yield p


class DerivedType(Variable):
type = "Derived"
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "dedupe"
description = "A python library for accurate and scaleable data deduplication and entity-resolution"
version = "2.0.19"
version = "3.0.0"
readme = "README.md"
requires-python = ">=3.7"
license = {file = "LICENSE"}
Expand Down Expand Up @@ -51,7 +51,7 @@ MailingList = "https://groups.google.com/forum/#!forum/open-source-deduplication


[build-system]
requires = ["setuptools==63",
requires = ["setuptools",
"wheel",
"cython"]
build-backend = "setuptools.build_meta"
Expand Down