From d0351dac6cdf58176fc11cea246c995298742bc1 Mon Sep 17 00:00:00 2001 From: Forest Gregg Date: Fri, 9 Aug 2024 14:45:28 -0400 Subject: [PATCH] modernize with pyugprade --- dedupe/api.py | 2 +- dedupe/blocking.py | 2 +- dedupe/clustering.py | 8 ++------ dedupe/convenience.py | 2 +- dedupe/core.py | 2 +- dedupe/datamodel.py | 2 +- dedupe/predicates.py | 8 ++++---- dedupe/variables/base.py | 4 ++-- 8 files changed, 13 insertions(+), 17 deletions(-) diff --git a/dedupe/api.py b/dedupe/api.py index 03b63554..50101c84 100644 --- a/dedupe/api.py +++ b/dedupe/api.py @@ -867,7 +867,7 @@ def blocks(self, data): con.execute("ROLLBACK") con.close() - def score(self, blocks: Blocks) -> Generator[Scores, None, None]: + def score(self, blocks: Blocks) -> Generator[Scores]: """ Scores groups of pairs of records. Yields structured numpy arrays representing pairs of records in the group and the associated diff --git a/dedupe/blocking.py b/dedupe/blocking.py index 69ed8b1b..7365139e 100644 --- a/dedupe/blocking.py +++ b/dedupe/blocking.py @@ -57,7 +57,7 @@ def __init__(self, predicates: Iterable[dedupe.predicates.Predicate]) -> None: def __call__( self, records: Iterable[Record], target: bool = False - ) -> Generator[tuple[str, RecordID], None, None]: + ) -> Generator[tuple[str, RecordID]]: """ Generate the predicates for records. Yields tuples of (predicate, record_id). diff --git a/dedupe/clustering.py b/dedupe/clustering.py index 82719417..f6e148ce 100644 --- a/dedupe/clustering.py +++ b/dedupe/clustering.py @@ -17,9 +17,7 @@ logger = logging.getLogger(__name__) -def connected_components( - edgelist: Scores, max_components: int -) -> Generator[Scores, None, None]: +def connected_components(edgelist: Scores, max_components: int) -> Generator[Scores]: if len(edgelist) == 0: raise StopIteration() @@ -51,9 +49,7 @@ def connected_components( edgelist._mmap.close() # type: ignore -def _connected_components( - edgelist: Scores, max_components: int -) -> Generator[Scores, None, None]: +def _connected_components(edgelist: Scores, max_components: int) -> Generator[Scores]: component_stops = union_find(edgelist) start = 0 diff --git a/dedupe/convenience.py b/dedupe/convenience.py index 9b6f5a95..9ec23eb0 100644 --- a/dedupe/convenience.py +++ b/dedupe/convenience.py @@ -161,7 +161,7 @@ def console_label(deduper: dedupe.api.ActiveMatching) -> None: # pragma: no cov for record in record_pair: for field in fields: - line = "{} : {}".format(field, record[field]) + line = f"{field} : {record[field]}" _print(line) _print() _print(f"{n_match}/10 positive, {n_distinct}/10 negative") diff --git a/dedupe/core.py b/dedupe/core.py index 710daf97..90c7aaf9 100644 --- a/dedupe/core.py +++ b/dedupe/core.py @@ -218,7 +218,7 @@ def scoreGazette( featurizer: FeaturizerFunction, classifier: Classifier, num_cores: int = 1, -) -> Generator[Scores, None, None]: +) -> Generator[Scores]: first, record_pairs = peek(record_pairs) if first is None: return # terminate iteration diff --git a/dedupe/datamodel.py b/dedupe/datamodel.py index 1b84b8af..c20f9bb5 100644 --- a/dedupe/datamodel.py +++ b/dedupe/datamodel.py @@ -86,7 +86,7 @@ def __len__(self) -> int: @property def _field_comparators( self, - ) -> Generator[tuple[str, Comparator, int, int], None, None]: + ) -> Generator[tuple[str, Comparator, int, int]]: start = 0 stop = 0 for var in self.field_variables: diff --git a/dedupe/predicates.py b/dedupe/predicates.py index 2abe2149..02182244 100644 --- a/dedupe/predicates.py +++ b/dedupe/predicates.py @@ -50,7 +50,7 @@ def __iter__(self): yield self def __repr__(self) -> str: - return "{}: {}".format(self.type, self.__name__) + return f"{self.type}: {self.__name__}" def __hash__(self) -> int: try: @@ -83,7 +83,7 @@ class SimplePredicate(Predicate): def __init__(self, func: PredicateFunction, field: str): self.func = func - self.__name__ = "({}, {})".format(func.__name__, field) + self.__name__ = f"({func.__name__}, {field})" self.field = field def __call__(self, record: RecordDict, **kwargs) -> frozenset[str]: @@ -107,7 +107,7 @@ class ExistsPredicate(Predicate): type = "ExistsPredicate" def __init__(self, field: str): - self.__name__ = "(Exists, {})".format(field) + self.__name__ = f"(Exists, {field})" self.field = field @staticmethod @@ -129,7 +129,7 @@ class IndexPredicate(Predicate): _cache: dict[Any, frozenset[str]] def __init__(self, threshold: float, field: str): - self.__name__ = "({}, {})".format(threshold, field) + self.__name__ = f"({threshold}, {field})" self.field = field self.threshold = threshold self.index = None diff --git a/dedupe/variables/base.py b/dedupe/variables/base.py index c7f256eb..fd54ba65 100644 --- a/dedupe/variables/base.py +++ b/dedupe/variables/base.py @@ -44,7 +44,7 @@ class DerivedType(Variable): type = "Derived" def __init__(self, name: str, var_type: str, **kwargs): - self.name = "({}: {})".format(str(name), str(var_type)) + self.name = f"({str(name)}: {str(var_type)})" super().__init__(**kwargs) @@ -59,7 +59,7 @@ def __init__(self, field: str, name: str | None = None, has_missing: bool = Fals self.field = field if name is None: - self.name = "({}: {})".format(self.field, self.type) + self.name = f"({self.field}: {self.type})" else: self.name = name