diff --git a/distances/_discounted_levenshtein.py b/distances/_discounted_levenshtein.py
index d2d5ed3..62603fc 100644
--- a/distances/_discounted_levenshtein.py
+++ b/distances/_discounted_levenshtein.py
@@ -175,7 +175,7 @@ def _alignment_matrix(
         else:
             discount_from = [1, 1]
 
-        d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float_)
+        d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float64)
         if backtrace:
             trace_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.int8)
         for i in range(1, src_len + 1):
diff --git a/distances/_editex.py b/distances/_editex.py
index fa1b660..d34dd28 100644
--- a/distances/_editex.py
+++ b/distances/_editex.py
@@ -23,7 +23,7 @@
 from typing import Any, Tuple, cast
 from unicodedata import normalize as unicode_normalize
 
-from numpy import float_ as np_float
+from numpy import float64 as np_float
 from numpy import zeros as np_zeros
 
 from ._distance import _Distance
diff --git a/distances/_levenshtein.py b/distances/_levenshtein.py
index c35f47d..42ee2bc 100644
--- a/distances/_levenshtein.py
+++ b/distances/_levenshtein.py
@@ -138,7 +138,7 @@ def _alignment_matrix(
         tar_len = len(tar)
         max_len = max(src_len, tar_len)
 
-        d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float_)
+        d_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.float64)
         if backtrace:
             trace_mat = np.zeros((src_len + 1, tar_len + 1), dtype=np.int8)
         for i in range(src_len + 1):
diff --git a/distances/_typo.py b/distances/_typo.py
index b7ea7d9..2b29db0 100644
--- a/distances/_typo.py
+++ b/distances/_typo.py
@@ -23,7 +23,7 @@
 from math import log
 from typing import Any, Dict, Tuple, cast
 
-from numpy import float_ as np_float
+from numpy import float64 as np_float
 from numpy import zeros as np_zeros
 
 from ._distance import _Distance
diff --git a/name_matching/distance_metrics.py b/name_matching/distance_metrics.py
index cc2146e..0d1a50b 100644
--- a/name_matching/distance_metrics.py
+++ b/name_matching/distance_metrics.py
@@ -1,34 +1,37 @@
-from distances import Indel, DiscountedLevenshtein, CormodeLZ, Tichy, IterativeSubString, BaulieuXIII, Clement, DiceAsymmetricI, KuhnsIII, Overlap, PearsonII, WeightedJaccard, WarrensIV, Bag, RougeL, RatcliffObershelp, NCDbz2, FuzzyWuzzyPartialString, FuzzyWuzzyTokenSort, FuzzyWuzzyTokenSet, Editex, Typo,LIG3, SSK, Levenshtein, DoubleMetaphone, RefinedSoundex, PhoneticDistance
+import distances as nm_dist
 from collections import defaultdict
 
-def make_distance_metrics(indel=False,
-                          discounted_levenshtein=False,
-                          tichy=False,
-                          cormodel_z=False,
-                          iterative_sub_string=False,
-                          baulieu_xiii=False,
-                          clement=False,
-                          dice_asymmetrici=False,
-                          kuhns_iii=False,
-                          overlap=False,
-                          pearson_ii=False,
-                          weighted_jaccard=False,
-                          warrens_iv=False,
-                          bag=False,
-                          rouge_l=False,
-                          ratcliff_obershelp=False,
-                          ncd_bz2=False,
-                          fuzzy_wuzzy_partial_string=False,
-                          fuzzy_wuzzy_token_sort=False,
-                          fuzzy_wuzzy_token_set=False,
-                          editex=False,
-                          typo=False,
-                          lig_3=False,
-                          ssk=False,
-                          refined_soundex=False,
-                          double_metaphone=False) -> dict:
+
+def make_distance_metrics(
+    indel=False,
+    discounted_levenshtein=False,
+    tichy=False,
+    cormodel_z=False,
+    iterative_sub_string=False,
+    baulieu_xiii=False,
+    clement=False,
+    dice_asymmetrici=False,
+    kuhns_iii=False,
+    overlap=False,
+    pearson_ii=False,
+    weighted_jaccard=False,
+    warrens_iv=False,
+    bag=False,
+    rouge_l=False,
+    ratcliff_obershelp=False,
+    ncd_bz2=False,
+    fuzzy_wuzzy_partial_string=False,
+    fuzzy_wuzzy_token_sort=False,
+    fuzzy_wuzzy_token_set=False,
+    editex=False,
+    typo=False,
+    lig_3=False,
+    ssk=False,
+    refined_soundex=False,
+    double_metaphone=False,
+) -> dict:
     r"""
-    A function which returns a dict containing the distance metrics that should be 
+    A function which returns a dict containing the distance metrics that should be
     used during the fuzzy string matching
 
     Levenshtein edit distance
@@ -70,69 +73,69 @@ def make_distance_metrics(indel=False,
     Parameters
     ----------
     indel: bool
-        Boolean indicating whether the Indel method should be used during the 
-        fuzzy name matching. The indel method is equal to a regular levenshtein 
-        distance with a twice as high substitution weight 
+        Boolean indicating whether the Indel method should be used during the
+        fuzzy name matching. The indel method is equal to a regular levenshtein
+        distance with a twice as high substitution weight
         default=False
     discounted_levenshtein: bool
-        Boolean indicating whether the DiscountedLevenshtein method should be used 
+        Boolean indicating whether the DiscountedLevenshtein method should be used
         during the fuzzy name matching. Equal to the regular levenshtein distance,
         only errors later in the string are counted at a discounted rate. To for
         instance limit the importance of suffix differences
         default=False
     tichy: bool
-        Boolean indicating whether the Tichy method should be used during the 
+        Boolean indicating whether the Tichy method should be used during the
         fuzzy name matching. This algorithm provides a shortest edit distance based
         on substring and add operations.
         default=False
     cormodel_z: bool
-        Boolean indicating whether the CormodeLZ method should be used during the 
-        fuzzy name matching. The CormodeLZ distance between strings x and y, is the 
-        minimum number of single characters or substrings of y or of the partially 
+        Boolean indicating whether the CormodeLZ method should be used during the
+        fuzzy name matching. The CormodeLZ distance between strings x and y, is the
+        minimum number of single characters or substrings of y or of the partially
         built string which are required to produce x from left to right.
         default=False
     iterative_sub_string: bool
-        Boolean indicating whether the IterativeSubString method should be used 
-        during the fuzzy name matching. A method that counts the similarities 
+        Boolean indicating whether the IterativeSubString method should be used
+        during the fuzzy name matching. A method that counts the similarities
         between two strings substrings and subtracts the differences taking into
         account the winkler similarity between the string and the substring.
         default=False
     baulieu_xiii: bool
-        Boolean indicating whether the BaulieuXIII method should be used during 
+        Boolean indicating whether the BaulieuXIII method should be used during
         the fuzzy name matching. The Baulieu XIII distance between two strings is
         given by the following formula: (|X \ Y| + |Y \ X|) / (
         |X ∩ Y| + |X \ Y| + |Y \ X| + |X ∩ Y| ∙ (|X ∩ Y| - 4)^2)
         default=False
     clement: bool
         Boolean indicating whether the Clement method should be used during the
-        fuzzy name matching. The Clement distance between two strings is given 
+        fuzzy name matching. The Clement distance between two strings is given
         by the following formula: (|X ∩ Y|/|X|)*(1-|X|/|N|) + (|(N \ X) \ Y|/|N \ X|) *
         (1-|N \ X|/|N|)
         default=False
     dice_asymmetrici: bool
-        Boolean indicating whether the DiceAsymmetricI method should be used during 
+        Boolean indicating whether the DiceAsymmetricI method should be used during
         the fuzzy name matching. The Dice asymmetric similarity is given be |X ∩ Y|/|X|
         default=False
     kuhns_iii: bool
-        Boolean indicating whether the KuhnsIII method should be used during the 
+        Boolean indicating whether the KuhnsIII method should be used during the
         fuzzy name matching
         default=False
     overlap: bool
-        Boolean indicating whether the Overlap method should be used during the 
+        Boolean indicating whether the Overlap method should be used during the
         fuzzy name matching. The overlap distance is given by: |X ∩ Y|/min(|X|,|Y|)
         default=True
     pearson_ii: bool
-        Boolean indicating whether the PearsonII method should be used during the 
-        fuzzy name matching. This algorithm is based on the Phi coefficient or the 
+        Boolean indicating whether the PearsonII method should be used during the
+        fuzzy name matching. This algorithm is based on the Phi coefficient or the
         mean square contingency
         default=False
     weighted_jaccard: bool
-        Boolean indicating whether the WeightedJaccard method should be used during 
-        the fuzzy name matching. This is the Jaccard distance only using a wheighing 
+        Boolean indicating whether the WeightedJaccard method should be used during
+        the fuzzy name matching. This is the Jaccard distance only using a wheighing
         for the differences of 3.
         default=True
     warrens_iv: bool
-        Boolean indicating whether the WarrensIV method should be used during the 
+        Boolean indicating whether the WarrensIV method should be used during the
         fuzzy name matching
         default=False
     bag: bool
@@ -141,54 +144,54 @@ def make_distance_metrics(indel=False,
             a similarity tree structure.
         default=False
     rouge_l: bool
-        Boolean indicating whether the ROUGE-L method should be used during the 
+        Boolean indicating whether the ROUGE-L method should be used during the
         fuzzy name matching. The ROGUE-L method is a measure that counts the longest
         substring between to strings
         default=False
     ratcliff_obershelp: bool
-        Boolean indicating whether the RatcliffObershelp method should be used 
+        Boolean indicating whether the RatcliffObershelp method should be used
         during the fuzzy name matching. This method finds the longest common substring
-        and evaluates the longest common substrings to the right and the left of the 
+        and evaluates the longest common substrings to the right and the left of the
         original longest common substring
         default=True
     ncd_bz2: bool
-        Boolean indicating whether the NCDbz2 method should be used during the 
-        fuzzy name matching. Applies the Burrows-Wheeler transform to the strings and 
+        Boolean indicating whether the NCDbz2 method should be used during the
+        fuzzy name matching. Applies the Burrows-Wheeler transform to the strings and
         subsequently returns the normalised compression distance.
         default=False
     fuzzy_wuzzy_partial_string: bool
         Boolean indicating whether the FuzzyWuzzyPartialString method should be used
-        during the fuzzy name matching. This methods takes the length of the longest 
-        common substring and divides it over the minimum of the length of each of 
+        during the fuzzy name matching. This methods takes the length of the longest
+        common substring and divides it over the minimum of the length of each of
         the two strings.
         default=False
     fuzzy_wuzzy_token_sort: bool
-        Boolean indicating whether the FuzzyWuzzyTokenSort method should be used 
+        Boolean indicating whether the FuzzyWuzzyTokenSort method should be used
         during the fuzzy name matching. This tokenizes the words in the string
         and sorts them, subsequently a hamming distance is calculated
         default=True
     fuzzy_wuzzy_token_set: bool
-        Boolean indicating whether the FuzzyWuzzyTokenSet method should be used 
-        during the fuzzy name matching. This method tokenizes the strings and 
-        find the largest intersection of the two substrings and divides it over 
+        Boolean indicating whether the FuzzyWuzzyTokenSet method should be used
+        during the fuzzy name matching. This method tokenizes the strings and
+        find the largest intersection of the two substrings and divides it over
         the length of the shortest string
         default=False
     editex: bool
-        Boolean indicating whether the Editex method should be used during the 
+        Boolean indicating whether the Editex method should be used during the
         fuzzy name matching
         default=True
     typo: bool
-        Boolean indicating whether the Typo method should be used during the 
+        Boolean indicating whether the Typo method should be used during the
         fuzzy name matching. The typo distance is calculated based on the distance
         on a keyboard between edits.
         default=False
     lig_3: bool
-        Boolean indicating whether the LIG3 method should be used during the fuzzy 
+        Boolean indicating whether the LIG3 method should be used during the fuzzy
         name matching
         default=False
     ssk: bool
-        Boolean indicating whether the SSK method should be used during the fuzzy 
-        name matching. The ssk algorithm looks at the string kernel generated by all 
+        Boolean indicating whether the SSK method should be used during the fuzzy
+        name matching. The ssk algorithm looks at the string kernel generated by all
         the possible different subsequences present between the two strings.
         default=False
     refined_soundex: bool
@@ -205,62 +208,68 @@ def make_distance_metrics(indel=False,
     """
     distance_metrics = defaultdict(list)
     if indel:
-        distance_metrics['Levenshtein'].append(Indel())
+        distance_metrics["Levenshtein"].append(nm_dist.Indel())
     if discounted_levenshtein:
-        distance_metrics['Levenshtein'].append(
-            DiscountedLevenshtein())
+        distance_metrics["Levenshtein"].append(nm_dist.DiscountedLevenshtein())
     if cormodel_z:
-        distance_metrics['block'].append(CormodeLZ())
+        distance_metrics["block"].append(nm_dist.CormodeLZ())
     if tichy:
-        distance_metrics['block'].append(Tichy())
+        distance_metrics["block"].append(nm_dist.Tichy())
     if iterative_sub_string:
-        distance_metrics['Subsequence'].append(
-            IterativeSubString())
+        distance_metrics["Subsequence"].append(nm_dist.IterativeSubString())
     if baulieu_xiii:
-        distance_metrics['multiset'].append(BaulieuXIII())
+        distance_metrics["multiset"].append(nm_dist.BaulieuXIII())
     if clement:
-        distance_metrics['multiset'].append(Clement())
+        distance_metrics["multiset"].append(nm_dist.Clement())
     if dice_asymmetrici:
-        distance_metrics['multiset'].append(DiceAsymmetricI())
+        distance_metrics["multiset"].append(nm_dist.DiceAsymmetricI())
     if kuhns_iii:
-        distance_metrics['multiset'].append(KuhnsIII())
+        distance_metrics["multiset"].append(nm_dist.KuhnsIII())
     if overlap:
-        distance_metrics['multiset'].append(Overlap())
+        distance_metrics["multiset"].append(nm_dist.Overlap())
     if pearson_ii:
-        distance_metrics['multiset'].append(PearsonII())
+        distance_metrics["multiset"].append(nm_dist.PearsonII())
     if weighted_jaccard:
-        distance_metrics['multiset'].append(WeightedJaccard())
+        distance_metrics["multiset"].append(nm_dist.WeightedJaccard())
     if warrens_iv:
-        distance_metrics['multiset'].append(WarrensIV())
+        distance_metrics["multiset"].append(nm_dist.WarrensIV())
     if bag:
-        distance_metrics['multiset'].append(Bag())
+        distance_metrics["multiset"].append(nm_dist.Bag())
     if rouge_l:
-        distance_metrics['multiset'].append(RougeL())
+        distance_metrics["multiset"].append(nm_dist.RougeL())
     if ratcliff_obershelp:
-        distance_metrics['Subsequence'].append(
-            RatcliffObershelp())
+        distance_metrics["Subsequence"].append(nm_dist.RatcliffObershelp())
     if ncd_bz2:
-        distance_metrics['compression'].append(NCDbz2())
+        distance_metrics["compression"].append(nm_dist.NCDbz2())
     if fuzzy_wuzzy_partial_string:
-        distance_metrics['fuzzy'].append(
-            FuzzyWuzzyPartialString())
+        distance_metrics["fuzzy"].append(nm_dist.FuzzyWuzzyPartialString())
     if fuzzy_wuzzy_token_sort:
-        distance_metrics['fuzzy'].append(FuzzyWuzzyTokenSort())
+        distance_metrics["fuzzy"].append(nm_dist.FuzzyWuzzyTokenSort())
     if fuzzy_wuzzy_token_set:
-        distance_metrics['fuzzy'].append(FuzzyWuzzyTokenSet())
+        distance_metrics["fuzzy"].append(nm_dist.FuzzyWuzzyTokenSet())
     if editex:
-        distance_metrics['edit'].append(Editex())
+        distance_metrics["edit"].append(nm_dist.Editex())
     if typo:
-        distance_metrics['edit'].append(Typo())
+        distance_metrics["edit"].append(nm_dist.Typo())
     if lig_3:
-        distance_metrics['Levenshtein'].append(LIG3())
+        distance_metrics["Levenshtein"].append(nm_dist.LIG3())
     if ssk:
-        distance_metrics['Subsequence'].append(SSK())
+        distance_metrics["Subsequence"].append(nm_dist.SSK())
     if refined_soundex:
-        distance_metrics['phonetic'].append(PhoneticDistance(
-            transforms=RefinedSoundex(max_length=30), metric=Levenshtein(), encode_alpha=True))
+        distance_metrics["phonetic"].append(
+            nm_dist.PhoneticDistance(
+                transforms=nm_dist.RefinedSoundex(max_length=30),
+                metric=nm_dist.Levenshtein(),
+                encode_alpha=True,
+            )
+        )
     if double_metaphone:
-        distance_metrics['phonetic'].append(PhoneticDistance(
-            transforms=DoubleMetaphone(max_length=30), metric=Levenshtein(), encode_alpha=True))
+        distance_metrics["phonetic"].append(
+            nm_dist.PhoneticDistance(
+                transforms=nm_dist.DoubleMetaphone(max_length=30),
+                metric=nm_dist.Levenshtein(),
+                encode_alpha=True,
+            )
+        )
 
     return distance_metrics
diff --git a/name_matching/name_matcher.py b/name_matching/name_matcher.py
index ad2b97c..3030a44 100644
--- a/name_matching/name_matcher.py
+++ b/name_matching/name_matcher.py
@@ -1,10 +1,10 @@
-import unicodedata
-import functools
-import operator
-import re
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
+from operator import iconcat
+from functools import reduce
+from unicodedata import normalize
+from re import escape, sub
 from typing import Union, Tuple
 from itertools import compress
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -14,10 +14,9 @@
 
 
 class NameMatcher:
-
     """
     A class for the name matching of data based on the strings in a single column. The NameMatcher
-    first applies a cosine similarity on the ngrams of the strings to get an approximate match followed 
+    first applies a cosine similarity on the ngrams of the strings to get an approximate match followed
     by a fuzzy matching based on a number of different algorithms.
 
     Parameters
@@ -46,17 +45,17 @@ class NameMatcher:
         group is returned.
         default=1
     legal_suffixes : bool
-        Boolean indicating whether the most common company legal terms should be excluded when calculating 
+        Boolean indicating whether the most common company legal terms should be excluded when calculating
         the final score. The terms are still included in determining the best match.
         default=False
     common_words : bool or list
-        Boolean indicating whether the most common words from the matching data should be excluded 
+        Boolean indicating whether the most common words from the matching data should be excluded
         when calculating the final score. The terms are still included in determining the best match.
         If common_words is given as a list, the words in the list are excluded from the calculation of
         the final score, downgrading matches that predominatly rely on these words.
         default=False
     cut_off_no_scoring_words: float
-        the cut off percentage of the occurrence of the most occurring word for which words are still included 
+        the cut off percentage of the occurrence of the most occurring word for which words are still included
         in the no_scoring_words set
         default=0.01
     lowercase : bool
@@ -71,8 +70,8 @@ class NameMatcher:
         ascii characters
         default=True : bool
     preprocess_split
-        Indicating whether during the preprocessing an additional step should be taken in which only 
-        the most common words out of a name are isolated and used in the matching process. The removing 
+        Indicating whether during the preprocessing an additional step should be taken in which only
+        the most common words out of a name are isolated and used in the matching process. The removing
         of the common words is only done for the n-grams cosine matching part.
         default=False
     verbose : bool
@@ -80,35 +79,42 @@ class NameMatcher:
         default=True
     distance_metrics: list
         A list of The distance metrics to be used during the fuzzy matching. For a list of possible distance
-        metrics see the distance_metrics.py file. By default the following metrics are used: overlap, weighted_jaccard, 
+        metrics see the distance_metrics.py file. By default the following metrics are used: overlap, weighted_jaccard,
         ratcliff_obershelp, fuzzy_wuzzy_token_sort and editex.
     row_numbers : bool
         Bool indicating whether the row number should be used as match_index rather than the original index as
-        was the default case before version 0.8.8  
+        was the default case before version 0.8.8
         default=False
     return_algorithms_score : bool
         Bool indicating whether the scores of all the algorithms should be returned instead of a combined score
         default=False
     """
 
-    def __init__(self,
-                 ngrams: tuple = (2, 3),
-                 top_n: int = 50,
-                 low_memory: bool = False,
-                 number_of_rows: int = 5000,
-                 number_of_matches: int = 1,
-                 lowercase: bool = True,
-                 punctuations: bool = True,
-                 remove_ascii: bool = True,
-                 legal_suffixes: bool = False,
-                 common_words: Union[bool, list] = False,
-                 cut_off_no_scoring_words: float = 0.01,
-                 preprocess_split: bool = False,
-                 verbose: bool = True,
-                 distance_metrics: Union[list, tuple] = ['overlap', 'weighted_jaccard', 'ratcliff_obershelp',
-                                                         'fuzzy_wuzzy_token_sort', 'editex'],
-                 row_numbers: bool = False,
-                 return_algorithms_score: bool = False):
+    def __init__(
+        self,
+        ngrams: tuple = (2, 3),
+        top_n: int = 50,
+        low_memory: bool = False,
+        number_of_rows: int = 5000,
+        number_of_matches: int = 1,
+        lowercase: bool = True,
+        punctuations: bool = True,
+        remove_ascii: bool = True,
+        legal_suffixes: bool = False,
+        common_words: Union[bool, list] = False,
+        cut_off_no_scoring_words: float = 0.01,
+        preprocess_split: bool = False,
+        verbose: bool = True,
+        distance_metrics: Union[list, tuple] = [
+            "overlap",
+            "weighted_jaccard",
+            "ratcliff_obershelp",
+            "fuzzy_wuzzy_token_sort",
+            "editex",
+        ],
+        row_numbers: bool = False,
+        return_algorithms_score: bool = False,
+    ):
 
         self._possible_matches = None
         self._preprocessed = False
@@ -117,8 +123,8 @@ def __init__(self,
         self._number_of_rows = number_of_rows
         self._low_memory = low_memory
 
-        self._column = ''
-        self._column_matching = ''
+        self._column = ""
+        self._column_matching = ""
 
         self._verbose = verbose
         self._number_of_matches = number_of_matches
@@ -129,7 +135,7 @@ def __init__(self,
         self._preprocess_punctuations = punctuations
         self._preprocess_ascii = remove_ascii
         self._postprocess_company_legal_id = legal_suffixes
-                
+
         if isinstance(common_words, bool):
             self._postprocess_common_words = common_words
             self._word_set = set()
@@ -137,33 +143,35 @@ def __init__(self,
             self._postprocess_common_words = False
             self._word_set = set(common_words)
         else:
-            raise TypeError('Please provide common_words as a list or a bool')
+            raise TypeError("Please provide common_words as a list or a bool")
 
         self._preprocess_split = preprocess_split
         self._cut_off = cut_off_no_scoring_words
 
         if self._postprocess_company_legal_id:
             self._word_set = self._make_no_scoring_words(
-                'legal', self._word_set, self._cut_off)
-            
+                "legal", self._word_set, self._cut_off
+            )
+
         self._original_indexes = not row_numbers
         self._original_index = None
 
         self.set_distance_metrics(distance_metrics)
 
         self._vec = TfidfVectorizer(
-            lowercase=False, analyzer="char", ngram_range=(ngrams))
+            lowercase=False, analyzer="char", ngram_range=(ngrams)
+        )
         self._n_grams_matching = None
 
     def set_distance_metrics(self, metrics: list) -> None:
         """
         A method to set which of the distance metrics should be employed during the
-        fuzzy matching. For very short explanations of most of the name matching 
+        fuzzy matching. For very short explanations of most of the name matching
         algorithms please see the make_distance_metrics function in distance_matrics.py
 
         Parameters
         ----------
-        metrics: list 
+        metrics: list
             The list with the distance metrics to be used during the name matching. The
             distance metrics can be chosen from the list below:
                 indel
@@ -198,47 +206,56 @@ def set_distance_metrics(self, metrics: list) -> None:
         try:
             self._distance_metrics = make_distance_metrics(**input_metrics)
         except TypeError:
-            raise TypeError('Not all of the supplied distance metrics are available. Please check the' +
-                            'list of options in the make_distance_metrics function and adjust your list accordingly')
+            raise TypeError(
+                "Not all of the supplied distance metrics are available. Please check the"
+                + "list of options in the make_distance_metrics function and adjust your list accordingly"
+            )
         self._num_distance_metrics = sum(
-            [len(x) for x in self._distance_metrics.values()])
+            [len(x) for x in self._distance_metrics.values()]
+        )
 
-    def _select_top_words(self, word: str, word_counts: pd.Series, occurrence_count: int) -> str:
+    def _select_top_words(
+        self, word: str, word_counts: pd.Series, occurrence_count: int
+    ) -> str:
         """Remove the top words from the string word based on an occurrence_count threshold
 
         Parameters
         ----------
-        word: str 
+        word: str
             the string from which the words should be removed
         word_counts: pd.Series
             the words which should be removed with their counts as result from a value_counts
-        occurrence_count: int 
-            the multiplication factor of the minimum occurrences below which to select 
+        occurrence_count: int
+            the multiplication factor of the minimum occurrences below which to select
 
         Returns
         -------
         str
            The string word with the words with a too high word_counts removed
         """
-        compressed_list = list(compress(
-            word, (word_counts[word] < occurrence_count*word_counts[word].min()).values))
-
-        return ' '.join(compressed_list)
-
-    def _preprocess_reduce(self,
-                           to_be_matched: pd.DataFrame,
-                           occurrence_count: int = 3) -> pd.DataFrame:
+        compressed_list = list(
+            compress(
+                word,
+                (word_counts[word] < occurrence_count * word_counts[word].min()).values,
+            )
+        )
+
+        return " ".join(compressed_list)
+
+    def _preprocess_reduce(
+        self, to_be_matched: pd.DataFrame, occurrence_count: int = 3
+    ) -> pd.DataFrame:
         """Preprocesses and copies the data to obtain the data with reduced strings. The strings have all words
-        removed which appear more than 3x as often as the least common word in the string and returns an adjusted 
-        copy of the input 
+        removed which appear more than 3x as often as the least common word in the string and returns an adjusted
+        copy of the input
 
         Parameters
         ----------
-        to_be_matched: pd.DataFrame 
+        to_be_matched: pd.DataFrame
             A dataframe from which the most common words should be removed
         occurrence_count: int
             The number of occurrence a word can occur more then the least common word in the string for which it will
-            still be included in the process 
+            still be included in the process
             default=3
 
         Returns
@@ -246,21 +263,25 @@ def _preprocess_reduce(self,
         pd.DataFrame
             A dataframe that will contain the reduced strings
         """
-        individual_words = to_be_matched[self._column_matching].str.split(
-            expand=True).stack()
+        individual_words = (
+            to_be_matched[self._column_matching].str.split(expand=True).stack()
+        )
         word_counts = individual_words.value_counts()
         to_be_matched_new = to_be_matched.copy()
         name = to_be_matched[self._column_matching].str.split()
         to_be_matched_new[self._column_matching] = name.apply(
-            lambda word: self._select_top_words(word, word_counts, occurrence_count))
+            lambda word: self._select_top_words(word, word_counts, occurrence_count)
+        )
 
         return to_be_matched_new
 
-    def load_and_process_master_data(self,
-                                     column: str,
-                                     df_matching_data: pd.DataFrame,
-                                     start_processing: bool = True,
-                                     transform: bool = True) -> None:
+    def load_and_process_master_data(
+        self,
+        column: str,
+        df_matching_data: pd.DataFrame,
+        start_processing: bool = True,
+        transform: bool = True,
+    ) -> None:
         """Load the matching data into the NameMatcher and start the preprocessing.
 
         Parameters
@@ -269,43 +290,42 @@ def load_and_process_master_data(self,
             The column name of the dataframe which should be used for the matching
         df_matching_data: pd.DataFrame
             The dataframe which is used to match the data to.
-        start_processing : bool 
-            A boolean indicating whether to start the preprocessing step after loading the matching data 
+        start_processing : bool
+            A boolean indicating whether to start the preprocessing step after loading the matching data
             default: True
-        transform : bool 
-            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised 
+        transform : bool
+            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised
             default: True
         """
         self._column = column
-        self._df_matching_data = df_matching_data        
+        self._df_matching_data = df_matching_data
         self._original_index = df_matching_data.index
         if start_processing:
             self._process_matching_data(transform)
 
-    def _process_matching_data(self,
-                               transform: bool = True) -> None:
-        """Function to process the matching data. First the matching data is preprocessed and assigned to 
-        a variable within the NameMatcher. Next the data is used to initialise the TfidfVectorizer. 
+    def _process_matching_data(self, transform: bool = True) -> None:
+        """Function to process the matching data. First the matching data is preprocessed and assigned to
+        a variable within the NameMatcher. Next the data is used to initialise the TfidfVectorizer.
 
         Parameters
         ----------
-        transform : bool 
-            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised 
+        transform : bool
+            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised
             default: True
         """
-        self._df_matching_data = self.preprocess(
-            self._df_matching_data, self._column)
+        self._df_matching_data = self.preprocess(self._df_matching_data, self._column)
         if self._postprocess_common_words:
             self._word_set = self._make_no_scoring_words(
-                'common', self._word_set, self._cut_off)
+                "common", self._word_set, self._cut_off
+            )
         self._vectorise_data(transform)
         self._preprocessed = True
 
-    def match_names(self,
-                    to_be_matched: Union[pd.Series, pd.DataFrame],
-                    column_matching: str) -> Union[pd.Series, pd.DataFrame]:
-        """Performs the name matching operation on the to_be_matched data. First it does the preprocessing of the 
-        data to be matched as well as the matching data if this has not been performed. Subsequently based on 
+    def match_names(
+        self, to_be_matched: Union[pd.Series, pd.DataFrame], column_matching: str
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """Performs the name matching operation on the to_be_matched data. First it does the preprocessing of the
+        data to be matched as well as the matching data if this has not been performed. Subsequently based on
         ngrams a cosine similarity is computed between the matching data and the data to be matched, to the top n
         matches fuzzy matching algorithms are performed to determine the best match and the quality of the match
 
@@ -319,63 +339,87 @@ def match_names(self,
         Returns
         -------
         Union[pd.Series, pd.DataFrame]
-            A series or dataframe depending on the input containing the match index from the matching_data dataframe. 
-            the name in the to_be_matched data, the name to which the datapoint was matched and a score between 0 
+            A series or dataframe depending on the input containing the match index from the matching_data dataframe.
+            the name in the to_be_matched data, the name to which the datapoint was matched and a score between 0
             (no match) and 100(perfect match) to indicate the quality of the matches
         """
-        if self._column == '':
+        if self._column == "":
             raise ValueError(
-                'Please first load the master data via the method: load_and_process_master_data')
+                "Please first load the master data via the method: load_and_process_master_data"
+            )
         if self._verbose:
             tqdm.pandas()
-            tqdm.write('preprocessing...\n')
+            tqdm.write("preprocessing...\n")
         self._column_matching = column_matching
 
         is_dataframe = True
         if isinstance(to_be_matched, pd.Series):
             is_dataframe = False
             to_be_matched = pd.DataFrame(
-                [to_be_matched.values], columns=to_be_matched.index.to_list())
+                [to_be_matched.values], columns=to_be_matched.index.to_list()
+            )
         if not self._preprocessed:
             self._process_matching_data()
         to_be_matched = self.preprocess(to_be_matched, self._column_matching)
 
         if self._verbose:
-            tqdm.write('preprocessing complete \n searching for matches...\n')
+            tqdm.write("preprocessing complete \n searching for matches...\n")
 
-        self._possible_matches = self._search_for_possible_matches(
-            to_be_matched)
+        self._possible_matches = self._search_for_possible_matches(to_be_matched)
 
         if self._preprocess_split:
-            self._possible_matches = np.hstack((self._search_for_possible_matches(
-                self._preprocess_reduce(to_be_matched)), self._possible_matches))
-        
+            self._possible_matches = np.hstack(
+                (
+                    self._search_for_possible_matches(
+                        self._preprocess_reduce(to_be_matched)
+                    ),
+                    self._possible_matches,
+                )
+            )
+
         if self._verbose:
-            tqdm.write('possible matches found   \n fuzzy matching...\n')
-            data_matches = to_be_matched.progress_apply(lambda x: self.fuzzy_matches(
-                self._possible_matches[to_be_matched.index.get_loc(x.name), :], x), axis=1)
+            tqdm.write("possible matches found   \n fuzzy matching...\n")
+            data_matches = to_be_matched.progress_apply(
+                lambda x: self.fuzzy_matches(
+                    self._possible_matches[to_be_matched.index.get_loc(x.name), :], x
+                ),
+                axis=1,
+            )
         else:
-            data_matches = to_be_matched.apply(lambda x: self.fuzzy_matches(
-                self._possible_matches[to_be_matched.index.get_loc(x.name), :], x), axis=1)
+            data_matches = to_be_matched.apply(
+                lambda x: self.fuzzy_matches(
+                    self._possible_matches[to_be_matched.index.get_loc(x.name), :], x
+                ),
+                axis=1,
+            )
         if self._return_algorithms_score:
             return data_matches
-            
+
         if self._number_of_matches == 1:
-            data_matches = data_matches.rename(columns={'match_name_0': 'match_name',
-                                                        'score_0': 'score', 'match_index_0': 'match_index'})
+            data_matches = data_matches.rename(
+                columns={
+                    "match_name_0": "match_name",
+                    "score_0": "score",
+                    "match_index_0": "match_index",
+                }
+            )
         if is_dataframe and self._original_indexes:
-            for col in data_matches.columns[data_matches.columns.str.contains('match_index')]:
-                data_matches[col] = self._original_index[data_matches[col].astype(int).fillna(0)]
+            for col in data_matches.columns[
+                data_matches.columns.str.contains("match_index")
+            ]:
+                data_matches[col] = self._original_index[
+                    data_matches[col].astype(int).fillna(0)
+                ]
 
         if self._verbose:
-            tqdm.write('done')
+            tqdm.write("done")
 
         return data_matches
 
-    def fuzzy_matches(self,
-                      possible_matches: np.array,
-                      to_be_matched: pd.Series) -> pd.Series:
-        """ A method which performs the fuzzy matching between the data in the to_be_matched series as well
+    def fuzzy_matches(
+        self, possible_matches: np.array, to_be_matched: pd.Series
+    ) -> pd.Series:
+        """A method which performs the fuzzy matching between the data in the to_be_matched series as well
         as the indicated indexes of the matching_data points which are possible matching candidates.
 
         Parameters
@@ -389,28 +433,34 @@ def fuzzy_matches(self,
         -------
         pd.Series
             A series containing the match index from the matching_data dataframe. the name in the to_be_matched data,
-            the name to which the datapoint was matched and a score between 0 (no match) and 100(perfect match) to 
+            the name to which the datapoint was matched and a score between 0 (no match) and 100(perfect match) to
             indicate the quality of the matches
         """
         if len(possible_matches.shape) > 1:
             possible_matches = possible_matches[0]
 
-        indexes = np.array([[f'match_name_{num}', f'score_{num}', f'match_index_{num}']
-                            for num in range(self._number_of_matches)]).flatten()
-        match = pd.Series(index=np.append('original_name', indexes), dtype=object)
-        match['original_name'] = to_be_matched[self._column_matching]
+        indexes = np.array(
+            [
+                [f"match_name_{num}", f"score_{num}", f"match_index_{num}"]
+                for num in range(self._number_of_matches)
+            ]
+        ).flatten()
+        match = pd.Series(index=np.append("original_name", indexes), dtype=object)
+        match["original_name"] = to_be_matched[self._column_matching]
         list_possible_matches = self._df_matching_data.iloc[
-            possible_matches.flatten(), :][self._column].values
+            possible_matches.flatten(), :
+        ][self._column].values
 
         match_score = self._score_matches(
-            to_be_matched[self._column_matching], list_possible_matches)
+            to_be_matched[self._column_matching], list_possible_matches
+        )
         if self._return_algorithms_score:
             return match_score
         ind = self._rate_matches(match_score)
 
         for num, col_num in enumerate(ind):
-            match[f'match_name_{num}'] = list_possible_matches[col_num]
-            match[f'match_index_{num}'] = possible_matches[col_num]
+            match[f"match_name_{num}"] = list_possible_matches[col_num]
+            match[f"match_index_{num}"] = possible_matches[col_num]
 
         match = self._adjust_scores(match_score[ind, :], match)
 
@@ -419,9 +469,9 @@ def fuzzy_matches(self,
 
         return match
 
-    def _score_matches(self,
-                       to_be_matched_instance: str,
-                       possible_matches: list) -> np.array:
+    def _score_matches(
+        self, to_be_matched_instance: str, possible_matches: list
+    ) -> np.array:
         """A method to score a name to_be_matched_instance to a list of possible matches. The scoring is done
         based on all the metrics which are enabled.
 
@@ -437,20 +487,19 @@ def _score_matches(self,
         np.array
             The score of each of the matches with respect to the different metrics which are assessed.
         """
-        match_score = np.zeros(
-            (len(possible_matches), self._num_distance_metrics))
+        match_score = np.zeros((len(possible_matches), self._num_distance_metrics))
         idx = 0
         for method_list in self._distance_metrics.values():
             for method in method_list:
                 match_score[:, idx] = np.array(
-                    [method.sim(to_be_matched_instance, s) for s in possible_matches])
+                    [method.sim(to_be_matched_instance, s) for s in possible_matches]
+                )
                 idx = idx + 1
 
         return match_score
 
-    def _rate_matches(self,
-                      match_score: np.array) -> np.array:
-        """Converts the match scores from the score_matches method to a list of indexes of the best scoring 
+    def _rate_matches(self, match_score: np.array) -> np.array:
+        """Converts the match scores from the score_matches method to a list of indexes of the best scoring
         matches limited to the _number_of_matches.
 
         Parameters
@@ -471,14 +520,16 @@ def _rate_matches(self,
             idx = 0
             for num, method_list in enumerate(self._distance_metrics.values()):
                 method_grouped_results = np.reshape(
-                    match_score[:, idx: idx + len(method_list)], (-1, len(method_list)))
+                    match_score[:, idx : idx + len(method_list)], (-1, len(method_list))
+                )
                 ind[num] = np.argmax(np.mean(method_grouped_results, axis=1))
                 idx = idx + len(method_list)
         elif self._number_of_matches == self._num_distance_metrics:
             ind = np.argmax(match_score, axis=1)
         else:
-            ind = np.argsort(np.mean(match_score, axis=1)
-                             )[-self._number_of_matches:][::-1]
+            ind = np.argsort(np.mean(match_score, axis=1))[-self._number_of_matches :][
+                ::-1
+            ]
 
         return np.array(ind, dtype=int)
 
@@ -498,7 +549,7 @@ def _get_alternative_names(self, match: pd.Series) -> list:
         alt_names = []
 
         for num in range(self._number_of_matches):
-            alt_names.append(str(match[f'match_name_{num}']))
+            alt_names.append(str(match[f"match_name_{num}"]))
 
         return alt_names
 
@@ -519,11 +570,11 @@ def _process_words(self, org_name: str, alt_names: list) -> Tuple[str, list]:
         """
         len_atl_names = len(alt_names)
         for word in self._word_set:
-            org_name = ' '.join(
-                re.sub(fr'\b{re.escape(word)}\b', '', org_name).split())
+            org_name = " ".join(sub(rf"\b{escape(word)}\b", "", org_name).split())
             for num in range(len_atl_names):
-                alt_names[num] = ' '.join(
-                    re.sub(fr'\b{re.escape(word)}\b', '', alt_names[num]).split())
+                alt_names[num] = " ".join(
+                    sub(rf"\b{escape(word)}\b", "", alt_names[num]).split()
+                )
 
         return org_name, alt_names
 
@@ -543,13 +594,12 @@ def _adjust_scores(self, match_score: np.array, match: pd.Series) -> pd.Series:
             The series with the possible matches and adjusted scores
         """
         for num in range(self._number_of_matches):
-            match[f'score_{num}'] = 100*np.mean(match_score[num, :])
+            match[f"score_{num}"] = 100 * np.mean(match_score[num, :])
 
         return match
 
-    def postprocess(self,
-                    match: pd.Series) -> pd.Series:
-        """Postprocesses the scores to exclude certain specific company words or the most 
+    def postprocess(self, match: pd.Series) -> pd.Series:
+        """Postprocesses the scores to exclude certain specific company words or the most
         common words. In this method only the scores are adjusted, the matches still stand.
 
         Parameters
@@ -563,7 +613,7 @@ def postprocess(self,
             A new version of the input series with updated scores
         """
         alt_names = self._get_alternative_names(match)
-        org_name = str(match['original_name'])
+        org_name = str(match["original_name"])
 
         org_name, alt_names = self._process_words(org_name, alt_names)
 
@@ -573,16 +623,15 @@ def postprocess(self,
 
         return match
 
-    def _vectorise_data(self,
-                        transform: bool = True):
+    def _vectorise_data(self, transform: bool = True):
         """Initialises the TfidfVectorizer, which generates ngrams and weights them based on the occurrance.
         Subsequently the matching data will be used to fit the vectoriser and the matching data might also be send
         to the transform_data function depending on the transform boolean.
 
         Parameters
         ----------
-        transform : bool 
-            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised 
+        transform : bool
+            A boolean indicating whether or not the data should be transformed after the vectoriser is initialised
             default: True
         """
         self._vec.fit(self._df_matching_data[self._column].values.flatten())
@@ -590,21 +639,19 @@ def _vectorise_data(self,
             self.transform_data()
 
     def transform_data(self):
-        """A method which transforms the matching data based on the ngrams transformer. After the 
+        """A method which transforms the matching data based on the ngrams transformer. After the
         transformation (the generation of the ngrams), the data is normalised by dividing each row
         by the sum of the row. Subsequently the data is changed to a coo sparse matrix format with
         the column indices in ascending order.
         """
-        ngrams = self._vec.transform(
-            self._df_matching_data[self._column].astype(str))
+        ngrams = self._vec.transform(self._df_matching_data[self._column].astype(str))
         for i, j in zip(ngrams.indptr[:-1], ngrams.indptr[1:]):
-            ngrams.data[i:j] = ngrams.data[i:j]/np.sum(ngrams.data[i:j])
+            ngrams.data[i:j] = ngrams.data[i:j] / np.sum(ngrams.data[i:j])
         self._n_grams_matching = ngrams.tocsc()
         if self._low_memory:
             self._n_grams_matching = self._n_grams_matching.tocoo()
 
-    def _search_for_possible_matches(self,
-                                     to_be_matched: pd.DataFrame) -> np.array:
+    def _search_for_possible_matches(self, to_be_matched: pd.DataFrame) -> np.array:
         """Generates ngrams from the data which should be matched, calculate the cosine simularity
         between these data and the matching data. Hereafter a top n of the matches is selected and
         returned.
@@ -622,8 +669,9 @@ def _search_for_possible_matches(self,
         """
         if self._n_grams_matching is None:
             raise RuntimeError(
-                """First the data needs to be transformed to be able to use the sparse cosine simularity. To""" +
-                """transform the data, run transform_data or run load_and_process_master_data with transform=True""")
+                """First the data needs to be transformed to be able to use the sparse cosine simularity. To"""
+                + """transform the data, run transform_data or run load_and_process_master_data with transform=True"""
+            )
 
         if self._low_memory:
             results = np.zeros((len(to_be_matched), self._top_n))
@@ -631,26 +679,37 @@ def _search_for_possible_matches(self,
             for idx, row_name in enumerate(tqdm(input_data, disable=not self._verbose)):
                 match_ngrams = self._vec.transform([row_name])
                 results[idx, :] = sparse_cosine_top_n(
-                    matrix_a=self._n_grams_matching, matrix_b=match_ngrams, top_n=self._top_n, low_memory=self._low_memory, number_of_rows=self._number_of_rows, verbose=self._verbose)
+                    matrix_a=self._n_grams_matching,
+                    matrix_b=match_ngrams,
+                    top_n=self._top_n,
+                    low_memory=self._low_memory,
+                    number_of_rows=self._number_of_rows,
+                    verbose=self._verbose,
+                )
         else:
             match_ngrams = self._vec.transform(
-                to_be_matched[self._column_matching].tolist()).tocsc()
+                to_be_matched[self._column_matching].tolist()
+            ).tocsc()
             results = sparse_cosine_top_n(
-                matrix_a=self._n_grams_matching, matrix_b=match_ngrams, top_n=self._top_n, low_memory=self._low_memory, number_of_rows=self._number_of_rows, verbose=self._verbose)
+                matrix_a=self._n_grams_matching,
+                matrix_b=match_ngrams,
+                top_n=self._top_n,
+                low_memory=self._low_memory,
+                number_of_rows=self._number_of_rows,
+                verbose=self._verbose,
+            )
 
         return results
 
-    def preprocess(self,
-                   df: pd.DataFrame,
-                   column_name: str) -> pd.DataFrame:
-        """Preprocess a dataframe before applying a name matching algorithm. The preprocessing consists of 
+    def preprocess(self, df: pd.DataFrame, column_name: str) -> pd.DataFrame:
+        """Preprocess a dataframe before applying a name matching algorithm. The preprocessing consists of
         removing special characters, spaces, converting all characters to lower case and removing the
         words given in the word lists
 
         Parameters
         ----------
         df : DataFrame
-            The dataframe or series on which the preprocessing needs to be performed        
+            The dataframe or series on which the preprocessing needs to be performed
         column_name : str
             The name of the column that is used for the preprocessing
 
@@ -664,12 +723,15 @@ def preprocess(self,
             df.loc[:, column_name] = df[column_name].str.lower()
         if self._preprocess_punctuations:
             df.loc[:, column_name] = df[column_name].str.replace(
-                '[^\w\s]', '', regex=True)
-            df.loc[:, column_name] = df[column_name].str.replace(
-                '  ', ' ').str.strip()
+                "[^\w\s]", "", regex=True
+            )
+            df.loc[:, column_name] = df[column_name].str.replace("  ", " ").str.strip()
         if self._preprocess_ascii:
-            df.loc[:, column_name] = df[column_name].apply(lambda string: unicodedata.normalize(
-                'NFKD', str(string)).encode('ASCII', 'ignore').decode())
+            df.loc[:, column_name] = df[column_name].apply(
+                lambda string: normalize("NFKD", str(string))
+                .encode("ASCII", "ignore")
+                .decode()
+            )
 
         return df
 
@@ -684,14 +746,15 @@ def _preprocess_word_list(self, terms: dict) -> list:
         Returns
         -------
         list
-            A list of preprocessed legal words  
+            A list of preprocessed legal words
         """
         if self._preprocess_punctuations:
-            return [re.sub(r'[^\w\s]', '', s).strip() for s in functools.reduce(
-                operator.iconcat, terms.values(), [])]
+            return [
+                sub(r"[^\w\s]", "", s).strip()
+                for s in reduce(iconcat, terms.values(), [])
+            ]
         else:
-            return [s.strip() for s in functools.reduce(
-                    operator.iconcat, terms.values(), [])]
+            return [s.strip() for s in reduce(iconcat, terms.values(), [])]
 
     def _process_legal_words(self, word_set: set) -> set:
         """Preprocess legal words and add them to the word_set
@@ -718,9 +781,9 @@ def _process_common_words(self, word_set: set, cut_off: float) -> set:
         Parameters
         -------
         word_set: str
-            the current word list which should be extended with additional words  
+            the current word list which should be extended with additional words
         cut_off: float
-            the cut_off percentage of the occurrence of the most occurring word for which words are still included 
+            the cut_off percentage of the occurrence of the most occurring word for which words are still included
             in the no_soring_words set
 
         Returns
@@ -728,17 +791,21 @@ def _process_common_words(self, word_set: set, cut_off: float) -> set:
         Set
             The current word set with the most common words from the matching_data added
         """
-        word_counts = self._df_matching_data[self._column].str.split(
-            expand=True).stack().value_counts()
+        word_counts = (
+            self._df_matching_data[self._column]
+            .str.split(expand=True)
+            .stack()
+            .value_counts()
+        )
         word_set = word_set.union(
-            set(word_counts[word_counts > np.max(word_counts)*cut_off].index))
+            set(word_counts[word_counts > np.max(word_counts) * cut_off].index)
+        )
 
         return word_set
 
-    def _make_no_scoring_words(self,
-                               indicator: str,
-                               word_set: set,
-                               cut_off: float) -> set:
+    def _make_no_scoring_words(
+        self, indicator: str, word_set: set, cut_off: float
+    ) -> set:
         """A method to make a set of words which are not taken into account when scoring matches.
 
         Parameters
@@ -747,9 +814,9 @@ def _make_no_scoring_words(self,
             indicator for which types of words should be excluded can be legal for
             legal suffixes or common for the most common words
         word_set: str
-            the current word list which should be extended with additional words  
+            the current word list which should be extended with additional words
         cut_off: float
-            the cut_off percentage of the occurrence of the most occurring word for which words are still included 
+            the cut_off percentage of the occurrence of the most occurring word for which words are still included
             in the no_soring_words set
 
         Returns
@@ -757,9 +824,9 @@ def _make_no_scoring_words(self,
         Set
             The set of no scoring words
         """
-        if indicator == 'legal':
+        if indicator == "legal":
             word_set = self._process_legal_words(word_set)
-        if indicator == 'common':
+        if indicator == "common":
             word_set = self._process_common_words(word_set, cut_off)
 
         return word_set
diff --git a/name_matching/run_nm.py b/name_matching/run_nm.py
index a7f1977..7e93779 100644
--- a/name_matching/run_nm.py
+++ b/name_matching/run_nm.py
@@ -1,18 +1,18 @@
+import pandas as pd
 from name_matching.name_matcher import NameMatcher
 from typing import Union, Tuple
-import pandas as pd
-import unicodedata
+from unicodedata import normalize
 
 
-def _match_names_check_data(data: Union[pd.Series, pd.DataFrame],
-                            column: str,
-                            group_column: str) -> pd.DataFrame:
+def _match_names_check_data(
+    data: Union[pd.Series, pd.DataFrame], column: str, group_column: str
+) -> pd.DataFrame:
     """
-    Checks the input data of the name matching function to see whether the defined columns can 
+    Checks the input data of the name matching function to see whether the defined columns can
     be found and makes a new column which will be used for the name matching
     ----------
     data: Union[pd.DataFrame, pd.Series]
-        The first dataframe or series used for the name matching        
+        The first dataframe or series used for the name matching
     column: str
         The column in which the name that should be matched can be found for data
     group_column_first: str
@@ -26,36 +26,39 @@ def _match_names_check_data(data: Union[pd.Series, pd.DataFrame],
     """
 
     if isinstance(data, pd.DataFrame):
-        if column == '':
+        if column == "":
             raise ValueError(
-                'For one of the dataframes no column is given to perform the name matching on')
+                "For one of the dataframes no column is given to perform the name matching on"
+            )
         if column not in data.columns:
-            raise ValueError(
-                'Could not find one of the columns in the dataframe')
-        if (group_column != '') & (group_column not in data.columns):
-            raise ValueError(
-                'Could not find one of the group_columns in the dataframe')
-        data['name_matching_data'] = data[column]
+            raise ValueError("Could not find one of the columns in the dataframe")
+        if (group_column != "") & (group_column not in data.columns):
+            raise ValueError("Could not find one of the group_columns in the dataframe")
+        data["name_matching_data"] = data[column]
     else:
-        if group_column != '':
+        if group_column != "":
             raise ValueError(
-                'Grouping is only possible when a dataframe is used for both inputs')
-        data = pd.DataFrame(data, columns=['name_matching_data'])
+                "Grouping is only possible when a dataframe is used for both inputs"
+            )
+        data = pd.DataFrame(data, columns=["name_matching_data"])
 
     return data
 
-def _match_names_preprocess_data(column: str,
-                                data_first: pd.DataFrame,
-                                data_second: pd.DataFrame,
-                                case_sensitive: bool,
-                                punctuation_sensitive: bool,
-                                special_character_sensitive: bool) -> Tuple[pd.DataFrame, pd.DataFrame]:
+
+def _match_names_preprocess_data(
+    column: str,
+    data_first: pd.DataFrame,
+    data_second: pd.DataFrame,
+    case_sensitive: bool,
+    punctuation_sensitive: bool,
+    special_character_sensitive: bool,
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
-    Preprocess the data by making the names lower case, removing punctuations and special characters. 
+    Preprocess the data by making the names lower case, removing punctuations and special characters.
     And convert the indexes of the second dataframe to a column.
     ----------
     data: Union[pd.DataFrame, pd.Series]
-        The first dataframe or series used for the name matching        
+        The first dataframe or series used for the name matching
     column: str
         The column in which the name that should be matched can be found for data
     group_column_first: str
@@ -81,32 +84,36 @@ def _match_names_preprocess_data(column: str,
         data_first[column] = data_first[column].str.lower().str.strip()
         data_second[column] = data_second[column].str.lower().str.strip()
     if not punctuation_sensitive:
-        data_first[column] = data_first[column].str.replace('[^\w\s]', '', regex=True)
-        data_second[column] = data_second[column].str.replace(
-            '[^\w\s]', '', regex=True)
+        data_first[column] = data_first[column].str.replace("[^\w\s]", "", regex=True)
+        data_second[column] = data_second[column].str.replace("[^\w\s]", "", regex=True)
     if not special_character_sensitive:
-        data_first[column] = data_first[column].apply(lambda string: unicodedata.normalize(
-            'NFKD', string).encode('ASCII', 'ignore').decode())
-        data_second[column] = data_second[column].apply(lambda string: unicodedata.normalize(
-            'NFKD', string).encode('ASCII', 'ignore').decode())
+        data_first[column] = data_first[column].apply(
+            lambda string: normalize("NFKD", string).encode("ASCII", "ignore").decode()
+        )
+        data_second[column] = data_second[column].apply(
+            lambda string: normalize("NFKD", string).encode("ASCII", "ignore").decode()
+        )
+
+    data_second = data_second.rename_axis("index").reset_index(drop=False)
 
-    data_second = data_second.rename_axis('index').reset_index(drop=False)
-    
     return data_first, data_second
 
-def _match_names_combine_data(data_first: pd.DataFrame, 
-                              data_second: pd.DataFrame, 
-                              left_cols: list, 
-                              right_cols: list) -> pd.DataFrame:
+
+def _match_names_combine_data(
+    data_first: pd.DataFrame,
+    data_second: pd.DataFrame,
+    left_cols: list,
+    right_cols: list,
+) -> pd.DataFrame:
     """
     Perform a merge to match data based on whether the names are equal
     ----------
     data_first: pd.DataFrame
-        The first dataframe or series used for the name matching        
+        The first dataframe or series used for the name matching
     data_second: pd.DataFrame
-        The second dataframe or series used for the name matching        
+        The second dataframe or series used for the name matching
     left_cols: list
-        A list of columns on which the first dataframe should be merged  
+        A list of columns on which the first dataframe should be merged
     right_cols: list
         A list of columns on which the first dataframe should be merged
 
@@ -117,29 +124,38 @@ def _match_names_combine_data(data_first: pd.DataFrame,
         dataframe is equal to the original index of data_first, the match index is the index in data_second
         for the matched name.
     """
-    matches = pd.merge(data_first, data_second, how='left',
-                        left_on=left_cols, right_on=right_cols, suffixes=['', '_matched'])
-    matches['score'] = 100
-    matches = matches.dropna(subset=['index'])
-    matches = matches.rename(columns={'index':'match_index'})
-    matches = matches[['match_index', 'score']] 
+    matches = pd.merge(
+        data_first,
+        data_second,
+        how="left",
+        left_on=left_cols,
+        right_on=right_cols,
+        suffixes=["", "_matched"],
+    )
+    matches["score"] = 100
+    matches = matches.dropna(subset=["index"])
+    matches = matches.rename(columns={"index": "match_index"})
+    matches = matches[["match_index", "score"]]
 
     return matches
 
-def _match_names_match_single(matcher: NameMatcher,
-                              data_first: pd.DataFrame,
-                              data_second: pd.DataFrame,
-                              name_column: str) -> pd.DataFrame:
+
+def _match_names_match_single(
+    matcher: NameMatcher,
+    data_first: pd.DataFrame,
+    data_second: pd.DataFrame,
+    name_column: str,
+) -> pd.DataFrame:
     """
     Perform the name matching. First by doing a perfect string match with a merge statement, followed
-    by the fuzzy matching approach as done in NameMatcher. 
+    by the fuzzy matching approach as done in NameMatcher.
     ----------
     matcher: NameMatcher
-        The NameMatcher to be used for the name matching part        
+        The NameMatcher to be used for the name matching part
     data_first: pd.DataFrame
-        The first dataframe or series used for the name matching        
+        The first dataframe or series used for the name matching
     data_second: pd.DataFrame
-        The second dataframe or series used for the name matching        
+        The second dataframe or series used for the name matching
     name_column: str
         The column in which the name that should be matched can be found for both dataframes
 
@@ -151,34 +167,46 @@ def _match_names_match_single(matcher: NameMatcher,
         for the matched name.
     """
 
-    matches = _match_names_combine_data(data_first, data_second,
-                            [name_column], [name_column])
+    matches = _match_names_combine_data(
+        data_first, data_second, [name_column], [name_column]
+    )
     unmatched = data_first[~data_first.index.isin(matches.index)].copy()
     if len(unmatched) > 0:
         matcher.load_and_process_master_data(name_column, data_second, transform=True)
-        matches = pd.concat([matches,(matcher.match_names(
-            to_be_matched=unmatched, column_matching=name_column))])
+        matches = pd.concat(
+            [
+                matches,
+                (
+                    matcher.match_names(
+                        to_be_matched=unmatched, column_matching=name_column
+                    )
+                ),
+            ]
+        )
         return matches
     else:
-        print('All data matched with basic string matching')
+        print("All data matched with basic string matching")
         return matches
 
-def _match_names_match_group(matcher: NameMatcher,
-                            data_first: pd.DataFrame,
-                            data_second: pd.DataFrame,
-                            name_column: str,
-                            group_column_first: str,
-                            group_column_second: str) -> pd.DataFrame:
+
+def _match_names_match_group(
+    matcher: NameMatcher,
+    data_first: pd.DataFrame,
+    data_second: pd.DataFrame,
+    name_column: str,
+    group_column_first: str,
+    group_column_second: str,
+) -> pd.DataFrame:
     """
     Perform the name matching based on the subgroups as indicated by the group_column strings. First by doing
-    a perfect string match with a merge statement, followed by the fuzzy matching approach as done in NameMatcher. 
+    a perfect string match with a merge statement, followed by the fuzzy matching approach as done in NameMatcher.
     ----------
     matcher: NameMatcher
-        The NameMatcher to be used for the name matching part        
+        The NameMatcher to be used for the name matching part
     data_first: pd.DataFrame
-        The first dataframe or series used for the name matching        
+        The first dataframe or series used for the name matching
     data_second: pd.DataFrame
-        The second dataframe or series used for the name matching        
+        The second dataframe or series used for the name matching
     name_column: str
         The column in which the name that should be matched can be found for both dataframes
     group_column_first: str
@@ -194,36 +222,54 @@ def _match_names_match_group(matcher: NameMatcher,
         for the matched name.
     """
 
-    matches = _match_names_combine_data(data_first, data_second, [
-                            name_column, group_column_first], [name_column, group_column_second])
+    matches = _match_names_combine_data(
+        data_first,
+        data_second,
+        [name_column, group_column_first],
+        [name_column, group_column_second],
+    )
     unmatched = data_first[~data_first.index.isin(matches.index)]
     if len(unmatched) > 0:
         matcher.load_and_process_master_data(name_column, data_second, transform=False)
         for group in data_first[group_column_first].unique():
-            data_second_group = data_second[data_second[group_column_second] == group].copy()
-            matcher.load_and_process_master_data(name_column, 
-                data_second_group, start_processing=False)
+            data_second_group = data_second[
+                data_second[group_column_second] == group
+            ].copy()
+            matcher.load_and_process_master_data(
+                name_column, data_second_group, start_processing=False
+            )
             matcher.transform_data()
-            matches = pd.concat([matches, matcher.match_names(
-                to_be_matched=unmatched[unmatched[group_column_first] == group].copy(), column_matching=name_column)])
+            matches = pd.concat(
+                [
+                    matches,
+                    matcher.match_names(
+                        to_be_matched=unmatched[
+                            unmatched[group_column_first] == group
+                        ].copy(),
+                        column_matching=name_column,
+                    ),
+                ]
+            )
     else:
-        print('All data matched with basic string matching')
+        print("All data matched with basic string matching")
         return matches
 
     return matches
 
 
-def match_names(data_first: Union[pd.DataFrame, pd.Series],
-                data_second: Union[pd.DataFrame, pd.Series],
-                column_first='',
-                column_second='',
-                group_column_first='',
-                group_column_second='',
-                case_sensitive=False,
-                punctuation_sensitive=False,
-                special_character_sensitive=False,
-                threshold=95,
-                **kwargs) -> pd.DataFrame:
+def match_names(
+    data_first: Union[pd.DataFrame, pd.Series],
+    data_second: Union[pd.DataFrame, pd.Series],
+    column_first="",
+    column_second="",
+    group_column_first="",
+    group_column_second="",
+    case_sensitive=False,
+    punctuation_sensitive=False,
+    special_character_sensitive=False,
+    threshold=95,
+    **kwargs
+) -> pd.DataFrame:
     """Function which performs name matching. First a simple merge on the data is performed
     to get the instances in which the name matches perfectly. Subsequently the matches are
     matched using the name matching algorithm as defined in name_matcher.
@@ -233,23 +279,23 @@ def match_names(data_first: Union[pd.DataFrame, pd.Series],
     data_first: Union[pd.DataFrame, pd.Series]
         The first dataframe or series used for the name matching
     data_second: Union[pd.DataFrame, pd.Series]
-        The second dataframe or series used for the name matching, for matching the data to 
+        The second dataframe or series used for the name matching, for matching the data to
         itself data_second should be equal to data first
     column_first: str
-        If data_first is a dataframe column_first should be the column in which the name 
+        If data_first is a dataframe column_first should be the column in which the name
         that should be matched can be found for data_first
         default=''
     column_second: str
-        If data_second is a dataframe column_second should be the column in which the name 
+        If data_second is a dataframe column_second should be the column in which the name
         that should be matched can be found for data_second
         default=''
     group_column_first: str
-        The name of the column that should be used to generate groups within the data_first 
+        The name of the column that should be used to generate groups within the data_first
         dataframe. The matchig is then only performed for instances in which the groups are
         identical
         default=''
     group_column_second: str
-        The name of the column that should be used to generate groups within the data_second 
+        The name of the column that should be used to generate groups within the data_second
         dataframe. The matchig is then only performed for instances in which the groups are
         identical
         default=''
@@ -275,36 +321,56 @@ def match_names(data_first: Union[pd.DataFrame, pd.Series],
     -------
     pd.DataFrame
         A dataframe containing the matched rows were the match score is above the threshold. The
-        dataframe consists of 4 columns; original_name: the original name from data_first after 
+        dataframe consists of 4 columns; original_name: the original name from data_first after
         preprocessing, match_name_0: the name it is matched to from data_second after preprocessing,
-        score_0: the score of the match, match_index_0: the index of the match in data_second. The 
-        match_index_0 can be used to join the data from both dataframes. 
+        score_0: the score of the match, match_index_0: the index of the match in data_second. The
+        match_index_0 can be used to join the data from both dataframes.
     """
-    if 'number_of_matches' in kwargs:
+    if "number_of_matches" in kwargs:
         raise ValueError(
-            'The number of matches can only be changed by using a custom matching approach')
+            "The number of matches can only be changed by using a custom matching approach"
+        )
 
     data_first = _match_names_check_data(data_first, column_first, group_column_first)
-    data_second = _match_names_check_data(data_second, column_second, group_column_second)
+    data_second = _match_names_check_data(
+        data_second, column_second, group_column_second
+    )
 
-    name_column = 'name_matching_data'
+    name_column = "name_matching_data"
 
-    if ((group_column_first == '') & (group_column_second != '')) | ((group_column_second == '') & (group_column_first != '')):
+    if ((group_column_first == "") & (group_column_second != "")) | (
+        (group_column_second == "") & (group_column_first != "")
+    ):
         raise ValueError(
-            'For the grouping to work both the grouping column in the first as well as the second dataframe have to be indicated')
+            "For the grouping to work both the grouping column in the first as well as the second dataframe have to be indicated"
+        )
 
     if (threshold > 100) | (threshold < 0):
-        raise ValueError('Please pick a threshold between 0 and 100')
+        raise ValueError("Please pick a threshold between 0 and 100")
 
-    data_first, data_second = _match_names_preprocess_data(name_column, data_first, 
-            data_second, case_sensitive, punctuation_sensitive, special_character_sensitive)
+    data_first, data_second = _match_names_preprocess_data(
+        name_column,
+        data_first,
+        data_second,
+        case_sensitive,
+        punctuation_sensitive,
+        special_character_sensitive,
+    )
 
     matcher = NameMatcher(**kwargs)
 
-    if group_column_first == '':
-        matches = _match_names_match_single(matcher, data_first, data_second, name_column)
+    if group_column_first == "":
+        matches = _match_names_match_single(
+            matcher, data_first, data_second, name_column
+        )
     else:
-        matches = _match_names_match_group(matcher, data_first, data_second,
-                              name_column, group_column_first, group_column_second)
+        matches = _match_names_match_group(
+            matcher,
+            data_first,
+            data_second,
+            name_column,
+            group_column_first,
+            group_column_second,
+        )
 
-    return matches[matches['score'] > threshold]
+    return matches[matches["score"] > threshold]
diff --git a/name_matching/sparse_cosine.py b/name_matching/sparse_cosine.py
index 3891b60..ee02fd8 100644
--- a/name_matching/sparse_cosine.py
+++ b/name_matching/sparse_cosine.py
@@ -1,19 +1,20 @@
 import numpy as np
 from tqdm import tqdm
-# from numba import jit
 from scipy.sparse import csc_matrix, coo_matrix
 from typing import Union
 
-# @jit(nopython=True, fastmath=True)
-def _sparse_cosine_low_memory(matrix_row: np.array,
-                              matrix_col: np.array,
-                              matrix_data: np.array,
-                              matrix_len: int,
-                              vector_ind: np.array,
-                              vector_data: np.array) -> np.array:
+
+def _sparse_cosine_low_memory(
+    matrix_row: np.array,
+    matrix_col: np.array,
+    matrix_data: np.array,
+    matrix_len: int,
+    vector_ind: np.array,
+    vector_data: np.array,
+) -> np.array:
     """
     A sparse cosine simularity calculation between a matrix and a vector. The sparse matrix should be sorted
-    in ascending order based on the matrix_col values. The vector should be sorted based on the indexes in 
+    in ascending order based on the matrix_col values. The vector should be sorted based on the indexes in
     ascending order.
 
     Parameters
@@ -46,17 +47,20 @@ def _sparse_cosine_low_memory(matrix_row: np.array,
             if ind == len(vector_ind):
                 break
         if col == vector_ind[ind]:
-            res[matrix_row[mat_ind]] = res[matrix_row[mat_ind]] + \
-                matrix_data[mat_ind] * vector_data[ind]
+            res[matrix_row[mat_ind]] = (
+                res[matrix_row[mat_ind]] + matrix_data[mat_ind] * vector_data[ind]
+            )
 
     return res
 
 
-def _sparse_cosine_top_n_standard(matrix_a: csc_matrix,
-                                  matrix_b: csc_matrix,
-                                  number_of_rows_at_once: int,
-                                  top_n: int,
-                                  verbose: bool) -> np.array:
+def _sparse_cosine_top_n_standard(
+    matrix_a: csc_matrix,
+    matrix_b: csc_matrix,
+    number_of_rows_at_once: int,
+    top_n: int,
+    verbose: bool,
+) -> np.array:
     """
     A function for sparse matrix multiplication followed by an argpartition to
     only take the top_n indexes.
@@ -82,40 +86,51 @@ def _sparse_cosine_top_n_standard(matrix_a: csc_matrix,
 
     """
 
-    results_arg = np.zeros(
-        (matrix_b.shape[0], top_n), dtype=np.float32)
+    results_arg = np.zeros((matrix_b.shape[0], top_n), dtype=np.float32)
 
     # Split up the matrice in a certain number of rows
-    for j in tqdm(range(0, matrix_b.shape[0], number_of_rows_at_once), disable=not verbose):
+    for j in tqdm(
+        range(0, matrix_b.shape[0], number_of_rows_at_once), disable=not verbose
+    ):
         number_of_rows_at_once_min = min(
-            [number_of_rows_at_once, matrix_b.shape[0]-j])
-        matrix_b_temp = matrix_b[j:j+number_of_rows_at_once_min, :]
+            [number_of_rows_at_once, matrix_b.shape[0] - j]
+        )
+        matrix_b_temp = matrix_b[j : j + number_of_rows_at_once_min, :]
 
         # Calculate the matrix dot product
         results_full = (matrix_a * (matrix_b_temp.T)).tocsc()
 
         # For each of the rows of the original matrix select the argpartition
         for i in range(number_of_rows_at_once_min):
-            results_full_temp = results_full.data[results_full.indptr[i]:results_full.indptr[i+1]]
+            results_full_temp = results_full.data[
+                results_full.indptr[i] : results_full.indptr[i + 1]
+            ]
 
             # If there are more results then top_n only select the top_n results
             if len(results_full_temp) > top_n:
-                ind = results_full.indices[results_full.indptr[i]:results_full.indptr[i+1]]
-                results_arg[j + i, :] = ind[np.argpartition(
-                    results_full_temp, -top_n)[-top_n:]]
-            
+                ind = results_full.indices[
+                    results_full.indptr[i] : results_full.indptr[i + 1]
+                ]
+                results_arg[j + i, :] = ind[
+                    np.argpartition(results_full_temp, -top_n)[-top_n:]
+                ]
+
             # else just select all the results
             else:
-                results_arg[j + i, :len(results_full_temp)
-                            ] = results_full.indices[results_full.indptr[i]:results_full.indptr[i+1]]
+                results_arg[j + i, : len(results_full_temp)] = results_full.indices[
+                    results_full.indptr[i] : results_full.indptr[i + 1]
+                ]
     return results_arg
 
-def sparse_cosine_top_n(matrix_a: Union[csc_matrix, coo_matrix], 
-                        matrix_b: csc_matrix, 
-                        top_n: int, 
-                        low_memory: bool,
-                        number_of_rows: int,
-                        verbose: bool):
+
+def sparse_cosine_top_n(
+    matrix_a: Union[csc_matrix, coo_matrix],
+    matrix_b: csc_matrix,
+    top_n: int,
+    low_memory: bool,
+    number_of_rows: int,
+    verbose: bool,
+):
     """
     Calculates the top_n cosine matches between matrix_a and matrix_b. Takes into account
     the amount of  memory that should be used based on the low_memory int
@@ -131,7 +146,7 @@ def sparse_cosine_top_n(matrix_a: Union[csc_matrix, coo_matrix],
     low_memory: bool
         A bool indicating whether the low memory sparse cosine approach should be used
     number_of_rows: int
-        An int inidcating the number of rows which should be 
+        An int inidcating the number of rows which should be
         processed at once when calculating the cosine simalarity
     verbose: bool
         A boolean indicating whether the progress should be printed
@@ -144,11 +159,19 @@ def sparse_cosine_top_n(matrix_a: Union[csc_matrix, coo_matrix],
     """
     if low_memory:
         matrix_b.sort_indices()
-        res = _sparse_cosine_low_memory(matrix_a.row, matrix_a.col, matrix_a.data,
-                        matrix_a.shape[0], matrix_b.indices, matrix_b.data)
+        res = _sparse_cosine_low_memory(
+            matrix_a.row,
+            matrix_a.col,
+            matrix_a.data,
+            matrix_a.shape[0],
+            matrix_b.indices,
+            matrix_b.data,
+        )
 
         top_n_adjusted = -np.min([top_n, len(res)])
 
         return np.argpartition(res, top_n_adjusted, axis=0)[top_n_adjusted:]
     else:
-        return _sparse_cosine_top_n_standard(matrix_a, matrix_b, number_of_rows, top_n, verbose)
\ No newline at end of file
+        return _sparse_cosine_top_n_standard(
+            matrix_a, matrix_b, number_of_rows, top_n, verbose
+        )
diff --git a/name_matching/test/test_name_matcher.py b/name_matching/test/test_name_matcher.py
index a5ab6b3..25c7c44 100644
--- a/name_matching/test/test_name_matcher.py
+++ b/name_matching/test/test_name_matcher.py
@@ -9,36 +9,94 @@
 import operator
 import re
 import name_matching.name_matcher as nm
-from distances import Indel, DiscountedLevenshtein, CormodeLZ, Tichy, IterativeSubString, BaulieuXIII, Clement, DiceAsymmetricI, KuhnsIII, Overlap, PearsonII, WeightedJaccard, WarrensIV, Bag, RougeL, RatcliffObershelp, NCDbz2, FuzzyWuzzyPartialString, FuzzyWuzzyTokenSort, FuzzyWuzzyTokenSet, Editex, Typo,LIG3, SSK, Levenshtein, DoubleMetaphone, RefinedSoundex, PhoneticDistance
+from distances import (
+    Indel,
+    DiscountedLevenshtein,
+    CormodeLZ,
+    Tichy,
+    IterativeSubString,
+    BaulieuXIII,
+    Clement,
+    DiceAsymmetricI,
+    KuhnsIII,
+    Overlap,
+    PearsonII,
+    WeightedJaccard,
+    WarrensIV,
+    Bag,
+    RougeL,
+    RatcliffObershelp,
+    NCDbz2,
+    FuzzyWuzzyPartialString,
+    FuzzyWuzzyTokenSort,
+    FuzzyWuzzyTokenSet,
+    Editex,
+    Typo,
+    LIG3,
+    SSK,
+    Levenshtein,
+    DoubleMetaphone,
+    RefinedSoundex,
+    PhoneticDistance,
+)
 
 
 @pytest.fixture
 def name_match():
     package_dir = path.dirname(path.dirname(path.dirname(path.abspath(__file__))))
-    data = pd.read_csv(path.join(package_dir, 'test','test_names.csv'))
+    data = pd.read_csv(path.join(package_dir, "test", "test_names.csv"))
     name_matcher = nm.NameMatcher()
     name_matcher.load_and_process_master_data(
-        'company_name', data, start_processing=False, transform=False)
+        "company_name", data, start_processing=False, transform=False
+    )
     return name_matcher
 
+
 @pytest.fixture
 def original_name():
     package_dir = path.dirname(path.dirname(path.dirname(path.abspath(__file__))))
-    return pd.read_csv(path.join(package_dir, 'test','test_names.csv'))
+    return pd.read_csv(path.join(package_dir, "test", "test_names.csv"))
 
 
 @pytest.fixture
 def adjusted_name():
     package_dir = path.dirname(path.dirname(path.dirname(path.abspath(__file__))))
-    return pd.read_csv(path.join(package_dir, 'test','adjusted_test_names.csv'))
+    return pd.read_csv(path.join(package_dir, "test", "adjusted_test_names.csv"))
 
 
 @pytest.fixture
 def words():
-    return ['fun', 'small', 'pool', 'fun', 'small', 'pool', 'sign',
-            'small', 'pool', 'sign', 'sign', 'small', 'pool', 'sign', 'paper',
-            'oppose', 'paper', 'oppose', 'brown', 'pig', 'fat', 'oppose', 'paper',
-            'oppose', 'brown', 'pig', 'fat', 'snail']
+    return [
+        "fun",
+        "small",
+        "pool",
+        "fun",
+        "small",
+        "pool",
+        "sign",
+        "small",
+        "pool",
+        "sign",
+        "sign",
+        "small",
+        "pool",
+        "sign",
+        "paper",
+        "oppose",
+        "paper",
+        "oppose",
+        "brown",
+        "pig",
+        "fat",
+        "oppose",
+        "paper",
+        "oppose",
+        "brown",
+        "pig",
+        "fat",
+        "snail",
+    ]
+
 
 def number_of_words_in_legal_list(preprocess: bool) -> int:
     """
@@ -57,128 +115,193 @@ def number_of_words_in_legal_list(preprocess: bool) -> int:
     """
 
     if preprocess:
-        set_of_words = set([re.sub(r'[^\w\s]', '', s).strip() for s in functools.reduce(
-                operator.iconcat, terms_by_country.values(), [])])
-        set_of_words.update([re.sub(r'[^\w\s]', '', s).strip() for s in functools.reduce(
-                operator.iconcat, terms_by_type.values(), [])])
+        set_of_words = set(
+            [
+                re.sub(r"[^\w\s]", "", s).strip()
+                for s in functools.reduce(
+                    operator.iconcat, terms_by_country.values(), []
+                )
+            ]
+        )
+        set_of_words.update(
+            [
+                re.sub(r"[^\w\s]", "", s).strip()
+                for s in functools.reduce(operator.iconcat, terms_by_type.values(), [])
+            ]
+        )
     else:
-        set_of_words = set([s.strip() for s in functools.reduce(
-            operator.iconcat, terms_by_country.values(), [])])
-        set_of_words.update([s.strip() for s in functools.reduce(operator.iconcat, terms_by_type.values(), [])])
+        set_of_words = set(
+            [
+                s.strip()
+                for s in functools.reduce(
+                    operator.iconcat, terms_by_country.values(), []
+                )
+            ]
+        )
+        set_of_words.update(
+            [
+                s.strip()
+                for s in functools.reduce(operator.iconcat, terms_by_type.values(), [])
+            ]
+        )
 
     return len(set_of_words)
 
-@pytest.mark.parametrize("method",
-                         ["",
-                          None,
-                          'no_method']
-                         )
+
+@pytest.mark.parametrize("method", ["", None, "no_method"])
 def test_make_distance_metrics_error(name_match, method):
     with pytest.raises(TypeError):
         name_match.set_distance_metrics([method])
 
 
-@pytest.mark.parametrize("method, result",
-                         [['indel', Indel()],
-                          ['discounted_levenshtein', DiscountedLevenshtein()],
-                          ['tichy', Tichy()],
-                          ['cormodeL_z', CormodeLZ()],
-                          ['iterative_sub_string', IterativeSubString()],
-                          ['baulieu_xiii', BaulieuXIII()],
-                          ['clement', Clement()],
-                          ['dice_asymmetricI', DiceAsymmetricI()],
-                          ['kuhns_iii', KuhnsIII()],
-                          ['overlap', Overlap()],
-                          ['pearson_ii', PearsonII()],
-                          ['weighted_jaccard', WeightedJaccard()],
-                          ['warrens_iv', WarrensIV()],
-                          ['bag', Bag()],
-                          ['rouge_l', RougeL()],
-                          ['ratcliff_obershelp', RatcliffObershelp()],
-                          ['ncd_bz2', NCDbz2()],
-                          ['fuzzy_wuzzy_partial_string',
-                              FuzzyWuzzyPartialString()],
-                          ['fuzzy_wuzzy_token_sort', FuzzyWuzzyTokenSort()],
-                          ['fuzzy_wuzzy_token_set', FuzzyWuzzyTokenSet()],
-                          ['editex', Editex()],
-                          ['typo', Typo()],
-                          ['lig_3', LIG3()],
-                          ['ssk', SSK()],
-                          ['refined_soundex', PhoneticDistance(transforms=RefinedSoundex(
-                              max_length=30), metric=Levenshtein(), encode_alpha=True)],
-                          ['double_metaphone', PhoneticDistance(transforms=DoubleMetaphone(max_length=30), metric=Levenshtein(), encode_alpha=True)]]
-                         )
+@pytest.mark.parametrize(
+    "method, result",
+    [
+        ["indel", Indel()],
+        ["discounted_levenshtein", DiscountedLevenshtein()],
+        ["tichy", Tichy()],
+        ["cormodeL_z", CormodeLZ()],
+        ["iterative_sub_string", IterativeSubString()],
+        ["baulieu_xiii", BaulieuXIII()],
+        ["clement", Clement()],
+        ["dice_asymmetricI", DiceAsymmetricI()],
+        ["kuhns_iii", KuhnsIII()],
+        ["overlap", Overlap()],
+        ["pearson_ii", PearsonII()],
+        ["weighted_jaccard", WeightedJaccard()],
+        ["warrens_iv", WarrensIV()],
+        ["bag", Bag()],
+        ["rouge_l", RougeL()],
+        ["ratcliff_obershelp", RatcliffObershelp()],
+        ["ncd_bz2", NCDbz2()],
+        ["fuzzy_wuzzy_partial_string", FuzzyWuzzyPartialString()],
+        ["fuzzy_wuzzy_token_sort", FuzzyWuzzyTokenSort()],
+        ["fuzzy_wuzzy_token_set", FuzzyWuzzyTokenSet()],
+        ["editex", Editex()],
+        ["typo", Typo()],
+        ["lig_3", LIG3()],
+        ["ssk", SSK()],
+        [
+            "refined_soundex",
+            PhoneticDistance(
+                transforms=RefinedSoundex(max_length=30),
+                metric=Levenshtein(),
+                encode_alpha=True,
+            ),
+        ],
+        [
+            "double_metaphone",
+            PhoneticDistance(
+                transforms=DoubleMetaphone(max_length=30),
+                metric=Levenshtein(),
+                encode_alpha=True,
+            ),
+        ],
+    ],
+)
 def test_make_distance_metrics(name_match, method, result):
     name_match.set_distance_metrics([method])
     assert type(name_match._distance_metrics.popitem()[1][0]) == type(result)
 
 
-@pytest.mark.parametrize("kwargs_str, result_1, result_2, result_3, result_4",
-                         [[{"ngrams": (4, 5)}, -1, False, (4, 5), 5000],
-                          [{"low_memory": True}, -1, True, (2, 3), 5000],
-                          [{"legal_suffixes": True}, 0, False, (2, 3), 5000],
-                          [{"legal_suffixes": True, "number_of_rows": 8,
-                              "ngrams": (1, 2, 3)}, 0, False, (1, 2, 3), 8],
-                          ])
+@pytest.mark.parametrize(
+    "kwargs_str, result_1, result_2, result_3, result_4",
+    [
+        [{"ngrams": (4, 5)}, -1, False, (4, 5), 5000],
+        [{"low_memory": True}, -1, True, (2, 3), 5000],
+        [{"legal_suffixes": True}, 0, False, (2, 3), 5000],
+        [
+            {"legal_suffixes": True, "number_of_rows": 8, "ngrams": (1, 2, 3)},
+            0,
+            False,
+            (1, 2, 3),
+            8,
+        ],
+    ],
+)
 def test_initialisation(kwargs_str, result_1, result_2, result_3, result_4):
     name_match = nm.NameMatcher(**kwargs_str)
     number_of_words = 1
     if result_1 > -1:
-        number_of_words = number_of_words_in_legal_list(name_match._preprocess_punctuations)
+        number_of_words = number_of_words_in_legal_list(
+            name_match._preprocess_punctuations
+        )
     assert len(name_match._word_set) == number_of_words + result_1
     assert name_match._low_memory == result_2
     assert name_match._vec.ngram_range == result_3
     assert name_match._number_of_rows == result_4
 
 
-@pytest.mark.parametrize("occ, result_1, result_2, result_3, result_4, result_5",
-                         [[1, '', '', '', '', ''],
-                          [2, 'Schiller', 'Sch-ster, an[',
-                              'Runolfsson, Tashirian Will', 'Hyats, S|nger', 'Ankunding-Harb-er'],
-                          [3, 'Schiller', 'Sch-ster, Raynor an[ Hermann',
-                              'Runolfsson, Tashirian Will', 'Hyats, Durgan S|nger', 'Ankunding-Harb-er'],
-                          ])
-def test_preprocess_reduce(name_match, adjusted_name, occ, result_1, result_2, result_3, result_4, result_5):
-
-    name_match._column_matching = 'company_name'
-    new_names = name_match._preprocess_reduce(
-        adjusted_name, occurrence_count=occ)
-    assert new_names.loc[166, 'company_name'] == result_1
-    assert new_names.loc[423, 'company_name'] == result_2
-    assert new_names.loc[268, 'company_name'] == result_3
-    assert new_names.loc[59, 'company_name'] == result_4
-    assert new_names.loc[18, 'company_name'] == result_5
-
-
-@pytest.mark.parametrize("col, start_pro, transform",
-                         [['company_name', False, False],
-                          ['no_name', False, False],
-                          ['company_name', True, False],
-                          ['company_name', True, True],
-                          ['company_name', True, True],
-                          ])
+@pytest.mark.parametrize(
+    "occ, result_1, result_2, result_3, result_4, result_5",
+    [
+        [1, "", "", "", "", ""],
+        [
+            2,
+            "Schiller",
+            "Sch-ster, an[",
+            "Runolfsson, Tashirian Will",
+            "Hyats, S|nger",
+            "Ankunding-Harb-er",
+        ],
+        [
+            3,
+            "Schiller",
+            "Sch-ster, Raynor an[ Hermann",
+            "Runolfsson, Tashirian Will",
+            "Hyats, Durgan S|nger",
+            "Ankunding-Harb-er",
+        ],
+    ],
+)
+def test_preprocess_reduce(
+    name_match, adjusted_name, occ, result_1, result_2, result_3, result_4, result_5
+):
+
+    name_match._column_matching = "company_name"
+    new_names = name_match._preprocess_reduce(adjusted_name, occurrence_count=occ)
+    assert new_names.loc[166, "company_name"] == result_1
+    assert new_names.loc[423, "company_name"] == result_2
+    assert new_names.loc[268, "company_name"] == result_3
+    assert new_names.loc[59, "company_name"] == result_4
+    assert new_names.loc[18, "company_name"] == result_5
+
+
+@pytest.mark.parametrize(
+    "col, start_pro, transform",
+    [
+        ["company_name", False, False],
+        ["no_name", False, False],
+        ["company_name", True, False],
+        ["company_name", True, True],
+        ["company_name", True, True],
+    ],
+)
 def test_load_and_process_master_data(adjusted_name, col, start_pro, transform):
     name_matcher = nm.NameMatcher()
     name_matcher.load_and_process_master_data(
         column=col,
         df_matching_data=adjusted_name,
         start_processing=start_pro,
-        transform=transform)
+        transform=transform,
+    )
 
     assert name_matcher._column == col
-    pd.testing.assert_frame_equal(
-        name_matcher._df_matching_data, adjusted_name)
+    pd.testing.assert_frame_equal(name_matcher._df_matching_data, adjusted_name)
     assert name_matcher._preprocessed == start_pro
     if transform & start_pro:
         assert type(name_matcher._n_grams_matching) == csc_matrix
 
 
-@pytest.mark.parametrize("trans, common",
-                         [[False, False],
-                          [True, False],
-                          [False, True],
-                          [True, True],
-                          ])
+@pytest.mark.parametrize(
+    "trans, common",
+    [
+        [False, False],
+        [True, False],
+        [False, True],
+        [True, True],
+    ],
+)
 def test_process_matching_data(name_match, trans, common):
     name_match._postprocess_common_words = common
     name_match._process_matching_data(transform=trans)
@@ -194,126 +317,383 @@ def test_process_matching_data(name_match, trans, common):
         assert len(name_match._word_set) == 0
 
 
-@pytest.mark.parametrize("lower_case, punctuations, ascii, result_1, result_2, result_3",
-                         [[False, False, False, 'Schumm PLC', 'Towne, Johnston and Murray', 'Ösinski-Schinner'],
-                          [True, False, False, 'schumm plc',
-                              'towne, johnston and murray', 'ösinski-schinner'],
-                          [False, True, False, 'Schumm PLC',
-                              'Towne Johnston and Murray', 'ÖsinskiSchinner'],
-                          [False, False, True, 'Schumm PLC',
-                              'Towne, Johnston and Murray', 'Osinski-Schinner'],
-                          [False, True, True, 'Schumm PLC',
-                              'Towne Johnston and Murray', 'OsinskiSchinner'],
-                          [True, False, True, 'schumm plc',
-                              'towne, johnston and murray', 'osinski-schinner'],
-                          [True, True, False, 'schumm plc',
-                              'towne johnston and murray', 'ösinskischinner'],
-                          [True, True, True, 'schumm plc',
-                              'towne johnston and murray', 'osinskischinner'],
-                          ])
-def test_preprocess(name_match, lower_case, punctuations, ascii, result_1, result_2, result_3):
+@pytest.mark.parametrize(
+    "lower_case, punctuations, ascii, result_1, result_2, result_3",
+    [
+        [
+            False,
+            False,
+            False,
+            "Schumm PLC",
+            "Towne, Johnston and Murray",
+            "Ösinski-Schinner",
+        ],
+        [
+            True,
+            False,
+            False,
+            "schumm plc",
+            "towne, johnston and murray",
+            "ösinski-schinner",
+        ],
+        [
+            False,
+            True,
+            False,
+            "Schumm PLC",
+            "Towne Johnston and Murray",
+            "ÖsinskiSchinner",
+        ],
+        [
+            False,
+            False,
+            True,
+            "Schumm PLC",
+            "Towne, Johnston and Murray",
+            "Osinski-Schinner",
+        ],
+        [
+            False,
+            True,
+            True,
+            "Schumm PLC",
+            "Towne Johnston and Murray",
+            "OsinskiSchinner",
+        ],
+        [
+            True,
+            False,
+            True,
+            "schumm plc",
+            "towne, johnston and murray",
+            "osinski-schinner",
+        ],
+        [
+            True,
+            True,
+            False,
+            "schumm plc",
+            "towne johnston and murray",
+            "ösinskischinner",
+        ],
+        [
+            True,
+            True,
+            True,
+            "schumm plc",
+            "towne johnston and murray",
+            "osinskischinner",
+        ],
+    ],
+)
+def test_preprocess(
+    name_match, lower_case, punctuations, ascii, result_1, result_2, result_3
+):
     name_match._preprocess_lowercase = lower_case
     name_match._preprocess_punctuations = punctuations
     name_match._preprocess_ascii = ascii
-    new_df = name_match.preprocess(
-        name_match._df_matching_data, 'company_name')
-    assert new_df.loc[0, 'company_name'] == result_1
-    assert new_df.loc[2, 'company_name'] == result_2
-    assert new_df.loc[432, 'company_name'] == result_3
-
-
-@pytest.mark.parametrize("low_memory, ngrams, result_1, result_2, result_3",
-                         [[1, (5, 6), 0.00689, 0.00892, 0.02242],
-                          [6, (2, 3), 0.01044, 0.01092, 0.035],
-                          [8, (1, 2), 0.02729, 0.02783, 0.02324],
-                          [0, (5, 6), 0.00689, 0.00892, 0.02242],
-                          [0, (2, 3), 0.01044, 0.01092, 0.035],
-                          [0, (1, 2), 0.02729, 0.02783, 0.02324],
-                          ])
+    new_df = name_match.preprocess(name_match._df_matching_data, "company_name")
+    assert new_df.loc[0, "company_name"] == result_1
+    assert new_df.loc[2, "company_name"] == result_2
+    assert new_df.loc[432, "company_name"] == result_3
+
+
+@pytest.mark.parametrize(
+    "low_memory, ngrams, result_1, result_2, result_3",
+    [
+        [1, (5, 6), 0.00689, 0.00892, 0.02242],
+        [6, (2, 3), 0.01044, 0.01092, 0.035],
+        [8, (1, 2), 0.02729, 0.02783, 0.02324],
+        [0, (5, 6), 0.00689, 0.00892, 0.02242],
+        [0, (2, 3), 0.01044, 0.01092, 0.035],
+        [0, (1, 2), 0.02729, 0.02783, 0.02324],
+    ],
+)
 def test_transform_data(name_match, low_memory, ngrams, result_1, result_2, result_3):
     name_match._low_memory = low_memory
     name_match._vec = TfidfVectorizer(
-        lowercase=False, analyzer="char", ngram_range=ngrams)
+        lowercase=False, analyzer="char", ngram_range=ngrams
+    )
     name_match._process_matching_data(transform=False)
     name_match.transform_data()
 
-    assert name_match._n_grams_matching.data[10] == pytest.approx(
-        result_1, 0.001)
-    assert name_match._n_grams_matching.data[181] == pytest.approx(
-        result_2, 0.001)
-    assert name_match._n_grams_matching.data[1000] == pytest.approx(
-        result_3, 0.001)
-
-
-@pytest.mark.parametrize("to_be_matched, possible_matches, metrics, result",
-                         [('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandse Bank', 'Bank de Nederlandsche'], ['weighted_jaccard'], 2),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandse Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'discounted_levenshtein'], 5),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandse Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'discounted_levenshtein', 'iterative_sub_string'], 7),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandse Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'overlap', 'iterative_sub_string'], 6),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandse Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'overlap', 'bag'], 11),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank',
-                           'De Nederlandsche Bank', 'Bank de Nederlandsche'], ['weighted_jaccard'], 2),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandsche Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'discounted_levenshtein'], 4),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandsche Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'discounted_levenshtein', 'iterative_sub_string'], 6),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandsche Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'overlap', 'iterative_sub_string'], 6),
-                          ('De Nederlandsche Bank', ['Nederlandsche Bank', 'De Nederlancsh Bank', 'De Nederlandsche Bank', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'overlap', 'bag'], 6),
-                          ('Schumm PLC', ['Torphy-Corkery', 'Hansen, Hoppe and Tillman',
-                                          'Gerlach and Sons', 'Bank de Nederlandsche'], ['weighted_jaccard'], 2),
-                          ('Schumm PLC', ['Torphy-Corkery', 'Hansen, Hoppe and Tillman', 'Gerlach and Sons',
-                                          'Bank de Nederlandsche'], ['weighted_jaccard', 'discounted_levenshtein'], 4),
-                          ('Schumm PLC', ['Torphy-Corkery', 'Hansen, Hoppe and Tillman', 'Gerlach and Sons', 'Bank de Nederlandsche'], [
-                              'weighted_jaccard', 'discounted_levenshtein', 'iterative_sub_string'], 6),
-                          ('Schumm PLC', ['Torphy-Corkery', 'Hansen, Hoppe and Tillman', 'Gerlach and Sons',
-                                          'Bank de Nederlandsche'], ['weighted_jaccard', 'overlap', 'iterative_sub_string'], 8),
-                          ('Schumm PLC', ['Torphy-Corkery', 'Hansen, Hoppe and Tillman', 'Gerlach and Sons',
-                                          'Bank de Nederlandsche'], ['weighted_jaccard', 'overlap', 'bag'], 8)
-                          ])
+    assert name_match._n_grams_matching.data[10] == pytest.approx(result_1, 0.001)
+    assert name_match._n_grams_matching.data[181] == pytest.approx(result_2, 0.001)
+    assert name_match._n_grams_matching.data[1000] == pytest.approx(result_3, 0.001)
+
+
+@pytest.mark.parametrize(
+    "to_be_matched, possible_matches, metrics, result",
+    [
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandse Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard"],
+            2,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandse Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein"],
+            5,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandse Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein", "iterative_sub_string"],
+            7,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandse Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "iterative_sub_string"],
+            6,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandse Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "bag"],
+            11,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandsche Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard"],
+            2,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandsche Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein"],
+            4,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandsche Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein", "iterative_sub_string"],
+            6,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandsche Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "iterative_sub_string"],
+            6,
+        ),
+        (
+            "De Nederlandsche Bank",
+            [
+                "Nederlandsche Bank",
+                "De Nederlancsh Bank",
+                "De Nederlandsche Bank",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "bag"],
+            6,
+        ),
+        (
+            "Schumm PLC",
+            [
+                "Torphy-Corkery",
+                "Hansen, Hoppe and Tillman",
+                "Gerlach and Sons",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard"],
+            2,
+        ),
+        (
+            "Schumm PLC",
+            [
+                "Torphy-Corkery",
+                "Hansen, Hoppe and Tillman",
+                "Gerlach and Sons",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein"],
+            4,
+        ),
+        (
+            "Schumm PLC",
+            [
+                "Torphy-Corkery",
+                "Hansen, Hoppe and Tillman",
+                "Gerlach and Sons",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "discounted_levenshtein", "iterative_sub_string"],
+            6,
+        ),
+        (
+            "Schumm PLC",
+            [
+                "Torphy-Corkery",
+                "Hansen, Hoppe and Tillman",
+                "Gerlach and Sons",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "iterative_sub_string"],
+            8,
+        ),
+        (
+            "Schumm PLC",
+            [
+                "Torphy-Corkery",
+                "Hansen, Hoppe and Tillman",
+                "Gerlach and Sons",
+                "Bank de Nederlandsche",
+            ],
+            ["weighted_jaccard", "overlap", "bag"],
+            8,
+        ),
+    ],
+)
 def test_score_matches(to_be_matched, possible_matches, metrics, result):
     name_match = nm.NameMatcher()
     name_match.set_distance_metrics(metrics)
-    assert np.argmax(name_match._score_matches(
-        to_be_matched, possible_matches)) == result
-
-
-@pytest.mark.parametrize("number_of_matches, match_score, metrics, result",
-                         [(1, np.array([[0.9, 0.3, 0.5, 0.2, 0.1]]), ['weighted_jaccard'], [0]),
-                          (2, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5]]), [
-                           'weighted_jaccard', 'discounted_levenshtein'], [0, 1]),
-                          (3, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5], [1, 0.2, 0.3, 0.2, 0.1]]), [
-                           'weighted_jaccard', 'discounted_levenshtein', 'iterative_sub_string'], [2, 1, 1]),
-                          (2, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5], [
-                           1, 0.2, 0.3, 0.2, 0.1]]), ['tichy', 'overlap', 'bag'], [2, 1]),
-                          (2, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5]]), [
-                           'overlap', 'bag'], [0, 2]),
-                          (1, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5], [
-                           1, 0.2, 0.3, 0.2, 0.1]]), ['weighted_jaccard', 'overlap', 'iterative_sub_string'], [1]),
-                          (2, np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5], [
-                           1, 0.2, 0.3, 0.2, 0.1]]), ['weighted_jaccard', 'overlap', 'bag'], [1, 0]),
-                          (1, np.array([[0.3, 0.3, 0.8, 0.2, 0.2]]), [
-                           'weighted_jaccard'], [0]),
-                          (3, np.array([[0.3, 0.3, 0.8, 0.2, 0.2], [0.3, 0.3, 0.8, 0.1, 0.1]]), [
-                           'weighted_jaccard', 'discounted_levenshtein'], [0, 1]),
-                          (2, np.array([[0.3, 0.3, 0.2, 0.1, 0.02], [0.1, 0.1, 0.2, 0.3, 0.02]]), [
-                           'weighted_jaccard', 'iterative_sub_string'], [0, 0]),
-                          (1, np.array([[0.3, 0.3, 0.2, 0.1, 0.02], [0.3, 0.3, 0.2, 0.3, 0.02]]), [
-                           'overlap', 'iterative_sub_string'], [1]),
-                          (1, np.array(
-                              [[-0.5, -0.8, -0.3, -0.7, 0, 2]]), ['bag'], [0]),
-                          (1, np.array(
-                              [[-0.5, -0.8, -0.3, -0.7, 0, 2]]), ['BAG'], [0]),
-                          (3, np.array([[10, 8, 7, 6, 12, 15, 14, 88]]), [
-                           'weighted_jaccard'], [0]),
-                          (2, np.array([[1, 0.3], [0.1, 0.4]]), [
-                              'weighted_jaccard', 'discounted_levenshtein'], [0, 1])
-                          ])
+    assert (
+        np.argmax(name_match._score_matches(to_be_matched, possible_matches)) == result
+    )
+
+
+@pytest.mark.parametrize(
+    "number_of_matches, match_score, metrics, result",
+    [
+        (1, np.array([[0.9, 0.3, 0.5, 0.2, 0.1]]), ["weighted_jaccard"], [0]),
+        (
+            2,
+            np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5]]),
+            ["weighted_jaccard", "discounted_levenshtein"],
+            [0, 1],
+        ),
+        (
+            3,
+            np.array(
+                [
+                    [0.9, 0.3, 0.5, 0.2, 0.1],
+                    [0.6, 0.7, 0.8, 0.4, 0.5],
+                    [1, 0.2, 0.3, 0.2, 0.1],
+                ]
+            ),
+            ["weighted_jaccard", "discounted_levenshtein", "iterative_sub_string"],
+            [2, 1, 1],
+        ),
+        (
+            2,
+            np.array(
+                [
+                    [0.9, 0.3, 0.5, 0.2, 0.1],
+                    [0.6, 0.7, 0.8, 0.4, 0.5],
+                    [1, 0.2, 0.3, 0.2, 0.1],
+                ]
+            ),
+            ["tichy", "overlap", "bag"],
+            [2, 1],
+        ),
+        (
+            2,
+            np.array([[0.9, 0.3, 0.5, 0.2, 0.1], [0.6, 0.7, 0.8, 0.4, 0.5]]),
+            ["overlap", "bag"],
+            [0, 2],
+        ),
+        (
+            1,
+            np.array(
+                [
+                    [0.9, 0.3, 0.5, 0.2, 0.1],
+                    [0.6, 0.7, 0.8, 0.4, 0.5],
+                    [1, 0.2, 0.3, 0.2, 0.1],
+                ]
+            ),
+            ["weighted_jaccard", "overlap", "iterative_sub_string"],
+            [1],
+        ),
+        (
+            2,
+            np.array(
+                [
+                    [0.9, 0.3, 0.5, 0.2, 0.1],
+                    [0.6, 0.7, 0.8, 0.4, 0.5],
+                    [1, 0.2, 0.3, 0.2, 0.1],
+                ]
+            ),
+            ["weighted_jaccard", "overlap", "bag"],
+            [1, 0],
+        ),
+        (1, np.array([[0.3, 0.3, 0.8, 0.2, 0.2]]), ["weighted_jaccard"], [0]),
+        (
+            3,
+            np.array([[0.3, 0.3, 0.8, 0.2, 0.2], [0.3, 0.3, 0.8, 0.1, 0.1]]),
+            ["weighted_jaccard", "discounted_levenshtein"],
+            [0, 1],
+        ),
+        (
+            2,
+            np.array([[0.3, 0.3, 0.2, 0.1, 0.02], [0.1, 0.1, 0.2, 0.3, 0.02]]),
+            ["weighted_jaccard", "iterative_sub_string"],
+            [0, 0],
+        ),
+        (
+            1,
+            np.array([[0.3, 0.3, 0.2, 0.1, 0.02], [0.3, 0.3, 0.2, 0.3, 0.02]]),
+            ["overlap", "iterative_sub_string"],
+            [1],
+        ),
+        (1, np.array([[-0.5, -0.8, -0.3, -0.7, 0, 2]]), ["bag"], [0]),
+        (1, np.array([[-0.5, -0.8, -0.3, -0.7, 0, 2]]), ["BAG"], [0]),
+        (3, np.array([[10, 8, 7, 6, 12, 15, 14, 88]]), ["weighted_jaccard"], [0]),
+        (
+            2,
+            np.array([[1, 0.3], [0.1, 0.4]]),
+            ["weighted_jaccard", "discounted_levenshtein"],
+            [0, 1],
+        ),
+    ],
+)
 def test_rate_matches(number_of_matches, match_score, metrics, result):
     name_match = nm.NameMatcher()
     name_match._number_of_matches = number_of_matches
@@ -329,37 +709,107 @@ def test_vectorise_data(name_match):
     assert len(name_match._vec.vocabulary_) > 0
 
 
-@pytest.mark.parametrize("match, number_of_matches, word_set, score, result",
-                         [(pd.Series(['Nederandsche', 0, 2, 'De Nederlandsche Bank'], index=['match_name_0', 'score_0', 'match_index_0', 'original_name']), 1, set(['De', 'Bank', 'nl']), 0, 94.553),
-                          (pd.Series(['Nederandsche', 0, 2, 'De Nederlandsche Bank'], index=[
-                              'match_name_0', 'score_0', 'match_index_0', 'original_name']), 1, set(['komt', 'niet', 'voor']), 0, 69.713),
-                          (pd.Series(['nederandsche', 0, 2, 'de nederand bank', 0.4, 3, 'De Nederlandsche Bank'], index=[
-                              'match_name_0', 'score_0', 'match_index_0', 'match_name_1', 'score_1', 'match_index_1', 'original_name']), 1, set(['De', 'Bank', 'nl']), 1, 0.4),
-                          (pd.Series(['nederandsche', 0, 2, 'de nederand bank', 0.4, 3, 'De Nederlandsche Bank'], index=[
-                              'match_name_0', 'score_0', 'match_index_0', 'match_name_1', 'score_1', 'match_index_1', 'original_name']), 1, set(['De', 'Bank', 'nl']), 0, 86.031),
-                          ])
+@pytest.mark.parametrize(
+    "match, number_of_matches, word_set, score, result",
+    [
+        (
+            pd.Series(
+                ["Nederandsche", 0, 2, "De Nederlandsche Bank"],
+                index=["match_name_0", "score_0", "match_index_0", "original_name"],
+            ),
+            1,
+            set(["De", "Bank", "nl"]),
+            0,
+            94.553,
+        ),
+        (
+            pd.Series(
+                ["Nederandsche", 0, 2, "De Nederlandsche Bank"],
+                index=["match_name_0", "score_0", "match_index_0", "original_name"],
+            ),
+            1,
+            set(["komt", "niet", "voor"]),
+            0,
+            69.713,
+        ),
+        (
+            pd.Series(
+                [
+                    "nederandsche",
+                    0,
+                    2,
+                    "de nederand bank",
+                    0.4,
+                    3,
+                    "De Nederlandsche Bank",
+                ],
+                index=[
+                    "match_name_0",
+                    "score_0",
+                    "match_index_0",
+                    "match_name_1",
+                    "score_1",
+                    "match_index_1",
+                    "original_name",
+                ],
+            ),
+            1,
+            set(["De", "Bank", "nl"]),
+            1,
+            0.4,
+        ),
+        (
+            pd.Series(
+                [
+                    "nederandsche",
+                    0,
+                    2,
+                    "de nederand bank",
+                    0.4,
+                    3,
+                    "De Nederlandsche Bank",
+                ],
+                index=[
+                    "match_name_0",
+                    "score_0",
+                    "match_index_0",
+                    "match_name_1",
+                    "score_1",
+                    "match_index_1",
+                    "original_name",
+                ],
+            ),
+            1,
+            set(["De", "Bank", "nl"]),
+            0,
+            86.031,
+        ),
+    ],
+)
 def test_postprocess(name_match, match, number_of_matches, word_set, score, result):
     name_match._number_of_matches = number_of_matches
     name_match._word_set = word_set
     new_match = name_match.postprocess(match)
-    assert new_match.loc[f'score_{score}'] == pytest.approx(result, 0.0001)
-
-
-@pytest.mark.parametrize("indicator, punctuations, word_set, cut_off, result_1, result_2",
-                         [('legal', False, set(), 0.01, 'plc.', 'bedrijf'),
-                          ('legal', True, set(), 0.01, 'plc', 'bedrijf'),
-                          ('legal', True, set(['bedrijf']),
-                          0.01, 'bedrijf', 'Group'),
-                          ('common', True, set(), 0.01, 'Group', 'bedrijf'),
-                          ('common', True, set(), 0.3, 'and', 'Group'),
-                          ('common', True, set(['West']),
-                          0.3, 'West', 'bedrijf'),
-                          ('someting', True, set(['key']), 0.01, 'key', 'val')
-                          ])
-def test_make_no_scoring_words(name_match, indicator, punctuations, word_set, cut_off, result_1, result_2):
+    assert new_match.loc[f"score_{score}"] == pytest.approx(result, 0.0001)
+
+
+@pytest.mark.parametrize(
+    "indicator, punctuations, word_set, cut_off, result_1, result_2",
+    [
+        ("legal", False, set(), 0.01, "plc.", "bedrijf"),
+        ("legal", True, set(), 0.01, "plc", "bedrijf"),
+        ("legal", True, set(["bedrijf"]), 0.01, "bedrijf", "Group"),
+        ("common", True, set(), 0.01, "Group", "bedrijf"),
+        ("common", True, set(), 0.3, "and", "Group"),
+        ("common", True, set(["West"]), 0.3, "West", "bedrijf"),
+        ("someting", True, set(["key"]), 0.01, "key", "val"),
+    ],
+)
+def test_make_no_scoring_words(
+    name_match, indicator, punctuations, word_set, cut_off, result_1, result_2
+):
     name_match._preprocess_punctuations = punctuations
-    new_word_set = name_match._make_no_scoring_words(
-        indicator, word_set, cut_off)
+    new_word_set = name_match._make_no_scoring_words(indicator, word_set, cut_off)
     print(new_word_set)
     assert new_word_set.issuperset(set([result_1]))
     assert not new_word_set.issuperset(set([result_2]))
@@ -371,18 +821,23 @@ def test_search_for_possible_matches_error(adjusted_name):
         name_matcher._search_for_possible_matches(adjusted_name)
 
 
-@pytest.mark.parametrize("top_n, low_memory, number_of_rows, result_1, result_2",
-                         [(10, True, 55, 469, 144),
-                          (50, True, 112, 499, 6),
-                          (100, True, 112, 499, 1),
-                          (1, True, 112, 44, 144),
-                          (10, False, 500, 469, 144),
-                          (50, False, 1500, 499, 6),
-                          (100, False, 500, 499, 1),
-                          (1, False, 500, 44, 144)
-                          ])
-def test_search_for_possible_matches(name_match, adjusted_name, top_n, low_memory, number_of_rows, result_1, result_2):
-    name_match._column_matching = 'company_name'
+@pytest.mark.parametrize(
+    "top_n, low_memory, number_of_rows, result_1, result_2",
+    [
+        (10, True, 55, 469, 144),
+        (50, True, 112, 499, 6),
+        (100, True, 112, 499, 1),
+        (1, True, 112, 44, 144),
+        (10, False, 500, 469, 144),
+        (50, False, 1500, 499, 6),
+        (100, False, 500, 499, 1),
+        (1, False, 500, 44, 144),
+    ],
+)
+def test_search_for_possible_matches(
+    name_match, adjusted_name, top_n, low_memory, number_of_rows, result_1, result_2
+):
+    name_match._column_matching = "company_name"
     name_match._low_memory = low_memory
     name_match._number_of_rows = number_of_rows
     name_match._top_n = top_n
@@ -395,131 +850,207 @@ def test_search_for_possible_matches(name_match, adjusted_name, top_n, low_memor
     assert np.min(possible_match[144, :]) == result_2
 
 
-@pytest.mark.parametrize("common_words, num_matches, possible_matches, matching_series, result_0, result_1",
-                         [(True, 3, np.array([29, 343, 126, 238, 445]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 31.33, 31.77),
-                          (False, 2, np.array([29, 343, 126, ]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 71.28, 68.6),
-                          (False, 2, np.array([29, 343]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 71.28, 68.6),
-                          (['Sons', 'and'], 3, np.array([29, 343, 126, 238, 445]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 31.33, 31.77),
-                          (False, 2, np.array([[29, 343], [0, 0]]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 71.28, 68.6),
-                          (False, 2, np.array([29, 343, 126, 238, 445]), pd.Series(
-                              ['Company and Sons'], index=['company_name']), 71.28, 68.6)
-                          ])
-def test_fuzzy_matches(name_match, common_words, num_matches, possible_matches, matching_series, result_0, result_1):
-    name_match._column_matching = 'company_name'
+@pytest.mark.parametrize(
+    "common_words, num_matches, possible_matches, matching_series, result_0, result_1",
+    [
+        (
+            True,
+            3,
+            np.array([29, 343, 126, 238, 445]),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            31.33,
+            31.77,
+        ),
+        (
+            False,
+            2,
+            np.array(
+                [
+                    29,
+                    343,
+                    126,
+                ]
+            ),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            71.28,
+            68.6,
+        ),
+        (
+            False,
+            2,
+            np.array([29, 343]),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            71.28,
+            68.6,
+        ),
+        (
+            ["Sons", "and"],
+            3,
+            np.array([29, 343, 126, 238, 445]),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            31.33,
+            31.77,
+        ),
+        (
+            False,
+            2,
+            np.array([[29, 343], [0, 0]]),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            71.28,
+            68.6,
+        ),
+        (
+            False,
+            2,
+            np.array([29, 343, 126, 238, 445]),
+            pd.Series(["Company and Sons"], index=["company_name"]),
+            71.28,
+            68.6,
+        ),
+    ],
+)
+def test_fuzzy_matches(
+    name_match,
+    common_words,
+    num_matches,
+    possible_matches,
+    matching_series,
+    result_0,
+    result_1,
+):
+    name_match._column_matching = "company_name"
     name_match._number_of_matches = num_matches
     name_match._postprocess_common_words = common_words
     if isinstance(common_words, list):
         name_match._word_set = set(common_words)
     elif common_words:
-        name_match._word_set = set(['Sons', 'and'])
+        name_match._word_set = set(["Sons", "and"])
     else:
         name_match._word_set = set()
     match = name_match.fuzzy_matches(possible_matches, matching_series)
-    assert match['score_0'] == pytest.approx(result_0, 0.0001)
-    assert match['score_1'] == pytest.approx(result_1, 0.0001)
-    assert match['match_index_0'] in possible_matches
-    assert match['match_index_1'] in possible_matches
+    assert match["score_0"] == pytest.approx(result_0, 0.0001)
+    assert match["score_1"] == pytest.approx(result_1, 0.0001)
+    assert match["match_index_0"] in possible_matches
+    assert match["match_index_1"] in possible_matches
 
 
 def test_do_name_matching_split(name_match, adjusted_name):
     name_match._preprocess_split = True
-    result = name_match.match_names(adjusted_name.iloc[44, :], 'company_name')
-    assert np.any(result['match_index'] == 44)
+    result = name_match.match_names(adjusted_name.iloc[44, :], "company_name")
+    assert np.any(result["match_index"] == 44)
 
 
 def test_do_name_matching_series(name_match, adjusted_name):
-    result = name_match.match_names(adjusted_name.iloc[44, :], 'company_name')
-    assert np.any(result['match_index'] == 44)
+    result = name_match.match_names(adjusted_name.iloc[44, :], "company_name")
+    assert np.any(result["match_index"] == 44)
+
 
 def test_do_name_matching_full(name_match, adjusted_name):
-    result = name_match.match_names(adjusted_name, 'company_name')
-    assert np.sum(result['match_index'] == result.index) == 491
-    
-@pytest.mark.parametrize("old_index, new_index, adjust, size_a, size_b, match_result",
-                        [[10, 'new', False, 20, 20, 'new'],
-                         [10, 'new', True, 20, 20, 10],
-                         [10, 526, False, 20, 20, 526],
-                         [10, 526, True, 20, 20, 10],
-                         [4, 201, True, 20, 50, 4],
-                         [8, 201, False, 20, 50, 201],
-                         [8, 44, True, 50, 20, 8],
-                         [4, 44, False, 50, 20, 44],
-                        ])
-def test_do_name_matching_switch_index(original_name, old_index, new_index, adjust, size_a, size_b, match_result):
+    result = name_match.match_names(adjusted_name, "company_name")
+    assert np.sum(result["match_index"] == result.index) == 493
+
+
+@pytest.mark.parametrize(
+    "old_index, new_index, adjust, size_a, size_b, match_result",
+    [
+        [10, "new", False, 20, 20, "new"],
+        [10, "new", True, 20, 20, 10],
+        [10, 526, False, 20, 20, 526],
+        [10, 526, True, 20, 20, 10],
+        [4, 201, True, 20, 50, 4],
+        [8, 201, False, 20, 50, 201],
+        [8, 44, True, 50, 20, 8],
+        [4, 44, False, 50, 20, 44],
+    ],
+)
+def test_do_name_matching_switch_index(
+    original_name, old_index, new_index, adjust, size_a, size_b, match_result
+):
     name_match = nm.NameMatcher(row_numbers=adjust, verbose=False)
     adjusted_name = original_name.copy()
-    original_name = original_name.rename(index={old_index:new_index})
+    original_name = original_name.rename(index={old_index: new_index})
     name_match.load_and_process_master_data(
-        'company_name', original_name.iloc[:size_a,:], start_processing=False, transform=False)
-    result = name_match.match_names(adjusted_name.iloc[:size_b,:], 'company_name')
-    assert result.loc[old_index, 'match_index'] == match_result
+        "company_name",
+        original_name.iloc[:size_a, :],
+        start_processing=False,
+        transform=False,
+    )
+    result = name_match.match_names(adjusted_name.iloc[:size_b, :], "company_name")
+    assert result.loc[old_index, "match_index"] == match_result
+
 
 def test_do_name_matching_error(adjusted_name):
     name_match = nm.NameMatcher()
     with pytest.raises(ValueError):
-        name_match.match_names(adjusted_name, 'company_name')
+        name_match.match_names(adjusted_name, "company_name")
 
 
 @pytest.mark.parametrize("verbose", [True, False])
 def test_do_name_matching_print(capfd, name_match, adjusted_name, verbose):
     name_match._verbose = verbose
-    name_match.match_names(adjusted_name.iloc[:5].copy(), 'company_name')
+    name_match.match_names(adjusted_name.iloc[:5].copy(), "company_name")
     out, err = capfd.readouterr()
     if verbose:
-        assert out.find('preprocessing') > -1
-        assert out.find('searching') > -1
-        assert out.find('possible') > -1
-        assert out.find('fuzzy') > -1
-        assert out.find('done') > -1
+        assert out.find("preprocessing") > -1
+        assert out.find("searching") > -1
+        assert out.find("possible") > -1
+        assert out.find("fuzzy") > -1
+        assert out.find("done") > -1
     else:
-        assert out == ''
-
-
-@pytest.mark.parametrize("word, occurrence_count, result",
-                         [['fun snail pool', 2, 'snail'],
-                          ['fun snail pool', 3, 'fun snail'],
-                          ['fun snail pool', 1, ''],
-                          ['fun small pool', 3, 'fun small pool'],
-                          ['fun snail', 3, 'fun snail'],
-                          ['fun small pool', 5, 'fun small pool']])
+        assert out == ""
+
+
+@pytest.mark.parametrize(
+    "word, occurrence_count, result",
+    [
+        ["fun snail pool", 2, "snail"],
+        ["fun snail pool", 3, "fun snail"],
+        ["fun snail pool", 1, ""],
+        ["fun small pool", 3, "fun small pool"],
+        ["fun snail", 3, "fun snail"],
+        ["fun small pool", 5, "fun small pool"],
+    ],
+)
 def test_select_top_words(word, words, occurrence_count, result):
     word_counts = pd.Series(words).value_counts()
     name_match = nm.NameMatcher()
-    new_word = name_match._select_top_words(
-        word.split(), word_counts, occurrence_count)
+    new_word = name_match._select_top_words(word.split(), word_counts, occurrence_count)
     assert new_word == result
 
 
-@pytest.mark.parametrize("match, num_of_matches, result",
-                         [[{'match_name_1': 'fun', 'match_name_2': 'dog', 
-                            'match_name_0': 'cat'}, 3, ['cat', 'fun', 'dog']],
-                          [{'match_name_1': 'fun', 'match_name_2': 'dog',
-                            'match_name_0': 'cat'}, 2, ['cat', 'fun']],
-                          [{'match_name_1': 'fun', 'match_name_0': 'cat'},
-                             2, ['cat', 'fun']],
-                          [{'match_name_1': 'fun', 'match_name_2': 'dog', 'match_name_0': 'cat'}, 0, []]])
+@pytest.mark.parametrize(
+    "match, num_of_matches, result",
+    [
+        [
+            {"match_name_1": "fun", "match_name_2": "dog", "match_name_0": "cat"},
+            3,
+            ["cat", "fun", "dog"],
+        ],
+        [
+            {"match_name_1": "fun", "match_name_2": "dog", "match_name_0": "cat"},
+            2,
+            ["cat", "fun"],
+        ],
+        [{"match_name_1": "fun", "match_name_0": "cat"}, 2, ["cat", "fun"]],
+        [{"match_name_1": "fun", "match_name_2": "dog", "match_name_0": "cat"}, 0, []],
+    ],
+)
 def test_get_alternative_names(match, num_of_matches, result):
     name_match = nm.NameMatcher(number_of_matches=num_of_matches)
     res = name_match._get_alternative_names(pd.Series(match))
     assert res == result
 
 
-@pytest.mark.parametrize("preprocess_punctuations, output, input, x",
-                         [[True, '_blame_', {'test': ['fun...', 'done'], 'num':['_.blame._']}, 2],
-                          [True, 'done', {'test': ['fun. . . ',
-                                                   'done'], 'num':['_.blame._']}, 1],
-                             [True, 'fun', {
-                                 'test': ['fun. . . ', 'done'], 'num':['_.blame._']}, 0],
-                             [False, 'fun. . .', {
-                                 'test': ['fun. . . ', 'done'], 'num':['_.blame._']}, 0],
-                             [False, 'fun. . .', {
-                                 'num': ['_.blame._'], 'test': ['fun. . . ', 'done']}, 1]
-                          ])
+@pytest.mark.parametrize(
+    "preprocess_punctuations, output, input, x",
+    [
+        [True, "_blame_", {"test": ["fun...", "done"], "num": ["_.blame._"]}, 2],
+        [True, "done", {"test": ["fun. . . ", "done"], "num": ["_.blame._"]}, 1],
+        [True, "fun", {"test": ["fun. . . ", "done"], "num": ["_.blame._"]}, 0],
+        [False, "fun. . .", {"test": ["fun. . . ", "done"], "num": ["_.blame._"]}, 0],
+        [False, "fun. . .", {"num": ["_.blame._"], "test": ["fun. . . ", "done"]}, 1],
+    ],
+)
 def test_preprocess_word_list(preprocess_punctuations, output, input, x):
     name_match = nm.NameMatcher(punctuations=preprocess_punctuations)
     res = name_match._preprocess_word_list(input)
@@ -527,36 +1058,75 @@ def test_preprocess_word_list(preprocess_punctuations, output, input, x):
     assert res[x] == output
 
 
-@pytest.mark.parametrize("num_matches, match_score, match, result, y",
-                         [[3, np.array([[1, 1, 1], [1, 1, 1], [0, 0, 0]]), pd.Series(dtype=float), 100, 0],
-                          [2, np.array([[1, 1], [0.4, 0.4], [0, 0]]),
-                           pd.Series(dtype=float), 40, 1],
-                             [1, np.array([[1, 1], [1, 1], [0, 0]]),
-                              pd.Series(dtype=float), 100, 0]
-                          ])
+@pytest.mark.parametrize(
+    "num_matches, match_score, match, result, y",
+    [
+        [
+            3,
+            np.array([[1, 1, 1], [1, 1, 1], [0, 0, 0]]),
+            pd.Series(dtype=float),
+            100,
+            0,
+        ],
+        [2, np.array([[1, 1], [0.4, 0.4], [0, 0]]), pd.Series(dtype=float), 40, 1],
+        [1, np.array([[1, 1], [1, 1], [0, 0]]), pd.Series(dtype=float), 100, 0],
+    ],
+)
 def test_adjust_scores(num_matches, match_score, match, result, y):
     name_match = nm.NameMatcher(number_of_matches=num_matches)
     match = name_match._adjust_scores(match_score, match)
     assert match.iloc[y] == result
 
 
-@pytest.mark.parametrize("string, stringlist, result_1, result_2, y",
-                         [['know sign first', ['know', 'know sign', 'know sign first'], 'know first', 'know first', 2],
-                          ['know sign first', ['know', 'know sign',
-                                               'know sign first'], 'know first', 'know', 1],
-                             ['know sign first', ['know', 'know sign',
-                                                  'know sign first'], 'know first', 'know', 0],
-                             ['know first', ['know', 'know', 'know'],
-                                 'know first', 'know', 1],
-                             ['pool sign small', ['sign small',
-                                                  'small pool sign', 'small'], '', '', 0],
-                             ['pool sign small know', ['sign small',
-                                                       'small pool sign', 'small'], 'know', '', 0],
-                             ['know pool sign small', ['sign small',
-                                                       'small pool sign', 'small'], 'know', '', 0],
-                             ['pool sign small', ['sign small',
-                                                  'small pool know sign', 'small'], '', 'know', 1],
-                          ])
+@pytest.mark.parametrize(
+    "string, stringlist, result_1, result_2, y",
+    [
+        [
+            "know sign first",
+            ["know", "know sign", "know sign first"],
+            "know first",
+            "know first",
+            2,
+        ],
+        [
+            "know sign first",
+            ["know", "know sign", "know sign first"],
+            "know first",
+            "know",
+            1,
+        ],
+        [
+            "know sign first",
+            ["know", "know sign", "know sign first"],
+            "know first",
+            "know",
+            0,
+        ],
+        ["know first", ["know", "know", "know"], "know first", "know", 1],
+        ["pool sign small", ["sign small", "small pool sign", "small"], "", "", 0],
+        [
+            "pool sign small know",
+            ["sign small", "small pool sign", "small"],
+            "know",
+            "",
+            0,
+        ],
+        [
+            "know pool sign small",
+            ["sign small", "small pool sign", "small"],
+            "know",
+            "",
+            0,
+        ],
+        [
+            "pool sign small",
+            ["sign small", "small pool know sign", "small"],
+            "",
+            "know",
+            1,
+        ],
+    ],
+)
 def test_process_words(words, string, stringlist, result_1, result_2, y):
     name_match = nm.NameMatcher()
     name_match._word_set = set(words)
@@ -565,28 +1135,30 @@ def test_process_words(words, string, stringlist, result_1, result_2, y):
     assert stringlist[y] == result_2
 
 
-@pytest.mark.parametrize("word_set, cut_off, result_1, result_2",
-                         [[set(), 0, 635, 'Group'],
-                          [set(), 0, 635, 'and'],
-                             [set(), 0.1, 7, 'Group'],
-                             [set(), 0.1, 7, 'LLC'],
-                             [set(), 0.12, 7, 'LLC'],
-                             [set(), 0.2, 1, 'and'],
-                             [set(['apple']), 1, 1, 'apple'],
-                             [set(['apple']), 0, 636, 'apple'],
-                             [set(['apple']), 0, 636, 'Group']
-                          ])
+@pytest.mark.parametrize(
+    "word_set, cut_off, result_1, result_2",
+    [
+        [set(), 0, 635, "Group"],
+        [set(), 0, 635, "and"],
+        [set(), 0.1, 7, "Group"],
+        [set(), 0.1, 7, "LLC"],
+        [set(), 0.12, 7, "LLC"],
+        [set(), 0.2, 1, "and"],
+        [set(["apple"]), 1, 1, "apple"],
+        [set(["apple"]), 0, 636, "apple"],
+        [set(["apple"]), 0, 636, "Group"],
+    ],
+)
 def test_process_common_words(name_match, word_set, cut_off, result_1, result_2):
     words = name_match._process_common_words(word_set, cut_off)
     assert result_2 in words
     assert len(words) == result_1
 
 
-@pytest.mark.parametrize("common_words, error", [[True, False], 
-                                                 [[], False], 
-                                                 [set(), False], 
-                                                 [dict(), True], 
-                                                 ["", True]])
+@pytest.mark.parametrize(
+    "common_words, error",
+    [[True, False], [[], False], [set(), False], [dict(), True], ["", True]],
+)
 def test_common_words_type_error(common_words, error):
     if error:
         with pytest.raises(TypeError):
@@ -599,33 +1171,44 @@ def test_common_words_type_error(common_words, error):
             assert name_matcher._word_set == set(common_words)
 
 
-@pytest.mark.parametrize("common_words, legal_suffixes", [[['Cherry', 'Stream', 'Puzzle', 'Balloon', 'Candle', 'Mirror'], False], 
-                                                          [['Cherry', 'Stream', 'Puzzle', 'Balloon', 'Candle', 'Mirror'], True], 
-                                                          [['Cherry'], False], 
-                                                          [['Cherry'], True], 
-                                                          [['limited', 'gmbh'], False],
-                                                          [['limited', 'gmbh'], True],])
+@pytest.mark.parametrize(
+    "common_words, legal_suffixes",
+    [
+        [["Cherry", "Stream", "Puzzle", "Balloon", "Candle", "Mirror"], False],
+        [["Cherry", "Stream", "Puzzle", "Balloon", "Candle", "Mirror"], True],
+        [["Cherry"], False],
+        [["Cherry"], True],
+        [["limited", "gmbh"], False],
+        [["limited", "gmbh"], True],
+    ],
+)
 def test_common_words_addition(original_name, common_words, legal_suffixes):
-        name_matcher = nm.NameMatcher(common_words=common_words, legal_suffixes=legal_suffixes)
-        name_matcher.load_and_process_master_data(
-            'company_name', original_name, start_processing=False, transform=False)
-        name_matcher._process_matching_data(transform=False)
-        for word in common_words:
-            assert word in name_matcher._word_set
-
-
-@pytest.mark.parametrize("word_set, preprocess, result_1, result_2, result_3",
-                         [[set(), True, 0, 'company', True],
-                          [set(), True, 0, '3ao', True],
-                             [set(), True, 0, 'g.m.b.h.', False],
-                             [set(), False, 0, '& company', True],
-                             [set(), False, 0, '3ao', True],
-                             [set(), False, 0, 'g.m.b.h.', True],
-                             [set(['apple']), True, 1, 'apple', True],
-                             [set(['apple']), False, 1, 'apple', True],
-                             [set(['apple..']), True, 1, 'apple..', True],
-                             [set(['apple..']), False, 1, 'apple..', True]
-                          ])
+    name_matcher = nm.NameMatcher(
+        common_words=common_words, legal_suffixes=legal_suffixes
+    )
+    name_matcher.load_and_process_master_data(
+        "company_name", original_name, start_processing=False, transform=False
+    )
+    name_matcher._process_matching_data(transform=False)
+    for word in common_words:
+        assert word in name_matcher._word_set
+
+
+@pytest.mark.parametrize(
+    "word_set, preprocess, result_1, result_2, result_3",
+    [
+        [set(), True, 0, "company", True],
+        [set(), True, 0, "3ao", True],
+        [set(), True, 0, "g.m.b.h.", False],
+        [set(), False, 0, "& company", True],
+        [set(), False, 0, "3ao", True],
+        [set(), False, 0, "g.m.b.h.", True],
+        [set(["apple"]), True, 1, "apple", True],
+        [set(["apple"]), False, 1, "apple", True],
+        [set(["apple.."]), True, 1, "apple..", True],
+        [set(["apple.."]), False, 1, "apple..", True],
+    ],
+)
 def test_process_legal_words(word_set, preprocess, result_1, result_2, result_3):
     name_match = nm.NameMatcher()
     name_match._preprocess_punctuations = preprocess
diff --git a/name_matching/test/test_sparse_cosine.py b/name_matching/test/test_sparse_cosine.py
index 0758595..0321e4f 100644
--- a/name_matching/test/test_sparse_cosine.py
+++ b/name_matching/test/test_sparse_cosine.py
@@ -2,219 +2,286 @@
 import pytest
 from scipy.sparse import csc_matrix
 
-from name_matching.sparse_cosine import _sparse_cosine_top_n_standard, _sparse_cosine_low_memory, sparse_cosine_top_n
+from name_matching.sparse_cosine import (
+    _sparse_cosine_top_n_standard,
+    _sparse_cosine_low_memory,
+    sparse_cosine_top_n,
+)
+
 
 def assert_values_in_array(A1, A2):
-    assert len(A1) == len(A2) 
+    assert len(A1) == len(A2)
     A1.sort()
     A2.sort()
     np.testing.assert_array_almost_equal(A1, A2, decimal=2)
 
+
 @pytest.fixture
 def mat_a():
-    return csc_matrix(np.array([[0. , 0. , 0. , 0. , 0. , 0.3, 0.2, 0.1, 0.3, 0.4],
-                            [0. , 0. , 0. , 0. , 0.6, 0.5, 0. , 0. , 0. , 0. ],
-                            [0. , 0. , 0.6, 0.1, 0. , 0.9, 0. , 0. , 0.5, 0. ],
-                            [0. , 0.3, 0. , 0.4, 0. , 0.6, 0. , 0.1, 0. , 0. ],
-                            [0. , 0. , 0. , 0. , 0. , 0. , 0.3, 0. , 0. , 0. ],
-                            [0. , 0. , 0. , 0. , 0. , 0.9, 0. , 0. , 0.2, 0. ],
-                            [0.7, 0. , 0. , 0.2, 0.3, 0.9, 0. , 0.3, 0. , 0.5],
-                            [0.9, 0.9, 0. , 0.3, 0.9, 0.9, 0. , 0. , 0. , 0. ],
-                            [0.9, 0.5, 0. , 0. , 0. , 0.5, 0.4, 0. , 0. , 0.7],
-                            [0.1, 0. , 0. , 0.2, 0. , 0.4, 0. , 0.9, 0. , 0.7]]))
+    return csc_matrix(
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.2, 0.1, 0.3, 0.4],
+                [0.0, 0.0, 0.0, 0.0, 0.6, 0.5, 0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.6, 0.1, 0.0, 0.9, 0.0, 0.0, 0.5, 0.0],
+                [0.0, 0.3, 0.0, 0.4, 0.0, 0.6, 0.0, 0.1, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.2, 0.0],
+                [0.7, 0.0, 0.0, 0.2, 0.3, 0.9, 0.0, 0.3, 0.0, 0.5],
+                [0.9, 0.9, 0.0, 0.3, 0.9, 0.9, 0.0, 0.0, 0.0, 0.0],
+                [0.9, 0.5, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.0, 0.7],
+                [0.1, 0.0, 0.0, 0.2, 0.0, 0.4, 0.0, 0.9, 0.0, 0.7],
+            ]
+        )
+    )
+
+
 @pytest.fixture
 def mat_b():
-    return csc_matrix(np.array([[0. , 0. , 0.4, 0. , 0.2, 0. , 0. , 0.4, 0. , 0. ],
-                                [0. , 0. , 0. , 0.3, 0. , 0. , 0.4, 0. , 0. , 0. ],
-                                [0. , 0.9, 0.9, 0.9, 0. , 0.1, 0.2, 0.6, 0. , 0. ],
-                                [0. , 0.4, 0. , 0. , 0. , 0. , 0.9, 0. , 0. , 0. ],
-                                [0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0.4, 0. ],
-                                [0. , 0. , 0. , 0. , 1. , 0.6, 0.6, 0. , 0. , 0. ],
-                                [0. , 0. , 0. , 0.3, 0.6, 0. , 0.9, 0. , 0. , 0. ],
-                                [0. , 0. , 0. , 0.9, 0. , 0. , 0. , 0. , 0. , 0.9],
-                                [0. , 0. , 0.8, 0. , 0. , 0. , 1. , 0. , 0. , 0. ],
-                                [0. , 0. , 0.4, 0. , 0. , 0. , 0.8, 0.3, 0. , 0. ]]))
+    return csc_matrix(
+        np.array(
+            [
+                [0.0, 0.0, 0.4, 0.0, 0.2, 0.0, 0.0, 0.4, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0],
+                [0.0, 0.9, 0.9, 0.9, 0.0, 0.1, 0.2, 0.6, 0.0, 0.0],
+                [0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.4, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 1.0, 0.6, 0.6, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.3, 0.6, 0.0, 0.9, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9],
+                [0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.8, 0.3, 0.0, 0.0],
+            ]
+        )
+    )
+
+
 @pytest.fixture
 def result_a_b():
-    return np.array([[9., 3., 0., 7., 6., 1., 2., 0., 0., 0.],
-                    [8., 4., 0., 9., 7., 6., 3., 2., 0., 0.],
-                    [4., 5., 1., 0., 9., 6., 2., 8., 7., 3.],
-                    [4., 0., 8., 7., 3., 0., 0., 0., 0., 0.],
-                    [5., 2., 8., 4., 0., 0., 0., 0., 0., 0.],
-                    [4., 9., 8., 5., 3., 2., 0., 7., 6., 1.],
-                    [8., 4., 0., 1., 9., 7., 6., 3., 2., 0.],
-                    [8., 0., 9., 7., 6., 3., 2., 0., 0., 0.],
-                    [8., 4., 0., 2., 0., 0., 0., 0., 0., 0.],
-                    [9., 6., 3., 8., 4., 0., 2., 0., 0., 0.]])
+    return np.array(
+        [
+            [9.0, 3.0, 0.0, 7.0, 6.0, 1.0, 2.0, 0.0, 0.0, 0.0],
+            [8.0, 4.0, 0.0, 9.0, 7.0, 6.0, 3.0, 2.0, 0.0, 0.0],
+            [4.0, 5.0, 1.0, 0.0, 9.0, 6.0, 2.0, 8.0, 7.0, 3.0],
+            [4.0, 0.0, 8.0, 7.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [5.0, 2.0, 8.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [4.0, 9.0, 8.0, 5.0, 3.0, 2.0, 0.0, 7.0, 6.0, 1.0],
+            [8.0, 4.0, 0.0, 1.0, 9.0, 7.0, 6.0, 3.0, 2.0, 0.0],
+            [8.0, 0.0, 9.0, 7.0, 6.0, 3.0, 2.0, 0.0, 0.0, 0.0],
+            [8.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [9.0, 6.0, 3.0, 8.0, 4.0, 0.0, 2.0, 0.0, 0.0, 0.0],
+        ]
+    )
+
+
 @pytest.fixture
 def result_a_b1():
-    return np.array([[9.],
-                    [8.],
-                    [7.],
-                    [8.],
-                    [2.],
-                    [7.],
-                    [7.],
-                    [9.],
-                    [2.],
-                    [8.]])
+    return np.array(
+        [[9.0], [8.0], [7.0], [8.0], [2.0], [7.0], [7.0], [9.0], [2.0], [8.0]]
+    )
+
+
 @pytest.fixture
 def result_a_b3():
-    return np.array([[7., 2., 9.],
-                    [4., 3., 8.],
-                    [3., 7., 9.],
-                    [4., 7., 8.],
-                    [5., 2., 0.],
-                    [6., 1., 7.],
-                    [1., 8., 7.],
-                    [6., 8., 9.],
-                    [4., 8., 2.],
-                    [4., 9., 8.]])
+    return np.array(
+        [
+            [7.0, 2.0, 9.0],
+            [4.0, 3.0, 8.0],
+            [3.0, 7.0, 9.0],
+            [4.0, 7.0, 8.0],
+            [5.0, 2.0, 0.0],
+            [6.0, 1.0, 7.0],
+            [1.0, 8.0, 7.0],
+            [6.0, 8.0, 9.0],
+            [4.0, 8.0, 2.0],
+            [4.0, 9.0, 8.0],
+        ]
+    )
+
+
 @pytest.fixture
 def mat_c():
-    return csc_matrix(np.array([[0.2, 0.5, 0.2, 0.1, 0.5, 0. ],
-                                [0.2, 0.9, 0.3, 0.4, 0.4, 0.7],
-                                [0. , 0. , 0.4, 0. , 0. , 0. ],
-                                [0. , 0.5, 0. , 0.3, 0.8, 0. ],
-                                [0.7, 0.9, 0. , 0.7, 0.9, 0.2],
-                                [0.2, 0.1, 0.8, 0. , 0. , 0.1]]))
+    return csc_matrix(
+        np.array(
+            [
+                [0.2, 0.5, 0.2, 0.1, 0.5, 0.0],
+                [0.2, 0.9, 0.3, 0.4, 0.4, 0.7],
+                [0.0, 0.0, 0.4, 0.0, 0.0, 0.0],
+                [0.0, 0.5, 0.0, 0.3, 0.8, 0.0],
+                [0.7, 0.9, 0.0, 0.7, 0.9, 0.2],
+                [0.2, 0.1, 0.8, 0.0, 0.0, 0.1],
+            ]
+        )
+    )
+
+
 @pytest.fixture
 def mat_d():
-    return csc_matrix(np.array([[0.8, 0. , 0. , 0. , 0.1, 0. ],
-                                [0. , 0. , 0. , 0.4, 0. , 0. ],
-                                [0.3, 0.4, 0. , 0. , 0. , 0.7],
-                                [0. , 0. , 0. , 0. , 0. , 0. ],
-                                [0.1, 0.1, 0.4, 0.4, 0. , 0. ],
-                                [0.8, 0. , 0.5, 0.8, 0.2, 0. ]]))
+    return csc_matrix(
+        np.array(
+            [
+                [0.8, 0.0, 0.0, 0.0, 0.1, 0.0],
+                [0.0, 0.0, 0.0, 0.4, 0.0, 0.0],
+                [0.3, 0.4, 0.0, 0.0, 0.0, 0.7],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                [0.1, 0.1, 0.4, 0.4, 0.0, 0.0],
+                [0.8, 0.0, 0.5, 0.8, 0.2, 0.0],
+            ]
+        )
+    )
+
+
 @pytest.fixture
 def result_c_d():
-    return np.array([[3., 5., 4., 1., 0., 0.],
-                    [4., 3., 1., 0., 0., 0.],
-                    [3., 5., 4., 1., 0., 0.],
-                    [0., 0., 0., 0., 0., 0.],
-                    [2., 3., 5., 4., 1., 0.],
-                    [3., 2., 5., 4., 1., 0.]])
-                    
+    return np.array(
+        [
+            [3.0, 5.0, 4.0, 1.0, 0.0, 0.0],
+            [4.0, 3.0, 1.0, 0.0, 0.0, 0.0],
+            [3.0, 5.0, 4.0, 1.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [2.0, 3.0, 5.0, 4.0, 1.0, 0.0],
+            [3.0, 2.0, 5.0, 4.0, 1.0, 0.0],
+        ]
+    )
+
+
 @pytest.fixture
 def result_c_d1():
-    return np.array([[4],
-                    [4],
-                    [1],
-                    [0],
-                    [4],
-                    [4]])
-                    
+    return np.array([[4], [4], [1], [0], [4], [4]])
+
+
 @pytest.fixture
 def result_c_d4():
-    return np.array([[5., 4., 1., 0.],
-                    [4., 3., 1., 0.],
-                    [3., 4., 1., 0.],
-                    [0., 0., 0., 0.],
-                    [0., 4., 1., 5.],
-                    [0., 4., 1., 5.]])
-
-
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(10, 10),
-                          (10, 8),
-                          (10, 7),
-                          (10, 1)]
-                        )
+    return np.array(
+        [
+            [5.0, 4.0, 1.0, 0.0],
+            [4.0, 3.0, 1.0, 0.0],
+            [3.0, 4.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0],
+            [0.0, 4.0, 1.0, 5.0],
+            [0.0, 4.0, 1.0, 5.0],
+        ]
+    )
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(10, 10), (10, 8), (10, 7), (10, 1)])
 def test_cosine_standard(top_n, num_rows, mat_a, mat_b, result_a_b):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False), result_a_b)
-    
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(1, 10),
-                          (1, 8),
-                          (1, 7),
-                          (1, 1)]
-                        )
+    np.testing.assert_array_equal(
+        _sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False), result_a_b
+    )
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(1, 10), (1, 8), (1, 7), (1, 1)])
 def test_cosine_standard1(top_n, num_rows, mat_a, mat_b, result_a_b1):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False), result_a_b1)
-
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(3, 10),
-                          (3, 8),
-                          (3, 7),
-                          (3, 1)]
-                        )
+    np.testing.assert_array_equal(
+        _sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False), result_a_b1
+    )
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(3, 10), (3, 8), (3, 7), (3, 1)])
 def test_cosine_standard3(top_n, num_rows, mat_a, mat_b, result_a_b3):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False), result_a_b3)
-    
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(7, 10),
-                          (6, 8),
-                          (9, 7),
-                          (6, 1)]
-                        )
+    results = _sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False)
+    for row_0, row_1 in zip(results, result_a_b3):
+        np.testing.assert_array_equal(np.sort(row_0), np.sort(row_1))
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(7, 10), (6, 8), (9, 7), (6, 1)])
 def test_cosine_standard_c(top_n, num_rows, mat_c, mat_d, result_c_d):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False)[:,:6], result_c_d)
-    
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(4, 5),
-                          (4, 4),
-                          (4, 3),
-                          (4, 1)]
-                        )
+    results = _sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False)[:, :6]
+    for row_0, row_1 in zip(results, result_c_d):
+        np.testing.assert_array_equal(np.sort(row_0), np.sort(row_1))
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(4, 5), (4, 4), (4, 3), (4, 1)])
 def test_cosine_standard_c4(top_n, num_rows, mat_c, mat_d, result_c_d4):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False), result_c_d4)
-    
-@pytest.mark.parametrize("top_n, num_rows", 
-                          [(1, 10),
-                          (1, 3),
-                          (1, 2),
-                          (1, 1)]
-                        )
+    results = _sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False)
+    for row_0, row_1 in zip(results, result_c_d4):
+        np.testing.assert_array_equal(np.sort(row_0), np.sort(row_1))
+
+
+@pytest.mark.parametrize("top_n, num_rows", [(1, 10), (1, 3), (1, 2), (1, 1)])
 def test_cosine_standard_c1(top_n, num_rows, mat_c, mat_d, result_c_d1):
-    np.testing.assert_array_equal(_sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False), result_c_d1)
+    np.testing.assert_array_equal(
+        _sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False), result_c_d1
+    )
+
 
-@pytest.mark.parametrize("row", 
-                          [[1],[2],[3],[4],[5],[0]]
-                        )
+@pytest.mark.parametrize("row", [[1], [2], [3], [4], [5], [0]])
 def test_cosine_top_n_cd_low_memory(row, mat_a, mat_b):
     mat_a_co = csc_matrix(mat_a).tocoo()
-    low_memory_result = _sparse_cosine_low_memory(matrix_row = mat_a_co.row, matrix_col = mat_a_co.col, 
-        matrix_data = mat_a_co.data, matrix_len = mat_a_co.shape[0], vector_ind = mat_b[row,:].tocsr().indices, 
-        vector_data = mat_b[row,:].tocsr().data)
-    ordinary_result = (mat_a * (mat_b).T).todense()[:,row]
-    np.testing.assert_array_almost_equal(low_memory_result.reshape(-1,1), ordinary_result, decimal=3)
-
-@pytest.mark.parametrize("top_n, num_rows, row", 
-                          [(1, 10, 2),
-                          (2, 3, 3),
-                          (3, 2, 1),
-                          (3, 0, 5),
-                          (3, 3, 0),
-                          (6, 2, 1),
-                          (3, 0, 4),
-                          (5, 0, 2),
-                          (8, 1, 2)]
-                        )
+    low_memory_result = _sparse_cosine_low_memory(
+        matrix_row=mat_a_co.row,
+        matrix_col=mat_a_co.col,
+        matrix_data=mat_a_co.data,
+        matrix_len=mat_a_co.shape[0],
+        vector_ind=mat_b[row, :].tocsr().indices,
+        vector_data=mat_b[row, :].tocsr().data,
+    )
+    ordinary_result = (mat_a * (mat_b).T).todense()[:, row]
+    np.testing.assert_array_almost_equal(
+        low_memory_result.reshape(-1, 1), ordinary_result, decimal=3
+    )
+
+
+@pytest.mark.parametrize(
+    "top_n, num_rows, row",
+    [
+        (1, 10, 2),
+        (2, 3, 3),
+        (3, 2, 1),
+        (3, 0, 5),
+        (3, 3, 0),
+        (6, 2, 1),
+        (3, 0, 4),
+        (5, 0, 2),
+        (8, 1, 2),
+    ],
+)
 def test_cosine_top_n_cd(top_n, num_rows, row, mat_c, mat_d):
     if num_rows == 0:
-        assert_values_in_array(sparse_cosine_top_n(mat_c.tocoo(), mat_d[row,:].tocsr(), top_n, True, num_rows, False).reshape(1,-1), 
-        _sparse_cosine_top_n_standard(mat_c, mat_d[row,:], num_rows + 1, top_n, False))
+        assert_values_in_array(
+            sparse_cosine_top_n(
+                mat_c.tocoo(), mat_d[row, :].tocsr(), top_n, True, num_rows, False
+            ).reshape(1, -1),
+            _sparse_cosine_top_n_standard(
+                mat_c, mat_d[row, :], num_rows + 1, top_n, False
+            ),
+        )
     else:
-        np.testing.assert_array_equal(sparse_cosine_top_n(mat_c, mat_d, top_n, False, num_rows, False), _sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False))
-
-    
-@pytest.mark.parametrize("top_n, num_rows, row", 
-                          [(1, 10, 2),
-                          (2, 3, 3),
-                          (6, 2, 1),
-                          (3, 0, 5),
-                          (3, 3, 0),
-                          (6, 2, 1),
-                          (4, 0, 4),
-                          (1, 0, 8),
-                          (2, 0, 6),
-                          (6, 0, 2),
-                          (8, 1, 2)]
-                        )
+        np.testing.assert_array_equal(
+            sparse_cosine_top_n(mat_c, mat_d, top_n, False, num_rows, False),
+            _sparse_cosine_top_n_standard(mat_c, mat_d, num_rows, top_n, False),
+        )
+
+
+@pytest.mark.parametrize(
+    "top_n, num_rows, row",
+    [
+        (1, 10, 2),
+        (2, 3, 3),
+        (6, 2, 1),
+        (3, 0, 5),
+        (3, 3, 0),
+        (6, 2, 1),
+        (4, 0, 4),
+        (1, 0, 8),
+        (2, 0, 6),
+        (6, 0, 2),
+        (8, 1, 2),
+    ],
+)
 def test_cosine_top_n_ab(top_n, num_rows, row, mat_a, mat_b):
     if num_rows == 0:
-        assert_values_in_array(sparse_cosine_top_n(mat_a.tocoo(), mat_b[row,:].tocsr(), top_n, True, num_rows, False).reshape(1,-1), 
-        _sparse_cosine_top_n_standard(mat_a, mat_b[row,:], num_rows + 1, top_n, False))
+        assert_values_in_array(
+            sparse_cosine_top_n(
+                mat_a.tocoo(), mat_b[row, :].tocsr(), top_n, True, num_rows, False
+            ).reshape(1, -1),
+            _sparse_cosine_top_n_standard(
+                mat_a, mat_b[row, :], num_rows + 1, top_n, False
+            ),
+        )
     else:
-        np.testing.assert_array_equal(sparse_cosine_top_n(mat_a, mat_b, top_n, False, num_rows, False), _sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False))
-        
\ No newline at end of file
+        np.testing.assert_array_equal(
+            sparse_cosine_top_n(mat_a, mat_b, top_n, False, num_rows, False),
+            _sparse_cosine_top_n_standard(mat_a, mat_b, num_rows, top_n, False),
+        )
diff --git a/setup.py b/setup.py
index bd0937b..d73ef25 100644
--- a/setup.py
+++ b/setup.py
@@ -5,21 +5,17 @@
 long_description = (this_directory / "README.md").read_text()
 
 setup(
-   name='name_matching',
-   version='0.8.10',
-   description='A package for the matching of company names',
-   author='Michiel Nijhuis',
-   author_email='m.nijhuis@dnb.nl',
-   project_urls = {
-        'Documentation': 'https://name-matching.readthedocs.io/en/latest/index.html',
-        'Source Code': 'https://github.com/DeNederlandscheBank/name_matching'},
-   packages=['name_matching','distances'],
-   install_requires = [
-			'cleanco',
-			'scikit-learn', 
-                        'pandas',
-                        'numpy',
-			'tqdm'],
+    name="name_matching",
+    version="0.8.11",
+    description="A package for the matching of company names",
+    author="Michiel Nijhuis",
+    author_email="m.nijhuis@dnb.nl",
+    project_urls={
+        "Documentation": "https://name-matching.readthedocs.io/en/latest/index.html",
+        "Source Code": "https://github.com/DeNederlandscheBank/name_matching",
+    },
+    packages=["name_matching", "distances"],
+    install_requires=["cleanco", "scikit-learn", "pandas", "numpy", "tqdm"],
     long_description=long_description,
-    long_description_content_type='text/markdown',
+    long_description_content_type="text/markdown",
 )