Skip to content

Commit

Permalink
fix combiner n_jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
itlubber committed Sep 12, 2024
1 parent 0374b7b commit c0002a1
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
17 changes: 11 additions & 6 deletions scorecardpipeline/feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,13 +275,13 @@ def _IV(x, y, regularization=1.0, n_jobs=None):

class InformationValueSelector(SelectorMixin):

def __init__(self, threshold=0.02, target="target", regularization=1.0, methods=None, n_jobs=None, **kwargs):
def __init__(self, threshold=0.02, target="target", regularization=1.0, methods=None, n_jobs=None, combiner=None, **kwargs):
super().__init__()
self.dropped = None
self.select_columns = None
self.scores_ = None
self.n_features_in_ = None
self.combiner = None
self.combiner = combiner
self.threshold = threshold
self.target = target
self.regularization = regularization
Expand All @@ -298,7 +298,9 @@ def fit(self, x: pd.DataFrame, y=None):

self.n_features_in_ = x.shape[1]

if self.methods:
if self.combiner:
xt = self.combiner.transform(x)
elif self.methods:
temp = x.copy()
temp[self.target] = y
self.combiner = Combiner(target=self.target, method=self.methods, n_jobs=self.n_jobs, **self.kwargs)
Expand Down Expand Up @@ -350,7 +352,7 @@ class LiftSelector(SelectorMixin):
:param select_columns : array-like
:param dropped : DataFrame
"""
def __init__(self, target="target", threshold=3.0, n_jobs=None, methods=None, **kwargs):
def __init__(self, target="target", threshold=3.0, n_jobs=None, methods=None, combiner=None, **kwargs):
"""
:param target: target
:param threshold: float or str (default=3.0). Feature which has a lift score greater than `threshold` will be kept.
Expand All @@ -362,6 +364,7 @@ def __init__(self, target="target", threshold=3.0, n_jobs=None, methods=None, **
self.n_jobs = n_jobs
self.target = target
self.methods = methods
self.combiner = combiner
self.kwargs = kwargs

def fit(self, x: pd.DataFrame, y=None, **fit_params):
Expand All @@ -372,8 +375,10 @@ def fit(self, x: pd.DataFrame, y=None, **fit_params):
x = x.drop(columns=self.target)

self.n_features_in_ = x.shape[1]

if self.methods:

if self.combiner:
xt = self.combiner.transform(x)
elif self.methods:
temp = x.copy()
temp[self.target] = y
self.combiner = Combiner(target=self.target, method=self.methods, n_jobs=self.n_jobs, **self.kwargs)
Expand Down
2 changes: 1 addition & 1 deletion scorecardpipeline/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def fit(self, x: pd.DataFrame, y=None):

if self.method in ["cart", "mdlp", "uniform"]:
feature_optbinning_bins = partial(self.optbinning_bins, data=x, target=self.target, min_n_bins=self.min_n_bins, max_n_bins=self.max_n_bins, max_n_prebins=self.max_n_prebins, min_prebin_size=self.min_prebin_size, min_bin_size=self.min_bin_size, max_bin_size=self.max_bin_size, gamma=self.gamma, monotonic_trend=self.monotonic_trend, **self.kwargs)
if self.n_jobs > 1:
if self.n_jobs is not None:
rules = Parallel(n_jobs=self.n_jobs)(delayed(feature_optbinning_bins)(feature) for feature in x.columns.drop(self.target))
[self.combiner.update(r) for r in rules]
# with ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
Expand Down

0 comments on commit c0002a1

Please sign in to comment.