Merge pull request #42 from oxfordinternetinstitute/frontier

Misc quality of life improvements
oxfordinternetinstitute · Sep 13, 2024 · c2ece8f · c2ece8f
2 parents 6eda37b + 2a53e77
commit c2ece8f
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 12 deletions.
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 FAIR = "oxonfair"
 
-version = "0.2.1.6"
+version = "0.2.1.7"
 
 PYTHON_REQUIRES = ">=3.8"
 

diff --git a/src/oxonfair/learners/fair.py b/src/oxonfair/learners/fair.py
@@ -133,8 +133,8 @@ def predictor(x):
         # to functions expecting a vector
         self._internal_groups = self.groups_to_numpy(groups, self.validation_data)
         self._internal_conditioning_factor = self.cond_fact_to_numpy(conditioning_factor, self.validation_data)
-        if self._internal_groups.shape[0] != validation_labels.shape[0]:
-            logger.error('The size of the groups does not match the dataset size')
+        assert self._internal_groups.shape[0] == validation_labels.shape[0], 'The size of the groups does not match the dataset size'
+        assert np.unique(validation_labels).shape[0] == 2, 'More than two target labels used. OxonFair only works with binary predictors'
 
         self.inferred_groups = inferred_groups
         if inferred_groups:
@@ -400,10 +400,67 @@ def frontier_thresholds(self):
         assert self.frontier, "Call fit before frontier_thresholds"
         return self.frontier[1]
 
-    def frontier_scores(self):
+    def frontier_scores(self, data=None):
         "Returns the scores (i.e. objective and constraint) corresponding to the found frontier"
         assert self.frontier, "Call fit before frontier_scores"
-        return self.frontier[0]
+        if data is None:
+            return self.frontier[0]
+
+        objective1 = self.objective1
+        objective2 = self.objective2
+
+        if isinstance(data, dict):
+            labels = np.asarray(data['target'])
+            proba = call_or_get_proba(self.predictor, data['data'])
+
+        else:
+            assert not is_not_autogluon(self.predictor), 'Data must be a dict unless using autogluon predictors'
+            labels = np.asarray(data[self.predictor.label])
+            proba = call_or_get_proba(self.predictor, data)
+            labels = (labels == self.predictor.positive_class) * 1
+        if self.add_noise:
+            proba += np.random.normal(0, self.add_noise, proba.shape)
+
+        groups = self.groups_to_numpy(None, data)
+        if groups is None:
+            groups = np.ones_like(labels)
+
+        if self.inferred_groups is False:
+            if self.groups is False:
+                val_thresholds = np.ones((groups.shape[0], 1))
+            else:
+                val_thresholds = self.group_encoder.transform(groups.reshape(-1, 1)).toarray()
+        else:
+            if isinstance(data, dict):
+                val_thresholds = call_or_get_proba(self.inferred_groups, data['data'])
+            else:
+                val_thresholds = call_or_get_proba(self.inferred_groups, data)
+
+        if self.use_fast is not True:
+            factor = self._internal_conditioning_factor
+            if _needs_groups(objective1):
+                objective1 = fix_groups_and_conditioning(objective1,
+                                                         self.groups_to_numpy(groups, data), factor, self.y_true)
+            if _needs_groups(objective2):
+                objective2 = fix_groups_and_conditioning(objective2,
+                                                         self.groups_to_numpy(groups, data), factor, self.y_true)
+
+            front1 = fair_frontier.compute_metric(objective1, labels, proba,
+                                                  val_thresholds, self.frontier[1])
+            front2 = fair_frontier.compute_metric(objective2, labels, proba,
+                                                  val_thresholds, self.frontier[1])
+
+        else:
+            front1 = efficient_compute.compute_metric(objective1, labels, proba,
+                                                      groups,
+                                                      self.infered_to_hard(val_thresholds),
+                                                      self.frontier[1])
+            front2 = efficient_compute.compute_metric(objective2, labels, proba,
+                                                      groups,
+                                                      self.infered_to_hard(val_thresholds),
+                                                      self.frontier[1])
+
+        return (front1, front2)
 
     def set_threshold(self, threshold):
         """Set the thresholds.
@@ -467,6 +524,7 @@ def plot_frontier(self, data=None, groups=None, *, objective1=False, objective2=
                 proba = call_or_get_proba(self.predictor, data['data'])
 
             else:
+                assert not is_not_autogluon(self.predictor), 'Data must be a dict unless using autogluon predictors'
                 labels = np.asarray(data[self.predictor.label])
                 proba = call_or_get_proba(self.predictor, data)
                 labels = (labels == self.predictor.positive_class) * 1
@@ -1090,6 +1148,7 @@ def DeepDataDict(target, score, groups, groups_inferred=None, *,
     assert groups.ndim == 1
     assert score.shape[0] == target.shape[0]
     assert target.shape[0] == groups.shape[0]
+    assert score.shape[1] > 1
     if groups_inferred is not None:
         assert score.shape[1] == 1
         assert groups_inferred.ndim == 2

diff --git a/src/oxonfair/utils/group_metric_classes.py b/src/oxonfair/utils/group_metric_classes.py
@@ -334,7 +334,7 @@ def clone(self, new_name, cond_weights=False):
         my_type = self.__class__
         if cond_weights is False:
             out = my_type(
-                self.func, new_name, self.greater_is_better, 
+                self.func, new_name, self.greater_is_better,
                 cond_weights=self.cond_weights, total_metric=self.total_metric
             )
         else:
@@ -385,6 +385,38 @@ def __call__(self, *args: np.ndarray) -> np.ndarray:
         )
 
 
+class MaxGroupMetrics(BaseGroupMetric):
+    """Group Metric consisting of the maximum of two existing metrics
+    parameters
+    ----------
+    metric1: a BaseGroupMetric
+    metric2: a BaseGroupMetric
+    name:    a string
+    returns
+    -------
+    a BaseGroupMetric that gives scores of the form:
+        np.maximum(metric1_response, metric2_response)"""
+
+    def __init__(
+        self,
+        metric1: BaseGroupMetric,
+        metric2: BaseGroupMetric,
+        name: str,  # pylint: disable=super-init-not-called
+    ) -> None:
+        self.metric1: BaseGroupMetric = metric1
+        self.metric2: BaseGroupMetric = metric2
+        self.name = name
+        self.cond_weights = None
+        if metric1.greater_is_better != metric2.greater_is_better:
+            logger.error(
+                "Metric1 and metric2  must satisfy the condition. metric1.greater_is_better == metric2.greater_is_better "
+            )
+        self.greater_is_better = metric1.greater_is_better
+
+    def __call__(self, *args: np.ndarray) -> np.ndarray:
+        return np.maximum(self.metric1(*args), self.metric2(*args))
+
+
 class Utility(GroupMetric):
     """A group metric for encoding utility functions.
     See Fairness on the Ground: htt_poss://arxiv.org/pdf/2103.06172.pdf

diff --git a/src/oxonfair/utils/group_metrics.py b/src/oxonfair/utils/group_metrics.py
@@ -5,7 +5,7 @@
 from .scipy_metrics_cont_wrapper import roc_auc, average_precision, ScorerRequiresContPred  # noqa: F401
 from .group_metric_classes import ( # pylint: disable=unused-import # noqa
     GroupMetric,
-    AddGroupMetrics,
+    AddGroupMetrics, MaxGroupMetrics,
     Utility)  # noqa: F401
 # N.B. BaseGroupMetric and Utility are needed for type declarations
 
@@ -166,6 +166,10 @@ def ge1(x):
 equalized_odds = AddGroupMetrics(
     true_pos_rate.diff, true_neg_rate.diff, "Equalized Odds"
 )
+equalized_odds_max = MaxGroupMetrics(
+    true_pos_rate.max_diff, true_neg_rate.max_diff, "Equalized Odds (L_inf)"
+)
+
 cond_use_accuracy = AddGroupMetrics(
     pos_pred_val.diff, neg_pred_val.diff, "Conditional Use Accuracy"
 )

diff --git a/src/oxonfair/utils/performance.py b/src/oxonfair/utils/performance.py
@@ -74,6 +74,7 @@ def evaluate_fairness(target, prediction, groups, factor=None, *,
     a pandas dataset containing rows indexed by fairness measure name
     """
     target = target.squeeze()
+    assert np.unique(target).shape[0] == 2, 'More than two target labels used. OxonFair only works with binary predictors'
     threshold = find_threshold(threshold, prediction)
     if groups is None:
         groups = np.ones_like(target)
@@ -115,6 +116,8 @@ def evaluate_per_group(target, prediction, groups, factor=None, *,
     a pandas dataset containing rows indexed by fairness measure name
     """
     target = target.squeeze()
+    assert np.unique(target).shape[0] == 2, 'More than two target labels used. OxonFair only works with binary predictors'
+
     threshold = find_threshold(threshold, prediction)
 
     if metrics is None:

diff --git a/tests/test_check_style.py b/tests/test_check_style.py
@@ -54,7 +54,7 @@ def test_check_style_examples():
 
 
 def test_md_links():
-    missing_links = lc.check_links('./', ext='.md', recurse=True,)
+    missing_links = lc.check_links('./', ext='.md', recurse=True, use_async=False)
     for link in missing_links:
         warnings.warn(link)
     assert missing_links == []

diff --git a/tests/unittests/test_scipy.py b/tests/unittests/test_scipy.py
@@ -222,9 +222,7 @@ def test_recall_diff(use_fast=True):
 def test_disp_impact(use_fast=True):
     """Enforce the 4/5 rule that the worst ratio between the proportion
       of positive decisions is greater than 0.9"""
-    fpredictor = fair.FairPredictor(
-        predictor, test_dict, "sex_ Female", use_fast=use_fast
-    )
+    fpredictor = fair.FairPredictor(predictor, test_dict, "sex_ Female", use_fast=use_fast)
     fpredictor.fit(gm.accuracy, gm.disparate_impact, 0.9)
 
     measures = fpredictor.evaluate_fairness(metrics=gm.clarify_metrics, verbose=False)
@@ -280,6 +278,7 @@ def test_recall_diff_slow():
 def test_recall_diff_hybrid():
     test_recall_diff('hybrid')
 
+
 """ too slow and disabled
 def test_many_recall_diff_hybrid(many=200):
     count = 0
@@ -306,7 +305,8 @@ def test_many_recall_diff_slow(many=200):
 def test_min_recall_slow():
     "test slow pathway"
     test_min_recall(False)
-"""
+ # """
+
 
 def test_min_recall_hybrid():
     test_min_recall('hybrid')
@@ -407,3 +407,17 @@ def test_total_metrics_hybrid():
 
 def test_total_metrics_slow():
     test_total_metrics(fast=False)
+
+
+def test_frontier(use_fast=True):
+    fpredictor = fair.FairPredictor(predictor, test_dict, "sex_ Female", use_fast=use_fast)
+    fpredictor.fit(gm.accuracy, gm.recall.diff, 0.9)
+    assert (fpredictor.frontier_scores() == fpredictor.frontier_scores(test_dict)).all()
+
+
+def test_frontier_slow():
+    test_frontier(False)
+
+
+def test_frontier_hybrid():
+    test_frontier('hybrid')