From 575dc1331d8478250f1bb1cfa720eb178ef567fb Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Mon, 28 Oct 2024 11:07:06 +0000
Subject: [PATCH 1/8] whitespace

---
 tests/unittests/test_scipy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unittests/test_scipy.py b/tests/unittests/test_scipy.py
index 18c9182..58cc52f 100644
--- a/tests/unittests/test_scipy.py
+++ b/tests/unittests/test_scipy.py
@@ -460,4 +460,4 @@ def test_selection_rate_diff_levelling_up_slow():
 
 
 def test_selection_rate_diff_levelling_up_hybrid():
-    test_selection_rate_diff_levelling_up(use_fast='hybrid')
\ No newline at end of file
+    test_selection_rate_diff_levelling_up(use_fast='hybrid')

From 841827251390edc8b6ff14cf931352b4857c3148 Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Sat, 23 Nov 2024 10:59:36 +0000
Subject: [PATCH 2/8] better testing of links in md file

---
 tests/test_check_style.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_check_style.py b/tests/test_check_style.py
index af71be6..2923546 100755
--- a/tests/test_check_style.py
+++ b/tests/test_check_style.py
@@ -55,9 +55,15 @@ def test_check_style_examples():
 
 def test_md_links():
     missing_links = lc.check_links('./', ext='.md', recurse=True, use_async=False)
+    missing_links_eg = lc.check_links('./examples/', ext='.md', recurse=True, use_async=False)
+
     for link in missing_links:
         warnings.warn(link)
+
+    for link in missing_links_eg:
+        warnings.warn(link)
     assert missing_links == []
+    assert missing_links_eg == []
 
 
 def test_run_notebooks_without_errors():

From e611a40ed0bce16ddd238a094360c09ee67e0edf Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Sat, 23 Nov 2024 11:18:35 +0000
Subject: [PATCH 3/8] speed up slow pathway

---
 src/oxonfair/learners/fair.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/oxonfair/learners/fair.py b/src/oxonfair/learners/fair.py
index 16ad4a9..342435e 100644
--- a/src/oxonfair/learners/fair.py
+++ b/src/oxonfair/learners/fair.py
@@ -8,7 +8,7 @@
 from sklearn.preprocessing import OneHotEncoder
 from ..utils import group_metrics
 from .. utils.scipy_metrics_cont_wrapper import ScorerRequiresContPred
-from ..utils.group_metric_classes import BaseGroupMetric
+from ..utils.group_metric_classes import BaseGroupMetric, Overall
 
 from ..utils import performance as perf
 from . import efficient_compute, fair_frontier
@@ -1093,6 +1093,9 @@ def fix_groups(metric, groups):
 
     groups = groups_to_masks(groups)
 
+    if isinstance(metric, Overall):  # Performance hack. If metric is of type overall, groups don't matter -- assign all groups to 1.
+        groups = np.ones(groups.shape[0])
+
     def new_metric(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
         return metric(y_true, y_pred, groups)
     return new_metric
@@ -1146,6 +1149,10 @@ def fix_groups_and_conditioning(metric, groups, conditioning_factor, y_true):
     weights = metric.cond_weights(conditioning_factor, groups, y_true)
     groups = groups_to_masks(groups)
 
+    if isinstance(metric, Overall):  # Performance hack. If metric is of type overall, groups don't matter -- assign all groups to 1.
+        groups = np.ones(groups.shape[0])
+
+
     def new_metric(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
         return metric(y_true, y_pred, groups, weights)
     return new_metric

From 1798dcd7cae3b67b8a55a3fd951bb337db91d4d1 Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Sat, 23 Nov 2024 11:54:41 +0000
Subject: [PATCH 4/8] reorder tests

---
 tests/test_check_style.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_check_style.py b/tests/test_check_style.py
index 2923546..8313428 100755
--- a/tests/test_check_style.py
+++ b/tests/test_check_style.py
@@ -62,8 +62,9 @@ def test_md_links():
 
     for link in missing_links_eg:
         warnings.warn(link)
-    assert missing_links == []
+
     assert missing_links_eg == []
+    assert missing_links == []
 
 
 def test_run_notebooks_without_errors():

From 64d50c91e5a1e1818803ce2ab8db069373eb7bfa Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Sat, 23 Nov 2024 12:05:52 +0000
Subject: [PATCH 5/8] hard code ssrn block in url check

---
 tests/test_check_style.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_check_style.py b/tests/test_check_style.py
index 8313428..4317f03 100755
--- a/tests/test_check_style.py
+++ b/tests/test_check_style.py
@@ -55,7 +55,7 @@ def test_check_style_examples():
 
 def test_md_links():
     missing_links = lc.check_links('./', ext='.md', recurse=True, use_async=False)
-    missing_links_eg = lc.check_links('./examples/', ext='.md', recurse=True, use_async=False)
+    missing_links_eg = lc.check_links('./examples/', ext='.md', recurse=True)
 
     for link in missing_links:
         warnings.warn(link)
@@ -64,7 +64,8 @@ def test_md_links():
         warnings.warn(link)
 
     assert missing_links_eg == []
-    assert missing_links == []
+    assert missing_links == [('README.md', 'https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4331652', 429),] or missing_links == []
+    # SSRN thinks we're crawling and blocks exactly one paper.
 
 
 def test_run_notebooks_without_errors():

From 01a4f94f00f31f9b8c59ad54f7d0928b7f106f4d Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Sat, 23 Nov 2024 12:07:03 +0000
Subject: [PATCH 6/8] increment version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 34bc19a..29c11a9 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 FAIR = "oxonfair"
 
-version = "0.2.1.8"
+version = "0.2.1.9"
 
 PYTHON_REQUIRES = ">=3.8"
 

From ae117c490900c0b44ea25715847b5a43da8540df Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Tue, 26 Nov 2024 14:24:45 +0000
Subject: [PATCH 7/8] align test with change of interface

---
 tests/unittests/test_ag.py    | 17 +++++++++--------
 tests/unittests/test_scipy.py |  4 +++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tests/unittests/test_ag.py b/tests/unittests/test_ag.py
index 767ecb8..dd647c9 100644
--- a/tests/unittests/test_ag.py
+++ b/tests/unittests/test_ag.py
@@ -91,19 +91,20 @@ def test_recall_diff(use_fast=True):
 
     fpredictor = fair.FairPredictor(predictor, test_data, "sex", use_fast=use_fast)
 
-    fpredictor.fit(gm.accuracy, gm.recall.diff, 0.025)
+    limit =0.01
+    fpredictor.fit(gm.accuracy, gm.recall.diff, limit)
 
     # Evaluate the change in fairness (recall difference corresponds to EO)
     measures = fpredictor.evaluate_fairness(verbose=False)
 
-    assert measures["updated"]["recall.diff"] < 0.025
+    assert measures["updated"]["recall.diff"] < limit
     measures = fpredictor.evaluate()
     acc = measures["updated"]["Accuracy"]
-    fpredictor.fit(gm.accuracy, gm.recall.diff, 0.025, greater_is_better_const=True)
+    fpredictor.fit(gm.accuracy, gm.recall.diff, limit, greater_is_better_const=True)
     measures = fpredictor.evaluate_fairness(verbose=False)
-    assert measures["original"]["recall.diff"] > 0.025
+    assert measures["original"]["recall.diff"] > limit
 
-    fpredictor.fit(gm.accuracy, gm.recall.diff, 0.01, greater_is_better_obj=False)
+    fpredictor.fit(gm.accuracy, gm.recall.diff, limit/2, greater_is_better_obj=False)
     assert acc >= fpredictor.evaluate()["updated"]["Accuracy"]
 
 
@@ -117,11 +118,11 @@ def test_subset(use_fast=True):
 
     # Check that metrics computed over a subset of the data is consistent with metrics over all data
     for group in (" White", " Black", " Amer-Indian-Eskimo"):
-        assert all(full_group_metrics.loc[group] == partial_group_metrics.loc[group])
+        assert all(full_group_metrics.loc[('original', group)] == partial_group_metrics.loc[('original', group)])
 
     assert all(
-        full_group_metrics.loc["Maximum difference"]
-        >= partial_group_metrics.loc["Maximum difference"]
+        full_group_metrics.loc[('original', "Maximum difference")]
+        >= partial_group_metrics.loc[('original',"Maximum difference")]
     )
 
 
diff --git a/tests/unittests/test_scipy.py b/tests/unittests/test_scipy.py
index 58cc52f..3d9e2ed 100644
--- a/tests/unittests/test_scipy.py
+++ b/tests/unittests/test_scipy.py
@@ -102,9 +102,11 @@ def test_conflict_groups():
 def test_fit_creates_updated(use_fast=True):
     """eval should return 'updated' iff fit has been called"""
     fpredictor = FairPredictor(predictor, val_dict, use_fast=use_fast)
-    assert isinstance(fpredictor.evaluate(), pd.Series)
+    assert not isinstance(fpredictor.evaluate(), pd.Series)
+    assert 'original' in fpredictor.evaluate().columns
     fpredictor.fit(gm.accuracy, gm.recall, 0)  # constraint is intentionally slack
     assert not isinstance(fpredictor.evaluate(), pd.Series)
+    assert 'original' in fpredictor.evaluate().columns
     assert 'updated' in fpredictor.evaluate().columns
 
 

From 8d87613e0e88d7696f5802eac13d8d14ed0e14f3 Mon Sep 17 00:00:00 2001
From: Chris Russell <chris.russell@oii.ox.ac.uk>
Date: Tue, 26 Nov 2024 14:25:41 +0000
Subject: [PATCH 8/8] adjusting interface of evaluate fairness

---
 src/oxonfair/learners/fair.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/oxonfair/learners/fair.py b/src/oxonfair/learners/fair.py
index 342435e..605ef34 100644
--- a/src/oxonfair/learners/fair.py
+++ b/src/oxonfair/learners/fair.py
@@ -720,6 +720,9 @@ def evaluate_fairness(self, data=None, groups=None, factor=None, *,
 
             collect = pd.concat([collect, new_pd], axis='columns')
             collect.columns = ['original', 'updated']
+        else:
+            collect = pd.concat([collect,], axis='columns')
+            collect.columns = ['original']
 
         return collect
 
@@ -822,7 +825,9 @@ def evaluate_groups(self, data=None, groups=None, metrics=None, fact=None, *,
                                           verbose=verbose)
 
         out = updated
-        if return_original:
+        if self.frontier is None:
+            out = pd.concat([updated, ], keys=['original', ])
+        elif return_original:
             out = pd.concat([original, updated], keys=['original', 'updated'])
         return out