change plots with z_alpha as x-label

NNPDF · Jan 24, 2025 · e3de138 · e3de138
1 parent 55ebf8b
commit e3de138
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 45 deletions.
diff --git a/validphys2/src/validphys/closuretest/multiclosure_nsigma.py b/validphys2/src/validphys/closuretest/multiclosure_nsigma.py
@@ -61,7 +61,7 @@
 """
 Quantile range for computing the true positive rate and true negative rate.
 """
-ALPHA_RANGE = np.linspace(0, 1.0, 100)
+Z_ALPHA_RANGE = norm.ppf(1 - np.linspace(0, 0.5, 100))
 
 
 @dataclasses.dataclass
@@ -169,14 +169,13 @@ def def_of_nsigma_alpha(
     df = multiclosurefits_nsigma.nsigma_table
     nsigma_values = df[df.index == weighted_dataset].values.flatten()
     set1_alpha = {}
-    for alpha in ALPHA_RANGE:
-        z_alpha = norm.ppf(1 - alpha)
+    for z_alpha in Z_ALPHA_RANGE:
         if complement:
             fit_idxs = np.where(nsigma_values < z_alpha)[0]
         else:
             fit_idxs = np.where(nsigma_values > z_alpha)[0]
         # save it as set to allow for easy intersection with other sets
-        set1_alpha[alpha] = set(df.columns[fit_idxs])
+        set1_alpha[z_alpha] = set(df.columns[fit_idxs])
 
     return NsigmaAlpha(
         alpha_dict=set1_alpha, is_weighted=multiclosurefits_nsigma.is_weighted
@@ -326,15 +325,13 @@ def def_set_2(
 
     set2_alpha = {}
 
-    for alpha in ALPHA_RANGE:
-        z_alpha = norm.ppf(1 - alpha)
-
+    for z_alpha in Z_ALPHA_RANGE:
         if complement:
             columns_bools = np.any((df_weight - df_ref).values < z_alpha, axis=0)
         else:
             columns_bools = np.any((df_weight - df_ref).values > z_alpha, axis=0)
 
-        set2_alpha[alpha] = set(df_weight.columns[columns_bools])
+        set2_alpha[z_alpha] = set(df_weight.columns[columns_bools])
 
     return set2_alpha
 
@@ -386,16 +383,16 @@ def probability_inconsistent(
 
     tagged_rates_cons = []
     tagged_rates = []
-    for alpha in set_2_alpha.keys():
-        set_2_inters_1 = set_2_alpha[alpha].intersection(set_1_alpha[alpha])
-        set_3 = set_3_alpha[alpha]
+    for z_alpha in set_2_alpha.keys():
+        set_2_inters_1 = set_2_alpha[z_alpha].intersection(set_1_alpha[z_alpha])
+        set_3 = set_3_alpha[z_alpha]
 
         set_tagged_fits = set_2_inters_1.union(set_3)
 
         tagged_rates_cons.append(len(set_tagged_fits) / n_fits)
 
         set_tagged_fits = set_tagged_fits.union(
-            comp_set_1_alpha[alpha].intersection(set_2_alpha[alpha])
+            comp_set_1_alpha[z_alpha].intersection(set_2_alpha[z_alpha])
         )
         tagged_rates.append(len(set_tagged_fits) / n_fits)
 

diff --git a/validphys2/src/validphys/closuretest/multiclosure_nsigma_output.py b/validphys2/src/validphys/closuretest/multiclosure_nsigma_output.py
@@ -9,19 +9,51 @@
 import sys
 
 sys.path.insert(0, "./")
-from multiclosure_nsigma import ALPHA_RANGE
+from multiclosure_nsigma import Z_ALPHA_RANGE
+
+
+@figure
+def plot_all_alpha_sets(set_1_alpha, set_2_alpha, set_3_alpha, n_fits):
+    """
+    TODO
+    """
+    fig, ax = plt.subplots()
+    set_1, set_2, set_3 = [], [], []
+    for z_alpha in set_1_alpha.keys():
+        set_1.append(len(set_1_alpha[z_alpha]) / n_fits)
+        set_2.append(len(set_2_alpha[z_alpha]) / n_fits)
+        set_3.append(len(set_3_alpha[z_alpha]) / n_fits)
+
+    ax.plot(set_1_alpha.keys(), set_1, label=r"$P_{\rm flag}, S_1$")
+    ax.plot(set_2_alpha.keys(), set_2, label=r"$P_{\rm flag}, S_2$")
+    ax.plot(set_3_alpha.keys(), set_3, label=r"$P_{\rm flag}, S_3$")
+    ax.set_title(
+        r"$S_1 = \{i | n_{\sigma}^i > Z_{\alpha} \}$"
+        + "\n"
+        + r"$S_2 = \{j \neq i | n_{\sigma}^{{\rm weighted}, j} - n_{\sigma}^{j} > Z_{\alpha} \}$"
+        + "\n"
+        + r"$S_3 = \{i | n_{\sigma}^{{\rm weighted}, i} > Z_{\alpha} \}$"
+    )
+
+    ax.set_xlabel(r"$Z_{\alpha}$")
+    ax.set_ylabel(r"$P_{\rm flag}$")
+    ax.legend()
+
+    return fig
 
 
 @figure
 def plot_probability_inconsistent(
     probability_inconsistent, set_1_alpha, weighted_dataset, n_fits
 ):
     """
-    The set of inconsistent fits I_alpha can be defined in different ways, two possible cases are:
+    The set of inconsistent fits I_alpha:
+
+    1. I_alpha = 1_alpha
 
-    1. I_alpha_1 = (1_alpha intersect 2_alpha) union (3_alpha)
+    2. I_alpha_1 = (1_alpha intersect 2_alpha) union (3_alpha)
 
-    2. I_alpha_2 = I_alpha_1 union (~1_alpha intersect 2_alpha)
+    3. I_alpha_2 = I_alpha_1 union (~1_alpha intersect 2_alpha)
 
     The probability of a dataset being inconsistent is defined as:
             P(inconsistent) = |I_alpha| / N
@@ -33,15 +65,22 @@ def plot_probability_inconsistent(
     fig, ax = plt.subplots()
 
     rates_set1 = []
-    for alpha in ALPHA_RANGE:
-        rates_set1.append(len(set_1_alpha[alpha]) / n_fits)
+    for z_alpha in Z_ALPHA_RANGE:
+        rates_set1.append(len(set_1_alpha[z_alpha]) / n_fits)
 
-    ax.plot(ALPHA_RANGE, rates_cons, label="P(inconsistent) (conservative)")
-    ax.plot(ALPHA_RANGE, rates, label="P(inconsistent)")
-    ax.plot(ALPHA_RANGE, rates_set1, label="Reference fit (set 1)")
-    ax.set_xlabel("alpha")
+    ax.plot(Z_ALPHA_RANGE, rates_set1, label=r"$P_{\rm flag}, C_1$")
+    ax.plot(Z_ALPHA_RANGE, rates_cons, label=r"$P_{\rm flag}, C_2$")
+    ax.plot(Z_ALPHA_RANGE, rates, label=r"$P_{\rm flag}, C_3$")
+
+    ax.set_xlabel(r"$Z_{\alpha}$")
+    ax.set_ylabel(r"$P_{\rm flag}$")
     ax.set_title(
-        f"Probability of classifying {weighted_dataset} dataset as inconsistent"
+        f"Probability of flagging {weighted_dataset} dataset as inconsistent \n"
+        + r"$C_1 = S_1$"
+        + "\n"
+        r"$C_2 = (S_1 \cap S_2) \cup S_3$"
+        + "\n"
+        + r"$C_3 = ((S_1 \cap S_2) \cup S_3) \cap (S_1^c \cap S_2)$"
     )
     ax.legend()
     return fig
@@ -58,14 +97,28 @@ def plot_probability_consistent(
     fig, ax = plt.subplots()
 
     rates_comp_set1 = []
-    for alpha in ALPHA_RANGE:
-        rates_comp_set1.append(len(comp_set_1_alpha[alpha]) / n_fits)
-
-    ax.plot(ALPHA_RANGE, 1 - np.array(rates_cons), label="P(consistent) (conservative)")
-    ax.plot(ALPHA_RANGE, 1 - np.array(rates), label="P(consistent)")
-    ax.plot(ALPHA_RANGE, rates_comp_set1, label="Reference fit (set 1)")
-    ax.set_xlabel("alpha")
-    ax.set_title(f"Probability of classifying {weighted_dataset} dataset as consistent")
+    for z_alpha in Z_ALPHA_RANGE:
+        rates_comp_set1.append(len(comp_set_1_alpha[z_alpha]) / n_fits)
+
+    ax.plot(Z_ALPHA_RANGE, rates_comp_set1, label=r"$1 - P_{\rm flag}, C_1$")
+    ax.plot(Z_ALPHA_RANGE, 1 - np.array(rates), label=r"$1 - P_{\rm flag}, C_2$")
+    ax.plot(Z_ALPHA_RANGE, 1 - np.array(rates_cons), label=r"$1 - P_{\rm flag}, C_3$")
+
+    ax.set_xlabel(r"$Z_{\alpha}$")
+    ax.set_ylabel(r"$1 - P_{\rm flag}$")
+    ax.set_title(
+        f"Probability of flagging {weighted_dataset} dataset as consistent"
+        + r"$((1_{\alpha} \cap 2_{\alpha}) \cup 3_{\alpha})^c$"
+    )
+
+    ax.set_title(
+        f"Probability of flagging {weighted_dataset} dataset as consistent \n"
+        + r"$C_1 = S_1$"
+        + "\n"
+        r"$C_2 = (S_1 \cap S_2) \cup S_3$"
+        + "\n"
+        + r"$C_3 = ((S_1 \cap S_2) \cup S_3) \cap (S_1^c \cap S_2)$"
+    )
     ax.legend()
     return fig
 
@@ -77,12 +130,12 @@ def plot_set1_alpha(set_1_alpha, n_fits):
     """
     fig, ax = plt.subplots()
     set1_vals = []
-    for alpha in set_1_alpha.keys():
-        set1_vals.append(len(set_1_alpha[alpha]) / n_fits)
+    for z_alpha in set_1_alpha.keys():
+        set1_vals.append(len(set_1_alpha[z_alpha]) / n_fits)
 
     ax.plot(set_1_alpha.keys(), set1_vals, label="Set 1 alpha")
     ax.set_title(r"$1_{\alpha}$")
-    ax.set_xlabel(r"$\alpha$")
+    ax.set_xlabel(r"$Z_{\alpha}$")
     ax.legend()
     return fig
 
@@ -94,12 +147,12 @@ def plot_set3_alpha(set_3_alpha, n_fits):
     """
     fig, ax = plt.subplots()
     set3_vals = []
-    for alpha in set_3_alpha.keys():
-        set3_vals.append(len(set_3_alpha[alpha]) / n_fits)
+    for z_alpha in set_3_alpha.keys():
+        set3_vals.append(len(set_3_alpha[z_alpha]) / n_fits)
 
     ax.plot(set_3_alpha.keys(), set3_vals, label="Set 3 alpha")
     ax.set_title(r"$3_{\alpha}$")
-    ax.set_xlabel(r"$\alpha$")
+    ax.set_xlabel(r"$Z_{\alpha}$")
     ax.legend()
     return fig
 
@@ -111,12 +164,12 @@ def plot_set2_alpha(set_2_alpha, n_fits):
     """
     fig, ax = plt.subplots()
     set2_vals = []
-    for alpha in set_2_alpha.keys():
-        set2_vals.append(len(set_2_alpha[alpha]) / n_fits)
+    for z_alpha in set_2_alpha.keys():
+        set2_vals.append(len(set_2_alpha[z_alpha]) / n_fits)
 
     ax.plot(set_2_alpha.keys(), set2_vals, label="Set 2 alpha")
     ax.set_title(r"$2_{\alpha}$")
-    ax.set_xlabel("alpha")
+    ax.set_xlabel(r"$Z_{\alpha}$")
     ax.legend()
     return fig
 
@@ -129,9 +182,9 @@ def plot_set1_vs_set3_alpha(set_1_alpha, set_3_alpha, weighted_dataset, n_fits):
     fig, ax = plt.subplots()
     set1_vals = []
     set3_vals = []
-    for alpha in set_1_alpha.keys():
-        set1_vals.append(len(set_1_alpha[alpha]) / n_fits)
-        set3_vals.append(len(set_3_alpha[alpha]) / n_fits)
+    for z_alpha in set_1_alpha.keys():
+        set1_vals.append(len(set_1_alpha[z_alpha]) / n_fits)
+        set3_vals.append(len(set_3_alpha[z_alpha]) / n_fits)
 
     ax.plot(set_1_alpha.keys(), set1_vals, label="TPR, reference fit (set 1)")
     ax.plot(set_3_alpha.keys(), set3_vals, label="TPR, weighted fit (set 3)")
@@ -148,8 +201,8 @@ def plot_set2(set_2_alpha, n_fits):
     """
     fig, ax = plt.subplots()
     set2_vals = []
-    for alpha in set_2_alpha.keys():
-        set2_vals.append(len(set_2_alpha[alpha]) / n_fits)
+    for z_alpha in set_2_alpha.keys():
+        set2_vals.append(len(set_2_alpha[z_alpha]) / n_fits)
 
     ax.plot(set_2_alpha.keys(), set2_vals, label="Set 2 alpha")