diff --git a/nonadditivity/classification/classify.py b/nonadditivity/classification/classify.py index e94162f..9d3276b 100644 --- a/nonadditivity/classification/classify.py +++ b/nonadditivity/classification/classify.py @@ -252,6 +252,37 @@ def _classify_circles( return classification +def _is_surprisingly_nonadditive(circle: Circle) -> bool: + """Check whether a circle's nonadditivity is surprising. + + Use keyword "circle" for this to work. + + A circle is considered surprising i fnone of the following is true: + Distance between R groups ≤ 2 atoms + Tanimoto similarity of the transformation < 0.4 + Number of exchanged heavy atoms > 10 + Linker exchange transformations + Transformations with unassigned or inverted stereocenters + + Args: + circle (Circle): circle to check. + + Returns: + bool: True if surprising false if Mundane. + """ + if circle.get_property(Circle.Properties.DISTANCE_BETWEEN_R_GROUPS) <= 2: + return False + if circle.get_property(Circle.Properties.MIN_TANIMOTO) < 0.4: + return False + if circle.get_property(Circle.Properties.NUM_HEAVY_ATOMS_DIFF) > 10: + return False + if circle.get_property(Circle.Properties.MAX_NUM_MMPDB_CUTS) > 1: + return False + return ( + circle.get_property(Circle.Properties.HAS_INVERSION_IN_TRANSFORMATION) == "None" + ) + + def _update_na_dataframe( na_dataframe: pd.DataFrame, circles: list[Circle], @@ -294,6 +325,9 @@ def _update_na_dataframe( circles=circles, property_key=props.DISTANCE_BETWEEN_R_GROUPS, ) + na_dataframe["classification"] = [ + "surprising" if _is_surprisingly_nonadditive(c) else "mundane" for c in circles + ] def classify_circles( diff --git a/nonadditivity/workflow/canonicalize_output.py b/nonadditivity/workflow/canonicalize_output.py index 0732ed1..c8e3f2f 100644 --- a/nonadditivity/workflow/canonicalize_output.py +++ b/nonadditivity/workflow/canonicalize_output.py @@ -91,11 +91,15 @@ def _get_props() -> list[str]: Returns: list[str]: list of descripytion keys """ - return ["num_atoms_between_r_groups", "ortho_classification"] + [ - val - for val in Circle.classification_keys.values() - if "ortho" not in val and "between" not in val - ] + return ( + ["num_atoms_between_r_groups", "ortho_classification"] + + [ + val + for val in Circle.classification_keys.values() + if "ortho" not in val and "between" not in val + ] + + ["classification"] + ) def _add_to_dict( @@ -215,7 +219,6 @@ def canonicalize_na_dataframe( columns=[ "has_transformation_at_ortho", "has_ortho_substituent_introduced", - "bonds_atoms_between_r_groups", ], ) except KeyError: diff --git a/tests/_test_files/test_naa_classify.csv b/tests/_test_files/test_naa_classify.csv index 60b5f36..892acab 100644 --- a/tests/_test_files/test_naa_classify.csv +++ b/tests/_test_files/test_naa_classify.csv @@ -1,3 +1,3 @@ -Compound1,Compound2,Compound3,Compound4,SMILES1,SMILES2,SMILES3,SMILES4,Prop_Cpd1,Prop_Cpd2,Prop_Cpd3,Prop_Cpd4,Transformation1,Transformation2,Property,Series,Nonadditivity,Theo_Quantile,Circle_ID,hbond_donor_diff,hbond_acceptor_diff,formal_charge_diff,tpsa_diff,num_rot_bonds_diff,sp3_carbon_diff,log_p_diff,chi0_diff,chi1_diff,chi2_diff,num_heavy_atoms_diff,tertiary_amide_formed,has_stereoinversion_in_transformation,max_num_mmpdb_cuts,max_num_heavy_atom_in_transformation,compound_stereocenter_classification,min_transformation_tanimoto,substituents_in_same_ring_system,ortho_classification,num_atoms_between_r_groups -ID1,ID2,ID4,ID3,CC=Cc1ccc(C)nc1,CC(F)=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC=C(Cl)c1ccc(C)nc1,4.5,4.2,5.9,3.7,[*:1][H]>>[*:1]F,[*:1][H]>>[*:1]Cl,TEST_PCHEMBL_VALUE,,2.5,0.5449521356173604,ID1_ID2_ID4_ID3_TEST_PCHEMBL_VALUE,0,0,0,0.0,0,0,0.8637000000000006,1.7404869760061565,0.7833624025851336,0.30279739252301363,2,False,None,1,1,None,0.4996873045653534,False,None,2 -ID1,ID4,ID6,ID5,CC=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC(F)=C(Cl)c1cnc(C)c(O)c1,CC=Cc1cnc(C)c(O)c1,4.5,5.9,4.0,3.4,[*:1]C=CC>>[*:1]C(Cl)=C(C)F,[*:1][H]>>[*:1]O,TEST_PCHEMBL_VALUE,,-0.8000000000000003,-0.5449521356173604,ID1_ID4_ID6_ID5_TEST_PCHEMBL_VALUE,1,1,0,20.230000000000004,0,0,1.158100000000001,2.610730464009235,1.1940460051080928,0.44307085693965886,3,False,None,1,5,None,0.40878552971576226,True,None,3 +Compound1,Compound2,Compound3,Compound4,SMILES1,SMILES2,SMILES3,SMILES4,Prop_Cpd1,Prop_Cpd2,Prop_Cpd3,Prop_Cpd4,Transformation1,Transformation2,Property,Series,Nonadditivity,Theo_Quantile,Circle_ID,hbond_donor_diff,hbond_acceptor_diff,formal_charge_diff,tpsa_diff,num_rot_bonds_diff,sp3_carbon_diff,log_p_diff,chi0_diff,chi1_diff,chi2_diff,num_heavy_atoms_diff,tertiary_amide_formed,has_stereoinversion_in_transformation,max_num_mmpdb_cuts,max_num_heavy_atom_in_transformation,compound_stereocenter_classification,min_transformation_tanimoto,substituents_in_same_ring_system,ortho_classification,num_atoms_between_r_groups,classification +ID1,ID2,ID4,ID3,CC=Cc1ccc(C)nc1,CC(F)=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC=C(Cl)c1ccc(C)nc1,4.5,4.2,5.9,3.7,[*:1][H]>>[*:1]F,[*:1][H]>>[*:1]Cl,TEST_PCHEMBL_VALUE,,2.5,0.5449521356173604,ID1_ID2_ID4_ID3_TEST_PCHEMBL_VALUE,0,0,0,0.0,0,0,0.8637000000000006,1.7404869760061565,0.7833624025851336,0.30279739252301363,2,False,None,1,1,None,0.4996873045653534,False,None,2,mundane +ID1,ID4,ID6,ID5,CC=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC(F)=C(Cl)c1cnc(C)c(O)c1,CC=Cc1cnc(C)c(O)c1,4.5,5.9,4.0,3.4,[*:1]C=CC>>[*:1]C(Cl)=C(C)F,[*:1][H]>>[*:1]O,TEST_PCHEMBL_VALUE,,-0.8000000000000003,-0.5449521356173604,ID1_ID4_ID6_ID5_TEST_PCHEMBL_VALUE,1,1,0,20.230000000000004,0,0,1.158100000000001,2.610730464009235,1.1940460051080928,0.44307085693965886,3,False,None,1,5,None,0.40878552971576226,True,None,3,surprising diff --git a/tests/nonadditivity/classification/test_classify.py b/tests/nonadditivity/classification/test_classify.py index 24912d0..6e81bfa 100644 --- a/tests/nonadditivity/classification/test_classify.py +++ b/tests/nonadditivity/classification/test_classify.py @@ -12,6 +12,7 @@ _create_compound_dict, _get_na_compounds, _get_transformations_for_na_dataframe, + _is_surprisingly_nonadditive, _update_na_dataframe, _update_per_compound_dataframe, classify, @@ -37,7 +38,7 @@ (Props.HAS_INVERSION_IN_TRANSFORMATION, "None"), (Props.MAX_NUM_MMPDB_CUTS, 1), (Props.MAX_HEAVY_ATOM_IN_TRANSFORMATION, 4), - (Props.COMPOUND_STEREO_CLASSIFICATION, "Unassigned"), + (Props.COMPOUND_STEREO_CLASSIFICATION, "Assigned"), (Props.MIN_TANIMOTO, 0.6626506024096386), (Props.SUBSTITUENT_ON_SAME_RING_SYSYTEM, False), ] @@ -279,6 +280,33 @@ def test_classify_circle_special( assert result == [solution] +@pytest.mark.parametrize( + "fixture_name, solution", + [ + ("circle", True), + ("circle_2", True), + ("ortho_none_circle", False), + ("ortho_exchanged_circle", True), + ("ortho_both_circle", False), + ], +) +def test_is_surprisingly_nonadditive( + fixture_name: str, + solution: str, + request: pytest.FixtureRequest, +) -> None: + """Test nonadditivity.classification.classify:_is_surprisingly_nonadditive. + + Args: + fixture_name (str): fixture name + solution (str): expected output + request (pytest.FixtureRequest): pytest magic + """ + circle: Circle = request.getfixturevalue(fixture_name) + circle.classify() + assert _is_surprisingly_nonadditive(circle) == solution + + @pytest.mark.parametrize( "prop, solution", [ diff --git a/tests/nonadditivity/workflow/test_canonicalize_output.py b/tests/nonadditivity/workflow/test_canonicalize_output.py index 25e426c..c5641bf 100644 --- a/tests/nonadditivity/workflow/test_canonicalize_output.py +++ b/tests/nonadditivity/workflow/test_canonicalize_output.py @@ -84,7 +84,12 @@ def test_get_props() -> None: "num_atoms_between_r_groups", ): solution.remove(rem) - solution = ["num_atoms_between_r_groups", "ortho_classification", *solution] + solution = [ + "num_atoms_between_r_groups", + "ortho_classification", + *solution, + "classification", + ] assert _get_props() == solution