Skip to content

Commit

Permalink
add 'classification' column
Browse files Browse the repository at this point in the history
  • Loading branch information
Niels Maeder committed Jul 14, 2024
1 parent e5af8ad commit 4821a90
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 11 deletions.
34 changes: 34 additions & 0 deletions nonadditivity/classification/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,37 @@ def _classify_circles(
return classification


def _is_surprisingly_nonadditive(circle: Circle) -> bool:
"""Check whether a circle's nonadditivity is surprising.
Use keyword "circle" for this to work.
A circle is considered surprising i fnone of the following is true:
Distance between R groups ≤ 2 atoms
Tanimoto similarity of the transformation < 0.4
Number of exchanged heavy atoms > 10
Linker exchange transformations
Transformations with unassigned or inverted stereocenters
Args:
circle (Circle): circle to check.
Returns:
bool: True if surprising false if Mundane.
"""
if circle.get_property(Circle.Properties.DISTANCE_BETWEEN_R_GROUPS) <= 2:
return False
if circle.get_property(Circle.Properties.MIN_TANIMOTO) < 0.4:
return False
if circle.get_property(Circle.Properties.NUM_HEAVY_ATOMS_DIFF) > 10:
return False
if circle.get_property(Circle.Properties.MAX_NUM_MMPDB_CUTS) > 1:
return False
return (
circle.get_property(Circle.Properties.HAS_INVERSION_IN_TRANSFORMATION) == "None"
)


def _update_na_dataframe(
na_dataframe: pd.DataFrame,
circles: list[Circle],
Expand Down Expand Up @@ -294,6 +325,9 @@ def _update_na_dataframe(
circles=circles,
property_key=props.DISTANCE_BETWEEN_R_GROUPS,
)
na_dataframe["classification"] = [
"surprising" if _is_surprisingly_nonadditive(c) else "mundane" for c in circles
]


def classify_circles(
Expand Down
15 changes: 9 additions & 6 deletions nonadditivity/workflow/canonicalize_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,15 @@ def _get_props() -> list[str]:
Returns:
list[str]: list of descripytion keys
"""
return ["num_atoms_between_r_groups", "ortho_classification"] + [
val
for val in Circle.classification_keys.values()
if "ortho" not in val and "between" not in val
]
return (
["num_atoms_between_r_groups", "ortho_classification"]
+ [
val
for val in Circle.classification_keys.values()
if "ortho" not in val and "between" not in val
]
+ ["classification"]
)


def _add_to_dict(
Expand Down Expand Up @@ -215,7 +219,6 @@ def canonicalize_na_dataframe(
columns=[
"has_transformation_at_ortho",
"has_ortho_substituent_introduced",
"bonds_atoms_between_r_groups",
],
)
except KeyError:
Expand Down
6 changes: 3 additions & 3 deletions tests/_test_files/test_naa_classify.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Compound1,Compound2,Compound3,Compound4,SMILES1,SMILES2,SMILES3,SMILES4,Prop_Cpd1,Prop_Cpd2,Prop_Cpd3,Prop_Cpd4,Transformation1,Transformation2,Property,Series,Nonadditivity,Theo_Quantile,Circle_ID,hbond_donor_diff,hbond_acceptor_diff,formal_charge_diff,tpsa_diff,num_rot_bonds_diff,sp3_carbon_diff,log_p_diff,chi0_diff,chi1_diff,chi2_diff,num_heavy_atoms_diff,tertiary_amide_formed,has_stereoinversion_in_transformation,max_num_mmpdb_cuts,max_num_heavy_atom_in_transformation,compound_stereocenter_classification,min_transformation_tanimoto,substituents_in_same_ring_system,ortho_classification,num_atoms_between_r_groups
ID1,ID2,ID4,ID3,CC=Cc1ccc(C)nc1,CC(F)=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC=C(Cl)c1ccc(C)nc1,4.5,4.2,5.9,3.7,[*:1][H]>>[*:1]F,[*:1][H]>>[*:1]Cl,TEST_PCHEMBL_VALUE,,2.5,0.5449521356173604,ID1_ID2_ID4_ID3_TEST_PCHEMBL_VALUE,0,0,0,0.0,0,0,0.8637000000000006,1.7404869760061565,0.7833624025851336,0.30279739252301363,2,False,None,1,1,None,0.4996873045653534,False,None,2
ID1,ID4,ID6,ID5,CC=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC(F)=C(Cl)c1cnc(C)c(O)c1,CC=Cc1cnc(C)c(O)c1,4.5,5.9,4.0,3.4,[*:1]C=CC>>[*:1]C(Cl)=C(C)F,[*:1][H]>>[*:1]O,TEST_PCHEMBL_VALUE,,-0.8000000000000003,-0.5449521356173604,ID1_ID4_ID6_ID5_TEST_PCHEMBL_VALUE,1,1,0,20.230000000000004,0,0,1.158100000000001,2.610730464009235,1.1940460051080928,0.44307085693965886,3,False,None,1,5,None,0.40878552971576226,True,None,3
Compound1,Compound2,Compound3,Compound4,SMILES1,SMILES2,SMILES3,SMILES4,Prop_Cpd1,Prop_Cpd2,Prop_Cpd3,Prop_Cpd4,Transformation1,Transformation2,Property,Series,Nonadditivity,Theo_Quantile,Circle_ID,hbond_donor_diff,hbond_acceptor_diff,formal_charge_diff,tpsa_diff,num_rot_bonds_diff,sp3_carbon_diff,log_p_diff,chi0_diff,chi1_diff,chi2_diff,num_heavy_atoms_diff,tertiary_amide_formed,has_stereoinversion_in_transformation,max_num_mmpdb_cuts,max_num_heavy_atom_in_transformation,compound_stereocenter_classification,min_transformation_tanimoto,substituents_in_same_ring_system,ortho_classification,num_atoms_between_r_groups,classification
ID1,ID2,ID4,ID3,CC=Cc1ccc(C)nc1,CC(F)=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC=C(Cl)c1ccc(C)nc1,4.5,4.2,5.9,3.7,[*:1][H]>>[*:1]F,[*:1][H]>>[*:1]Cl,TEST_PCHEMBL_VALUE,,2.5,0.5449521356173604,ID1_ID2_ID4_ID3_TEST_PCHEMBL_VALUE,0,0,0,0.0,0,0,0.8637000000000006,1.7404869760061565,0.7833624025851336,0.30279739252301363,2,False,None,1,1,None,0.4996873045653534,False,None,2,mundane
ID1,ID4,ID6,ID5,CC=Cc1ccc(C)nc1,CC(F)=C(Cl)c1ccc(C)nc1,CC(F)=C(Cl)c1cnc(C)c(O)c1,CC=Cc1cnc(C)c(O)c1,4.5,5.9,4.0,3.4,[*:1]C=CC>>[*:1]C(Cl)=C(C)F,[*:1][H]>>[*:1]O,TEST_PCHEMBL_VALUE,,-0.8000000000000003,-0.5449521356173604,ID1_ID4_ID6_ID5_TEST_PCHEMBL_VALUE,1,1,0,20.230000000000004,0,0,1.158100000000001,2.610730464009235,1.1940460051080928,0.44307085693965886,3,False,None,1,5,None,0.40878552971576226,True,None,3,surprising
30 changes: 29 additions & 1 deletion tests/nonadditivity/classification/test_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_create_compound_dict,
_get_na_compounds,
_get_transformations_for_na_dataframe,
_is_surprisingly_nonadditive,
_update_na_dataframe,
_update_per_compound_dataframe,
classify,
Expand All @@ -37,7 +38,7 @@
(Props.HAS_INVERSION_IN_TRANSFORMATION, "None"),
(Props.MAX_NUM_MMPDB_CUTS, 1),
(Props.MAX_HEAVY_ATOM_IN_TRANSFORMATION, 4),
(Props.COMPOUND_STEREO_CLASSIFICATION, "Unassigned"),
(Props.COMPOUND_STEREO_CLASSIFICATION, "Assigned"),
(Props.MIN_TANIMOTO, 0.6626506024096386),
(Props.SUBSTITUENT_ON_SAME_RING_SYSYTEM, False),
]
Expand Down Expand Up @@ -279,6 +280,33 @@ def test_classify_circle_special(
assert result == [solution]


@pytest.mark.parametrize(
"fixture_name, solution",
[
("circle", True),
("circle_2", True),
("ortho_none_circle", False),
("ortho_exchanged_circle", True),
("ortho_both_circle", False),
],
)
def test_is_surprisingly_nonadditive(
fixture_name: str,
solution: str,
request: pytest.FixtureRequest,
) -> None:
"""Test nonadditivity.classification.classify:_is_surprisingly_nonadditive.
Args:
fixture_name (str): fixture name
solution (str): expected output
request (pytest.FixtureRequest): pytest magic
"""
circle: Circle = request.getfixturevalue(fixture_name)
circle.classify()
assert _is_surprisingly_nonadditive(circle) == solution


@pytest.mark.parametrize(
"prop, solution",
[
Expand Down
7 changes: 6 additions & 1 deletion tests/nonadditivity/workflow/test_canonicalize_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ def test_get_props() -> None:
"num_atoms_between_r_groups",
):
solution.remove(rem)
solution = ["num_atoms_between_r_groups", "ortho_classification", *solution]
solution = [
"num_atoms_between_r_groups",
"ortho_classification",
*solution,
"classification",
]
assert _get_props() == solution


Expand Down

0 comments on commit 4821a90

Please sign in to comment.