diff --git a/archives/prospr_core.tar.gz b/archives/prospr_core.tar.gz index ec4589e..03aa6be 100644 Binary files a/archives/prospr_core.tar.gz and b/archives/prospr_core.tar.gz differ diff --git a/archives/prospr_core.zip b/archives/prospr_core.zip index d0aee49..5be7b07 100644 Binary files a/archives/prospr_core.zip and b/archives/prospr_core.zip differ diff --git a/archives/prospr_data.tar.gz b/archives/prospr_data.tar.gz index eddf068..fdbd076 100644 Binary files a/archives/prospr_data.tar.gz and b/archives/prospr_data.tar.gz differ diff --git a/archives/prospr_data.zip b/archives/prospr_data.zip index c251fb6..508ea43 100644 Binary files a/archives/prospr_data.zip and b/archives/prospr_data.zip differ diff --git a/manage.sh b/manage.sh index 54b9c1b..ee274ac 100755 --- a/manage.sh +++ b/manage.sh @@ -75,6 +75,18 @@ case "$1" in echo "~ Running core tests.." ./"$COREDIR/tests/run_tests.sh" "$2" ;; + # Test visualizations without building the Python interfaces. + "test_visualize") + echo "~ Running visualize tests.." + echo "~ Uninstalling old prospr.." + pip uninstall -qy prospr + echo "~ Installing new prospr.." + pip install -q . + python tests/visualize/test_visualization.py + echo "~ Uninstalling old prospr.." + pip uninstall -qy prospr + echo "~ Done running tests!" + ;; # Test core without building the Python interfaces. "debug_core") echo "~ Running core tests.." diff --git a/prospr/core/core_module.cpp b/prospr/core/core_module.cpp index 9c55def..51bde46 100644 --- a/prospr/core/core_module.cpp +++ b/prospr/core/core_module.cpp @@ -39,7 +39,7 @@ PYBIND11_MODULE(prospr_core, m) { .def(py::init, bool &>(), "Protein constructor", py::arg("sequence"), py::arg("dim")=2, - py::arg("model")="", py::arg("bond_values")=bond_values, + py::arg("model")="HP", py::arg("bond_values")=bond_values, py::arg("bond_symmetry")=true) .def_property_readonly("solutions_checked", &Protein::get_solutions_checked) @@ -52,6 +52,7 @@ PYBIND11_MODULE(prospr_core, m) { .def_property_readonly("last_pos", &Protein::get_last_pos) .def_property_readonly("score", &Protein::get_score) .def_property_readonly("sequence", &Protein::get_sequence) + .def_property_readonly("max_weights", &Protein::get_max_weights) .def("get_amino", &Protein::get_amino, "Get amino index and next direction from amino at given position", diff --git a/prospr/helpers.py b/prospr/helpers.py index 1766943..a0a458f 100644 --- a/prospr/helpers.py +++ b/prospr/helpers.py @@ -19,9 +19,10 @@ def get_scoring_aminos(protein): amino_acid = protein.get_amino(cur_pos) idx = amino_acid.index next_dir = amino_acid.next_move + max_weights = protein.max_weights # Store origin if it may score points. - if protein.is_hydro(idx): + if max_weights[idx] < 0: score_pos[tuple(cur_pos)] = np.array([0, next_dir], dtype=np.int64) while next_dir != 0: @@ -36,7 +37,7 @@ def get_scoring_aminos(protein): next_dir = fold # Save amino if it may score points. - if protein.is_hydro(idx): + if max_weights[idx] < 0: score_pos[tuple(cur_pos)] = np.array( [prev_dir, next_dir], dtype=np.int64 ) @@ -52,7 +53,7 @@ def get_scoring_pairs(protein): # Get dictionary with the amino's that can possibly score points. score_aminos = get_scoring_aminos(protein) - # Sort positions from bottom-left to upper-rigth. + # Sort positions from bottom-left to upper-right. moves = np.array([m for m in range(1, protein.dim + 1)]) pairs = np.empty((1, 2, protein.dim), dtype=np.int64) diff --git a/prospr/visualize.py b/prospr/visualize.py index ea7e09d..ff573ea 100644 --- a/prospr/visualize.py +++ b/prospr/visualize.py @@ -17,18 +17,13 @@ import pandas as pd -def _plot_protein_2d(protein, ax): +def _plot_aminos_2d_basic(protein, df, ax): """ - - :param protein: - :param ax: + Plot amino acids in basic style in a 2D figure. + :param Protein protein: Protein object to plot the hash of. + :param DataFrame df: DataFrame with all ordered positions. + :param Axes ax: Axis to plot on. """ - # Setup dataframe containing the data and set types for the coordinates. - df = pd.DataFrame( - get_ordered_positions(protein), columns=["x", "y", "Type"] - ) - df = df.astype({"x": "int32", "y": "int32"}) - ax.plot(df["x"], df["y"], color="black", alpha=0.65, zorder=1) sns.scatterplot( x="x", @@ -55,44 +50,72 @@ def _plot_protein_2d(protein, ax): color="indianred", alpha=0.9, zorder=1, - lw=1.5, + lw=2, ) - # Set axis labels. - ax.set_title(f"2D conformation with {protein.score} energy") - ax.set_xlabel("x-axis", fontsize=13) - ax.set_ylabel("y-axis", fontsize=13) - ax.xaxis.set_major_locator(MaxNLocator(integer=True)) - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - # Remove title from legend and add item for bonds. - handles, labels = ax.get_legend_handles_labels() - score_patch = Line2D( - [], - [], - color="indianred", - linestyle=":", - alpha=0.9, - label="Contact", - lw=1.5, +def _plot_aminos_2d_paper(protein, df, ax): + """ + Plot amino acids in paper style in a 2D figure. + :param Protein protein: Protein object to plot the hash of. + :param DataFrame df: DataFrame with all ordered positions. + :param Axes ax: Axis to plot on. + """ + # Split dataframe on amino acid type. + df_H = df.loc[df["Type"] == "H"] + df_P = df.loc[df["Type"] == "P"] + + ax.plot(df["x"], df["y"], color="black", alpha=0.65, zorder=1) + sns.scatterplot( + x="x", + y="y", + data=df_H, + marker="o", + edgecolor="royalblue", + s=80, + zorder=2, + ax=ax, + label="H", + ) + sns.scatterplot( + x="x", + y="y", + data=df_P, + marker="o", + facecolor="white", + edgecolor="orange", + linewidth=2, + s=80, + zorder=2, + ax=ax, + label="P", ) - handles.append(score_patch) - labels.append(score_patch.get_label()) - ax.legend(handles=handles, labels=labels) + # Plot dotted lines between the aminos that increase the stability. + pairs = get_scoring_pairs(protein) -def _plot_protein_3d(protein, ax): - """ + for pos1, pos2 in pairs: + ax.plot( + [pos1[0], pos2[0]], + [pos1[1], pos2[1]], + linestyle=":", + color="indianred", + alpha=0.9, + zorder=1, + lw=2, + ) + + # Remove axis, and position legend in the upper right with created space. + ax.axis("off") - :param protein: - :param ax: - """ - # Setup dataframe containing the data and set types for the coordinates. - df = pd.DataFrame( - get_ordered_positions(protein), columns=["x", "y", "z", "Type"] - ) - df = df.astype({"x": "int32", "y": "int32", "z": "int32"}) +def _plot_aminos_3d_basic(protein, df, ax): + """ + Plot amino acids in basic style in a 3D figure. + :param Protein protein: Protein object to plot the hash of. + :param DataFrame df: DataFrame with all ordered positions. + :param Axes ax: Axis to plot on. + """ # Split dataframe on amino acid type. df_H = df.loc[df["Type"] == "H"] df_P = df.loc[df["Type"] == "P"] @@ -132,17 +155,129 @@ def _plot_protein_3d(protein, ax): color="indianred", alpha=0.9, zorder=1, - lw=1.5, + lw=2, ) - # Set axis labels and tics. - ax.set_title(f"3D conformation with {protein.score} energy") - ax.set_xlabel("x-axis", fontsize=13) - ax.set_ylabel("y-axis", fontsize=13) - ax.set_zlabel("z-axis", fontsize=13) - ax.xaxis.set_major_locator(MaxNLocator(integer=True)) - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - ax.zaxis.set_major_locator(MaxNLocator(integer=True)) + +def _plot_aminos_3d_paper(protein, df, ax): + """ + Plot amino acids in paper style in a 3D figure. + :param Protein protein: Protein object to plot the hash of. + :param DataFrame df: DataFrame with all ordered positions. + :param Axes ax: Axis to plot on. + """ + # Split dataframe on amino acid type. + df_H = df.loc[df["Type"] == "H"] + df_P = df.loc[df["Type"] == "P"] + + ax.plot(df["x"], df["y"], df["z"], color="black", alpha=0.65, zorder=1) + + sns.scatterplot( + df_H["x"], + df_H["y"], + df_H["z"], + data=df_H, + marker="o", + edgecolor="royalblue", + s=60, + zorder=2, + ax=ax, + label="H", + ) + sns.scatterplot( + df_P["x"], + df_P["y"], + df_P["z"], + data=df_P, + marker="o", + facecolor="white", + edgecolor="orange", + linewidth=2, + s=60, + zorder=2, + ax=ax, + label="P", + ) + + # Plot dotted lines between the aminos that increase the stability. + pairs = get_scoring_pairs(protein) + + for pos1, pos2 in pairs: + ax.plot( + [pos1[0], pos2[0]], + [pos1[1], pos2[1]], + [pos1[2], pos2[2]], + linestyle=":", + color="indianred", + alpha=0.9, + zorder=1, + lw=2, + ) + + # Remove axis, and position legend in the upper right with created space. + ax.axis("off") + + +def plot_protein(protein, style="basic", ax=None, show=True): + """ + Plot conformation of a protein. + :param Protein protein: Protein object to plot the hash of. + :param [str] style: What style to plot the proteins in. + :param Axes ax: Axis to plot Protein on. + """ + # Catch unplottable dimensions. + if protein.dim != 2 and protein.dim != 3: + raise RuntimeError( + f"Cannot plot the structure of a protein with " + f"dimension '{protein.dim}'" + ) + + # Create axis to plot onto if not given. + if ax is None: + if style == "paper": + fig = plt.figure(figsize=(4, 2.5)) + else: + fig = plt.figure(figsize=(5, 6)) + sns.set_style("whitegrid") + + if protein.dim == 2: + ax = fig.gca() + else: + ax = fig.gca(projection="3d") + + # Fetch data in right dimension. + if protein.dim == 2: + df = pd.DataFrame( + get_ordered_positions(protein), columns=["x", "y", "Type"] + ) + df = df.astype({"x": "int32", "y": "int32"}) + else: + df = pd.DataFrame( + get_ordered_positions(protein), columns=["x", "y", "z", "Type"] + ) + df = df.astype({"x": "int32", "y": "int32", "z": "int32"}) + + # Plot the selected style. + if style == "paper": + if protein.dim == 2: + _plot_aminos_2d_paper(protein, df, ax) + else: + _plot_aminos_3d_paper(protein, df, ax) + elif style == "basic": + ax.set_xlabel("x-axis", fontsize=13) + ax.set_ylabel("y-axis", fontsize=13) + ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + ax.yaxis.set_major_locator(MaxNLocator(integer=True)) + + # Plot dimension specific. + if protein.dim == 2: + ax.set_title(f"2D conformation with {protein.score} energy") + _plot_aminos_2d_basic(protein, df, ax) + else: + ax.set_title(f"3D conformation with {protein.score} energy") + ax.set_zlabel("z-axis", fontsize=13) + ax.zaxis.set_major_locator(MaxNLocator(integer=True)) + _plot_aminos_3d_basic(protein, df, ax) # Remove title from legend and add item for bonds. handles, labels = ax.get_legend_handles_labels() @@ -152,33 +287,25 @@ def _plot_protein_3d(protein, ax): color="indianred", linestyle=":", alpha=0.9, - label="Contact", - lw=1.5, + label="Bond", + lw=2, ) handles.append(score_patch) labels.append(score_patch.get_label()) - ax.legend(handles=handles, labels=labels) - -def plot_protein(protein): - """ - Plot conformation of a protein. - :param Protein protein: Protein object to plot the hash of. - """ - fig = plt.figure(figsize=(6, 5)) - sns.set_style("whitegrid") - - # Plot data according to used dimension. - if protein.dim == 2: - ax = fig.gca() - _plot_protein_2d(protein, ax) - elif protein.dim == 3: - ax = fig.gca(projection="3d") - _plot_protein_3d(protein, ax) - else: - raise RuntimeError( - f"Cannot plot the structure of a protein with " - f"dimension '{protein.dim}'" + # Style legend according to plotting style. + if style == "paper": + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.7, box.height]) + ax.legend( + handles=handles, + labels=labels, + loc="upper left", + bbox_to_anchor=(1, 1), ) + else: + ax.legend(handles=handles, labels=labels) - plt.show() + # Show plot if specified. + if show: + plt.show() diff --git a/setup.py b/setup.py index d0c7bb3..92a102b 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools import setup from pybind11.setup_helpers import Pybind11Extension, build_ext -__version__ = "0.2a5" +__version__ = "0.2a6" # Define core module extension. ext_modules = [ diff --git a/tests/visualize/test_visualization.py b/tests/visualize/test_visualization.py new file mode 100644 index 0000000..e69dc67 --- /dev/null +++ b/tests/visualize/test_visualization.py @@ -0,0 +1,44 @@ +from prospr import Protein, plot_protein +import matplotlib.pyplot as plt + +if __name__ == "__main__": + p = Protein("HPPHPPHPPH", model="HP") + p.set_hash([-1, -2, 1, -2, 1, 2, 1, 2, -1]) + plot_protein(p, style="paper", show=False) + + p = Protein("PHPHHPPPHHPHPHH", model="HP") + p.set_hash([-1, -1, 2, 2, -1, 2, 2, 1, -2, 1, -2, 1, -2, -1]) + plot_protein(p, style="paper", show=False) + + p = Protein("HHPHPHPPPHPHHHPPPPPPHPPPH", model="HP") + p.set_hash( + [ + -1, + -2, + 1, + 1, + 2, + 1, + 2, + 2, + -1, + -2, + -1, + 2, + -1, + 2, + 2, + 2, + -1, + -2, + -2, + -2, + -1, + -2, + 1, + 1, + ] + ) + plot_protein(p, style="paper", show=False) + + plt.show()