diff --git a/test/test_data/true_multimer/description.csv b/test/test_data/true_multimer/description.csv deleted file mode 100755 index 889124c3..00000000 --- a/test/test_data/true_multimer/description.csv +++ /dev/null @@ -1,2 +0,0 @@ -3L4Q_A.fasta, 3L4Q.cif, A -3L4Q_C.fasta, 3L4Q.cif, C diff --git a/test/test_data/true_multimer/fastas/3L4Q.fa b/test/test_data/true_multimer/fastas/3L4Q.fa new file mode 100755 index 00000000..75379e76 --- /dev/null +++ b/test/test_data/true_multimer/fastas/3L4Q.fa @@ -0,0 +1,4 @@ +>3L4Q_A +SDEALKMTMASVPASRYLTDMTLEEMSRDWSMLIPKQKVAGPLCIRMDQAIMDKNIILKANFSVIFDRLETLILLRAFTEEGAIVGEISPLPSLPGHTAEDVKNAVGVLIGGLEWNDNTVRVSETLQRFAWRSSNENGRPPLTPKQKREMAGTIRSEV +>3L4Q_C +YQQDQIVKEDSVEAVGAQLKVYHQQYQDKSREYDQLYEEYTRTSQELQMKRTAIEAFNETIKIFEEQGQTQEKSSKEYLERFRREGNEKEMQRILLNSERLKSRIAEIHESRTKLEQELRAQASDNREIDKRMNSLKPDLMQLRKIRDQYLVWLTQKGARQKKINEWLGI \ No newline at end of file diff --git a/test/test_data/true_multimer/fastas/RANdom_name1_.7-1_0.fasta b/test/test_data/true_multimer/fastas/RANdom_name1_.7-1_0.fasta new file mode 100755 index 00000000..116a9e89 --- /dev/null +++ b/test/test_data/true_multimer/fastas/RANdom_name1_.7-1_0.fasta @@ -0,0 +1,7 @@ +>RANdom_name1_.7-1_0_B +ISASDLALLTRQLATLVAAALPLEEALDAVAKQSEKPKLSALMAAVRAKVVEGHSLAEAM +GNFPGSFERLYCAMVAAGEASGHLDAVLNRLADYTEQRQQMRSRIQQ +>RANdom_name1_.7-1_0_C +ISASDLALLTRQLATLVAAALPLEEALDAVAKQSEKPKLSALMAAVRAKVVEGHSLAEAM +GNFPGSFERLYCAMVAAGEASGHLDAVLNRLADYTEQRQQMRSRIQQ + diff --git a/test/test_data/true_multimer/features/3L4Q_A/pdb_hits.sto b/test/test_data/true_multimer/features/3L4Q_A/pdb_hits.sto deleted file mode 100644 index fcb9d9c7..00000000 --- a/test/test_data/true_multimer/features/3L4Q_A/pdb_hits.sto +++ /dev/null @@ -1,11 +0,0 @@ -# STOCKHOLM 1.0 -#=GF ID query -#=GF AU hmmsearch (HMMER 3.3.2) - -#=GS 3l4q_A/7-164 DE [subseq from] mol:protein length:164 - -3l4q_A/7-164 SDEALKMTMASVPASRYLTDMTLEEMSRDWSMLIPKQKVAGPLCIRMDQAIMDKNIILKANFSVIFDRLETLILLRAFTEEGAIVGEISPLPSLPGHTAEDVKNAVGVLIGGLEWNDNTVRVSETLQRFAWRSSNENGRPPLTPKQKREMAGTIRSEV -#=GR 3l4q_A/7-164 PP 79**********************************************************************************************************************************************************97 -#=GC PP_cons 79**********************************************************************************************************************************************************97 -#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -// diff --git a/test/test_data/true_multimer/features/3L4Q_A_feature_metadata_2023-10-04.json b/test/test_data/true_multimer/features/3L4Q_A_feature_metadata_2023-10-04.json deleted file mode 100644 index 44f32f8e..00000000 --- a/test/test_data/true_multimer/features/3L4Q_A_feature_metadata_2023-10-04.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "databases": { - "uniref90": { - "version": "2022-12-12 19:57:13", - "hash": "NA" - }, - "mgnify": { - "version": "2022-05", - "hash": "NA" - }, - "bfd": { - "version": "AF2", - "hash": "799f308b20627088129847709f1abed6" - }, - "small_bfd": { - "version": null, - "hash": "NA" - }, - "uniref30": { - "version": "2021-03", - "hash": "bdd865b81d9706697e9132cc8d7f0703" - }, - "uniprot": { - "version": "2022-12-13 15:30:36", - "hash": "NA" - }, - "pdb70": { - "version": "2020-04-01 17:21:43", - "hash": "1e4a67eb58df3885b1a161ca4ce8cc81" - }, - "pdb_seqres": { - "version": "2023-10-04 16:36:34", - "hash": "71b08c9739a4d1673806fee5317009f4" - }, - "ColabFold": { - "version": "2023-10-04 16:36:34", - "hash": "NA" - } - }, - "software": { - "AlphaPulldown": { - "version": "0.30.8" - }, - "AlphaFold": { - "version": "2.3.2" - }, - "jackhmmer": { - "version": "3.3.2" - }, - "hhblits": { - "version": "3.3.0" - }, - "hhsearch": { - "version": "3.3.0" - }, - "hmmsearch": { - "version": "3.3.2" - }, - "hmmbuild": { - "version": "3.3.2" - }, - "kalign": { - "version": "2.04" - } - }, - "date": "2023-10-04 16:36:34", - "other": { - "logtostderr": "False", - "alsologtostderr": "False", - "log_dir": "", - "v": "0", - "verbosity": "0", - "logger_levels": "{}", - "stderrthreshold": "fatal", - "showprefixforinfo": "True", - "run_with_pdb": "False", - "pdb_post_mortem": "False", - "pdb": "False", - "run_with_profiling": "False", - "only_check_args": "False", - "op_conversion_fallback_to_while_loop": "True", - "delta_threshold": "0.5", - "tt_check_filter": "False", - "tt_single_core_summaries": "False", - "runtime_oom_exit": "True", - "hbm_oom_exit": "True", - "xml_output_file": "", - "data_dir": "/scratch/AlphaFold_DBs/2.3.2", - "output_dir": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/features", - "jackhmmer_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/jackhmmer", - "hhblits_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hhblits", - "hhsearch_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hhsearch", - "hmmsearch_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hmmsearch", - "hmmbuild_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hmmbuild", - "kalign_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/kalign", - "uniref90_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniref90/uniref90.fasta", - "mgnify_database_path": "/scratch/AlphaFold_DBs/2.3.2/mgnify/mgy_clusters_2022_05.fa", - "bfd_database_path": "/scratch/AlphaFold_DBs/2.3.2/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt", - "small_bfd_database_path": "/scratch/AlphaFold_DBs/2.3.2/small_bfd/bfd-first_non_consensus_sequences.fasta", - "uniref30_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniref30/UniRef30_2021_03", - "uniprot_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniprot/uniprot.fasta", - "pdb70_database_path": "/scratch/AlphaFold_DBs/2.3.2/pdb70/pdb70", - "pdb_seqres_database_path": "custom_template_db/3L4Q_A/pdb_seqres/pdb_seqres.txt", - "template_mmcif_dir": "custom_template_db/3L4Q_A/pdb_mmcif/mmcif_files", - "max_template_date": "3021-01-01", - "obsolete_pdbs_path": "custom_template_db/3L4Q_A/pdb_mmcif/obsolete.dat", - "db_preset": "full_dbs", - "model_preset": "monomer", - "benchmark": "False", - "num_multimer_predictions_per_model": "5", - "use_precomputed_msas": "True", - "models_to_relax": "ModelsToRelax.BEST", - "save_msa_files": "True", - "skip_existing": "True", - "use_mmseqs2": "False", - "description_file": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/description.csv", - "path_to_fasta": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/fastas", - "path_to_mmt": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/templates", - "threshold_clashes": "1000.0", - "hb_allowance": "0.4", - "plddt_threshold": "0.0", - "?": "False", - "use_small_bfd": "False", - "fasta_path_1": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/fastas/3L4Q_A.fasta", - "multimeric_templates_1": "['/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/templates/3L4Q.cif']", - "multimeric_chains_1": "['A']" - } -} \ No newline at end of file diff --git a/test/test_data/true_multimer/features/3L4Q_C.pkl b/test/test_data/true_multimer/features/3L4Q_C.pkl index 35df7642..29f6bb35 100644 Binary files a/test/test_data/true_multimer/features/3L4Q_C.pkl and b/test/test_data/true_multimer/features/3L4Q_C.pkl differ diff --git a/test/test_data/true_multimer/features/3L4Q_C/pdb_hits.sto b/test/test_data/true_multimer/features/3L4Q_C/pdb_hits.sto deleted file mode 100644 index 47dc112b..00000000 --- a/test/test_data/true_multimer/features/3L4Q_C/pdb_hits.sto +++ /dev/null @@ -1,11 +0,0 @@ -# STOCKHOLM 1.0 -#=GF ID query -#=GF AU hmmsearch (HMMER 3.3.2) - -#=GS 3l4q_C/1-170 DE [subseq from] mol:protein length:170 - -3l4q_C/1-170 YQQDQIVKEDSVEAVGAQLKVYHQQYQDKSREYDQLYEEYTRTSQELQMKRTAIEAFNETIKIFEEQGQTQEKSSKEYLERFRREGNEKEMQRILLNSERLKSRIAEIHESRTKLEQELRAQASDNREIDKRMNSLKPDLMQLRKIRDQYLVWLTQKGARQKKINEWLGI -#=GR 3l4q_C/1-170 PP 9***********************************************************************************************************************************************************************98 -#=GC PP_cons 9***********************************************************************************************************************************************************************98 -#=GC RF xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -// diff --git a/test/test_data/true_multimer/features/3L4Q_C_feature_metadata_2023-10-04.json b/test/test_data/true_multimer/features/3L4Q_C_feature_metadata_2023-10-04.json deleted file mode 100644 index b3387f4f..00000000 --- a/test/test_data/true_multimer/features/3L4Q_C_feature_metadata_2023-10-04.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "databases": { - "uniref90": { - "version": "2022-12-12 19:57:13", - "hash": "NA" - }, - "mgnify": { - "version": "2022-05", - "hash": "NA" - }, - "bfd": { - "version": "AF2", - "hash": "799f308b20627088129847709f1abed6" - }, - "small_bfd": { - "version": null, - "hash": "NA" - }, - "uniref30": { - "version": "2021-03", - "hash": "bdd865b81d9706697e9132cc8d7f0703" - }, - "uniprot": { - "version": "2022-12-13 15:30:36", - "hash": "NA" - }, - "pdb70": { - "version": "2020-04-01 17:21:43", - "hash": "1e4a67eb58df3885b1a161ca4ce8cc81" - }, - "pdb_seqres": { - "version": "2023-10-04 16:36:38", - "hash": "e7a53894943d14845e667938f7fdc3f6" - }, - "ColabFold": { - "version": "2023-10-04 16:36:38", - "hash": "NA" - } - }, - "software": { - "AlphaPulldown": { - "version": "0.30.8" - }, - "AlphaFold": { - "version": "2.3.2" - }, - "jackhmmer": { - "version": "3.3.2" - }, - "hhblits": { - "version": "3.3.0" - }, - "hhsearch": { - "version": "3.3.0" - }, - "hmmsearch": { - "version": "3.3.2" - }, - "hmmbuild": { - "version": "3.3.2" - }, - "kalign": { - "version": "2.04" - } - }, - "date": "2023-10-04 16:36:38", - "other": { - "logtostderr": "False", - "alsologtostderr": "False", - "log_dir": "", - "v": "0", - "verbosity": "0", - "logger_levels": "{}", - "stderrthreshold": "fatal", - "showprefixforinfo": "True", - "run_with_pdb": "False", - "pdb_post_mortem": "False", - "pdb": "False", - "run_with_profiling": "False", - "only_check_args": "False", - "op_conversion_fallback_to_while_loop": "True", - "delta_threshold": "0.5", - "tt_check_filter": "False", - "tt_single_core_summaries": "False", - "runtime_oom_exit": "True", - "hbm_oom_exit": "True", - "xml_output_file": "", - "data_dir": "/scratch/AlphaFold_DBs/2.3.2", - "output_dir": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/features", - "jackhmmer_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/jackhmmer", - "hhblits_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hhblits", - "hhsearch_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hhsearch", - "hmmsearch_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hmmsearch", - "hmmbuild_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/hmmbuild", - "kalign_binary_path": "/home/dmolodenskiy/.conda/envs/AlphaPulldown/bin/kalign", - "uniref90_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniref90/uniref90.fasta", - "mgnify_database_path": "/scratch/AlphaFold_DBs/2.3.2/mgnify/mgy_clusters_2022_05.fa", - "bfd_database_path": "/scratch/AlphaFold_DBs/2.3.2/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt", - "small_bfd_database_path": "/scratch/AlphaFold_DBs/2.3.2/small_bfd/bfd-first_non_consensus_sequences.fasta", - "uniref30_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniref30/UniRef30_2021_03", - "uniprot_database_path": "/scratch/AlphaFold_DBs/2.3.2/uniprot/uniprot.fasta", - "pdb70_database_path": "/scratch/AlphaFold_DBs/2.3.2/pdb70/pdb70", - "pdb_seqres_database_path": "custom_template_db/3L4Q_C/pdb_seqres/pdb_seqres.txt", - "template_mmcif_dir": "custom_template_db/3L4Q_C/pdb_mmcif/mmcif_files", - "max_template_date": "3021-01-01", - "obsolete_pdbs_path": "custom_template_db/3L4Q_C/pdb_mmcif/obsolete.dat", - "db_preset": "full_dbs", - "model_preset": "monomer", - "benchmark": "False", - "num_multimer_predictions_per_model": "5", - "use_precomputed_msas": "True", - "models_to_relax": "ModelsToRelax.BEST", - "save_msa_files": "True", - "skip_existing": "True", - "use_mmseqs2": "False", - "description_file": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/description.csv", - "path_to_fasta": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/fastas", - "path_to_mmt": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/templates", - "threshold_clashes": "1000.0", - "hb_allowance": "0.4", - "plddt_threshold": "0.0", - "?": "False", - "use_small_bfd": "False", - "fasta_path_1": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/fastas/3L4Q_A.fasta", - "multimeric_templates_1": "['/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/templates/3L4Q.cif']", - "multimeric_chains_1": "['A']", - "fasta_path_2": "/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/fastas/3L4Q_C.fasta", - "multimeric_templates_2": "['/g/kosinski/dima/PycharmProjects/AlphaPulldown/test/test_data/true_multimer/templates/3L4Q.cif']", - "multimeric_chains_2": "['C']" - } -} \ No newline at end of file diff --git a/test/test_data/true_multimer/features/GAPPY_PDB_B.pkl b/test/test_data/true_multimer/features/GAPPY_PDB_B.pkl new file mode 100644 index 00000000..563a9435 Binary files /dev/null and b/test/test_data/true_multimer/features/GAPPY_PDB_B.pkl differ diff --git a/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_B.pkl b/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_B.pkl index 9e0d770d..4a8b2539 100644 Binary files a/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_B.pkl and b/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_B.pkl differ diff --git a/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_C.pkl b/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_C.pkl new file mode 100644 index 00000000..ce6194b1 Binary files /dev/null and b/test/test_data/true_multimer/features/RANdom_name1_.7-1_0_C.pkl differ diff --git a/test/test_features_with_templates.py b/test/test_features_with_templates.py index 769ebf1c..faa0e296 100644 --- a/test/test_features_with_templates.py +++ b/test/test_features_with_templates.py @@ -5,26 +5,25 @@ import pickle import numpy as np from alphapulldown.remove_clashes_low_plddt import extract_seqs +import tempfile +import shutil class TestCreateIndividualFeaturesWithTemplates(absltest.TestCase): def setUp(self): super().setUp() - self.TEST_DATA_DIR = Path(__file__).parent / "test_data" / "true_multimer" - # Create necessary directories if they don't exist + self.temp_dir = tempfile.TemporaryDirectory() # Create a temporary directory + self.TEST_DATA_DIR = Path(self.temp_dir.name) # Use the temporary directory as the test data directory + # Copy test data files to the temporary directory + original_test_data_dir = Path(__file__).parent / "test_data" / "true_multimer" + shutil.copytree(original_test_data_dir, self.TEST_DATA_DIR, dirs_exist_ok=True) + # Create necessary directories (self.TEST_DATA_DIR / 'features').mkdir(parents=True, exist_ok=True) (self.TEST_DATA_DIR / 'templates').mkdir(parents=True, exist_ok=True) def tearDown(self): - # Clean up any files or directories created during testing - sto_files = list((self.TEST_DATA_DIR / 'features').glob('*/pdb_hits.sto')) - for sto_file in sto_files: - if sto_file.exists(): - sto_file.unlink() - desc_file = self.TEST_DATA_DIR / 'description.csv' - if desc_file.exists(): - desc_file.unlink() + self.temp_dir.cleanup() # Clean up the temporary directory def run_features_generation(self, file_name, chain_id, file_extension): # Ensure directories exist