Skip to content

Commit

Permalink
Merge pull request #190 from KosinskiLab/TrueMultimer
Browse files Browse the repository at this point in the history
True multimer
  • Loading branch information
DimaMolod authored Nov 15, 2023
2 parents 4e0ec78 + 0ed62cd commit fbc42bd
Show file tree
Hide file tree
Showing 54 changed files with 712,455 additions and 672 deletions.
12 changes: 5 additions & 7 deletions Developing.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@
1. Test your package during development using tests in ```test/```, e.g.:
```
pip install pytest
pytest
pytest test
python test/test_predict_structure.py
sbatch test/test_predict_structure.sh
python -m unittest test/test_predict_structure.<name of the test>
pytest -s test/
pytest -s test/test_predictions_slurm.py
pytest -s test/test_features_with_templates.py::TestCreateIndividualFeaturesWithTemplates::test_1a_run_features_generation
```
1. Before pushing to the remote or submitting pull request
```
pip install .
pytest test
pytest -s test/
```
to install the package and test
to install the package and test. Pytest for predictions only work if slurm is available. Check the created log files in your current directory.
3 changes: 1 addition & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
include ./alphafold/run_alphafold.py
include stereo_chemical_props.txt
include stereo_chemical_props.txt
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ conda create -n AlphaPulldown -c omnia -c bioconda -c conda-forge python==3.10 o
**Secondly**, activate the AlphaPulldown environment and install AlphaPulldown
```bash
source activate AlphaPulldown
python3 -m pip install alphapulldown==0.40.4

python3 -m pip install alphapulldown==1.0.0
pip install jax==0.3.25 jaxlib==0.3.25+cuda11.cudnn805 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
```

Expand Down
2 changes: 1 addition & 1 deletion alphapulldown/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.00.0"
__version__ = "1.0.0"
73 changes: 15 additions & 58 deletions alphapulldown/create_custom_template_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

import os
import shutil
import sys
import random
import string
from pathlib import Path
from absl import logging, flags, app
from alphapulldown.remove_clashes_low_plddt import MmcifChainFiltered
from colabfold.batch import validate_and_fix_mmcif, convert_pdb_to_mmcif
from colabfold.batch import validate_and_fix_mmcif
from alphafold.common.protein import _from_bio_structure, to_mmcif
from Bio import SeqIO, PDB

FLAGS = flags.FLAGS

Expand Down Expand Up @@ -47,10 +47,11 @@ def parse_code(template):
for line in f:
if line.startswith("_entry.id"):
code = line.split()[1]
if len(code) != 4:
logging.error(f'Error for template {template}!\n'
f'Code must have 4 characters but is {code}\n')
sys.exit(1)

# Generate a random 4-character code if needed
if len(code) != 4:
code = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(4))

return code.lower()


Expand Down Expand Up @@ -90,40 +91,6 @@ def create_tree(pdb_mmcif_dir, mmcif_dir, seqres_dir, templates_dir):
create_dir_and_remove_files(seqres_dir, ['pdb_seqres.txt'])


def extract_seqs(template, chain_id):
"""
Extract sequences from PDB/CIF file using Bio.SeqIO.
o input_file_path - path to the input file
o chain_id - chain ID
Returns:
o sequence_atom - sequence from ATOM records
o sequence_seqres - sequence from SEQRES records
"""
file_type = template.suffix.lower()

if template.suffix.lower() != '.pdb' and template.suffix.lower() != '.cif':
raise ValueError(f"Unknown file type for {template}!")

format_types = [f"{file_type[1:]}-atom", f"{file_type[1:]}-seqres"]
# initialize the sequences
sequence_atom = None
sequence_seqres = None
# parse
for format_type in format_types:
for record in SeqIO.parse(template, format_type):
chain = record.annotations['chain']
if chain == chain_id:
if format_type.endswith('atom'):
sequence_atom = str(record.seq)
elif format_type.endswith('seqres'):
sequence_seqres = str(record.seq)
if sequence_atom is None:
logging.error(f"No atom sequence found for chain {chain_id}")
if sequence_seqres is None:
logging.warning(f"No SEQRES sequence found for chain {chain_id}")
return sequence_atom, sequence_seqres


def create_db(out_path, templates, chains, threshold_clashes, hb_allowance, plddt_threshold):
"""
Main function that creates a custom template database for AlphaFold2
Expand All @@ -146,30 +113,20 @@ def create_db(out_path, templates, chains, threshold_clashes, hb_allowance, pldd
# Process each template/chain pair
for template, chain_id in zip(templates, chains):
code = parse_code(template)
logging.info(f"Template code: {code}")
assert len(code) == 4
# Copy the template to out_path to avoid conflicts with the same file names
shutil.copyfile(template, templates_dir / Path(template).name)
template = templates_dir / Path(template).name
logging.info(f"Processing template: {template} Chain {chain_id} Code: {code}")
logging.info("Parsing SEQRES...")
atom_seq, seqres_seq = None, None
if template.suffix == '.pdb':
atom_seq, seqres_seq = extract_seqs(template, chain_id)
logging.info(f"Converting to mmCIF: {template}")
template = Path(template)
convert_pdb_to_mmcif(template)
template = template.parent.joinpath(f"{template.stem}.cif")
new_template = templates_dir / Path(code + Path(template).suffix)
shutil.copyfile(template, new_template)
template = new_template
logging.info(f"Processing template: {template} Chain {chain_id}")
# Convert to (our) mmcif object
mmcif_obj = MmcifChainFiltered(template, code, chain_id)
# Parse SEQRES
# full sequence is either SEQRES or parsed from (original) ATOMs
if mmcif_obj.sequence_seqres:
seqres = mmcif_obj.sequence_seqres
else:
seqres = mmcif_obj.sequence_atom
# if we converted from pdb, seqres is parsed from Bio.SeqIO
if seqres_seq or atom_seq:
seqres = seqres_seq
if seqres is None:
seqres = atom_seq
sqrres_path = save_seqres(code, chain_id, seqres, seqres_dir)
logging.info(f"SEQRES saved to {sqrres_path}!")
# Remove clashes and low pLDDT regions for each template
Expand Down
51 changes: 7 additions & 44 deletions alphapulldown/create_individual_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,59 +4,27 @@
# This script is just to create msa and structural features for each sequences and store them in pickle
# #

import os
import pickle
import sys
from alphapulldown.objects import MonomericObject
import importlib
from absl import app
from absl import flags
from absl import logging

from alphafold.data.pipeline import DataPipeline
from alphafold.data.tools import hmmsearch
from alphafold.data import templates
import numpy as np
import os
from absl import logging, app
import numpy as np
from alphapulldown.utils import *
from alphapulldown.utils import save_meta_data, create_uniprot_runner, parse_fasta, get_flags_from_af
import contextlib
from datetime import datetime
import alphafold
from pathlib import Path
from colabfold.utils import DEFAULT_API_SERVER
import os
import sys
import pickle

@contextlib.contextmanager
def output_meta_file(file_path):
"""function that create temp file"""
with open(file_path, "w") as outfile:
yield outfile.name


def load_module(file_name, module_name):
spec = importlib.util.spec_from_file_location(module_name, file_name)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module


PATH_TO_RUN_ALPHAFOLD = os.path.join(
os.path.dirname(alphafold.__file__), "run_alphafold.py"
)

try:
run_af = load_module(PATH_TO_RUN_ALPHAFOLD, "run_alphafold")
except FileNotFoundError:
PATH_TO_RUN_ALPHAFOLD = os.path.join(
os.path.dirname(os.path.dirname(alphafold.__file__)), "run_alphafold.py"
)

run_af = load_module(PATH_TO_RUN_ALPHAFOLD, "run_alphafold")


flags = run_af.flags
flags = get_flags_from_af()
flags.DEFINE_bool("save_msa_files", False, "save msa output or not")
flags.DEFINE_bool(
"skip_existing", False, "skip existing monomer feature pickles or not"
Expand Down Expand Up @@ -221,11 +189,7 @@ def create_and_save_monomer_objects(m, pipeline, flags_dict,use_mmseqs2=False):
else:
logging.info("running mmseq now")
m.make_mmseq_features(DEFAULT_API_SERVER=DEFAULT_API_SERVER,
pdb70_database_path=pdb70_database_path,
template_mmcif_dir=template_mmcif_dir,
max_template_date=FLAGS.max_template_date,
output_dir=FLAGS.output_dir,
obsolete_pdbs_path=FLAGS.obsolete_pdbs_path
pipeline=pipeline,output_dir=FLAGS.output_dir
)
pickle.dump(m, open(f"{FLAGS.output_dir}/{m.description}.pkl", "wb"))
del m
Expand Down Expand Up @@ -264,8 +228,7 @@ def main(argv):
)
sys.exit()
else:

pipeline=None
pipeline = create_pipeline()
uniprot_runner=None
flags_dict=FLAGS.flag_values_dict()

Expand Down
Loading

0 comments on commit fbc42bd

Please sign in to comment.