-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added data workflow for fastfold
- Loading branch information
Showing
14 changed files
with
432 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .workflow_run import batch_run |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from .task_factory import TaskFactory | ||
from .hhblits import HHBlitsFactory | ||
from .hhsearch import HHSearchFactory | ||
from .jackhmmer import JackHmmerFactory | ||
from .hhfilter import HHfilterFactory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from ray import workflow | ||
from typing import List | ||
from fastfold.workflow.factory import TaskFactory | ||
from ray.workflow.common import Workflow | ||
import fastfold.data.tools.hhblits as ffHHBlits | ||
|
||
class HHBlitsFactory(TaskFactory): | ||
|
||
keywords = ['binary_path', 'databases', 'n_cpu'] | ||
|
||
def gen_task(self, fasta_path: str, output_path: str, after: List[Workflow]=None) -> Workflow: | ||
|
||
self.isReady() | ||
|
||
# setup runner | ||
runner = ffHHBlits.HHBlits( | ||
binary_path=self.config['binary_path'], | ||
databases=self.config['databases'], | ||
n_cpu=self.config['n_cpu'] | ||
) | ||
|
||
# generate step function | ||
@workflow.step | ||
def hhblits_step(fasta_path: str, output_path: str, after: List[Workflow]) -> None: | ||
result = runner.query(fasta_path) | ||
with open(output_path, "w") as f: | ||
f.write(result["a3m"]) | ||
|
||
return hhblits_step.step(fasta_path, output_path, after) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import subprocess | ||
import logging | ||
from ray import workflow | ||
from typing import List | ||
from fastfold.workflow.factory import TaskFactory | ||
from ray.workflow.common import Workflow | ||
|
||
class HHfilterFactory(TaskFactory): | ||
|
||
keywords = ['binary_path'] | ||
|
||
def gen_task(self, fasta_path: str, output_path: str, after: List[Workflow]=None) -> Workflow: | ||
|
||
self.isReady() | ||
|
||
# generate step function | ||
@workflow.step | ||
def hhfilter_step(fasta_path: str, output_path: str, after: List[Workflow]) -> None: | ||
|
||
cmd = [ | ||
self.config.get('binary_path'), | ||
] | ||
if 'id' in self.config: | ||
cmd += ['-id', str(self.config.get('id'))] | ||
if 'cov' in self.config: | ||
cmd += ['-cov', str(self.config.get('cov'))] | ||
cmd += ['-i', fasta_path, '-o', output_path] | ||
|
||
logging.info(f"HHfilter start: {' '.join(cmd)}") | ||
|
||
subprocess.run(cmd) | ||
|
||
return hhfilter_step.step(fasta_path, output_path, after) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from fastfold.workflow.factory import TaskFactory | ||
from ray import workflow | ||
from ray.workflow.common import Workflow | ||
import fastfold.data.tools.hhsearch as ffHHSearch | ||
from typing import List | ||
|
||
class HHSearchFactory(TaskFactory): | ||
|
||
keywords = ['binary_path', 'databases', 'n_cpu'] | ||
|
||
def gen_task(self, a3m_path: str, output_path: str, after: List[Workflow]=None) -> Workflow: | ||
|
||
self.isReady() | ||
|
||
# setup runner | ||
runner = ffHHSearch.HHSearch( | ||
binary_path=self.config['binary_path'], | ||
databases=self.config['databases'], | ||
n_cpu=self.config['n_cpu'] | ||
) | ||
|
||
# generate step function | ||
@workflow.step | ||
def hhsearch_step(a3m_path: str, output_path: str, after: List[Workflow], atab_path: str = None) -> None: | ||
|
||
with open(a3m_path, "r") as f: | ||
a3m = f.read() | ||
if atab_path: | ||
hhsearch_result, atab = runner.query(a3m, gen_atab=True) | ||
else: | ||
hhsearch_result = runner.query(a3m) | ||
with open(output_path, "w") as f: | ||
f.write(hhsearch_result) | ||
if atab_path: | ||
with open(atab_path, "w") as f: | ||
f.write(atab) | ||
|
||
return hhsearch_step.step(a3m_path, output_path, after) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from fastfold.workflow.factory import TaskFactory | ||
from ray import workflow | ||
from ray.workflow.common import Workflow | ||
import fastfold.data.tools.jackhmmer as ffJackHmmer | ||
from fastfold.data import parsers | ||
from typing import List | ||
|
||
class JackHmmerFactory(TaskFactory): | ||
|
||
keywords = ['binary_path', 'database_path', 'n_cpu', 'uniref_max_hits'] | ||
|
||
def gen_task(self, fasta_path: str, output_path: str, after: List[Workflow]=None) -> Workflow: | ||
|
||
self.isReady() | ||
|
||
# setup runner | ||
runner = ffJackHmmer.Jackhmmer( | ||
binary_path=self.config['binary_path'], | ||
database_path=self.config['database_path'], | ||
n_cpu=self.config['n_cpu'] | ||
) | ||
|
||
# generate step function | ||
@workflow.step | ||
def jackhmmer_step(fasta_path: str, output_path: str, after: List[Workflow]) -> None: | ||
result = runner.query(fasta_path)[0] | ||
uniref90_msa_a3m = parsers.convert_stockholm_to_a3m( | ||
result['sto'], | ||
max_sequences=self.config['uniref_max_hits'] | ||
) | ||
with open(output_path, "w") as f: | ||
f.write(uniref90_msa_a3m) | ||
|
||
return jackhmmer_step.step(fasta_path, output_path, after) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from ast import keyword | ||
import json | ||
from ray.workflow.common import Workflow | ||
from os import path | ||
from typing import List | ||
|
||
class TaskFactory: | ||
|
||
keywords = [] | ||
|
||
def __init__(self, config: dict = None, config_path: str = None) -> None: | ||
|
||
# skip if no keyword required from config file | ||
if not self.__class__.keywords: | ||
return | ||
|
||
# setting config for factory | ||
if config is not None: | ||
self.config = config | ||
elif config_path is not None: | ||
self.loadConfig(config_path) | ||
else: | ||
self.loadConfig() | ||
|
||
def configure(self, config: dict, purge=False) -> None: | ||
if purge: | ||
self.config = config | ||
else: | ||
self.config.update(config) | ||
|
||
def configure(self, keyword: str, value: any) -> None: | ||
self.config[keyword] = value | ||
|
||
def gen_task(self, after: List[Workflow]=None, *args, **kwargs) -> Workflow: | ||
raise NotImplementedError | ||
|
||
def isReady(self): | ||
for key in self.__class__.keywords: | ||
if key not in self.config: | ||
raise KeyError(f"{self.__class__.__name__} not ready: \"{key}\" not specified") | ||
|
||
def loadConfig(self, config_path='./config.json'): | ||
with open(config_path) as configFile: | ||
globalConfig = json.load(configFile) | ||
if 'tools' not in globalConfig: | ||
raise KeyError("\"tools\" not found in global config file") | ||
factoryName = self.__class__.__name__[:-7] | ||
if factoryName not in globalConfig['tools']: | ||
raise KeyError(f"\"{factoryName}\" not found in the \"tools\" section in config") | ||
self.config = globalConfig['tools'][factoryName] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .fastfold_data_workflow import FastFoldDataWorkFlow |
Oops, something went wrong.