Skip to content

Commit

Permalink
Update tests and add more files to clean list
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Aug 7, 2024
1 parent ffb23de commit 060aea9
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 10 deletions.
1 change: 1 addition & 0 deletions PopPUNK/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ def assign_query_hdf5(dbFuncs,
core_only = (fit_type == 'core_refined'),
accessory_only = (fit_type == 'accessory_refined'),
use_gpu = gpu_graph)
sys.stderr.write(f"Loading previous cluster assignments from {old_cluster_file}\n")

n_vertices = len(get_vertex_list(genomeNetwork, use_gpu = gpu_graph))
if n_vertices != len(rNames):
Expand Down
19 changes: 12 additions & 7 deletions PopPUNK/lineages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import argparse
import subprocess
import pickle
import shutil
import pandas as pd
from collections import defaultdict

Expand Down Expand Up @@ -142,16 +143,18 @@ def main():
create_db(args)
elif args.query_db is not None:
query_db(args)


def create_db(args):

# Check if output files exist
if not args.overwrite:
if os.path.exists(args.output + '.csv'):
sys.stderr.write('Output file ' + args.output + '.csv exists; use --overwrite to replace it\n')
sys.exit(1)
if os.path.exists(args.db_scheme):
sys.stderr.write('Output file ' + args.db_scheme + ' exists; use --overwrite to replace it\n')
sys.exit(1)

sys.stderr.write("Identifying strains in existing database\n")
# Read in strain information
Expand Down Expand Up @@ -197,7 +200,8 @@ def create_db(args):
if num_isolates >= args.min_count:
lineage_dbs[strain] = strain_db_name
if os.path.isdir(strain_db_name) and args.overwrite:
os.rmdir(strain_db_name)
sys.stderr.write("--overwrite means {strain_db_name} will be deleted now\n")
shutil.rmtree(strain_db_name)
if not os.path.isdir(strain_db_name):
try:
os.makedirs(strain_db_name)
Expand All @@ -209,7 +213,8 @@ def create_db(args):
dest_db = os.path.join(strain_db_name,os.path.basename(strain_db_name) + '.h5')
rel_path = os.path.relpath(src_db, os.path.dirname(dest_db))
if os.path.exists(dest_db) and args.overwrite:
os.remove(dest_db)
sys.stderr.write("--overwrite means {dest_db} will be deleted now\n")
shutil.rmtree(dest_db)
elif not os.path.exists(dest_db):
os.symlink(rel_path,dest_db)
# Extract sparse distances
Expand Down Expand Up @@ -304,7 +309,7 @@ def create_db(args):


def query_db(args):

# Read querying scheme
with open(args.db_scheme, 'rb') as pickle_file:
ref_db, rlist, model_dir, clustering_file, args.clustering_col_name, distances, \
Expand Down Expand Up @@ -434,10 +439,10 @@ def query_db(args):
args.gpu_graph,
save_partial_query_graph = False)
overall_lineage[strain] = createOverallLineage(rank_list, lineageClustering)

# Print combined strain and lineage clustering
print_overall_clustering(overall_lineage,args.output + '.csv',qNames)


def print_overall_clustering(overall_lineage,output,include_list):

Expand All @@ -455,7 +460,7 @@ def print_overall_clustering(overall_lineage,output,include_list):
isolate_info[isolate].append(str(overall_lineage[strain][rank][isolate]))
else:
isolate_info[isolate] = [str(strain),str(overall_lineage[strain][rank][isolate])]

# Print output
with open(output,'w') as out:
out.write('id,Cluster,')
Expand Down
14 changes: 12 additions & 2 deletions test/clean_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@ def deleteDir(dirname):
shutil.rmtree(dirname)

sys.stderr.write("Cleaning up tests\n")
dirty_files = ['example_db.info.csv']
dirty_files = [
"example_db.info.csv",
"example_external_clusters.csv",
"batch12_external_clusters.csv",
"example_lineage_scheme.pkl",
"lineage_creation_output.csv",
"lineage_querying_output.csv"
]
with open("references.txt", 'r') as ref_file:
for line in ref_file:
dirty_files.append(line.rstrip().split("\t")[1])
Expand All @@ -29,6 +36,7 @@ def deleteDir(dirname):
"example_query",
"example_single_query",
"example_query_update",
"example_query_update_2",
"example_lineage_query",
"example_viz",
"example_viz_subset",
Expand All @@ -46,8 +54,10 @@ def deleteDir(dirname):
"batch3",
"batch12",
"batch123",
"batch123_viz",
"strain_1_lineage_db",
"strain_2_lineage_db"
"strain_2_lineage_db",
"lineage_querying_output"
]
for outDir in outputDirs:
deleteDir(outDir)
Expand Down
5 changes: 5 additions & 0 deletions test/even_more_queries.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
12754_5_55 12754_5#55.contigs_velvet.fa
19183_4_61 19183_4#61.contigs_velvet.fa
12673_8_34 12673_8#34.contigs_velvet.fa
19183_4_70 19183_4#70.contigs_velvet.fa
12754_4_89 12754_4#89.contigs_velvet.fa
2 changes: 1 addition & 1 deletion test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query more_queries.txt --db example_db --model-dir example_refine --output example_query --run-qc --max-zero-dist 0.3 --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query more_queries.txt --db example_db --model-dir example_refine --output example_query --run-qc --max-zero-dist 1 --max-merge 3 --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query some_queries.txt --db example_db --model-dir example_dbscan --output example_query_update --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query even_more_queries.txt --db example_query_update --model-dir example_dbscan --output example_query_update_2 --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query even_more_queries.txt --db example_query_update --model-dir example_dbscan --previous-clustering example_query_update --output example_query_update_2 --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query single_query.txt --db example_db --model-dir example_refine --output example_single_query --update-db --overwrite", shell=True, check=True)
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query inref_query.txt --db example_db --model-dir example_refine --output example_single_query --write-references", shell=True, check=True) # matched name, but should be renamed in the output
subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query some_queries.txt --db example_db --model-dir example_refine --model-dir example_lineages --output example_lineage_query --overwrite", shell=True, check=True)
Expand Down

0 comments on commit 060aea9

Please sign in to comment.