Update tests and add more files to clean list

bacpop · Aug 7, 2024 · 060aea9 · 060aea9
1 parent ffb23de
commit 060aea9
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 10 deletions.
diff --git a/PopPUNK/assign.py b/PopPUNK/assign.py
@@ -561,6 +561,7 @@ def assign_query_hdf5(dbFuncs,
                              core_only = (fit_type == 'core_refined'),
                              accessory_only = (fit_type == 'accessory_refined'),
                              use_gpu = gpu_graph)
+            sys.stderr.write(f"Loading previous cluster assignments from {old_cluster_file}\n")
 
             n_vertices = len(get_vertex_list(genomeNetwork, use_gpu = gpu_graph))
             if n_vertices != len(rNames):

diff --git a/PopPUNK/lineages.py b/PopPUNK/lineages.py
@@ -7,6 +7,7 @@
 import argparse
 import subprocess
 import pickle
+import shutil
 import pandas as pd
 from collections import defaultdict
 
@@ -142,16 +143,18 @@ def main():
         create_db(args)
     elif args.query_db is not None:
         query_db(args)
-  
+
 
 def create_db(args):
 
     # Check if output files exist
     if not args.overwrite:
         if os.path.exists(args.output + '.csv'):
             sys.stderr.write('Output file ' + args.output + '.csv exists; use --overwrite to replace it\n')
+            sys.exit(1)
         if os.path.exists(args.db_scheme):
             sys.stderr.write('Output file ' + args.db_scheme + ' exists; use --overwrite to replace it\n')
+            sys.exit(1)
 
     sys.stderr.write("Identifying strains in existing database\n")
     # Read in strain information
@@ -197,7 +200,8 @@ def create_db(args):
       if num_isolates >= args.min_count:
         lineage_dbs[strain] = strain_db_name
         if os.path.isdir(strain_db_name) and args.overwrite:
-            os.rmdir(strain_db_name)
+            sys.stderr.write("--overwrite means {strain_db_name} will be deleted now\n")
+            shutil.rmtree(strain_db_name)
         if not os.path.isdir(strain_db_name):
             try:
                 os.makedirs(strain_db_name)
@@ -209,7 +213,8 @@ def create_db(args):
         dest_db = os.path.join(strain_db_name,os.path.basename(strain_db_name) + '.h5')
         rel_path = os.path.relpath(src_db, os.path.dirname(dest_db))
         if os.path.exists(dest_db) and args.overwrite:
-            os.remove(dest_db)
+            sys.stderr.write("--overwrite means {dest_db} will be deleted now\n")
+            shutil.rmtree(dest_db)
         elif not os.path.exists(dest_db):
             os.symlink(rel_path,dest_db)
         # Extract sparse distances
@@ -304,7 +309,7 @@ def create_db(args):
 
 
 def query_db(args):
-    
+
     # Read querying scheme
     with open(args.db_scheme, 'rb') as pickle_file:
         ref_db, rlist, model_dir, clustering_file, args.clustering_col_name, distances, \
@@ -434,10 +439,10 @@ def query_db(args):
                             args.gpu_graph,
                             save_partial_query_graph = False)
             overall_lineage[strain] = createOverallLineage(rank_list, lineageClustering)
-    
+
     # Print combined strain and lineage clustering
     print_overall_clustering(overall_lineage,args.output + '.csv',qNames)
-    
+
 
 def print_overall_clustering(overall_lineage,output,include_list):
 
@@ -455,7 +460,7 @@ def print_overall_clustering(overall_lineage,output,include_list):
                         isolate_info[isolate].append(str(overall_lineage[strain][rank][isolate]))
                     else:
                         isolate_info[isolate] = [str(strain),str(overall_lineage[strain][rank][isolate])]
-    
+
     # Print output
     with open(output,'w') as out:
         out.write('id,Cluster,')

diff --git a/test/clean_test.py b/test/clean_test.py
@@ -12,7 +12,14 @@ def deleteDir(dirname):
         shutil.rmtree(dirname)
 
 sys.stderr.write("Cleaning up tests\n")
-dirty_files = ['example_db.info.csv']
+dirty_files = [
+    "example_db.info.csv",
+    "example_external_clusters.csv",
+    "batch12_external_clusters.csv",
+    "example_lineage_scheme.pkl",
+    "lineage_creation_output.csv",
+    "lineage_querying_output.csv"
+]
 with open("references.txt", 'r') as ref_file:
     for line in ref_file:
         dirty_files.append(line.rstrip().split("\t")[1])
@@ -29,6 +36,7 @@ def deleteDir(dirname):
     "example_query",
     "example_single_query",
     "example_query_update",
+    "example_query_update_2",
     "example_lineage_query",
     "example_viz",
     "example_viz_subset",
@@ -46,8 +54,10 @@ def deleteDir(dirname):
     "batch3",
     "batch12",
     "batch123",
+    "batch123_viz",
     "strain_1_lineage_db",
-    "strain_2_lineage_db"
+    "strain_2_lineage_db",
+    "lineage_querying_output"
 ]
 for outDir in outputDirs:
     deleteDir(outDir)

diff --git a/test/even_more_queries.txt b/test/even_more_queries.txt
@@ -0,0 +1,5 @@
+12754_5_55	12754_5#55.contigs_velvet.fa
+19183_4_61	19183_4#61.contigs_velvet.fa
+12673_8_34	12673_8#34.contigs_velvet.fa
+19183_4_70	19183_4#70.contigs_velvet.fa
+12754_4_89	12754_4#89.contigs_velvet.fa
diff --git a/test/run_test.py b/test/run_test.py
@@ -70,7 +70,7 @@
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query more_queries.txt --db example_db --model-dir example_refine --output example_query --run-qc --max-zero-dist 0.3 --overwrite", shell=True, check=True)
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query more_queries.txt --db example_db --model-dir example_refine --output example_query --run-qc --max-zero-dist 1 --max-merge 3 --overwrite", shell=True, check=True)
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query some_queries.txt --db example_db --model-dir example_dbscan --output example_query_update --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
-subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query even_more_queries.txt --db example_query_update --model-dir example_dbscan --output example_query_update_2 --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
+subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query even_more_queries.txt --db example_query_update --model-dir example_dbscan --previous-clustering example_query_update --output example_query_update_2 --update-db --graph-weights --overwrite", shell=True, check=True) # uses graph weights
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query single_query.txt --db example_db --model-dir example_refine --output example_single_query --update-db --overwrite", shell=True, check=True)
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query inref_query.txt --db example_db --model-dir example_refine --output example_single_query --write-references", shell=True, check=True) # matched name, but should be renamed in the output
 subprocess.run(python_cmd + " ../poppunk_assign-runner.py --query some_queries.txt --db example_db --model-dir example_refine --model-dir example_lineages --output example_lineage_query --overwrite", shell=True, check=True)