From 2de93f8287284eda53053177573fc64a957a4559 Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 11:26:39 +0100 Subject: [PATCH 1/5] fix spikein renaming --- snakePipes/shared/rules/createIndices.snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakePipes/shared/rules/createIndices.snakefile b/snakePipes/shared/rules/createIndices.snakefile index 45035c2ee..f383fe5c8 100755 --- a/snakePipes/shared/rules/createIndices.snakefile +++ b/snakePipes/shared/rules/createIndices.snakefile @@ -54,7 +54,7 @@ else: params: spikeinExt = spikeinExt shell: """ - sed '/\s+/$/{spikeinExt} /' {input} > {output} + sed 's/\s\+/{params.spikeinExt} /' {input} > {output} """ rule createGenomeFasta: From 85c01cbc66ceb0cab43ce4f3575695537c211fca Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 13:37:30 +0100 Subject: [PATCH 2/5] predict chip dict working --- pyproject.toml | 3 +- snakePipes/common_functions.py | 30 +++++++++++++------ snakePipes/snakePipes.py | 4 +-- .../workflows/ChIPseq/internals.snakefile | 2 +- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 69d09d8f4..cc844fbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,8 @@ dependencies = [ "snakemake >= 8", "pandas", "thefuzz", - "pyyaml >= 5.1", +# "pyyaml >= 5.1", + "ruamel.yaml", "snakemake-executor-plugin-cluster-generic >= 1.0.9", "graphviz" ] diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index a9f008867..67a527982 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -5,7 +5,8 @@ import subprocess import os import re -import yaml +#import yaml +from ruamel.yaml import YAML import glob import sys import shutil @@ -88,8 +89,10 @@ def namesOKinR(sampleNames): def load_configfile(configFiles, verbose, info='Config'): + yaml=YAML(typ='safe') with open(configFiles, "r") as f: - config = yaml.load(f, Loader=yaml.FullLoader) + #config = yaml.load(f, Loader=yaml.FullLoader) + config = yaml.load(f) config = sanity_dict_clean(config) @@ -102,9 +105,15 @@ def load_configfile(configFiles, verbose, info='Config'): return config -def write_configfile(configFile, config): +def write_configfile(configFile, config, trafo): + yaml=YAML(typ='safe') + yaml.default_flow_style = False with open(configFile, 'w') as f: - yaml.dump(config, f, default_flow_style=False) + #yaml.dump(config, f, default_flow_style=False) + if trafo: + yaml.dump(config, f, transform=trafo) + else: + yaml.dump(config, f) # returns all key-value pairs that are different from dict1 to dict2 @@ -632,7 +641,7 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): # save to configs.yaml in outdir config = defaults config.update(vars(args)) # This allows modifications of args after handling a user config file to still make it to the YAML given to snakemake! - write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config) + write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config, trafo=None) # merge cluster config files: 1) global one, 2) workflow specific one, 3) user provided one cfg = load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), False, "defaults") @@ -719,7 +728,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = False write_configfile( os.path.join(args.outdir, - '{}.config.yaml'.format(workflowName)), config) + '{}.config.yaml'.format(workflowName)), config, trafo=None) DAGproc = subprocess.Popen( snakemake_cmd + " --rulegraph -q ", @@ -734,7 +743,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = oldVerbose write_configfile( os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), - config) + config, trafo=None) def logAndExport(args, workflowName): @@ -794,6 +803,9 @@ def runAndCleanup(args, cmd, logfile_name): if args.emailAddress: sendEmail(args, 0) +def tr(s): + return s.replace('null', 'None') + def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): """ @@ -856,14 +868,14 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): print("No control sample found!") chip_dict_pred["chip_dict"][i] = {} - chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None + chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE): chip_dict_pred["chip_dict"][i]['Broad'] = True else: chip_dict_pred["chip_dict"][i]['Broad'] = False outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml") - write_configfile(outfile, chip_dict_pred) + write_configfile(outfile, chip_dict_pred,trafo=tr) print("---------------------------------------------------------------------------------------") print("ChIPseq sample configuration is written to file ", outfile) print("Please check and modify this file - this is just a guess! Then run the workflow with it.") diff --git a/snakePipes/snakePipes.py b/snakePipes/snakePipes.py index 7240ad91d..82ae42c65 100755 --- a/snakePipes/snakePipes.py +++ b/snakePipes/snakePipes.py @@ -414,7 +414,7 @@ def updateConfig(args): else: sys.exit("Config file not found\n") updatedDict = cof.merge_dicts(currentDict, d) - cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict) + cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict, trafo=None) #update conda-prefix in snakemakeProfile if args.condaEnvDir: @@ -422,7 +422,7 @@ def updateConfig(args): f = open(profilePath / 'config.yaml') pf = yaml.load(f, Loader=yaml.FullLoader) pf['conda-prefix'] = args.condaEnvDir - cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf) + cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf, trafo=None) f.close() cof.load_configfile( diff --git a/snakePipes/workflows/ChIPseq/internals.snakefile b/snakePipes/workflows/ChIPseq/internals.snakefile index 8c8eae6f5..b5b842ec6 100755 --- a/snakePipes/workflows/ChIPseq/internals.snakefile +++ b/snakePipes/workflows/ChIPseq/internals.snakefile @@ -104,7 +104,7 @@ with open(samples_config, "r") as f: exit(1) del chip_dict_tmp -cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict) +cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict, trafo=None) # create unique sets of control samples, ChIP samples with and without control control_samples = set() From 89c9110a272d7501237ab2222100ebba2082f95e Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 14:06:54 +0100 Subject: [PATCH 3/5] fix org yaml --- snakePipes/shared/organisms/GRCh38_gencode40.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/snakePipes/shared/organisms/GRCh38_gencode40.yaml b/snakePipes/shared/organisms/GRCh38_gencode40.yaml index 6af67cd33..11d300f75 100755 --- a/snakePipes/shared/organisms/GRCh38_gencode40.yaml +++ b/snakePipes/shared/organisms/GRCh38_gencode40.yaml @@ -91,6 +91,5 @@ ignoreForNormalization: chrX chrY chrM GL000008.2 GL000009.2 GL000194.1 GL000195 KI270748.1 KI270749.1 KI270750.1 KI270751.1 KI270752.1 KI270753.1 KI270754.1 KI270755.1 KI270756.1 KI270757.1 known_splicesites: /data/repository/organisms/GRCh38_gencode_40/gencode/release-40/HISAT2/genome.ss -rmsk_file: '' star_index: /data/repository/organisms/GRCh38_gencode_40/Indices/STAR_2.7.10 rmsk_file: /data/repository/organisms/GRCh38_gencode_40/repeatMasker/genome.fa.tbl From 9f46179b40940aafc2c11c2239ba33d1527d4e98 Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 14:27:42 +0100 Subject: [PATCH 4/5] ruamel --- snakePipes/snakePipes.py | 20 +++++++++++++------ .../workflows/ChIPseq/internals.snakefile | 5 +++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/snakePipes/snakePipes.py b/snakePipes/snakePipes.py index 82ae42c65..0ed83a855 100755 --- a/snakePipes/snakePipes.py +++ b/snakePipes/snakePipes.py @@ -4,7 +4,7 @@ import subprocess import snakePipes import os -import yaml +from ruamel.yaml import YAML import glob import hashlib import shutil @@ -222,7 +222,9 @@ def envInfo(): baseDir = os.path.dirname(snakePipes.__file__) f = open(os.path.join(baseDir, "shared/defaults.yaml")) - cf = yaml.load(f, Loader=yaml.FullLoader) + #cf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + cf = yaml.load(f) f.close() # Properly resolve the snakemake profile path @@ -230,7 +232,8 @@ def envInfo(): # Find out condaEnvDir from snakemake profile f = open(profilePath / 'config.yaml') - _p = yaml.load(f, Loader=yaml.FullLoader) + #_p = yaml.load(f, Loader=yaml.FullLoader) + _p = yaml.load(f) f.close() if 'conda-prefix' in _p: condaEnvDir = _p['conda-prefix'].replace("$USER", os.environ.get("USER")) @@ -278,14 +281,17 @@ def createCondaEnvs(args): baseDir = os.path.dirname(snakePipes.__file__) f = open(os.path.join(baseDir, "shared/defaults.yaml")) - cf = yaml.load(f, Loader=yaml.FullLoader) + #cf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + cf = yaml.load(f) f.close() # Properly resolve the snakemake profile path profilePath = cof.resolveSnakemakeProfile(cf['snakemakeProfile'], baseDir) # Find out condaEnvDir from snakemake profile f = open(profilePath / 'config.yaml') - _p = yaml.load(f, Loader=yaml.FullLoader) + #_p = yaml.load(f, Loader=yaml.FullLoader) + _p = yaml.load(f) f.close() if 'conda-prefix' in _p: # For now $USER can be set in this path, resolve this explicitely. @@ -420,7 +426,9 @@ def updateConfig(args): if args.condaEnvDir: profilePath = cof.resolveSnakemakeProfile(d['snakemakeProfile'], baseDir) f = open(profilePath / 'config.yaml') - pf = yaml.load(f, Loader=yaml.FullLoader) + #pf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + pf = yaml.load(f) pf['conda-prefix'] = args.condaEnvDir cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf, trafo=None) f.close() diff --git a/snakePipes/workflows/ChIPseq/internals.snakefile b/snakePipes/workflows/ChIPseq/internals.snakefile index b5b842ec6..91e326402 100755 --- a/snakePipes/workflows/ChIPseq/internals.snakefile +++ b/snakePipes/workflows/ChIPseq/internals.snakefile @@ -2,7 +2,7 @@ import glob import os import subprocess import re -import yaml +from ruamel.yaml import YAML import sys import pandas as pd import warnings @@ -96,7 +96,8 @@ else: chip_dict = {} with open(samples_config, "r") as f: - chip_dict_tmp = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + chip_dict_tmp = yaml.load(f) if "chip_dict" in chip_dict_tmp and chip_dict_tmp["chip_dict"] : chip_dict = chip_dict_tmp["chip_dict"] else: From d102c2593130358598556041fbf7867b88886a56 Mon Sep 17 00:00:00 2001 From: "katarzyna.otylia.sikora@gmail.com" Date: Tue, 21 Jan 2025 14:45:56 +0100 Subject: [PATCH 5/5] conda recipe --- conda-recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 533b373c5..bc8c62668 100755 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -24,7 +24,7 @@ requirements: - snakemake-executor-plugin-cluster-generic >=1.0.9 - pandas - thefuzz - - pyyaml >=5.1 + - ruamel.yaml test: commands: