diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 533b373c5..bc8c62668 100755 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -24,7 +24,7 @@ requirements: - snakemake-executor-plugin-cluster-generic >=1.0.9 - pandas - thefuzz - - pyyaml >=5.1 + - ruamel.yaml test: commands: diff --git a/pyproject.toml b/pyproject.toml index 69d09d8f4..cc844fbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,8 @@ dependencies = [ "snakemake >= 8", "pandas", "thefuzz", - "pyyaml >= 5.1", +# "pyyaml >= 5.1", + "ruamel.yaml", "snakemake-executor-plugin-cluster-generic >= 1.0.9", "graphviz" ] diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index a9f008867..67a527982 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -5,7 +5,8 @@ import subprocess import os import re -import yaml +#import yaml +from ruamel.yaml import YAML import glob import sys import shutil @@ -88,8 +89,10 @@ def namesOKinR(sampleNames): def load_configfile(configFiles, verbose, info='Config'): + yaml=YAML(typ='safe') with open(configFiles, "r") as f: - config = yaml.load(f, Loader=yaml.FullLoader) + #config = yaml.load(f, Loader=yaml.FullLoader) + config = yaml.load(f) config = sanity_dict_clean(config) @@ -102,9 +105,15 @@ def load_configfile(configFiles, verbose, info='Config'): return config -def write_configfile(configFile, config): +def write_configfile(configFile, config, trafo): + yaml=YAML(typ='safe') + yaml.default_flow_style = False with open(configFile, 'w') as f: - yaml.dump(config, f, default_flow_style=False) + #yaml.dump(config, f, default_flow_style=False) + if trafo: + yaml.dump(config, f, transform=trafo) + else: + yaml.dump(config, f) # returns all key-value pairs that are different from dict1 to dict2 @@ -632,7 +641,7 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): # save to configs.yaml in outdir config = defaults config.update(vars(args)) # This allows modifications of args after handling a user config file to still make it to the YAML given to snakemake! - write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config) + write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config, trafo=None) # merge cluster config files: 1) global one, 2) workflow specific one, 3) user provided one cfg = load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), False, "defaults") @@ -719,7 +728,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = False write_configfile( os.path.join(args.outdir, - '{}.config.yaml'.format(workflowName)), config) + '{}.config.yaml'.format(workflowName)), config, trafo=None) DAGproc = subprocess.Popen( snakemake_cmd + " --rulegraph -q ", @@ -734,7 +743,7 @@ def print_DAG(args, snakemake_cmd, callingScript, defaults): config['verbose'] = oldVerbose write_configfile( os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), - config) + config, trafo=None) def logAndExport(args, workflowName): @@ -794,6 +803,9 @@ def runAndCleanup(args, cmd, logfile_name): if args.emailAddress: sendEmail(args, 0) +def tr(s): + return s.replace('null', 'None') + def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): """ @@ -856,14 +868,14 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): print("No control sample found!") chip_dict_pred["chip_dict"][i] = {} - chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None + chip_dict_pred["chip_dict"][i]['Control'] = tmp if tmp != "" else None if re.match(".*(H3K4me1|H3K36me3|H3K9me3|H3K27me3).*", i, re.IGNORECASE): chip_dict_pred["chip_dict"][i]['Broad'] = True else: chip_dict_pred["chip_dict"][i]['Broad'] = False outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml") - write_configfile(outfile, chip_dict_pred) + write_configfile(outfile, chip_dict_pred,trafo=tr) print("---------------------------------------------------------------------------------------") print("ChIPseq sample configuration is written to file ", outfile) print("Please check and modify this file - this is just a guess! Then run the workflow with it.") diff --git a/snakePipes/shared/organisms/GRCh38_gencode40.yaml b/snakePipes/shared/organisms/GRCh38_gencode40.yaml index 6af67cd33..11d300f75 100755 --- a/snakePipes/shared/organisms/GRCh38_gencode40.yaml +++ b/snakePipes/shared/organisms/GRCh38_gencode40.yaml @@ -91,6 +91,5 @@ ignoreForNormalization: chrX chrY chrM GL000008.2 GL000009.2 GL000194.1 GL000195 KI270748.1 KI270749.1 KI270750.1 KI270751.1 KI270752.1 KI270753.1 KI270754.1 KI270755.1 KI270756.1 KI270757.1 known_splicesites: /data/repository/organisms/GRCh38_gencode_40/gencode/release-40/HISAT2/genome.ss -rmsk_file: '' star_index: /data/repository/organisms/GRCh38_gencode_40/Indices/STAR_2.7.10 rmsk_file: /data/repository/organisms/GRCh38_gencode_40/repeatMasker/genome.fa.tbl diff --git a/snakePipes/shared/rules/createIndices.snakefile b/snakePipes/shared/rules/createIndices.snakefile index 45035c2ee..f383fe5c8 100755 --- a/snakePipes/shared/rules/createIndices.snakefile +++ b/snakePipes/shared/rules/createIndices.snakefile @@ -54,7 +54,7 @@ else: params: spikeinExt = spikeinExt shell: """ - sed '/\s+/$/{spikeinExt} /' {input} > {output} + sed 's/\s\+/{params.spikeinExt} /' {input} > {output} """ rule createGenomeFasta: diff --git a/snakePipes/snakePipes.py b/snakePipes/snakePipes.py index 7240ad91d..0ed83a855 100755 --- a/snakePipes/snakePipes.py +++ b/snakePipes/snakePipes.py @@ -4,7 +4,7 @@ import subprocess import snakePipes import os -import yaml +from ruamel.yaml import YAML import glob import hashlib import shutil @@ -222,7 +222,9 @@ def envInfo(): baseDir = os.path.dirname(snakePipes.__file__) f = open(os.path.join(baseDir, "shared/defaults.yaml")) - cf = yaml.load(f, Loader=yaml.FullLoader) + #cf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + cf = yaml.load(f) f.close() # Properly resolve the snakemake profile path @@ -230,7 +232,8 @@ def envInfo(): # Find out condaEnvDir from snakemake profile f = open(profilePath / 'config.yaml') - _p = yaml.load(f, Loader=yaml.FullLoader) + #_p = yaml.load(f, Loader=yaml.FullLoader) + _p = yaml.load(f) f.close() if 'conda-prefix' in _p: condaEnvDir = _p['conda-prefix'].replace("$USER", os.environ.get("USER")) @@ -278,14 +281,17 @@ def createCondaEnvs(args): baseDir = os.path.dirname(snakePipes.__file__) f = open(os.path.join(baseDir, "shared/defaults.yaml")) - cf = yaml.load(f, Loader=yaml.FullLoader) + #cf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + cf = yaml.load(f) f.close() # Properly resolve the snakemake profile path profilePath = cof.resolveSnakemakeProfile(cf['snakemakeProfile'], baseDir) # Find out condaEnvDir from snakemake profile f = open(profilePath / 'config.yaml') - _p = yaml.load(f, Loader=yaml.FullLoader) + #_p = yaml.load(f, Loader=yaml.FullLoader) + _p = yaml.load(f) f.close() if 'conda-prefix' in _p: # For now $USER can be set in this path, resolve this explicitely. @@ -414,15 +420,17 @@ def updateConfig(args): else: sys.exit("Config file not found\n") updatedDict = cof.merge_dicts(currentDict, d) - cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict) + cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict, trafo=None) #update conda-prefix in snakemakeProfile if args.condaEnvDir: profilePath = cof.resolveSnakemakeProfile(d['snakemakeProfile'], baseDir) f = open(profilePath / 'config.yaml') - pf = yaml.load(f, Loader=yaml.FullLoader) + #pf = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + pf = yaml.load(f) pf['conda-prefix'] = args.condaEnvDir - cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf) + cof.write_configfile(os.path.join(profilePath, "config.yaml"), pf, trafo=None) f.close() cof.load_configfile( diff --git a/snakePipes/workflows/ChIPseq/internals.snakefile b/snakePipes/workflows/ChIPseq/internals.snakefile index 8c8eae6f5..91e326402 100755 --- a/snakePipes/workflows/ChIPseq/internals.snakefile +++ b/snakePipes/workflows/ChIPseq/internals.snakefile @@ -2,7 +2,7 @@ import glob import os import subprocess import re -import yaml +from ruamel.yaml import YAML import sys import pandas as pd import warnings @@ -96,7 +96,8 @@ else: chip_dict = {} with open(samples_config, "r") as f: - chip_dict_tmp = yaml.load(f, Loader=yaml.FullLoader) + yaml=YAML(typ='safe') + chip_dict_tmp = yaml.load(f) if "chip_dict" in chip_dict_tmp and chip_dict_tmp["chip_dict"] : chip_dict = chip_dict_tmp["chip_dict"] else: @@ -104,7 +105,7 @@ with open(samples_config, "r") as f: exit(1) del chip_dict_tmp -cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict) +cf.write_configfile(os.path.join("chip_samples.yaml"), chip_dict, trafo=None) # create unique sets of control samples, ChIP samples with and without control control_samples = set()