diff --git a/Snakefile b/Snakefile
index d91b4ce..9f9d5ae 100644
--- a/Snakefile
+++ b/Snakefile
@@ -7,6 +7,17 @@ import gzip
 
 configfile: "conf/config.yaml"
 
+#####################
+# utility functions #
+#####################
+
+def get_mem_mb(mem_mb=10000, factor=1.5):
+    # return function that tries a higher memory limit for retries, scaling exponentially with a factor
+    def gm(wildcards, attempt):
+        return factor ** (attempt - 1) * mem_mb
+    return gm
+
+
 ###################################
 # samplesheet for the plink files #
 ###################################
@@ -46,7 +57,7 @@ wildcard_constraints:
 # local rules #
 ###############
 
-localrules: link_all, install_sh, install_seak, setup_all, run_deepripe_vep_all, run_ensembl_vep_all, evep_missense_proc_all, splice_ai_filter_and_overlap_with_genotypes_all, splice_ai_vcf_to_tsv_all, filter_variants_all, export_plof_burden_all, export_missense_burden_all, assoc_baseline_scoretest_all, mac_report_all, assoc_spliceai_linw_all, assoc_deepripe_single_localcollapsing_all, pLOF_nsnp_cummac_all
+localrules: link_all, link_genotypes, install_sh, install_seak, setup_all, run_deepripe_vep_all, run_ensembl_vep_all, evep_missense_proc_all, splice_ai_filter_and_overlap_with_genotypes_all, splice_ai_vcf_to_tsv_all, filter_variants_all, export_plof_burden_all, export_missense_burden_all, assoc_baseline_scoretest_all, mac_report_all, assoc_spliceai_linw_all, assoc_deepripe_single_localcollapsing_all, pLOF_nsnp_cummac_all, complete_cases_ancestry, gather_complete_cases
 
 
 ###############
@@ -156,4 +167,5 @@ include: "snake/deepripe.smk"
 include: "snake/evep.smk"
 include: "snake/splice_ai.smk"
 include: "snake/association.smk"
+include: "snake/conditional_tests.smk"
 include: "snake/results_processing.smk"
diff --git a/env/snakemake.yml b/env/snakemake.yml
index 19e804c..4663315 100644
--- a/env/snakemake.yml
+++ b/env/snakemake.yml
@@ -7,3 +7,4 @@ dependencies:
   - python=3.8
   - snakemake-minimal
   - pandas
+  - mamba
diff --git a/run_cluster.sh b/run_cluster.sh
new file mode 100755
index 0000000..af8e851
--- /dev/null
+++ b/run_cluster.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+#SBATCH --job-name=controljob_%j
+#SBATCH --output=snakemake_%j.log
+#SBATCH --partition=vcpu
+#SBATCH --time=48:00:00
+#SBATCH -c 1
+#SBATCH --mem 2000
+
+# Initialize conda:
+eval "$(conda shell.bash hook)"
+
+snakemake_env="install/snakemake"
+
+if [ ! -d $snakemake_env ]; then
+    ./install.sh
+fi
+
+#conda activate $snakemake_env
+
+conda activate snakemake
+
+snakemake --snakefile Snakefile \
+          --configfile conf/config.yaml \
+	  --profile ./slurm \
+          --directory "${PWD}" \
+	  "${@}"
+
+
diff --git a/script/python/assoc_baseline_scoretest_conditional_analysis.py b/script/python/assoc_baseline_scoretest_conditional_analysis.py
new file mode 100644
index 0000000..a60244b
--- /dev/null
+++ b/script/python/assoc_baseline_scoretest_conditional_analysis.py
@@ -0,0 +1,179 @@
+
+
+
+import logging
+logging.basicConfig(filename=snakemake.log[0], level=logging.INFO)
+
+
+import numpy as np
+import pandas as pd
+import gzip
+
+from seak.scoretest import ScoretestNoK
+from seak.data_loaders import intersect_ids, CovariatesLoaderCSV, VariantLoaderSnpReader
+from pysnptools.snpreader import Bed
+from util.association import BurdenLoaderHDF5
+from util import Timer
+
+
+'''
+This script is the baseline that we later compare to.
+It loops over genes and performs score-tests, using pre-computed indicator variables (0/1)
+'''
+
+
+# covariates loader
+covariatesloader = CovariatesLoaderCSV(snakemake.params.phenotype,
+                                       snakemake.input.covariates_tsv,
+                                       snakemake.params.covariate_column_names,
+                                       sep='\t',
+                                       path_to_phenotypes=snakemake.input.phenotypes_tsv)
+
+# set up burden loaders
+bloader_lof = BurdenLoaderHDF5(snakemake.input.h5_lof, snakemake.input.iid_lof, snakemake.input.gid_lof)
+bloader_missense = BurdenLoaderHDF5(snakemake.input.h5_missense, snakemake.input.iid_missense, snakemake.input.gid_missense)
+
+# make sure individuals are in the same order
+bloader_lof.update_individuals(covariatesloader.get_iids())
+bloader_missense.update_individuals(covariatesloader.get_iids())
+
+# gene names to iterate over
+genes = np.union1d(bloader_lof.get_vids(), bloader_missense.get_vids())
+if isinstance(genes, str):
+    genes = [genes]
+    
+# keep only those that are significant!
+# (1) load the results table
+results = pd.read_csv(snakemake.input.results_tsv, sep='\t', na_values='.')
+
+# the columns to consider
+sigcols = snakemake.params.columns
+if isinstance(sigcols, str):
+    sigcols = [sigcols]
+    
+pvcols_score = sigcols
+results = results[['gene', 'nCarrier_pLOF', 'nCarrier_missense'] + pvcols_score]
+results['pheno'] = snakemake.params.phenotype
+results = results.loc[results.gene.isin(genes)]
+results['gene_name'] = results['gene'].str.split('_', expand=True)[1]
+genes = results.gene.tolist()
+
+# (2) load the list of variants to condition on
+_conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0, usecols=['gene_name','pheno'])
+_conditional_list = _conditional_list[(_conditional_list.pheno == snakemake.wildcards['pheno']) | (_conditional_list.pheno == snakemake.params.phenotype) ]
+_conditional_list = _conditional_list.drop_duplicates() 
+
+if len(_conditional_list) == 0:
+    logging.info('No genes pass significance threshold.'.format(snakemake.params.phenotype))
+    with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+        outfile.write('# no significant hits for phenotype {}. \n'.format(snakemake.params.phenotype))
+        sys.exit(0)
+else:
+    results =  results.merge(_conditional_list, how='right', on=['gene_name', 'pheno'])
+
+# (3) keep only genes below threshold 
+sig_genes = [results.gene[results[k] < snakemake.params.significance_cutoff].values for k in pvcols_score ]
+sig_genes = np.unique(np.concatenate(sig_genes))
+
+if len(sig_genes) == 0:
+    logging.info('No genes pass significance threshold.')
+    with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+        outfile.write('# no hits below specified threshold ({}) for phenotype {}. \n'.format(snakemake.params.significance_cutoff, snakemake.params.phenotype))
+    sys.exit(0)
+results = results.loc[results.gene.isin(sig_genes)].copy()
+genes = results.gene.tolist()
+
+logging.info('About to evaluate variants in {} genes'.format(len(results.gene.unique())))
+
+# conditional analysis:
+# these genotypes will be used for the conditional analysis
+geno_cond = VariantLoaderSnpReader(Bed(snakemake.input.conditional_geno, count_A1=True, num_threads=1))
+geno_cond.update_individuals(covariatesloader.get_iids())
+
+# this file contains the mapping of associations to SNPs to condition on
+conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0)
+conditional_list = conditional_list[(conditional_list.pheno == snakemake.params['phenotype']) | (conditional_list.pheno == snakemake.wildcards['pheno']) ].drop_duplicates()
+conditional_list = conditional_list.loc[conditional_list.gene_name.isin(results.gene_name)]
+
+geno_cond.update_variants(intersect_ids(conditional_list.snp_rsid, geno_cond.get_vids()))
+logging.info('considering {} variants as covariates for conditional tests.'.format(len(geno_cond.get_vids())))
+
+# set up the null model
+Y, X = covariatesloader.get_one_hot_covariates_and_phenotype('NoK')
+null_model = ScoretestNoK(Y, X)
+
+logging.info('Phenotype: {}, Sample size: {}'.format(snakemake.params.phenotype, len(Y)))
+
+def test_gene(gene):
+    
+    # conditional analysis
+    # set up the function to load the variants to condition on
+    
+    def get_conditional(gene):
+        
+        # print(gene)
+        # print(gene.split('_')[1])
+        # print(conditional_list)
+        
+        cond_snps_vid = conditional_list.loc[ conditional_list.gene_name == gene.split('_')[1], 'snp_rsid' ].unique().tolist()
+        
+        temp_genotypes, temp_vids = geno_cond.genotypes_by_id(cond_snps_vid, return_pos=False)
+        temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        
+        cond_snps = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+        return cond_snps, temp_vids
+    
+    pval_dict = {}
+    pval_dict['gene'] = gene
+    
+    def call(GV, G2, name):
+        # calls the test, appends results to pval_dict
+        if GV is None:
+            pval_dict['pv_' + name] = np.nan
+            pval_dict['nCarrier_' + name] = 0
+        else:
+            pv = null_model.pv_alt_model(GV, G2)
+            if pv < 0.:
+                logging.warning('negative value encountered in p-value computation for gene {}, p-value: {}, using saddle instead.'.format(gene, pv))
+                pv = null_model.pv_alt_model(GV, G2, method='saddle')
+            pval_dict['pv_' + name] = pv
+            pval_dict['nCarrier_' + name] = int(GV.sum())
+
+            
+    # Load the protein lof burdens
+    try:
+        Glof = bloader_lof.genotypes_by_id(gene).astype(float)
+    except KeyError:
+        Glof = None
+
+    # Load the missense burdens
+    try:
+        Gmiss = bloader_missense.genotypes_by_id(gene).astype(float)
+    except KeyError:
+        Gmiss = None
+        
+    G2, cond_vid = get_conditional(gene)
+
+    call(Glof, G2, 'pLOF')
+    call(Gmiss, G2, 'missense')
+
+    if (Glof is None) or (Gmiss is None):
+        call(None, 'mrg')
+    else:
+        G = np.maximum(Glof, Gmiss)
+        call(G, G2, 'mrg')
+    
+    pval_dict['cond_snps'] = ','.join(cond_vid)
+    
+    return pval_dict
+
+
+timer = Timer()
+results = pd.DataFrame.from_dict([test_gene(gene) for gene in genes]).set_index('gene')
+
+t = timer.check()
+logging.info('{} genes tested in {:.2f} minutes.'.format(len(results), t/60.))
+
+results.to_csv(snakemake.output.results_tsv, sep='\t', index=True, na_rep='.')
+
diff --git a/script/python/assoc_sclrt_kernels_deepripe_multiple.py b/script/python/assoc_sclrt_kernels_deepripe_multiple.py
index c645c2d..7ba927e 100644
--- a/script/python/assoc_sclrt_kernels_deepripe_multiple.py
+++ b/script/python/assoc_sclrt_kernels_deepripe_multiple.py
@@ -225,9 +225,8 @@ def get_rbp(interval):
             if temp_genotypes is None:
                 raise GotNone
 
-            nanmean = np.nanmean(temp_genotypes, axis=0)
-                
-            ncarrier = np.nansum(np.nansum(np.where((nanmean / 2 > 0.5), abs(temp_genotypes - 2), temp_genotypes), axis=1) >= 1) # calculated differently here because alleles can be flipped!
+            nanmean = np.nanmean(temp_genotypes, axis=0) 
+            ncarrier = np.nansum(np.where((nanmean / 2 > 0.5), abs(temp_genotypes - 2), temp_genotypes) > 0, axis=0)# calculated differently here because alleles can be flipped!
 
             G1 = temp_genotypes - nanmean
             G1 = np.ma.masked_invalid(G1).filled(0.)
diff --git a/script/python/assoc_sclrt_kernels_deepripe_multiple_conditional_analysis.py b/script/python/assoc_sclrt_kernels_deepripe_multiple_conditional_analysis.py
new file mode 100644
index 0000000..f082c1f
--- /dev/null
+++ b/script/python/assoc_sclrt_kernels_deepripe_multiple_conditional_analysis.py
@@ -0,0 +1,441 @@
+import os
+# os.environ["OMP_NUM_THREADS"] = "16"
+
+import logging
+
+logging.basicConfig(filename=snakemake.log[0], level=logging.INFO)
+
+import pandas as pd
+import numpy as np
+import h5py
+import gzip
+import pickle
+import sys
+
+from numpy.linalg import cholesky, LinAlgError
+from sklearn.metrics.pairwise import rbf_kernel, cosine_similarity
+
+# seak imports
+from seak.data_loaders import intersect_ids, Hdf5Loader, VariantLoaderSnpReader, CovariatesLoaderCSV
+from seak.scoretest import ScoretestNoK
+from seak.lrt import LRTnoK
+
+from pysnptools.snpreader import Bed
+
+from util import Timer
+
+
+class GotNone(Exception):
+    pass
+
+
+# set up the covariatesloader
+
+covariatesloader = CovariatesLoaderCSV(snakemake.params.phenotype,
+                                       snakemake.input.covariates_tsv,
+                                       snakemake.params.covariate_column_names,
+                                       sep='\t',
+                                       path_to_phenotypes=snakemake.input.phenotypes_tsv)
+
+# initialize the null models
+Y, X = covariatesloader.get_one_hot_covariates_and_phenotype('noK')
+
+null_model_score = ScoretestNoK(Y, X)
+
+# conditional analysis:
+# we need to fit gene-specific null models for the LRT!
+# null_model_lrt = LRTnoK(X, Y)
+null_model_lrt = None
+
+
+# set up function to filter variants:
+def maf_filter(mac_report):
+    # load the MAC report, keep only observed variants with MAF below threshold
+    mac_report = pd.read_csv(mac_report, sep='\t', usecols=['SNP', 'MAF', 'Minor', 'alt_greater_ref'])
+
+    if snakemake.params.filter_highconfidence:
+        # note: We don't filter out the variants for which alt/ref are "flipped" for the RBPs
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0) & (mac_report.hiconf_reg.astype(bool))]
+    else:
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0)]
+
+    return mac_report.set_index('SNP').loc[vids]
+
+
+def sid_filter(vids):
+    
+    if 'sid_include' in snakemake.config:
+        print('limiting to variants present in {}'.format(snakemake.config['sid_include']))
+        
+        infilepath = snakemake.config['sid_include']
+        
+        if infilepath.endswith('gz'):
+            with gzip.open(infilepath,'rt') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+        else:
+            with open(infilepath, 'r') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+    else:
+        return vids
+                
+    return intersect_ids(vids, sid)
+
+
+def vep_filter(h5_rbp, bed_rbp):
+    # returns variant ids for variants that pass filtering threshold and mask for the hdf5loader
+
+    with h5py.File(h5_rbp, 'r') as f:
+
+        rbp_of_interest = snakemake.params.rbp_of_interest
+        try:
+            # in newer version of h5py the strings are not decoded -> decode them
+            labels = [l.decode() for l in f['labels'][:]]
+        except AttributeError:
+            # this used to work in older versions of  h5py:
+            labels = list(f['labels'][:])
+
+        if isinstance(rbp_of_interest, str):
+            rbp_of_interest = [rbp_of_interest]
+
+        assert all([rbp in labels for rbp in rbp_of_interest]), 'Error: not all of {} are in labels: {}'.format(rbp_of_interest, labels)
+
+        keep = list(i for i, l in enumerate(labels) if l in rbp_of_interest)
+        diffscores = f['diffscore'][:, keep]
+
+    rbp_mask = np.array([i in rbp_of_interest for i in labels])
+
+    if np.ndim(diffscores) == 1:
+        diffscores = diffscores[:, np.newaxis]
+
+    vid_mask = np.max(np.abs(diffscores), axis=1) >= snakemake.params.min_impact
+
+    vid_df = pd.read_csv(bed_rbp, sep='\t', header=None, usecols=[3], names=['vid'], dtype={'vid': str})
+    vids = vid_df.vid.str.split(pat='_[ACGT]+>[ACGT]+$', n=1, expand=True)[0].values[vid_mask]
+
+    return vids, rbp_mask, np.array(labels)[rbp_mask].tolist()
+
+
+def get_plof_id_func(vep_tsv):
+    # returns a function that takes an array of variant ids as input and returns a boolean array which indicates whether a variant is
+    # annotated as a protein LOF variant for any gene
+
+    plof_ids = pd.read_csv(vep_tsv, sep='\t', usecols=['Uploaded_variation'], index_col='Uploaded_variation').index.drop_duplicates()
+
+    def is_plof(vids):
+        return np.array([vid in plof_ids for vid in vids])
+
+    return is_plof
+
+
+# start: kernel parameters / functions
+def get_cholesky(S):
+    try:
+        chol = cholesky(S)
+        flag = 1
+    except LinAlgError:
+        try:
+            np.fill_diagonal(S, S.diagonal() + 1e-6)  # sometimes this saves it...
+            S /= np.max(S)
+            chol = cholesky(S)
+            flag = 0
+        except LinAlgError:
+            chol = np.eye(len(S))
+            flag = -1
+    return chol, flag
+
+
+def get_simil_from_vep(V):
+    # cosine similarity
+    return cosine_similarity(V)
+
+
+def get_simil_from_pos(pos):
+    # 0.5 at 50bp distance
+    gamma = -1*np.log(0.5)/(50**2)
+    return rbf_kernel(pos[:, np.newaxis], gamma=gamma)
+
+
+def get_weights_from_vep(V):
+    # max norm
+    return np.sqrt(np.max(np.abs(V), axis=1))
+
+
+def get_regions():
+    # load the results, keep those below a certain p-value + present in conditional analysis list 
+    results = pd.read_csv(snakemake.input.results_tsv, sep='\t', comment='#')
+
+    kern = snakemake.params.kernels
+    if isinstance(kern, str):
+        kern = [kern]
+
+    pvcols_score = ['pv_score_' + k for k in kern]
+    pvcols_lrt = ['pv_lrt_' + k for k in kern]
+    statcols = ['lrtstat_' + k for k in kern]    
+    results = results[['gene', 'n_snp', 'cumMAC', 'nCarrier'] + statcols + pvcols_score + pvcols_lrt]
+    
+    results.rename(columns={'gene':'name'}, inplace=True)
+    
+    # set up the regions to loop over for the chromosome
+    regions = pd.read_csv(snakemake.input.regions_bed, sep='\t', header=None, usecols=[0 ,1 ,2 ,3, 5], dtype={0 :str, 1: np.int32, 2 :np.int32, 3 :str, 5:str})
+    regions.columns = ['chrom', 'start', 'end', 'name', 'strand']
+    regions['strand'] = regions.strand.map({'+': 'plus', '-': 'minus'})
+    
+    # conditional analysis, keep only genes that are provided in conditional_list
+    regions['gene_name'] = regions['name'].str.split('_', expand=True)[1]
+    
+    _conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0, usecols=['gene_name','pheno'])
+    _conditional_list = _conditional_list[(_conditional_list.pheno == snakemake.wildcards['pheno']) | (_conditional_list.pheno == snakemake.params.phenotype) ]
+    _conditional_list = _conditional_list.drop_duplicates() 
+    
+    if len(_conditional_list) == 0:
+        logging.info('No genes pass significance threshold for phenotype {}, exiting.'.format(snakemake.params.phenotype))
+        with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+            outfile.write('# no significant hits for phenotype {}. \n'.format(snakemake.params.phenotype))
+        sys.exit(0)
+    else:
+        regions = regions.merge(_conditional_list, how='right')
+        
+    regions = regions.merge(results, how='left', on=['name'], validate='one_to_one')
+    
+    sig_genes = [results.name[results[k] < snakemake.params.significance_cutoff].values for k in pvcols_score + pvcols_lrt]
+    sig_genes = np.unique(np.concatenate(sig_genes))
+    
+    if len(sig_genes) == 0:
+        return None
+    
+    regions = regions.loc[regions.name.isin(sig_genes)]
+        
+    return regions
+
+
+# end: kernel parameters / functions
+
+# genotype path, vep-path:
+assert len(snakemake.params.ids) == len(snakemake.input.bed), 'Error: length of chromosome IDs does not match length of genotype files'
+geno_vep = zip(snakemake.params.ids, snakemake.input.bed, snakemake.input.mac_report, snakemake.input.ensembl_vep_tsv)
+
+# get the top hits
+regions_all = get_regions()
+if regions_all is None:
+    logging.info('No genes pass significance threshold, exiting.')
+    with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+        outfile.write('# no hits below specified threshold ({}) for phenotype {}. \n'.format(snakemake.params.significance_cutoff, snakemake.params.phenotype))
+    sys.exit(0)
+
+
+# dict with paths to input files
+vep_h5 = {chrom: {'plus': p, 'minus': m} for chrom, (p, m) in zip(snakemake.params.ids, zip(snakemake.input.h5_rbp_plus, snakemake.input.h5_rbp_minus))}
+vep_bed = {chrom: {'plus': p, 'minus': m} for chrom, (p, m) in zip(snakemake.params.ids, zip(snakemake.input.bed_rbp_plus, snakemake.input.bed_rbp_minus))}
+
+# storing all results here
+results = []
+
+i_gene = 0
+i_chrom = 0
+
+# conditional analysis:
+# these genotypes will be used for the conditional analysis
+geno_cond = VariantLoaderSnpReader(Bed(snakemake.input.conditional_geno, count_A1=True, num_threads=1))
+geno_cond.update_individuals(covariatesloader.get_iids())
+
+# this file contains the mapping of associations to SNPs to condition on
+conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0)
+conditional_list = conditional_list[(conditional_list.pheno == snakemake.params['phenotype']) | (conditional_list.pheno == snakemake.wildcards['pheno']) ].drop_duplicates()
+
+geno_cond.update_variants(intersect_ids(conditional_list.snp_rsid, geno_cond.get_vids()))
+logging.info('considering {} variants as covariates for conditional tests.'.format(len(geno_cond.get_vids())))
+
+# enter the chromosome loop:
+timer = Timer()
+for i, (chromosome, bed, mac_report, vep_tsv) in enumerate(geno_vep):
+
+    if chromosome.replace('chr','') not in regions_all.chrom.unique():
+        continue
+
+    if snakemake.params.debug:
+        # process only two chromosomes if debugging...
+        if i_chrom > 2:
+            break
+
+    timer.reset()
+
+    # get variants that pass MAF threshold:
+    mac_report = maf_filter(mac_report)
+    filter_vids = mac_report.index.values
+    filter_vids = sid_filter(filter_vids)
+
+    # function to identify protein LOF variants
+    is_plof = get_plof_id_func(vep_tsv)
+
+    # set up local collapsing
+    # collapser = LocalCollapsing(distance_threshold=51.)
+
+    for strand in ['plus', 'minus']:
+
+        # set up the regions to loop over for the chromosome
+        chromosome_id = chromosome.replace('chr', '')
+
+        regions = regions_all[(regions_all.chrom == chromosome_id) & (regions_all.strand == strand)]
+
+        if len(regions) == 0:
+            continue
+
+        # get variants that pass variant effect prediction threshold:
+        vep_vids, vep_mask, labels = vep_filter(vep_h5[chromosome][strand], vep_bed[chromosome][strand])
+
+        # combine
+        filter_vids_chromosome = intersect_ids(vep_vids, filter_vids)
+
+        # initialize the vep loader
+        veploader = Hdf5Loader(vep_bed[chromosome][strand], vep_h5[chromosome][strand], 'diffscore', from_janggu=True)
+        veploader.update_variants(filter_vids_chromosome)
+        veploader.set_mask(vep_mask)
+
+        # set up the variant loader (rbp variants) for the chromosome + strand
+        plinkloader = VariantLoaderSnpReader(Bed(bed, count_A1=True, num_threads=4))
+        plinkloader.update_variants(veploader.get_vids())
+        plinkloader.update_individuals(covariatesloader.get_iids())
+
+
+        # set up the genotype + vep loading function
+        def get_rbp(interval):
+
+            temp_genotypes, temp_vids, pos = plinkloader.genotypes_by_region(interval, return_pos=True)
+
+            if temp_genotypes is None:
+                raise GotNone
+
+            nanmean = np.nanmean(temp_genotypes, axis=0) 
+            ncarrier = np.nansum(np.where((nanmean / 2 > 0.5), abs(temp_genotypes - 2), temp_genotypes) > 0, axis=0)# calculated differently here because alleles can be flipped!
+
+            temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+            G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+            # deepripe variant effect predictions (single RBP)
+            V1 = veploader.anno_by_id(temp_vids)
+
+            weights = get_weights_from_vep(V1)
+
+            S = get_simil_from_pos(pos)
+            S *= get_simil_from_vep(V1) # Schur product of two positive definite matrices is positive definite
+
+            cummac = mac_report.loc[temp_vids].Minor
+
+            V1 = pd.DataFrame(V1, columns=labels)[sorted(labels)]
+
+            return G1, temp_vids, weights, S, ncarrier, cummac, pos, V1
+        
+        
+        # conditional analysis
+        # set up the function to load the variants to condition on
+    
+        def get_conditional(interval):
+        
+            cond_snps_vid = conditional_list.loc[ conditional_list.gene_name == interval['gene_name'], 'snp_rsid' ].unique().tolist()
+        
+            temp_genotypes, temp_vids = geno_cond.genotypes_by_id(cond_snps_vid, return_pos=False)
+            temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        
+            cond_snps = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+            return cond_snps, temp_vids
+        
+
+        # set up the test-function for a single gene
+        def test_gene(interval, seed):
+
+            interval = interval.to_dict()
+
+            pval_dict = {}
+            pval_dict['gene'] = interval['name']
+
+            out_dir = os.path.join(snakemake.params.out_dir_stats, interval['name'])
+            os.makedirs(out_dir, exist_ok=True)
+            
+            # conditional analysis:
+            # get the snps to condition on, and include them in the null model for the LRT
+            cond_snps, cond_snps_vid = get_conditional(interval)
+            null_model_lrt = LRTnoK(np.concatenate([X, cond_snps], axis=1), Y)
+
+            # conditional analysis:
+            # the score-test takes a second argument (G2) that allows conditioning on a second set of variants...
+            def pv_score(GV, G2=cond_snps):
+                # wraps score-test
+                pv = null_model_score.pv_alt_model(GV, G2)
+                if pv < 0.:
+                    pv = null_model_score.pv_alt_model(GV, G2, method='saddle')
+                return pv
+
+            def call_test(GV, name):
+                pval_dict['pv_score_' + name] = pv_score(GV)
+                altmodel = null_model_lrt.altmodel(GV)
+                res = null_model_lrt.pv_sim_chi2(250000, simzero=False, seed=seed)
+                pval_dict['pv_lrt_' + name] = res['pv']
+                pval_dict['lrtstat_' + name] = altmodel['stat']
+                if 'h2' in altmodel:
+                    pval_dict['h2_' + name] = altmodel['h2']
+
+                if res['pv'] != 1.:
+                    for stat in ['scale', 'dof', 'mixture', 'imax']:
+                        pval_dict[stat + '_' + name] = res[stat]
+                    if len(res['res'] > 0):
+                        pd.DataFrame({interval['name']: res['res']}).to_pickle(out_dir + '/{}.pkl.gz'.format(name))
+
+            # load rbp variants
+            G, vids, weights, S, ncarrier, cummac, pos, V = get_rbp(interval)
+            keep = ~is_plof(vids)
+
+            # cholesky
+            if G.shape[1] > 1:
+                L, flag1 = get_cholesky(S)
+            else:
+                L, flag1 = np.eye(G.shape[1]), -1
+
+            # do a score test (cholesky, and weighted cholesky)
+            GWL = G.dot(np.diag(weights, k=0)).dot(L)
+            call_test(GWL, 'linwcholesky')
+
+            # sanity checks
+            assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+            assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+
+            if np.any(keep):
+
+                if keep.sum() == 1:
+                    # only single SNP is not LOF
+                    GWL = G[:, keep].dot(np.diag(weights[keep], k=0)) # actually just the linear weighted kernel
+                else:
+                    L, flag2 = get_cholesky(S[np.ix_(keep, keep)])
+                    GWL = G[:, keep].dot(np.diag(weights[keep], k=0)).dot(L)
+
+                call_test(GWL, 'linwcholesky_notLOF')
+
+            # conditional analysis: keep names of SNPs that we condition on 
+            pval_dict['cond_snps'] = ','.join(cond_snps_vid)
+                
+            return pval_dict
+
+
+        logging.info('loaders for chromosome {}, strand "{}" initialized in {:.1f} seconds.'.format(chromosome, strand, timer.check()))
+        # run tests for all genes on the chromosome / strand
+        for _, region in regions.iterrows():
+
+            if snakemake.params.debug:
+                if i_gene > 5:
+                    continue
+            
+            try:
+                results.append(test_gene(region, i_gene))
+            except GotNone:
+                continue
+
+            i_gene += 1
+            logging.info('tested {} genes...'.format(i_gene))
+
+        i_chrom += 1
+        timer.reset()
+
+# export the results in a single table
+results = pd.DataFrame(results)
+results['pheno'] = snakemake.params.phenotype
+results.to_csv(snakemake.output.results_tsv, sep='\t', index=False)
diff --git a/script/python/assoc_sclrt_kernels_deepripe_multiple_eval_top_hits.py b/script/python/assoc_sclrt_kernels_deepripe_multiple_eval_top_hits.py
index 765a4b3..0d739be 100644
--- a/script/python/assoc_sclrt_kernels_deepripe_multiple_eval_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_deepripe_multiple_eval_top_hits.py
@@ -261,6 +261,9 @@ def get_rbp(interval):
             if temp_genotypes is None:
                 raise GotNone
 
+            nanmean = np.nanmean(temp_genotypes, axis=0) 
+            ncarrier = np.nansum(np.nansum(np.where((nanmean / 2 > 0.5), abs(temp_genotypes - 2), temp_genotypes), axis=1) >= 1) # calculated differently here because alleles can be flipped!
+                
             ncarrier = np.nansum(temp_genotypes, axis=0)
             ncarrier = np.minimum(ncarrier, temp_genotypes.shape[0] - ncarrier).astype(int)
 
@@ -358,8 +361,8 @@ def call_lrt(GV, name, vids=None):
             call_lrt(G.dot(np.diag(weights, k=0)), 'linw') # performs weighting and joint estimation of effect-sizes 
 
             # sanity checks
-            assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-            assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+            # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+            # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
             if np.any(keep):
 
diff --git a/script/python/assoc_sclrt_kernels_deepripe_multiple_retest_top_hits.py b/script/python/assoc_sclrt_kernels_deepripe_multiple_retest_top_hits.py
index ce58987..0cafec5 100644
--- a/script/python/assoc_sclrt_kernels_deepripe_multiple_retest_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_deepripe_multiple_retest_top_hits.py
@@ -285,8 +285,8 @@ def get_rbp(interval):
             if temp_genotypes is None:
                 raise GotNone
 
-            ncarrier = np.nansum(temp_genotypes, axis=0)
-            ncarrier = np.minimum(ncarrier, temp_genotypes.shape[0] - ncarrier).astype(int)
+            nanmean = np.nanmean(temp_genotypes, axis=0) 
+            ncarrier = np.nansum(np.where((nanmean / 2 > 0.5), abs(temp_genotypes - 2), temp_genotypes) > 0, axis=0)# calculated differently here because alleles can be flipped!
 
             temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
             G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
@@ -353,8 +353,8 @@ def call_test(GV, name):
             call_test(GWL, 'linwcholesky')
 
             # sanity checks
-            assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-            assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+            # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+            # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
             if np.any(keep):
 
diff --git a/script/python/assoc_sclrt_kernels_missense.py b/script/python/assoc_sclrt_kernels_missense.py
index f99b506..1280d64 100644
--- a/script/python/assoc_sclrt_kernels_missense.py
+++ b/script/python/assoc_sclrt_kernels_missense.py
@@ -255,9 +255,9 @@ def call_lrt(GV, name):
             if G2 is not None:
 
                 # merged (single variable), missense non-weighted
-                G1_burden_mrg = np.maximum(G1_burden_nonweighted, G2)
-                call_score(G1_burden_mrg, 'linb_mrgLOF')
-                call_lrt(G1_burden_mrg, 'linb_mrgLOF')
+                # G1_burden_mrg = np.maximum(G1_burden_nonweighted, G2)
+                # call_score(G1_burden_mrg, 'linb_mrgLOF')
+                # call_lrt(G1_burden_mrg, 'linb_mrgLOF')
                 
                 # merged (single variable)
                 G1_burden_mrg = np.maximum(G2, G1_burden)
@@ -265,9 +265,9 @@ def call_lrt(GV, name):
                 call_lrt(G1_burden_mrg, 'linwb_mrgLOF')
 
                 # concatenated ( >= 2 variables, non-weighted)
-                G1_nonweighted = np.concatenate([G1_nonweighted, G2], axis=1)
-                call_score(G1_nonweighted, 'lincollapsed_cLOF')
-                call_lrt(G1_nonweighted, 'lincollapsed_cLOF')
+                # G1_nonweighted = np.concatenate([G1_nonweighted, G2], axis=1)
+                # call_score(G1_nonweighted, 'lincollapsed_cLOF')
+                # call_lrt(G1_nonweighted, 'lincollapsed_cLOF')
                 
                 # concatenated ( >= 2 variables)
                 G1 = np.concatenate([G1, G2], axis=1)
@@ -309,7 +309,8 @@ def call_lrt(GV, name):
 stats = pd.DataFrame.from_dict(stats).set_index('gene')
 
 # these are the kernels 
-kern = ['linb', 'linwb', 'linb_mrgLOF', 'linwb_mrgLOF', 'lincollapsed', 'linwcollapsed', 'lincollapsed_cLOF', 'linwcollapsed_cLOF']
+# kern = ['linb', 'linwb', 'linb_mrgLOF', 'linwb_mrgLOF', 'lincollapsed', 'linwcollapsed', 'lincollapsed_cLOF', 'linwcollapsed_cLOF']
+kern = ['linb', 'linwb', 'linwb_mrgLOF', 'lincollapsed', 'linwcollapsed', 'linwcollapsed_cLOF']
 
 # concatenate and save gene-specific simulations
 # ...complicated nested dict comprehension that ensures we drop empty values etc.
diff --git a/script/python/assoc_sclrt_kernels_missense_conditional_analysis.py b/script/python/assoc_sclrt_kernels_missense_conditional_analysis.py
new file mode 100644
index 0000000..3441a62
--- /dev/null
+++ b/script/python/assoc_sclrt_kernels_missense_conditional_analysis.py
@@ -0,0 +1,388 @@
+
+import os
+import sys
+# os.environ["OMP_NUM_THREADS"] = "16"
+
+import logging
+logging.basicConfig(filename=snakemake.log[0], level=logging.INFO)
+
+import pandas as pd
+import numpy as np
+import pickle
+import gzip
+
+# seak imports
+from seak.data_loaders import intersect_ids, EnsemblVEPLoader, VariantLoaderSnpReader, CovariatesLoaderCSV
+from seak.kernels import LocalCollapsing
+from seak.scoretest import ScoretestNoK
+from seak.lrt import LRTnoK, pv_chi2mixture, fit_chi2mixture
+
+from pysnptools.snpreader import Bed
+
+from util.association import BurdenLoaderHDF5
+from util import Timer
+
+import functools
+
+
+class GotNone(Exception):
+    pass
+
+# set up the covariatesloader
+
+covariatesloader = CovariatesLoaderCSV(snakemake.params.phenotype,
+                                       snakemake.input.covariates_tsv,
+                                       snakemake.params.covariate_column_names,
+                                       sep='\t',
+                                       path_to_phenotypes=snakemake.input.phenotypes_tsv)
+
+# initialize the null models
+Y, X = covariatesloader.get_one_hot_covariates_and_phenotype('noK')
+
+null_model_score = ScoretestNoK(Y, X)
+
+# conditional analysis:
+# we need to fit gene-specific null models for the LRT!
+# null_model_lrt = LRTnoK(X, Y)
+null_model_lrt = None
+
+
+# set up function to filter variants:
+def maf_filter(mac_report):
+
+    # load the MAC report, keep only observed variants with MAF below threshold
+    mac_report = pd.read_csv(mac_report, sep='\t', usecols=['SNP', 'MAF', 'Minor', 'alt_greater_ref'])
+
+    if snakemake.params.filter_highconfidence:
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0) & ~(mac_report.alt_greater_ref.astype(bool)) & (mac_report.hiconf_reg.astype(bool))]
+    else:
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0) & ~(mac_report.alt_greater_ref)]
+
+    # this has already been done in filter_variants.py
+    # load the variant annotation, keep only variants in high-confidece regions
+    # anno = pd.read_csv(anno_tsv, sep='\t', usecols=['Name', 'hiconf_reg'])
+    # vids_highconf = anno.Name[anno.hiconf_reg.astype(bool).values]
+    # vids = np.intersect1d(vids, vids_highconf)
+
+    return mac_report.set_index('SNP').loc[vids]
+
+
+def sid_filter(vids):
+    
+    if 'sid_include' in snakemake.config:
+        print('limiting to variants present in {}'.format(snakemake.config['sid_include']))
+        
+        infilepath = snakemake.config['sid_include']
+        
+        if infilepath.endswith('gz'):
+            with gzip.open(infilepath,'rt') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+        else:
+            with open(infilepath, 'r') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+    else:
+        return vids
+                
+    return intersect_ids(vids, sid)
+
+
+
+def get_regions():
+    # load the results, keep those below a certain p-value + present in conditional analysis list 
+    results = pd.read_csv(snakemake.input.results_tsv, sep='\t')
+
+    kern = snakemake.params.kernels
+    if isinstance(kern, str):
+        kern = [kern]
+
+    pvcols_score = ['pv_score_' + k for k in kern]
+    pvcols_lrt = ['pv_lrt_' + k for k in kern]
+    statcols = ['lrtstat_' + k for k in kern]
+    results = results[['gene', 'n_snp', 'cumMAC', 'nCarrier'] + statcols + pvcols_score + pvcols_lrt]
+    
+    results.rename(columns={'gene':'name'}, inplace=True)
+    
+    # set up the regions to loop over for the chromosome
+    regions = pd.read_csv(snakemake.input.regions_bed, sep='\t', header=None, usecols=[0 ,1 ,2 ,3, 5], dtype={0 :str, 1: np.int32, 2 :np.int32, 3 :str, 5:str})
+    regions.columns = ['chrom', 'start', 'end', 'name', 'strand']
+    regions['strand'] = regions.strand.map({'+': 'plus', '-': 'minus'})
+    
+    # conditional analysis, keep only genes that are provided in conditional_list
+    regions['gene_name'] = regions['name'].str.split('_', expand=True)[1]
+    
+    _conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0, usecols=['gene_name','pheno'])
+    _conditional_list = _conditional_list[(_conditional_list.pheno == snakemake.wildcards['pheno']) | (_conditional_list.pheno == snakemake.params.phenotype) ]
+    _conditional_list = _conditional_list.drop_duplicates() 
+    
+    if len(_conditional_list) == 0:
+        logging.info('No genes pass significance threshold for phenotype {}, exiting.'.format(snakemake.params.phenotype))
+        with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+            outfile.write('# no significant hits for phenotype {}. \n'.format(snakemake.params.phenotype))
+        sys.exit(0)
+    else:
+        regions = regions.merge(_conditional_list, how='right')
+        
+    regions = regions.merge(results, how='left', on=['name'], validate='one_to_one')
+    
+    sig_genes = [results.name[results[k] < snakemake.params.significance_cutoff].values for k in pvcols_score + pvcols_lrt]
+    sig_genes = np.unique(np.concatenate(sig_genes))
+    
+    if len(sig_genes) == 0:
+        return None
+    
+    regions = regions.loc[regions.name.isin(sig_genes)]
+        
+    return regions
+
+# genotype path, vep-path:
+assert len(snakemake.params.ids) == len(snakemake.input.bed), 'Error: length of chromosome IDs does not match length of genotype files'
+geno_vep = zip(snakemake.params.ids, snakemake.input.bed, snakemake.input.vep_tsv, snakemake.input.mac_report, snakemake.input.h5_lof, snakemake.input.iid_lof, snakemake.input.gid_lof)
+
+# get the top hits
+regions_all = get_regions()
+if regions_all is None:
+    logging.info('No genes pass significance threshold, exiting.')
+    with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+        outfile.write('# no hits below specified threshold ({}) for phenotype {}. \n'.format(snakemake.params.significance_cutoff, snakemake.params.phenotype))
+    sys.exit(0)
+    
+logging.info('About to evaluate variants in {} genes'.format(len(regions_all.name.unique())))
+
+# storing all results here
+results = []
+
+i_gene = 0
+i_chrom = 0
+
+# conditional analysis:
+# these genotypes will be used for the conditional analysis
+geno_cond = VariantLoaderSnpReader(Bed(snakemake.input.conditional_geno, count_A1=True, num_threads=1))
+geno_cond.update_individuals(covariatesloader.get_iids())
+
+# this file contains the mapping of associations to SNPs to condition on
+conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0)
+conditional_list = conditional_list[(conditional_list.pheno == snakemake.params['phenotype']) | (conditional_list.pheno == snakemake.wildcards['pheno']) ].drop_duplicates()
+
+geno_cond.update_variants(intersect_ids(conditional_list.snp_rsid, geno_cond.get_vids()))
+logging.info('considering {} variants as covariates for conditional tests.'.format(len(geno_cond.get_vids())))
+
+
+# enter the chromosome loop:
+timer = Timer()
+for i, (chromosome, bed, vep_tsv, mac_report, h5_lof, iid_lof, gid_lof) in enumerate(geno_vep):
+
+    if chromosome.replace('chr','') not in regions_all.chrom.unique():
+        continue
+
+    if snakemake.params.debug:
+        # process only two chromosomes if debugging...
+        if i_chrom > 2:
+            break
+
+    timer.reset()
+
+    # set up the ensembl vep loader for the chromosome
+    ensemblvepdf = pd.read_csv(vep_tsv,
+                               sep='\t',
+                               usecols=['Uploaded_variation', 'Location', 'Gene', 'pos_standardized', 'impact', 'ref', 'alt', 'cosine_similarity'],
+                               index_col='Uploaded_variation')
+
+    # get set of variants for the chromosome:
+    mac_report = maf_filter(mac_report)
+    filter_vids = mac_report.index.values
+    filter_vids = sid_filter(filter_vids)
+
+    # filter by MAF
+    keep = intersect_ids(filter_vids, ensemblvepdf.index.values)
+    ensemblvepdf = ensemblvepdf.loc[keep]
+    ensemblvepdf.reset_index(inplace=True)
+
+    # filter by impact:
+    ensemblvepdf = ensemblvepdf[ensemblvepdf.groupby(['Gene' ,'pos_standardized'])['impact'].transform(np.max) >= snakemake.params.min_impact ]
+
+    # initialize the loader
+    eveploader = EnsemblVEPLoader(ensemblvepdf['Uploaded_variation'], ensemblvepdf['Location'], ensemblvepdf['Gene'], data = ensemblvepdf[['pos_standardized' ,'impact', 'ref', 'alt', 'cosine_similarity']].values)
+
+    # set up the regions to loop over for the chromosome
+    regions = regions_all.copy()
+
+    # discard all genes for which we don't have annotations
+    regions['gene'] = regions.name.str.split('_', expand=True)[0]
+    regions.set_index('gene', inplace=True)
+    genes = intersect_ids(np.unique(regions.index.values), np.unique(eveploader.pos_df.gene))
+    regions = regions.loc[genes].reset_index()
+    regions = regions.sort_values(['chrom', 'start', 'end'])
+
+    # set up the variant loader (missense variants) for the chromosome
+    plinkloader = VariantLoaderSnpReader(Bed(bed, count_A1=True, num_threads=4))
+    plinkloader.update_variants(eveploader.get_vids())
+    plinkloader.update_individuals(covariatesloader.get_iids())
+
+    # set up the protein LOF burden loader
+    bloader_lof = BurdenLoaderHDF5(h5_lof, iid_lof, gid_lof)
+    bloader_lof.update_individuals(covariatesloader.get_iids())
+
+    # set up local collapsing
+    collapser = LocalCollapsing(distance_threshold=1.)
+
+    # set up the missense genotype + vep loading function
+    def get_missense(interval):
+
+        try:
+            V1 = eveploader.anno_by_interval(interval, gene=interval['name'].split('_')[0])
+        except KeyError:
+            raise GotNone
+
+        if V1.index.empty:
+            raise GotNone
+
+        vids = V1.index.get_level_values('vid')
+        V1 = V1.droplevel(['gene'])
+
+        temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
+
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
+
+        temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+        cummac = mac_report.loc[vids].Minor
+
+        # polyphen / sift impact
+        weights = V1[1].values.astype(np.float64)
+
+        # "standardized" positions -> codon start positions
+        pos = V1[0].values.astype(np.int32)
+
+        ref = V1[2].values.astype(str)
+        alt = V1[3].values.astype(str)
+        cosine_similarity = V1[4].values.astype(np.float64)
+
+        return G1, vids, weights, ncarrier, cummac, pos, ref, alt, cosine_similarity
+
+    # set up the protein-LOF loading function
+
+    def get_plof(interval):
+
+        try:
+            G2 = bloader_lof.genotypes_by_id(interval['name']).astype(np.float64)
+        except KeyError:
+            G2 = None
+
+        return G2
+    
+    
+    # conditional analysis
+    # set up the function to load the variants to condition on
+    
+    def get_conditional(interval):
+        
+        cond_snps_vid = conditional_list.loc[ conditional_list.gene_name == interval['gene_name'], 'snp_rsid' ].unique().tolist()
+        
+        temp_genotypes, temp_vids = geno_cond.genotypes_by_id(cond_snps_vid, return_pos=False)
+        temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        
+        cond_snps = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+        return cond_snps, temp_vids
+        
+    
+    # set up the test-function for a single gene
+    def test_gene(interval, seed):
+
+        pval_dict = {}
+        pval_dict['gene'] = interval['name']
+
+        out_dir = os.path.join(snakemake.params.out_dir_stats, interval['name'])
+        os.makedirs(out_dir, exist_ok=True)
+        
+        # conditional analysis:
+        # get the snps to condition on, and include them in the null model for the LRT
+        cond_snps, cond_snps_vid = get_conditional(interval)
+        null_model_lrt = LRTnoK(np.concatenate([X, cond_snps], axis=1), Y)
+        
+        # conditional analysis:
+        # the score-test takes a second argument (G2) that allows conditioning on a second set of variants...
+        def pv_score(GV, G2=cond_snps):
+            # wraps score-test
+            pv = null_model_score.pv_alt_model(GV, G2)
+            if pv < 0.:
+                pv = null_model_score.pv_alt_model(GV, G2, method='saddle')
+            return pv
+
+        def call_test(GV, name):
+            pval_dict['pv_score_' + name] = pv_score(GV)
+            altmodel = null_model_lrt.altmodel(GV)
+            res = null_model_lrt.pv_sim_chi2(250000, simzero=False, seed=seed)
+            pval_dict['pv_lrt_' + name] = res['pv']
+            pval_dict['lrtstat_' + name ] = altmodel['stat']
+            if 'h2' in altmodel:
+                pval_dict['h2_' + name ] = altmodel['h2']
+            if res['pv'] != 1.:
+                for stat in ['scale', 'dof', 'mixture', 'imax']:
+                    pval_dict[stat + '_' + name] = res[stat]
+                if len(res['res'] > 0):
+                    pd.DataFrame({interval['name']: res['res']}).to_pickle(out_dir + '/{}.pkl.gz'.format(name))
+
+        # load missense variants
+        G1, vids, weights, ncarrier, cummac, pos, ref, alt, cosine_similarity = get_missense(interval)
+
+        # sanity checks
+        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+
+        # perform test using gene-specific distribution, gbvc
+        G1_burden = np.max(np.where(G1 > 0.5, np.sqrt(weights), 0.), axis=1, keepdims=True)
+        call_test(G1_burden, 'linwb')
+
+        # perform local collapsing with weights
+        if G1.shape[1] > 1:
+            G1, clusters = collapser.collapse(G1, pos, np.sqrt(weights))
+        else:
+            G1 = G1.dot(np.diag(np.sqrt(weights), k=0))
+
+        # perform test using gene-specific distribution, kernel-based
+        call_test(G1, 'linwcollapsed')
+
+        # load plof burden
+        G2 = get_plof(interval)
+
+        if G2 is not None:
+
+            # merged (single variable)
+            G1_burden_mrg = np.maximum(G2, G1_burden)
+            call_test(G1_burden_mrg, 'linwb_mrgLOF')
+
+            # concatenated
+            call_test(np.concatenate([G1, G2], axis=1), 'linwcollapsed_cLOF')
+        
+        # conditional analysis: keep names of SNPs that we condition on 
+        pval_dict['cond_snps'] = ','.join(cond_snps_vid)
+
+        return pval_dict
+
+    logging.info('loaders for chromosome {} initialized in {:.1f} seconds.'.format(chromosome, timer.check()))
+    # run tests for all genes on the chromosome
+
+    for _, region in regions.iterrows():
+
+        if snakemake.params.debug:
+            if i_gene > 5:
+                continue
+        
+        try:
+            results.append(test_gene(region, i_gene)) # i_gene is used as the random seed to make things reproducible
+        except GotNone:
+            continue
+
+        i_gene += 1
+        logging.info('tested {} genes...'.format(i_gene))
+
+    i_chrom += 1
+    timer.reset()
+
+# export the results in a single table
+results = pd.DataFrame(results)
+results['pheno'] = snakemake.params.phenotype
+results.to_csv(snakemake.output.results_tsv, sep='\t', index=False)
+
diff --git a/script/python/assoc_sclrt_kernels_missense_eval_top_hits.py b/script/python/assoc_sclrt_kernels_missense_eval_top_hits.py
index 7ca7530..7d92623 100644
--- a/script/python/assoc_sclrt_kernels_missense_eval_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_missense_eval_top_hits.py
@@ -193,7 +193,7 @@ def get_missense(interval):
 
         temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
 
-        ncarrier = np.nansum(np.nansum(temp_genotypes, axis=1) >= 1)
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
 
         temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
         G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
@@ -287,8 +287,8 @@ def call_lrt(GV, name, vids=None):
         call_lrt(G1.dot(np.diag(np.sqrt(weights), k=0)), 'variant_pvals') # coeficients after weight adjustment (estimated *jointly*)
 
         # sanity checks
-        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+        # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
         # do a score burden test (max weighted), this is different than the baseline!
         G1_burden = np.max(np.where(G1 > 0.5, np.sqrt(weights), 0.), axis=1, keepdims=True)
diff --git a/script/python/assoc_sclrt_kernels_missense_retest_top_hits.py b/script/python/assoc_sclrt_kernels_missense_retest_top_hits.py
index 8e96fc6..ffd2218 100644
--- a/script/python/assoc_sclrt_kernels_missense_retest_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_missense_retest_top_hits.py
@@ -219,7 +219,7 @@ def get_missense(interval):
 
         temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
 
-        ncarrier = np.nansum(np.nansum(temp_genotypes, axis=1) >= 1)
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
 
         temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
         G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
@@ -285,8 +285,8 @@ def call_test(GV, name):
         G1, vids, weights, ncarrier, cummac, pos, ref, alt, cosine_similarity = get_missense(interval)
 
         # sanity checks
-        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+        # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
         # perform test using gene-specific distribution, gbvc
         G1_burden = np.max(np.where(G1 > 0.5, np.sqrt(weights), 0.), axis=1, keepdims=True)
diff --git a/script/python/assoc_sclrt_kernels_spliceai_conditional_analysis.py b/script/python/assoc_sclrt_kernels_spliceai_conditional_analysis.py
new file mode 100644
index 0000000..8ccb2bd
--- /dev/null
+++ b/script/python/assoc_sclrt_kernels_spliceai_conditional_analysis.py
@@ -0,0 +1,424 @@
+import os
+# os.environ["OMP_NUM_THREADS"] = "16"
+
+import logging
+
+logging.basicConfig(filename=snakemake.log[0], level=logging.INFO)
+
+import pandas as pd
+import numpy as np
+
+# seak imports
+from seak.data_loaders import intersect_ids, EnsemblVEPLoader, VariantLoaderSnpReader, CovariatesLoaderCSV
+from seak.scoretest import ScoretestNoK
+from seak.lrt import LRTnoK
+import gzip
+
+from pysnptools.snpreader import Bed
+import pickle
+import sys
+
+from util.association import BurdenLoaderHDF5
+from util import Timer
+
+
+class GotNone(Exception):
+    pass
+
+
+# set up the covariatesloader
+
+covariatesloader = CovariatesLoaderCSV(snakemake.params.phenotype,
+                                       snakemake.input.covariates_tsv,
+                                       snakemake.params.covariate_column_names,
+                                       sep='\t',
+                                       path_to_phenotypes=snakemake.input.phenotypes_tsv)
+
+# initialize the null models
+Y, X = covariatesloader.get_one_hot_covariates_and_phenotype('noK')
+
+null_model_score = ScoretestNoK(Y, X)
+
+# conditional analysis:
+# we need to fit gene-specific null models for the LRT!
+# null_model_lrt = LRTnoK(X, Y)
+null_model_lrt = None
+
+
+# set up function to filter variants:
+def maf_filter(mac_report):
+    # load the MAC report, keep only observed variants with MAF below threshold
+    mac_report = pd.read_csv(mac_report, sep='\t', usecols=['SNP', 'MAF', 'Minor', 'alt_greater_ref'])
+
+    if snakemake.params.filter_highconfidence:
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0) & ~(mac_report.alt_greater_ref.astype(bool)) & (mac_report.hiconf_reg.astype(bool))]
+    else:
+        vids = mac_report.SNP[(mac_report.MAF < snakemake.params.max_maf) & (mac_report.Minor > 0) & ~(mac_report.alt_greater_ref.astype(bool))]
+
+    # this has already been done in filter_variants.py
+    # load the variant annotation, keep only variants in high-confidece regions
+    # anno = pd.read_csv(anno_tsv, sep='\t', usecols=['Name', 'hiconf_reg'])
+    # vids_highconf = anno.Name[anno.hiconf_reg.astype(bool).values]
+    # vids = np.intersect1d(vids, vids_highconf)
+
+    return mac_report.set_index('SNP').loc[vids]
+
+def sid_filter(vids):
+    
+    if 'sid_include' in snakemake.config:
+        print('limiting to variants present in {}'.format(snakemake.config['sid_include']))
+        
+        infilepath = snakemake.config['sid_include']
+        
+        if infilepath.endswith('gz'):
+            with gzip.open(infilepath,'rt') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+        else:
+            with open(infilepath, 'r') as infile:
+                sid = np.array([l.rstrip() for l in infile])
+    else:
+        return vids
+                
+    return intersect_ids(vids, sid)
+
+
+def get_regions():
+    # load the results, keep those below a certain p-value + present in conditional analysis list 
+    results = pd.read_csv(snakemake.input.results_tsv, sep='\t')
+
+    kern = snakemake.params.kernels
+    if isinstance(kern, str):
+        kern = [kern]
+
+    pvcols_score = ['pv_score_' + k for k in kern]
+    pvcols_lrt = ['pv_lrt_' + k for k in kern]
+    statcols = ['lrtstat_' + k for k in kern]
+    results = results[['gene', 'n_snp', 'cumMAC', 'nCarrier'] + statcols + pvcols_score + pvcols_lrt]
+    
+    results.rename(columns={'gene':'name'}, inplace=True)
+    
+    # set up the regions to loop over for the chromosome
+    regions = pd.read_csv(snakemake.input.regions_bed, sep='\t', header=None, usecols=[0 ,1 ,2 ,3, 5], dtype={0 :str, 1: np.int32, 2 :np.int32, 3 :str, 5:str})
+    regions.columns = ['chrom', 'start', 'end', 'name', 'strand']
+    regions['strand'] = regions.strand.map({'+': 'plus', '-': 'minus'})
+    
+    # conditional analysis, keep only genes that are provided in conditional_list
+    regions['gene_name'] = regions['name'].str.split('_', expand=True)[1]
+    
+    _conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0, usecols=['gene_name','pheno'])
+    _conditional_list = _conditional_list[(_conditional_list.pheno == snakemake.wildcards['pheno']) | (_conditional_list.pheno == snakemake.params.phenotype) ]
+    _conditional_list = _conditional_list.drop_duplicates() 
+    
+    if len(_conditional_list) == 0:
+        logging.info('No genes pass significance threshold for phenotype {}, exiting.'.format(snakemake.params.phenotype))
+        with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+            outfile.write('# no significant hits for phenotype {}. \n'.format(snakemake.params.phenotype))
+        sys.exit(0)
+    else:
+        regions = regions.merge(_conditional_list, how='right')
+        
+    regions = regions.merge(results, how='left', on=['name'], validate='one_to_one')
+    
+    sig_genes = [results.name[results[k] < snakemake.params.significance_cutoff].values for k in pvcols_score + pvcols_lrt]
+    sig_genes = np.unique(np.concatenate(sig_genes))
+    
+    if len(sig_genes) == 0:
+        return None
+    
+    regions = regions.loc[regions.name.isin(sig_genes)]
+        
+    return regions
+
+# genotype path, vep-path:
+assert len(snakemake.params.ids) == len (snakemake.input.bed), 'Error: length of chromosome IDs does not match length of genotype files'
+geno_vep = zip(snakemake.params.ids, snakemake.input.bed, snakemake.input.vep_tsv, snakemake.input.ensembl_vep_tsv, snakemake.input.mac_report, snakemake.input.h5_lof, snakemake.input.iid_lof, snakemake.input.gid_lof)
+
+# get the top hits
+regions_all = get_regions()
+if regions_all is None:
+    logging.info('No genes pass significance threshold, exiting.')
+    import gzip
+    with gzip.open(snakemake.output.results_tsv, 'wt') as outfile:
+        outfile.write('# no hits below specified threshold ({}) for phenotype {}. \n'.format(snakemake.params.significance_cutoff, snakemake.params.phenotype))
+    sys.exit(0)
+
+logging.info('About to evaluate variants in {} genes'.format(len(regions_all.name.unique())))
+
+# storing all results here
+results = []
+
+i_gene = 0
+i_chrom = 0
+
+# conditional analysis:
+# these genotypes will be used for the conditional analysis
+geno_cond = VariantLoaderSnpReader(Bed(snakemake.input.conditional_geno, count_A1=True, num_threads=1))
+geno_cond.update_individuals(covariatesloader.get_iids())
+
+# this file contains the mapping of associations to SNPs to condition on
+conditional_list = pd.read_csv(snakemake.input.conditional_list, sep='\t', header=0)
+conditional_list = conditional_list[(conditional_list.pheno == snakemake.params['phenotype']) | (conditional_list.pheno == snakemake.wildcards['pheno']) ].drop_duplicates()
+
+geno_cond.update_variants(intersect_ids(conditional_list.snp_rsid, geno_cond.get_vids()))
+logging.info('considering {} variants as covariates for conditional tests.'.format(len(geno_cond.get_vids())))
+
+# enter the chromosome loop:
+timer = Timer()
+for i, (chromosome, bed, vep_tsv, ensembl_vep_tsv, mac_report, h5_lof, iid_lof, gid_lof) in enumerate(geno_vep):
+
+    if chromosome.replace('chr','') not in regions_all.chrom.unique():
+        continue
+
+    if snakemake.params.debug:
+        # process only two chromosomes if debugging...
+        if i_chrom > 2:
+            break
+
+    timer.reset()
+
+    # set up the ensembl vep loader for the chromosome
+    spliceaidf = pd.read_csv(vep_tsv,
+                             sep='\t',
+                             usecols=['name', 'chrom', 'end', 'gene', 'max_effect', 'DS_AG', 'DS_AL', 'DS_DG', 'DS_DL', 'DP_AG', 'DP_AL', 'DP_DG', 'DP_DL'],
+                             index_col='name')
+
+    # get set of variants for the chromosome:
+    mac_report = maf_filter(mac_report)
+    filter_vids = mac_report.index.values
+    filter_vids = sid_filter(filter_vids)
+
+    # filter by MAF
+    keep = intersect_ids(filter_vids, spliceaidf.index.values)
+    spliceaidf = spliceaidf.loc[keep]
+    spliceaidf.reset_index(inplace=True)
+
+    # filter by impact:
+    spliceaidf = spliceaidf[spliceaidf.max_effect >= snakemake.params.min_impact]
+
+    # set up the regions to loop over for the chromosome
+    regions = regions_all.copy()
+
+    # discard all genes for which we don't have annotations
+    gene_ids = regions.name.str.split('_', expand=True)  # table with two columns, ensembl-id and gene-name
+    regions['gene'] = gene_ids[1]  # this is the gene name
+    regions['ensembl_id'] = gene_ids[0]
+    regions.set_index('gene', inplace=True)
+    genes = intersect_ids(np.unique(regions.index.values), np.unique(spliceaidf.gene))  # intersection of gene names
+    regions = regions.loc[genes].reset_index()  # subsetting
+    regions = regions.sort_values(['chrom', 'start', 'end'])
+
+    # check if the variants are protein LOF variants, load the protein LOF variants:
+    ensemblvepdf = pd.read_csv(ensembl_vep_tsv, sep='\t', usecols=['Uploaded_variation', 'Gene'])
+
+    # this column will contain the gene names:
+    genes = intersect_ids(np.unique(ensemblvepdf.Gene.values), regions.ensembl_id)  # intersection of ensembl gene ids
+    ensemblvepdf = ensemblvepdf.set_index('Gene').loc[genes].reset_index()
+    ensemblvepdf['gene'] = gene_ids.set_index(0).loc[ensemblvepdf.Gene.values].values
+
+    # set up the merge
+    ensemblvepdf.drop(columns=['Gene'], inplace=True)  # get rid of the ensembl ids, will use gene names instead
+    ensemblvepdf.rename(columns={'Uploaded_variation': 'name'}, inplace=True)
+    ensemblvepdf['is_plof'] = 1.
+
+    ensemblvepdf = ensemblvepdf[~ensemblvepdf.duplicated()]  # if multiple ensembl gene ids map to the same gene names, this prevents a crash.
+
+    # we add a column to the dataframe indicating whether the variant is already annotated as protein loss of function by the ensembl variant effect predictor
+    spliceaidf = pd.merge(spliceaidf, ensemblvepdf, on=['name', 'gene'], how='left', validate='one_to_one')
+    spliceaidf['is_plof'] = spliceaidf['is_plof'].fillna(0.).astype(bool)
+
+    # initialize the loader
+    # Note: we use "end" here because the start + 1 = end, and we need 1-based coordiantes (this would break if we had indels)
+    eveploader = EnsemblVEPLoader(spliceaidf['name'], spliceaidf['chrom'].astype('str') + ':' + spliceaidf['end'].astype('str'), spliceaidf['gene'], data=spliceaidf[['max_effect', 'is_plof', 'DS_AG', 'DS_AL', 'DS_DG', 'DS_DL', 'DP_AG', 'DP_AL', 'DP_DG', 'DP_DL']].values)
+
+    # set up the variant loader (splice variants) for the chromosome
+    plinkloader = VariantLoaderSnpReader(Bed(bed, count_A1=True, num_threads=4))
+    plinkloader.update_variants(eveploader.get_vids())
+    plinkloader.update_individuals(covariatesloader.get_iids())
+
+    # set up the protein LOF burden loader
+    bloader_lof = BurdenLoaderHDF5(h5_lof, iid_lof, gid_lof)
+    bloader_lof.update_individuals(covariatesloader.get_iids())
+
+
+    # set up the splice genotype + vep loading function
+    def get_splice(interval):
+
+        try:
+            V1 = eveploader.anno_by_interval(interval, gene=interval['name'].split('_')[1])
+        except KeyError:
+            raise GotNone
+
+        if V1.index.empty:
+            raise GotNone
+
+        vids = V1.index.get_level_values('vid')
+        V1 = V1.droplevel(['gene'])
+
+        temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
+
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
+
+        temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+        cummac = mac_report.loc[vids].Minor
+
+        # spliceAI max score
+        weights = V1[0].values.astype(np.float64)
+
+        is_plof = V1[1].values.astype(bool)
+
+        splice_preds_all = V1.iloc[:,2:]
+        splice_preds_all.columns = ['DS_AG', 'DS_AL', 'DS_DG', 'DS_DL', 'DP_AG', 'DP_AL', 'DP_DG', 'DP_DL']
+
+        # "standardized" positions -> codon start positions
+        # pos = V1[0].values.astype(np.int32)
+
+        return G1, vids, weights, ncarrier, cummac, is_plof, splice_preds_all
+
+
+    # set up the protein-LOF loading function
+
+    def get_plof(interval):
+
+        try:
+            G2 = bloader_lof.genotypes_by_id(interval['name']).astype(np.float64)
+        except KeyError:
+            G2 = None
+
+        return G2
+
+    
+    # conditional analysis
+    # set up the function to load the variants to condition on
+    
+    def get_conditional(interval):
+        
+        cond_snps_vid = conditional_list.loc[ conditional_list.gene_name == interval['gene_name'], 'snp_rsid' ].unique().tolist()
+        
+        temp_genotypes, temp_vids = geno_cond.genotypes_by_id(cond_snps_vid, return_pos=False)
+        temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
+        
+        cond_snps = np.ma.masked_invalid(temp_genotypes).filled(0.)
+
+        return cond_snps, temp_vids
+    
+
+    # set up the test-function for a single gene
+    def test_gene(interval, seed):
+
+        pval_dict = {}
+        pval_dict['gene'] = interval['name']
+
+        out_dir = os.path.join(snakemake.params.out_dir_stats, interval['name'])
+        os.makedirs(out_dir, exist_ok=True)
+
+        # conditional analysis:
+        # get the snps to condition on, and include them in the null model for the LRT
+        cond_snps, cond_snps_vid = get_conditional(interval)
+        null_model_lrt = LRTnoK(np.concatenate([X, cond_snps], axis=1), Y)
+        
+        # conditional analysis:
+        # the score-test takes a second argument (G2) that allows conditioning on a second set of variants...
+        def pv_score(GV, G2=cond_snps):
+            # wraps score-test
+            pv = null_model_score.pv_alt_model(GV, G2)
+            if pv < 0.:
+                pv = null_model_score.pv_alt_model(GV, G2, method='saddle')
+            return pv
+
+        def call_test(GV, name):
+            pval_dict['pv_score_' + name] = pv_score(GV)
+            
+            altmodel = null_model_lrt.altmodel(GV)
+
+            res = null_model_lrt.pv_sim_chi2(250000, simzero=False, seed=seed)
+            pval_dict['pv_lrt_' + name] = res['pv']
+            pval_dict['lrtstat_' + name ] = altmodel['stat']
+            if 'h2' in altmodel:
+                pval_dict['h2_' + name ] = altmodel['h2']
+            if res['pv'] != 1.:
+                for stat in ['scale', 'dof', 'mixture', 'imax']:
+                    pval_dict[stat + '_' + name] = res[stat]
+                if len(res['res'] > 0):
+                    pd.DataFrame({interval['name']: res['res']}).to_pickle(out_dir + '/{}.pkl.gz'.format(name))
+
+
+        # load splice variants
+        G1, vids, weights, ncarrier, cummac, is_plof, splice_preds_all = get_splice(interval)
+        # keep indicates which variants are NOT "protein LOF" variants, i.e. variants already identified by the ensembl VEP
+        keep = ~is_plof
+
+        # sanity checks
+        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+
+
+        # do a score burden test (max weighted), this is different than the baseline!
+        G1_burden = np.max(np.where(G1 > 0.5, np.sqrt(weights), 0.), axis=1, keepdims=True)
+        
+        try:
+            call_test(G1_burden, 'linwb')
+        except AssertionError as err:
+            out_dump_np = '{}_{}_{{}}_covar.npy'.format(interval['name'],snakemake.params.phenotype)
+            np.save(out_dump_np.format('G1burden'), G1_burden)
+            np.save(out_dump_np.format('G1'), G1)
+            np.save(out_dump_np.format('Covar'), np.concatenate([X, cond_snps], axis=1))
+            np.save(out_dump_np.format('Y'), Y)
+            logging.error('AssertionError encountered when testing gene {}, conditioning on {}. dumping Y, covariates and G1 to {}'.format(interval['name'], ','.join(cond_snps_vid),out_dump_np.format('*')))
+            raise err
+
+        # linear weighted kernel
+        G1 = G1.dot(np.diag(np.sqrt(weights), k=0))
+
+        # do a score test (linear weighted)
+        call_test(G1, 'linw')
+
+        # load plof burden
+        G2 = get_plof(interval)
+
+        if G2 is not None:
+
+            if np.any(keep):
+
+                # merged (single variable)
+                G1_burden_mrg = np.maximum(G2, G1_burden)
+                call_test(G1_burden_mrg, 'linwb_mrgLOF')
+
+                # concatenated ( >= 2 variables)
+                # we separate out the ones that are already part of the protein LOF variants!
+
+                G1 = np.concatenate([G1[:, keep], G2], axis=1)
+                call_test(G1, 'linw_cLOF')
+            else:
+                logging.info('All Splice-AI variants for gene {} where already identified by the Ensembl variant effect predictor'.format(interval['name']))
+
+        # conditional analysis: keep names of SNPs that we condition on 
+        pval_dict['cond_snps'] = ','.join(cond_snps_vid)
+                
+        return pval_dict
+
+
+    logging.info('loaders for chromosome {} initialized in {:.1f} seconds.'.format(chromosome, timer.check()))
+    # run tests for all genes on the chromosome
+    for _, region in regions.iterrows():
+
+        if snakemake.params.debug:
+            if i_gene > 2:
+                continue
+        
+        try:
+            results.append(test_gene(region, i_gene)) # i_gene is used as the random seed to make things reproducible
+        except GotNone:
+            continue
+
+        i_gene += 1
+        logging.info('tested {} genes...'.format(i_gene))
+
+    i_chrom += 1
+    timer.reset()
+
+# export the results in a single table
+results = pd.DataFrame(results)
+results['pheno'] = snakemake.params.phenotype
+results.to_csv(snakemake.output.results_tsv, sep='\t', index=False)
diff --git a/script/python/assoc_sclrt_kernels_spliceai_eval_top_hits.py b/script/python/assoc_sclrt_kernels_spliceai_eval_top_hits.py
index a0f8c90..cb30d6d 100644
--- a/script/python/assoc_sclrt_kernels_spliceai_eval_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_spliceai_eval_top_hits.py
@@ -208,10 +208,11 @@ def get_splice(interval):
 
         temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
 
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
+        
         temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
         G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
 
-        ncarrier = np.sum(G1 > 0.5, axis=0)
         cummac = mac_report.loc[vids].Minor
 
         # spliceAI max score
@@ -305,8 +306,8 @@ def call_lrt(GV, name, vids=None):
         call_lrt(G1.dot(np.diag(np.sqrt(weights), k=0)), 'variant_pvals') # single variant coefficients estimated *jointly* after weighting
 
         # sanity checks
-        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+        # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
         # do a score burden test (max weighted), this is different than the baseline!
         G1_burden = np.max(np.where(G1 > 0.5, np.sqrt(weights), 0.), axis=1, keepdims=True)
diff --git a/script/python/assoc_sclrt_kernels_spliceai_retest_top_hits.py b/script/python/assoc_sclrt_kernels_spliceai_retest_top_hits.py
index d138006..7042016 100644
--- a/script/python/assoc_sclrt_kernels_spliceai_retest_top_hits.py
+++ b/script/python/assoc_sclrt_kernels_spliceai_retest_top_hits.py
@@ -236,7 +236,7 @@ def get_splice(interval):
 
         temp_genotypes, temp_vids = plinkloader.genotypes_by_id(vids, return_pos=False)
 
-        ncarrier = np.nansum(np.nansum(temp_genotypes, axis=1) >= 1)
+        ncarrier = np.nansum(temp_genotypes > 0, axis=0)
 
         temp_genotypes -= np.nanmean(temp_genotypes, axis=0)
         G1 = np.ma.masked_invalid(temp_genotypes).filled(0.)
@@ -307,8 +307,8 @@ def call_test(GV, name):
         keep = ~is_plof
 
         # sanity checks
-        assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
-        assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
+        # assert len(vids) == interval['n_snp'], 'Error: number of variants does not match! expected: {}  got: {}'.format(interval['n_snp'], len(vids))
+        # assert cummac.sum() == interval['cumMAC'], 'Error: cumMAC does not match! expeced: {}, got: {}'.format(interval['cumMAC'], cummac.sum())
 
 
         # do a score burden test (max weighted), this is different than the baseline!
diff --git a/script/python/gather_complete_cases.py b/script/python/gather_complete_cases.py
index 6a8a86d..f43a793 100644
--- a/script/python/gather_complete_cases.py
+++ b/script/python/gather_complete_cases.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import numpy as np
 from util.snake import clean_str
 
 covar = pd.read_csv(snakemake.input.covar_tsv, sep='\t')
@@ -7,13 +8,15 @@
 
 pheno = pd.read_csv(snakemake.input.pheno_tsv, sep='\t')
 
-not_nan = {}
-for p in pheno.columns[1:]:
-    not_nan[clean_str(p)] = pheno[[pheno.columns[0],p]].dropna()[pheno.columns[0]].values
+phenos = [ clean_str(x) for x in pheno.columns.tolist()[1:] ]
 
-for k, v in not_nan.items():
+for i, k in enumerate(phenos):
+    
+    iid = pheno.iloc[:,[0,i+1]].dropna().iloc[:,0].values
+    iid = np.intersect1d(iid, covar.iloc[:,0].values)
+    
     with open('data/covariates/complete_cases/{}.txt'.format(k), 'x') as outfile:
-        for value in v:
+        for value in iid:
             outfile.write('{}\n'.format(value))
             
 with open('data/covariates/complete_cases/covariates.txt', 'x') as outfile:
diff --git a/script/python/mac_report.py b/script/python/mac_report.py
index 284c06c..d21651b 100644
--- a/script/python/mac_report.py
+++ b/script/python/mac_report.py
@@ -27,6 +27,7 @@ def get_args():
     p.add_argument('--hwe', required=False, default=0.00001, type=float)
     p.add_argument('--log', required=False, default='log.txt')
     p.add_argument('--threads', default=4, type=int)
+    p.add_argument('--mem_mb', default=4000, type=int)
     args = p.parse_args()
     return(args)
 
@@ -73,18 +74,18 @@ def main():
     if args.hwe > 0.:
         tmpfilename_vid = '{}.snplist'.format(args.out_prefix)
         if args.iid is not None:
-            plink_command = [args.plink2_path, '--keep', tmpfilename_iid, '--bfile', args.bed, '--threads', str(args.threads), '--hwe', str(args.hwe), '--write-snplist', '--out', args.out_prefix]
+            plink_command = [args.plink2_path, '--keep', tmpfilename_iid, '--bfile', args.bed, '--memory', str(args.mem_mb-2000), '--threads', str(args.threads), '--hwe', str(args.hwe), '--write-snplist', '--out', args.out_prefix]
         else:
-            plink_command = [args.plink2_path, '--bfile', args.bed, '--threads', str(args.threads), '--hwe', str(args.hwe), '--write-snplist', '--out', args.out_prefix]
+            plink_command = [args.plink2_path, '--bfile', args.bed,'--memory', str(args.mem_mb-2000), '--threads', str(args.threads), '--hwe', str(args.hwe), '--write-snplist', '--out', args.out_prefix]
         logging.info('plink command: {}'.format(' '.join(plink_command)))
         _ = subprocess.run(plink_command, check=True)
 
     # plink command to create count reports
     plink_command = [args.plink_path, '--bfile', args.bed]
     if args.iid is not None:
-        plink_command += ['--keep', tmpfilename_iid, '--freq', 'counts', 'gz', '--threads', str(args.threads), '--out', args.out_prefix]
+        plink_command += ['--keep', tmpfilename_iid, '--freq', 'counts', 'gz','--memory', str(args.mem_mb-2000), '--threads', str(args.threads), '--out', args.out_prefix]
     else:
-        plink_command += ['--freq', 'counts', 'gz', '--threads', str(args.threads), '--out', args.out_prefix ]
+        plink_command += ['--freq', 'counts', 'gz', '--threads', str(args.threads),'--memory', str(args.mem_mb-2000), '--out', args.out_prefix ]
         
     if args.hwe > 0.:
         plink_command += ['--extract', tmpfilename_vid]
diff --git a/slurm/config.yaml b/slurm/config.yaml
new file mode 100644
index 0000000..f7eca57
--- /dev/null
+++ b/slurm/config.yaml
@@ -0,0 +1,24 @@
+cluster:
+  mkdir -p logs/{rule} &&
+  sbatch
+    --partition={resources.partition}
+    --cpus-per-task={threads}
+    --mem={resources.mem_mb}
+    --time={resources.time}
+    --job-name=smk-{rule}-{wildcards}
+    --output=logs/{rule}/{rule}-{wildcards}-%j.out
+default-resources:
+  - partition=vcpu,hpcpu
+  - mem_mb=4000
+  - time="01:00:00"
+restart-times: 1
+max-jobs-per-second: 10
+max-status-checks-per-second: 1
+local-cores: 1
+latency-wait: 5
+jobs: 50
+keep-going: True
+rerun-incomplete: True
+printshellcmds: True
+scheduler: greedy
+use-conda: True
diff --git a/snake/association.smk b/snake/association.smk
index 706314f..550f3dc 100644
--- a/snake/association.smk
+++ b/snake/association.smk
@@ -3,6 +3,7 @@
 # the {id} wildcard refers to the different chromosomes
 # the {pheno} wildcard refers to the different phenotypes
 
+
 rule assoc_baseline_scoretest:
     # runs "baseline" association tests:
     # score tests for indicator variables
@@ -25,6 +26,11 @@ rule assoc_baseline_scoretest:
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence]
     output:
         results_tsv = 'work/association/baseline_scoretest/{filter_highconfidence}/{pheno}/results_{id}.tsv.gz'
+    threads:
+        1
+    resources:
+        mem_mb=4000,
+        time='2:00:00'
     log:
         'logs/association/baseline_scoretest/{filter_highconfidence}/{pheno}/{id}.log'
     conda:
@@ -48,6 +54,9 @@ rule pLOF_nsnp_cummac:
         tsv='work/association/baseline_scoretest/{filter_highconfidence}/{pheno}/pLOF_nSNP_cumMAC.tsv.gz'
     conda:
         '../env/seak.yml'
+    resources:
+        mem_mb=4000,
+        time='0:30:00'
     script:
         '../script/python/pLOF_nSNP_cumMAC.py'
       
@@ -82,6 +91,11 @@ rule assoc_missense_localcollapsing:
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
         sclrt_nominal_significance_cutoff = 0.1,
         debug=False
+    resources:
+        mem_mb=get_mem_mb(12000,1.5),
+        time="48:00:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_missense/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -111,7 +125,7 @@ rule assoc_missense_localcollapsing_eval_top_hits:
     output:
         out_ok = touch('work/association/sclrt_kernels_missense/{filter_highconfidence}/{pheno}/top_hits/all.ok')
     params:
-        kernels = ['lincollapsed','linwcollapsed','lincollapsed_cLOF','linwcollapsed_cLOF','linb','linwb','linb_mrgLOF','linwb_mrgLOF'], # genes with p < 1e-7 in one of these kernels will be analysed in detail
+        kernels = ['linwcollapsed','linwcollapsed_cLOF','linwb','linwb_mrgLOF'], # genes with p < 1e-7 in one of these kernels will be analysed in detail
         phenotype = lambda wc: phenotypes[ wc.pheno ],
         covariate_column_names = config['covariate_column_names'],
         max_maf = config['maf_cutoff'],
@@ -119,6 +133,11 @@ rule assoc_missense_localcollapsing_eval_top_hits:
         out_dir_stats = lambda wc: 'work/association/sclrt_kernels_missense/{filter_highconfidence}/{pheno}/top_hits/'.format(filter_highconfidence=wc.filter_highconfidence, pheno=wc.pheno),
         ids = plinkfiles.getIds(),
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence]
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="02:30:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_missense_eval_top_hits/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -162,6 +181,11 @@ rule assoc_missense_localcollapsing_retest_top_hits:
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
         debug=False,
         random=False
+    resources:
+        mem_mb=get_mem_mb(15000,1.5),
+        time="02:30:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_missense_retest_top_hits/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -205,6 +229,11 @@ rule assoc_missense_localcollapsing_retest_random:
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
         debug=False,
         random=True
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="2:30:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_missense_retest_random/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -248,6 +277,11 @@ rule assoc_spliceai_linw:
         sclrt_nominal_significance_cutoff = 0.1
     log:
         'logs/association/sclrt_kernels_spliceai/{filter_highconfidence}_{pheno}.log'
+    resources:
+        mem_mb=get_mem_mb(12000,1.5),
+        time="24:00:00"
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
@@ -287,6 +321,11 @@ rule assoc_spliceai_linw_eval_top_hits:
         debug = False
     log:
         'logs/association/sclrt_kernels_spliceai_eval_top_hits/{filter_highconfidence}_{pheno}.log'
+    resources:
+        mem_mb=get_mem_mb(12000,1.5),
+        time="2:30:00"
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
@@ -330,6 +369,11 @@ rule assoc_spliceai_linw_retest_top_hits:
         random=False
     log:
         'logs/association/sclrt_kernels_spliceai_retest_top_hits/{filter_highconfidence}_{pheno}.log'
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="1:30:00"
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
@@ -372,6 +416,11 @@ rule assoc_spliceai_linw_retest_random:
         random = True
     log:
         'logs/association/sclrt_kernels_spliceai_retest_top_hits/{filter_highconfidence}_{pheno}.log'
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="1:30:00"
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
@@ -460,6 +509,11 @@ rule assoc_deepripe_multiple_cholesky:
         sclrt_nominal_significance_cutoff = 0.1
     log:
         'logs/association/sclrt_kernels_deepripe_multiple/{filter_highconfidence}_{pheno}.log'
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="8:00:00"
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
@@ -499,6 +553,11 @@ rule assoc_deepripe_multiple_cholesky_eval_top_hits:
         filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
         rbp_of_interest = rules.assoc_deepripe_multiple_cholesky.params.rbp_of_interest,
         debug = False
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="01:00:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_deepripe_multiple_eval_top_hits/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -544,6 +603,11 @@ rule assoc_deepripe_multiple_cholesky_retest_top_hits:
         rbp_of_interest = rules.assoc_deepripe_multiple_cholesky.params.rbp_of_interest,
         debug = False,
         random = False
+    resources:
+        mem_mb=get_mem_mb(10000,1.5),
+        time="01:00:00"
+    threads:
+        1
     log:
         'logs/association/sclrt_kernels_deepripe_multiple_retest_top_hits/{filter_highconfidence}_{pheno}.log'
     conda:
@@ -602,3 +666,29 @@ rule assoc_deepripe_multiple_cholesky_retest_random_all:
     input:
         expand(rules.assoc_deepripe_multiple_cholesky_retest_random.output, pheno=phenotypes.keys(), filter_highconfidence=['all'])
 
+
+#######
+# all #
+#######
+
+
+rule all_assoc:
+    input:
+        rules.assoc_baseline_scoretest_all.input,
+        rules.assoc_missense_localcollapsing_all.input,
+        rules.assoc_spliceai_linw_all.input,
+        rules.assoc_deepripe_multiple_cholesky_all.input
+
+        
+rule all_eval_top_hits:
+    input:
+        rules.assoc_missense_localcollapsing_eval_top_hits_all.input,
+        rules.assoc_spliceai_linw_eval_top_hits_all.input,
+        rules.assoc_deepripe_multiple_cholesky_eval_top_hits_all.input
+        
+        
+rule all_retest_top_hits:
+    input:
+        rules.assoc_missense_localcollapsing_retest_top_hits_all.input,
+        rules.assoc_spliceai_linw_retest_top_hits_all.input,
+        rules.assoc_deepripe_multiple_cholesky_retest_top_hits_all.input
diff --git a/snake/conditional_tests.smk b/snake/conditional_tests.smk
new file mode 100644
index 0000000..d0cf52c
--- /dev/null
+++ b/snake/conditional_tests.smk
@@ -0,0 +1,197 @@
+
+# TODO: add conditional_list to config.yaml
+
+
+rule assoc_baseline_scoretest_conditional_analysis:
+    # performs conditional analysis for the baseline 
+    input:
+        h5_missense = rules.export_missense_burden.output.h5,
+        iid_missense = rules.export_missense_burden.output.iid_txt,
+        gid_missense = rules.export_missense_burden.output.gene_txt,
+        h5_lof = rules.export_plof_burden.output.h5,
+        iid_lof = rules.export_plof_burden.output.iid_txt,
+        gid_lof = rules.export_plof_burden.output.gene_txt,
+        covariates_tsv = config['covariates'],
+        phenotypes_tsv = config['phenotypes'],
+        results_tsv = rules.assoc_baseline_scoretest.output['results_tsv'],
+        conditional_list = config['conditional_list'],
+        conditional_geno = config['conditional_geno'] + '.bed'
+    output:
+        results_tsv = 'work/association/baseline_scoretest/{filter_highconfidence}/{pheno}/conditional_analysis/conditionalres_{id}.tsv.gz'
+    params:
+        # phenotypes is a dictionary that maps file-prefixes to column names, see ../Snakefile
+        phenotype = lambda wc: phenotypes[ wc.pheno ],
+        covariate_column_names = config['covariate_column_names'],
+        filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
+        columns = ['pv_pLOF','pv_missense','pv_mrg'],
+        significance_cutoff=1e-7
+    threads:
+        1
+    resources:
+        mem_mb=4000,
+        time='0:10:00'
+    log:
+        'logs/association/baseline_scoretest_conditional_analysis/{filter_highconfidence}/{pheno}/{id}.log'
+    conda:
+        '../env/seak.yml'
+    script:
+        '../script/python/assoc_baseline_scoretest_conditional_analysis.py'
+        
+        
+rule assoc_baseline_scoretest_conditional_analysis_all:
+    input:
+        expand(rules.assoc_baseline_scoretest_conditional_analysis.output, filter_highconfidence=['all'], pheno=phenotypes.keys(), id=plinkfiles.getIds())
+        
+
+rule assoc_missense_localcollapsing_conditional_analysis:
+    # calculates gene-specific LRT p-values for the most significant genes for missense variants
+    # conditions on a list of common variants
+    input:
+        h5_lof = expand(rules.export_plof_burden.output.h5, id = plinkfiles.getIds(), allow_missing=True),
+        iid_lof = expand(rules.export_plof_burden.output.iid_txt,  id = plinkfiles.getIds(), allow_missing=True),
+        gid_lof = expand(rules.export_plof_burden.output.gene_txt, id = plinkfiles.getIds(), allow_missing=True),
+        covariates_tsv = config['covariates'],
+        phenotypes_tsv = config['phenotypes'],
+        bed = expand(rules.link_genotypes.output.bed, id = plinkfiles.getIds()),
+        vep_tsv = expand(rules.evep_missense_proc.output.tsv_filtered, id = plinkfiles.getIds()),
+        mac_report = expand(rules.filter_variants.output.vid_tsv, id = plinkfiles.getIds(), allow_missing=True),
+        regions_bed = rules.get_protein_coding_genes.output.pc_genes_bed,
+        results_tsv = rules.assoc_missense_localcollapsing.output.results_tsv,
+        seak_install = rules.install_seak.output,
+        conditional_list = config['conditional_list'],
+        conditional_geno = config['conditional_geno'] + '.bed'
+    output:
+        results_tsv = 'work/association/sclrt_kernels_missense/{filter_highconfidence}/{pheno}/conditional_analysis/lrt_conditional.tsv.gz'
+    params:
+        phenotype = lambda wc: phenotypes[ wc.pheno ],
+        covariate_column_names = config['covariate_column_names'],
+        kernels = ['linwcollapsed','linwcollapsed_cLOF','linwb','linwb_mrgLOF'], # kernels to consider
+        max_maf = config['maf_cutoff'],
+        min_impact = config['min_impact'],
+        ids = plinkfiles.getIds(),
+        filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
+        out_dir_stats=lambda wc, output: '/'.join(output['results_tsv'].split('/')[:-1]),
+        significance_cutoff=1e-7,
+        debug=False
+    resources:
+        mem_mb=12000,
+        time="02:30:00"
+    log:
+        'logs/association/sclrt_kernels_missense_conditional_analysis/{filter_highconfidence}_{pheno}.log'
+    conda:
+        '../env/seak.yml'
+    script:
+        '../script/python/assoc_sclrt_kernels_missense_conditional_analysis.py'
+
+
+rule all_assoc_missense_localcollapsing_conditional_analysis:
+    # runs rule above for all phenotypes
+    input:
+        expand(rules.assoc_missense_localcollapsing_conditional_analysis.output, pheno=phenotypes.keys(), filter_highconfidence=['all'])
+        
+        
+rule assoc_spliceai_linw_conditional_analysis:
+    # calculates gene-specific LRT p-values for the most significant genes for missense variants
+    # conditions on a list of common variants
+    input:
+        h5_lof = expand(rules.export_plof_burden.output.h5, id = plinkfiles.getIds(), allow_missing=True),
+        iid_lof = expand(rules.export_plof_burden.output.iid_txt,  id = plinkfiles.getIds(), allow_missing=True),
+        gid_lof = expand(rules.export_plof_burden.output.gene_txt, id = plinkfiles.getIds(), allow_missing=True),
+        ensembl_vep_tsv = expand(rules.process_ensembl_vep_output_highimpact.output.tsv, id = plinkfiles.getIds(), allow_missing=True),
+        covariates_tsv = config['covariates'],
+        phenotypes_tsv = config['phenotypes'],
+        bed = expand(rules.link_genotypes.output.bed, id = plinkfiles.getIds()),
+        vep_tsv = expand(rules.splice_ai_filter_and_overlap_with_genotypes.output.tsv, id = plinkfiles.getIds()),
+        mac_report = expand(rules.filter_variants.output.vid_tsv, id = plinkfiles.getIds(), allow_missing=True),
+        regions_bed = rules.get_protein_coding_genes.output.pc_genes_bed,
+        results_tsv = rules.assoc_spliceai_linw.output.results_tsv,
+        seak_install = rules.install_seak.output,
+        conditional_list = config['conditional_list'],
+        conditional_geno = config['conditional_geno'] + '.bed'
+    output:
+        results_tsv = 'work/association/sclrt_kernels_spliceai/{filter_highconfidence}/{pheno}/conditional_analysis/lrt_conditional.tsv.gz'
+    params:
+        kernels = ['linwb','linw','linwb_mrgLOF','linw_cLOF'],
+        phenotype = lambda wc: phenotypes[wc.pheno],
+        covariate_column_names = config['covariate_column_names'],
+        max_maf = config['maf_cutoff'],
+        min_impact = config['splice_ai_min_impact'],
+        out_dir_stats=lambda wc, output: '/'.join(output['results_tsv'].split('/')[:-1]),
+        ids = plinkfiles.getIds(),
+        filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
+        debug = False,
+        significance_cutoff = 1e-7
+    resources:
+        mem_mb=10000,
+        time="02:30:00"
+    log:
+        'logs/association/sclrt_kernels_spliceai_conditional_analysis/{filter_highconfidence}_{pheno}.log'
+    conda:
+        '../env/seak.yml'
+    script:
+        '../script/python/assoc_sclrt_kernels_spliceai_conditional_analysis.py'
+
+
+
+rule all_assoc_spliceai_linw_conditional_analysis:
+    # runs rule above for all phenotypes
+    input:
+        expand(rules.assoc_spliceai_linw_conditional_analysis.output, pheno=phenotypes.keys(), filter_highconfidence=['all'])
+
+
+
+rule assoc_deepripe_multiple_cholesky_conditional_analysis:
+    # re-tests most significant genes for DeepRiPe variants using gene-specific null-distributions
+    input:
+        bed = expand(rules.link_genotypes.output.bed, id = plinkfiles.getIds()),
+        h5_rbp_plus = expand(rules.run_deepripe_vep.output.h5, id = plinkfiles.getIds(), strand=['plus']),
+        h5_rbp_minus = expand(rules.run_deepripe_vep.output.h5, id = plinkfiles.getIds(), strand=['minus']),
+        bed_rbp_plus = expand(rules.run_deepripe_vep.output.bed, id = plinkfiles.getIds(), strand=['plus']),
+        bed_rbp_minus = expand(rules.run_deepripe_vep.output.bed, id = plinkfiles.getIds(), strand=['minus']),
+        ensembl_vep_tsv = expand(rules.process_ensembl_vep_output_highimpact.output.tsv, id = plinkfiles.getIds(), allow_missing=True),
+        mac_report = expand(rules.filter_variants.output.vid_tsv, id = plinkfiles.getIds(), allow_missing=True),
+        regions_bed = rules.get_protein_coding_genes.output.pc_genes_bed,
+        seak_install = rules.install_seak.output,
+        covariates_tsv = config['covariates'],
+        phenotypes_tsv = config['phenotypes'],
+        results_tsv = rules.assoc_deepripe_multiple_cholesky.output.results_tsv,
+        conditional_list = config['conditional_list'],
+        conditional_geno = config['conditional_geno'] + '.bed'
+    output:
+        results_tsv = 'work/association/sclrt_kernels_deepripe_multiple/{filter_highconfidence}/{pheno}/conditional_analysis/lrt_conditional.tsv.gz'
+    params:
+        kernels = ['linwcholesky', 'linwcholesky_notLOF'],
+        phenotype = lambda wc: phenotypes[wc.pheno],
+        covariate_column_names = config['covariate_column_names'],
+        max_maf = config['maf_cutoff'],
+        min_impact = config['deepripe_min_impact'],
+        out_dir_stats=lambda wc, output: '/'.join(output['results_tsv'].split('/')[:-1]),
+        ids = plinkfiles.getIds(),
+        filter_highconfidence = lambda wc: {'all': False, 'highconf_only': True}[wc.filter_highconfidence],
+        rbp_of_interest = rules.assoc_deepripe_multiple_cholesky.params.rbp_of_interest,
+        debug = False,
+        random = False,
+        significance_cutoff = 1e-7
+    resources:
+        mem_mb=10000,
+        time="02:30:00"
+    log:
+        'logs/association/sclrt_kernels_deepripe_multiple_conditional_analysis/{filter_highconfidence}_{pheno}.log'
+    conda:
+        '../env/seak.yml'
+    script:
+        '../script/python/assoc_sclrt_kernels_deepripe_multiple_conditional_analysis.py'
+
+
+rule assoc_deepripe_multiple_cholesky_conditional_analysis_all:
+    # runs rule above for all phenotypes - this will effectively run the entire pipeline for DeepRiPe variants
+    input:
+        expand(rules.assoc_deepripe_multiple_cholesky_conditional_analysis.output, pheno=phenotypes.keys(), filter_highconfidence=['all'])
+        
+        
+rule conditional_analysis_all:
+    input:
+        rules.assoc_deepripe_multiple_cholesky_conditional_analysis_all.input,
+        rules.all_assoc_spliceai_linw_conditional_analysis.input,
+        rules.all_assoc_missense_localcollapsing_conditional_analysis.input,
+        rules.assoc_baseline_scoretest_conditional_analysis_all.input
\ No newline at end of file
diff --git a/snake/evep.smk b/snake/evep.smk
index 372760a..fc66442 100644
--- a/snake/evep.smk
+++ b/snake/evep.smk
@@ -189,6 +189,9 @@ rule export_plof_burden:
         iid_txt = 'work/gene_scores/indicator01/pLOF/{filter_highconfidence}/{id}_iid.txt'
     log:
         'logs/evep/export_plof_burden_{filter_highconfidence}_{id}.log'
+    resources:
+        mem_mb=8000,
+        time='1:00:00'
     conda:
         '../env/seak.yml'
     script:
@@ -228,6 +231,11 @@ rule export_missense_burden:
         iid_txt = 'work/gene_scores/indicator01/missense/{filter_highconfidence}/{id}_iid.txt'
     log:
         'logs/evep/export_missense_burden_{filter_highconfidence}_{id}.log'
+    resources:
+        mem_mb=12000,
+        time='1:00:00'
+    threads:
+        1
     conda:
         '../env/seak.yml'
     script:
diff --git a/snake/results_processing.smk b/snake/results_processing.smk
index ed2d0b7..cf15586 100644
--- a/snake/results_processing.smk
+++ b/snake/results_processing.smk
@@ -1,6 +1,4 @@
 
-
-
 rule export_results_baseline:
     # export results and calculate genomic inflation
     input:
diff --git a/snake/setup.smk b/snake/setup.smk
index 615159a..6908cab 100644
--- a/snake/setup.smk
+++ b/snake/setup.smk
@@ -81,6 +81,41 @@ rule gather_complete_cases:
     script:
         '../script/python/gather_complete_cases.py'
         
+
+rule complete_cases_ancestry:
+    # create ancestry keep-files for plink
+    input:
+        iid_txt = 'data/covariates/complete_cases/covariates.txt',
+        ancestry_tsv = config['ancestry_scoring_file'] if 'ancestry_scoring_file' in config else 'ancestry_scoring_file_is_undefined'
+    output:
+        ancestry_keep = expand('data/ancestry_keep_files/{ancestries}.keep', ancestries=['AFR','AMR','EAS','EUR','SAS'])
+    run:
+        import pandas as pd
+        
+        with open(input['iid_txt'], 'r') as infile:
+            iids = [ l.rstrip() for l in infile ]
+            
+        ancestry = pd.read_csv(input['ancestry_tsv'], sep='\t', dtype={0:str})
+        if 'IID' in ancestry.columns:
+            ancestry.rename(columns={'IID':'iid'}, inplace=True)
+        ancestry.set_index('iid', inplace=True)    
+        
+        # print(ancestry.head())
+        
+        print('{} individuals in ancestry file'.format(len(ancestry)))
+        print('{} individuals in iid-file. Subsetting to those individuals.'.format(len(iids)))
+        
+        ancestry = ancestry.loc[iids]
+
+        for a in ancestry.columns:
+            outfile = 'data/ancestry_keep_files/{}.keep'.format(a)
+            
+            with open(outfile, 'w') as out:
+                for idx in ancestry[ancestry[a] > 0.5].index.values:
+                    out.write('{}\t{}\n'.format(idx, idx))
+            
+            print('written {} iids to {}'.format((ancestry[a] > 0.5).sum(), outfile))
+            
     
 rule mac_report:
     # filter by HWE and
@@ -117,10 +152,52 @@ rule mac_report_all:
     input:
         expand(rules.mac_report.output, id = plinkfiles.getIds())
         
+        
+rule mac_report_ancestry:
+    # filter by HWE and
+    # create minor allele count report for specific ancestry and chromosome
+    input:
+        iid_txt = 'data/ancestry_keep_files/{ancestry}.keep',
+        plink = 'bin/plink',
+        bed = rules.link_genotypes.output.bed
+    output:
+        frq_tsv = temp('work/mac_report/all/{ancestry}/{id}.frq.counts.gz'),
+        tsv = 'work/mac_report/all/{ancestry}/{id}.tsv.gz',
+        plink_log = 'work/mac_report/all/{ancestry}/{id}.log'
+    params:
+        in_prefix = lambda wc, input: input.bed[:-4],
+        out_prefix = 'work/mac_report/all/{ancestry}/{id}'
+    log:
+        'logs/mac_report_ancestry/{ancestry}/{id}.log'
+    conda:
+        '../env/genomics.yml'
+    threads:
+        1
+    resources:
+        mem_mb=8000,
+        time="1:00:00"
+    shell:
+        'python script/python/mac_report.py '
+        '--iid {input.iid_txt} '
+        '--bed {params.in_prefix} '
+        '-o {params.out_prefix} '
+        '--plink_path {input.plink} '
+        '--log {log} '
+        '--threads {threads} '
+        '--mem_mb {resources.mem_mb} '
+        '&> {log} '
+        
+
+rule mac_report_ancestry_all:
+    # create minor allele count reports for all ancestries and all chromosomes
+    input:
+        expand(rules.mac_report_ancestry.output, id = plinkfiles.getIds(), ancestry=['AFR','AMR','EAS','EUR','SAS'])
+
+        
 rule filter_variants:
     # apply missingness filters and
     # high-confidence region filter for phenotype-complete cases
-    # returns the filtered variant ids, their MAF (calculated on the covariate-complete cases) and their MAC (calculated on the phenotype-complete cases)
+    # returns the filtered variant ids, their MAF (calculated on the covariate-complete cases) and their MAC (calculated on the *phenotype*-complete cases)
     # the MAF filter is applied while performing the association tests, NOT here.
     input:
         mac_tsv = rules.mac_report.output.tsv,