diff --git a/.gitignore b/.gitignore
index 26bbb14..adc504b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 GRCh37.primary_assembly.genome.fa.gz
 GRCh37.primary_assembly.genome.fa.gz.fxi
+pangolin/__pycache__
+tests/__pycache__
diff --git a/README.md b/README.md
index 912ddb9..c23b351 100755
--- a/README.md
+++ b/README.md
@@ -7,19 +7,17 @@ Pangolin can be run on Google Colab, which provides free acess to GPUs and other
 See below for information on usage and local installation.
 
 ### Installation
-* Prerequisites: Python 3.6 or higher and conda, which can both be installed using Miniconda: https://docs.conda.io/en/latest/miniconda.html
-* Install PyTorch: https://pytorch.org/get-started/locally/
-  * If a supported GPU is available, installation with GPU support is recommended (choose an option under "Compute Platform")
-* Install other dependencies:
-  ```
-  conda install -c conda-forge pyvcf
-  pip install gffutils biopython pandas pyfastx
-  ```
+* Prerequisites: Python 3.8 or higher
+* Poetry: See https://python-poetry.org/docs/#installation
 * Install Pangolin:
   ```
-  git clone https://github.com/tkzeng/Pangolin.git
+  git clone https://github.com/invitae/Pangolin.git
   cd Pangolin
-  pip install .
+  poetry install
+  ```
+* Activate env
+  ```
+  poetry shell
   ```
 
 ### Usage (command-line)
@@ -52,13 +50,13 @@ See below for information on usage and local installation.
     ```
     See full options below:
     ```
-    usage: pangolin [-h] [-c COLUMN_IDS] [-m {False,True}] [-s SCORE_CUTOFF] [-d DISTANCE] variant_file reference_file annotation_file output_file
+    usage: pangolin [-h] [-c COLUMN_IDS] [-m {False,True}] [-s SCORE_CUTOFF] [-d DISTANCE] [-b BATCH_SIZE] [-v] variant_file reference_file annotation_file output_file
 
     positional arguments:
       variant_file          VCF or CSV file with a header (see COLUMN_IDS option).
       reference_file        FASTA file containing a reference genome sequence.
       annotation_file       gffutils database file. Can be generated using create_db.py.
-      output_file           Prefix for output file. Will be a VCF/CSV if variant_file is VCF/CSV.
+      output_file           Name of output file.
 
     optional arguments:
       -h, --help            show this help message and exit
@@ -70,12 +68,44 @@ See below for information on usage and local installation.
                             Output all sites with absolute predicted change in score >= cutoff, instead of only the maximum loss/gain sites.
       -d DISTANCE, --distance DISTANCE
                             Number of bases on either side of the variant for which splice scores should be calculated. (Default: 50)
+      -b BATCH_SIZE, --batch_size BATCH_SIZE
+                            Number of variants to batch together (Default: 0). Use this to improve GPU optimization
+      -v, --verbose         Enable additional debugging output
+      --enable_gtf_cache    Enable caching of GTF database into memory
     ```
 
 ### Usage (custom)
 
 See `scripts/custom_usage.py`
 
+### Batching Support
+
+Invitae added batching support in April 2023 to get better GPU optimization. Variants are read in batches and then distributed into collections by tensor sizes and then run through the GPU in larger batches.
+After batches are run, data is put back together in the original order and written to disk. You can control the batching via the `-b` parameter documented above.
+
+![Batching](docs/Pangolin_Batching_Indexing.png)
+
+### GTF DB Caching
+
+If you are running a larger batch of variants, you can gain additional performance by caching the gtf database into memory. 
+You can enable this behavior with `--enable_gtf_cache`. With this enabled, it'll dump the SQLite database into memory using
+interval trees for the gene information for quick lookups without hitting the disk.
+
+## Testing
+
+There are unit tests available that run some small scale sets of predictions using data on chromosome 19, see details in 
+the tests about how the data was generated.
+
+```
+poetry run pytest
+```
+
+Testing with coverage
+
+```
+poetry run coverage run --source=pangolin -m pytest && poetry run coverage report -m
+```
+
 ### Citation
 
 If you use Pangolin, please cite:
diff --git a/docs/Pangolin_Batching_Indexing.png b/docs/Pangolin_Batching_Indexing.png
new file mode 100644
index 0000000..adb0c34
Binary files /dev/null and b/docs/Pangolin_Batching_Indexing.png differ
diff --git a/docs/Pangolin_Batching_Overview.png b/docs/Pangolin_Batching_Overview.png
new file mode 100644
index 0000000..368be76
Binary files /dev/null and b/docs/Pangolin_Batching_Overview.png differ
diff --git a/pangolin/.fuse_hidden0000252700000002 b/pangolin/.fuse_hidden0000252700000002
deleted file mode 100644
index 6c2d773..0000000
--- a/pangolin/.fuse_hidden0000252700000002
+++ /dev/null
@@ -1,257 +0,0 @@
-import argparse
-from pkg_resources import resource_filename
-from pangolin.model import *
-import vcf
-import gffutils
-import pandas as pd
-import pyfastx
-# import time
-# startTime = time.time()
-
-IN_MAP = np.asarray([[0, 0, 0, 0],
-                     [1, 0, 0, 0],
-                     [0, 1, 0, 0],
-                     [0, 0, 1, 0],
-                     [0, 0, 0, 1]])
-
-
-def one_hot_encode(seq, strand):
-    seq = seq.upper().replace('A', '1').replace('C', '2')
-    seq = seq.replace('G', '3').replace('T', '4').replace('N', '0')
-    if strand == '+':
-        seq = np.asarray(list(map(int, list(seq))))
-    elif strand == '-':
-        seq = np.asarray(list(map(int, list(seq[::-1]))))
-        seq = (5 - seq) % 5  # Reverse complement
-    return IN_MAP[seq.astype('int8')]
-
-
-def compute_score(ref_seq, alt_seq, strand, d, models):
-    ref_seq = one_hot_encode(ref_seq, strand).T
-    ref_seq = torch.from_numpy(np.expand_dims(ref_seq, axis=0)).float()
-    alt_seq = one_hot_encode(alt_seq, strand).T
-    alt_seq = torch.from_numpy(np.expand_dims(alt_seq, axis=0)).float()
-
-    if torch.cuda.is_available():
-        ref_seq = ref_seq.to(torch.device("cuda"))
-        alt_seq = alt_seq.to(torch.device("cuda"))
-
-    pangolin = []
-    for j in range(4):
-        score = []
-        for model in models[3*j:3*j+3]:
-            with torch.no_grad():
-                ref = model(ref_seq)[0][[1,4,7,10][j],:].cpu().numpy()
-                alt = model(alt_seq)[0][[1,4,7,10][j],:].cpu().numpy()
-                if strand == '-':
-                    ref = ref[::-1]
-                    alt = alt[::-1]
-                l = 2*d+1
-                ndiff = np.abs(len(ref)-len(alt))
-                if len(ref)>len(alt):
-                    alt = np.concatenate([alt[0:l//2+1],np.zeros(ndiff),alt[l//2+1:]])
-                elif len(ref)<len(alt):
-                    alt = np.concatenate([alt[0:l//2],np.max(alt[l//2:l//2+ndiff+1], keepdims=True),alt[l//2+ndiff+1:]])
-                score.append(alt-ref)
-        pangolin.append(np.mean(score, axis=0))
-    
-    pangolin = np.array(pangolin)
-    loss = pangolin[np.argmin(pangolin, axis=0), np.arange(pangolin.shape[1])]
-    gain = pangolin[np.argmax(pangolin, axis=0), np.arange(pangolin.shape[1])]
-    return loss, gain
-
-
-def get_genes(chr, pos, gtf):
-    genes = gtf.region((chr, pos-1, pos-1), featuretype="gene")
-    genes_pos, genes_neg = {}, {}
-
-    for gene in genes:
-        if gene[3] > pos or gene[4] < pos:
-            continue
-        gene_id = gene["gene_id"][0]
-        exons = []
-        for exon in gtf.children(gene, featuretype="exon"):
-            exons.extend([exon[3], exon[4]])
-        if gene[6] == '+':
-            genes_pos[gene_id] = exons
-        elif gene[6] == '-':
-            genes_neg[gene_id] = exons
-
-    return (genes_pos, genes_neg)
-
-
-def process_variant(lnum, chr, pos, ref, alt, gtf, models, args):
-    d = args.distance
-    cutoff = args.score_cutoff
-
-    if len(set("ACGT").intersection(set(ref))) == 0 or len(set("ACGT").intersection(set(alt))) == 0 \
-            or (len(ref) != 1 and len(alt) != 1 and len(ref) != len(alt)):
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Variant format not supported.")
-        return -1
-    elif len(ref) > 2*d:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Deletion too large")
-        return -1
-
-    fasta = pyfastx.Fasta(args.reference_file)
-    # try to make vcf chromosomes compatible with reference chromosomes
-    if chr not in fasta.keys() and "chr"+chr in fasta.keys():
-        chr = "chr"+chr
-    elif chr not in fasta.keys() and chr[3:] in fasta.keys():
-        chr = chr[3:]
-
-    try:
-        seq = fasta[chr][pos-5001-d:pos+len(ref)+4999+d].seq
-    except Exception as e:
-        print(e)
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Could not get sequence, possibly because the variant is too close to chromosome ends. "
-                                  "See error message above.")
-        return -1    
-
-    if seq[5000+d:5000+d+len(ref)] != ref:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Mismatch between FASTA (ref base: %s) and variant file (ref base: %s)."
-              % (seq[5000+d:5000+d+len(ref)], ref))
-        return -1
-
-    ref_seq = seq
-    alt_seq = seq[:5000+d] + alt + seq[5000+d+len(ref):]
-
-    # get genes that intersect variant
-    genes_pos, genes_neg = get_genes(chr, pos, gtf)
-    if len(genes_pos)+len(genes_neg)==0:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Variant not contained in a gene body. Do GTF/FASTA chromosome names match?")
-        return -1
-
-    # get splice scores
-    loss_pos, gain_pos = None, None
-    if len(genes_pos) > 0:
-        loss_pos, gain_pos = compute_score(ref_seq, alt_seq, '+', d, models)
-    loss_neg, gain_neg = None, None
-    if len(genes_neg) > 0:
-        loss_neg, gain_neg = compute_score(ref_seq, alt_seq, '-', d, models)
-
-    scores = ""
-    for (genes, loss, gain) in \
-            ((genes_pos,loss_pos,gain_pos),(genes_neg,loss_neg,gain_neg)):
-        for gene, positions in genes.items():
-            warnings = "Warnings:"
-
-            if args.mask == "True" and len(positions) != 0:
-                positions = np.array(positions)
-                positions = positions - (pos - d)
-
-                positions_filt = positions[(positions>=0) & (positions<len(loss))]
-                # set splice gain at annotated sites to 0
-                gain[positions_filt] = np.minimum(gain[positions_filt], 0)
-                # set splice loss at unannotated sites to 0
-                not_positions = ~np.isin(np.arange(len(loss)), positions_filt)
-                loss[not_positions] = np.maximum(loss[not_positions], 0)
-
-            elif args.mask == "True":
-                warnings += "NoAnnotatedSitesToMaskForThisGene"
-                loss[:] = np.maximum(loss[:], 0)
-
-            if cutoff != None:
-                scores = scores+gene+'|'
-                l, g = np.where(loss<=-cutoff)[0], np.where(gain>=cutoff)[0]
-                for p, s in zip(np.concatenate([g-d,l-d]), np.concatenate([gain[g],loss[l]])):
-                    scores += "%s:%s|" % (p, round(s,2))
-
-            else:
-                scores = scores+gene+'|'
-                l, g = np.argmin(loss), np.argmax(gain),
-                scores += "%s:%s|%s:%s|" % (g-d, round(gain[g],2), l-d, round(loss[l],2))
-
-            scores += warnings
-
-    return scores.strip('|')
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("variant_file", help="VCF or CSV file with a header (see COLUMN_IDS option).")
-    parser.add_argument("reference_file", help="FASTA file containing a reference genome sequence.")
-    parser.add_argument("annotation_file", help="gffutils database file. Can be generated using create_db.py.")
-    parser.add_argument("output_file", help="Prefix for output file. Will be a VCF/CSV if variant_file is VCF/CSV.")
-    parser.add_argument("-c", "--column_ids", default="CHROM,POS,REF,ALT", help="(If variant_file is a CSV) Column IDs for: chromosome, variant position, reference bases, and alternative bases. "
-                                                                                "Separate IDs by commas. (Default: CHROM,POS,REF,ALT)")
-    parser.add_argument("-m", "--mask", default="True", choices=["False","True"], help="If True, splice gains (increases in score) at annotated splice sites and splice losses (decreases in score) at unannotated splice sites will be set to 0. (Default: True)")
-    parser.add_argument("-s", "--score_cutoff", type=float, help="Output all sites with absolute predicted change in score >= cutoff, instead of only the maximum loss/gain sites.")
-    parser.add_argument("-d", "--distance", type=int, default=50, help="Number of bases on either side of the variant for which splice scores should be calculated. (Default: 50)")
-    #parser.add_argument("--score_exons", default="False", choices=["False","True"], help="Output changes in score for both splice sites of annotated exons, as long as one splice site is within the considered range (specified by -d). Output will be: gene|site1_pos:score|site2_pos:score|...")
-    args = parser.parse_args()
-
-    variants = args.variant_file
-    gtf = args.annotation_file
-    try:
-        gtf = gffutils.FeatureDB(gtf)
-    except:
-        print("ERROR, annotation_file could not be opened. Is it a gffutils database file?")
-        exit()
-
-    if torch.cuda.is_available():
-        print("Using GPU")
-    else:
-        print("Using CPU")
-
-    models = []
-    for i in [0,2,4,6]:
-        for j in range(1,4):
-            model = Pangolin(L, W, AR)
-            if torch.cuda.is_available():
-                model.cuda()
-                weights = torch.load(resource_filename(__name__,"models/final.%s.%s.3.v2" % (j, i)))
-            else:
-                weights = torch.load(resource_filename(__name__,"models/final.%s.%s.3.v2" % (j, i)), map_location=torch.device('cpu'))
-            model.load_state_dict(weights)
-            model.eval()
-            models.append(model)
-
-    if variants.endswith(".vcf"):
-        lnum = 0
-        # count the number of header lines
-        for line in open(variants, 'r'):
-            lnum += 1
-            if line[0] != '#':
-                break
-
-        variants = vcf.Reader(filename=variants)
-        variants.infos["Pangolin"] = vcf.parser._Info(
-            "Pangolin",'.',"String","Pangolin splice scores. "
-            "Format: gene|pos:score_change|pos:score_change|...",'.','.')
-        fout = vcf.Writer(open(args.output_file+".vcf", 'w'), variants)
-
-        for i, variant in enumerate(variants):
-            scores = process_variant(lnum+i, str(variant.CHROM), int(variant.POS), variant.REF, str(variant.ALT[0]), gtf, models, args)
-            if scores != -1:
-                variant.INFO["Pangolin"] = scores
-            fout.write_record(variant)
-            fout.flush()
-
-        fout.close()
-
-    elif variants.endswith(".csv"):
-        col_ids = args.column_ids.split(',')
-        variants = pd.read_csv(variants, header=0)
-        fout = open(args.output_file+".csv", 'w')
-        fout.write(','.join(variants.columns)+',Pangolin\n')
-        fout.flush()
-
-        for lnum, variant in variants.iterrows():
-            chr, pos, ref, alt = variant[col_ids]
-            ref, alt = ref.upper(), alt.upper()
-            scores = process_variant(lnum+1, str(chr), int(pos), ref, alt, gtf, models, args)
-            if scores == -1:
-                fout.write(','.join(variant.to_csv(header=False, index=False).split('\n'))+'\n')
-            else:
-                fout.write(','.join(variant.to_csv(header=False, index=False).split('\n'))+scores+'\n')
-            fout.flush()
-
-        fout.close()
-
-    else:
-        print("ERROR, variant_file needs to be a CSV or VCF.")
-
-    # executionTime = (time.time() - startTime)
-    # print('Execution time in seconds: ' + str(executionTime))
-
-if __name__ == '__main__':
-    main()
diff --git a/pangolin/batch.py b/pangolin/batch.py
new file mode 100644
index 0000000..e2f4d60
--- /dev/null
+++ b/pangolin/batch.py
@@ -0,0 +1,304 @@
+import logging
+import time
+
+import numpy as np
+import pyfastx
+import torch
+from typing import List, Dict, Tuple
+
+from pangolin.data_models import (
+    Variant,
+    PreppedVariant,
+    BatchLookupIndex,
+    AppConfig,
+    SequenceType,
+)
+from pangolin.genes import GeneAnnotator
+from pangolin.utils import combine_scores, prepare_variant
+
+logger = logging.getLogger(__name__)
+
+
+class PredictionBatch:
+    def __init__(self, models: List, app_config: AppConfig):
+        self.app_config = app_config
+        self.models = models
+        self.gene_annotator = GeneAnnotator(
+            self.app_config.annotation_file,
+            use_cache=self.app_config.enable_gtf_cache,
+        )
+        self.fasta = pyfastx.Fasta(self.app_config.reference_file)
+
+        self.batches = {}
+        self.variants: List[Variant] = []
+        self.prepared_records: List[PreppedVariant] = []
+
+        self.batch_count = 0
+        self.total_records = 0
+
+        self.prep_total_time = None
+        self.batch_start_time = None
+
+        # Flag to know when the batch was run
+        self.did_run_predictions = False
+
+        logger.debug(f"Batch init with batch size: {self.app_config.batch_size}")
+
+    def batch_variant(self, prepped_variant: PreppedVariant) -> List[BatchLookupIndex]:
+        # Skip batching this variant if it wasn't encoded for validation reasons
+        if not prepped_variant.encodings:
+            return []
+
+        encoded_ref_pos = (
+            prepped_variant.encodings.encoded_ref_pos
+            if prepped_variant.encodings
+            else ""
+        )
+        encoded_alt_pos = (
+            prepped_variant.encodings.encoded_alt_pos
+            if prepped_variant.encodings
+            else ""
+        )
+        encoded_ref_neg = (
+            prepped_variant.encodings.encoded_ref_neg
+            if prepped_variant.encodings
+            else ""
+        )
+        encoded_alt_neg = (
+            prepped_variant.encodings.encoded_alt_neg
+            if prepped_variant.encodings
+            else ""
+        )
+
+        batch_lookup_indexes = []
+
+        for var_type, encoded_seq in zip(
+            (
+                SequenceType.POS_REF,
+                SequenceType.POS_ALT,
+                SequenceType.NEG_REF,
+                SequenceType.NEG_ALT,
+            ),
+            (encoded_ref_pos, encoded_alt_pos, encoded_ref_neg, encoded_alt_neg),
+        ):
+            if len(encoded_seq) == 0:
+                # Add BatchLookupIndex with zeros so when the batch collects the outputs
+                # it knows that there is no prediction for this record
+                batch_lookup_indexes.append(BatchLookupIndex(var_type, 0, 0))
+                continue
+
+            # Iterate over the encoded sequence and drop into the correct batch by size and
+            # create an index to use to pull out the result after batch is processed
+            # for row in encoded_seq:
+            # Extract the size of the sequence that was encoded to build a batch from
+            tensor_size = encoded_seq.shape[2]
+
+            # Create batch for this size
+            if tensor_size not in self.batches:
+                self.batches[tensor_size] = []
+
+            # Add encoded record to batch
+            self.batches[tensor_size].append(encoded_seq)
+
+            # Get the index of the record we just added in the batch
+            cur_batch_record_ix = len(self.batches[tensor_size]) - 1
+
+            # Store a reference so we can pull out the prediction for this item from the batches
+            batch_lookup_indexes.append(
+                BatchLookupIndex(var_type, tensor_size, cur_batch_record_ix)
+            )
+
+        return batch_lookup_indexes
+
+    def prep_all_variants(self) -> None:
+        prep_time = time.time()
+        total_seq_time = 0
+        total_encode_time = 0
+        total_gene_time = 0
+        for variant in self.variants:
+            prepared_record, timing = prepare_variant(
+                variant,
+                self.gene_annotator,
+                self.fasta,
+                self.app_config.distance,
+            )
+            if prepared_record.skip_message:
+                logger.debug(prepared_record.skip_message)
+            total_seq_time += timing.seq_time
+            total_encode_time += timing.encode_time
+            total_gene_time += timing.gene_time
+            self.prepared_records.append(prepared_record)
+        self.prep_total_time = time.time() - prep_time
+        logger.debug(f"Total seq time: {total_seq_time:.5f}s")
+        logger.debug(f"Total gene time: {total_gene_time:.5f}s")
+        logger.debug(f"Total encode time: {total_encode_time:.5f}s")
+        logger.debug(f"Prep variant time: {self.prep_total_time:.5f}s")
+
+        # Put the variants into buckets
+        for prepped_variant in self.prepared_records:
+            prepped_variant.locations = self.batch_variant(prepped_variant)
+
+    def add_variant(self, variant: Variant) -> None:
+        self.total_records += 1
+        self.variants.append(variant)
+        self.did_run_predictions = False
+
+        # Once we fill the batch, process the records
+        if len(self.variants) >= self.app_config.batch_size:
+            logger.debug(f"Finished collected variants in batch: {len(self.variants)}")
+            self.run_batch()
+            self.did_run_predictions = True
+
+    def run_batch(self) -> None:
+        self.batch_start_time = time.time()
+        self.prep_all_variants()
+        self._process_batch()
+
+    def finish(self) -> None:
+        logger.debug("Finish")
+
+        if len(self.variants) == 0:
+            logger.debug("No variants left to process")
+            return
+
+        # Run remaining variants
+        self.run_batch()
+
+    def run_predictions(self, batch) -> List:
+        batch_preds = []
+        if torch.cuda.is_available():
+            batch = batch.to(torch.device("cuda"))
+        for j in range(4):
+            for i, model in enumerate(self.models[3 * j : 3 * j + 3]):
+                with torch.no_grad():
+                    preds = model(batch)
+                    batch_preds.append(preds)
+        return batch_preds
+
+    def _process_batch(self) -> None:
+        start = time.time()
+        total_batch_predictions = 0
+        self.batch_count += 1
+        logger.debug(f"Starting process_batch ({self.batch_count})")
+
+        batch_sizes = [
+            "{}:{}".format(tensor_size, len(batch))
+            for tensor_size, batch in self.batches.items()
+        ]
+        logger.debug("Batch Sizes: {}".format(batch_sizes))
+
+        batch_preds = {}
+        for tensor_size, batch in self.batches.items():
+            # Convert list of encodings into a proper sized numpy matrix
+            prediction_batch = np.concatenate(batch, axis=0)
+            torched = torch.from_numpy(prediction_batch).float()
+            batch_preds[tensor_size] = self.run_predictions(torched)
+
+        for prepped_record in self.prepared_records:
+            (
+                prepped_record.loss_pos,
+                prepped_record.gain_pos,
+            ) = self._get_score_from_batch(prepped_record, batch_preds, "+")
+            (
+                prepped_record.loss_neg,
+                prepped_record.gain_neg,
+            ) = self._get_score_from_batch(prepped_record, batch_preds, "-")
+            prepped_record.score = self.calculate_score(prepped_record)
+            total_batch_predictions += 1
+
+        duration = time.time() - start
+        logger.debug(f"Batch time: {duration:0.2f}s")
+        batch_duration = time.time() - self.batch_start_time
+        preds_per_sec = total_batch_predictions / batch_duration
+        preds_per_hour = preds_per_sec * 60 * 60
+        logger.info(
+            f"Finished batch {self.batch_count}: Total Time {batch_duration:0.2f}s, Prep Time: {self.prep_total_time:0.2f}s, Preds/Hour: {preds_per_hour:0.0f}, Records: {self.total_records}"
+        )
+
+    def _get_score_from_batch(
+        self, prepped_record: PreppedVariant, batch_preds: Dict[int, List], strand: str
+    ) -> Tuple:
+        if len(prepped_record.locations) == 0:
+            return None, None
+
+        # Get the lookup locations of the ref and alt values
+        ref_location = (
+            prepped_record.locations[SequenceType.POS_REF.value]
+            if strand == "+"
+            else prepped_record.locations[SequenceType.NEG_REF.value]
+        )
+        alt_location = (
+            prepped_record.locations[SequenceType.POS_ALT.value]
+            if strand == "+"
+            else prepped_record.locations[SequenceType.NEG_ALT.value]
+        )
+
+        if ref_location.tensor_size == 0 and alt_location.tensor_size == 0:
+            return None, None
+
+        ix = 0
+        pangolin = []
+        for j in range(4):
+            scores = []
+            for _ in self.models[3 * j : 3 * j + 3]:
+                # Pull out predictions from the batch
+                ref_prediction = batch_preds[ref_location.tensor_size][ix]
+                alt_prediction = batch_preds[alt_location.tensor_size][ix]
+
+                # Bring data back to CPU
+                ref = (
+                    ref_prediction[ref_location.batch_index][[1, 4, 7, 10][j], :]
+                    .cpu()
+                    .numpy()
+                )
+                alt = (
+                    alt_prediction[alt_location.batch_index][[1, 4, 7, 10][j], :]
+                    .cpu()
+                    .numpy()
+                )
+                if strand == "-":
+                    ref = ref[::-1]
+                    alt = alt[::-1]
+                l = 2 * self.app_config.distance + 1
+                ndiff = np.abs(len(ref) - len(alt))
+                if len(ref) > len(alt):
+                    alt = np.concatenate(
+                        [alt[0 : l // 2 + 1], np.zeros(ndiff), alt[l // 2 + 1 :]]
+                    )
+                elif len(ref) < len(alt):
+                    alt = np.concatenate(
+                        [
+                            alt[0 : l // 2],
+                            np.max(alt[l // 2 : l // 2 + ndiff + 1], keepdims=True),
+                            alt[l // 2 + ndiff + 1 :],
+                        ]
+                    )
+                score = alt - ref
+                scores.append(score)
+                ix += 1
+            pangolin.append(np.mean(scores, axis=0))
+
+        pangolin = np.array(pangolin)
+        loss = pangolin[np.argmin(pangolin, axis=0), np.arange(pangolin.shape[1])]
+        gain = pangolin[np.argmax(pangolin, axis=0), np.arange(pangolin.shape[1])]
+        return loss, gain
+
+    def calculate_score(self, variant: PreppedVariant) -> str:
+        if len(variant.locations) == 0:
+            return ""
+        scores = combine_scores(
+            variant.variant.pos,
+            variant.genes_pos,
+            variant.loss_pos,
+            variant.gain_pos,
+            variant.genes_neg,
+            variant.loss_neg,
+            variant.gain_neg,
+            self.app_config,
+        )
+        return scores
+
+    def clear_batch(self) -> None:
+        self.batches.clear()
+        del self.variants[:]
+        del self.prepared_records[:]
diff --git a/pangolin/data_models.py b/pangolin/data_models.py
new file mode 100644
index 0000000..67d25f6
--- /dev/null
+++ b/pangolin/data_models.py
@@ -0,0 +1,94 @@
+import dataclasses
+from enum import Enum
+from typing import Optional, List
+
+from torch._C._te import Tensor
+
+
+class SequenceType(Enum):
+    POS_REF = 0
+    POS_ALT = 1
+    NEG_REF = 2
+    NEG_ALT = 3
+
+
+@dataclasses.dataclass
+class TimingDetails:
+    seq_time: float = 0
+    encode_time: float = 0
+    gene_time: float = 0
+
+
+@dataclasses.dataclass
+class BatchLookupIndex:
+    sequence_type: SequenceType
+    tensor_size: int
+    batch_index: int
+
+
+@dataclasses.dataclass
+class AppConfig:
+    variant_file: str
+    output_file: str
+    reference_file: str
+    annotation_file: str
+    batch_size: int
+    distance: int
+    mask: str
+    score_exons: str
+    column_ids: str
+    score_cutoff: Optional[float]
+    enable_gtf_cache: bool
+
+    @classmethod
+    def from_args(cls, args) -> "AppConfig":
+        return cls(
+            variant_file=args.variant_file,
+            output_file=args.output_file,
+            reference_file=args.reference_file,
+            annotation_file=args.annotation_file,
+            batch_size=args.batch_size,
+            distance=args.distance,
+            score_cutoff=args.score_cutoff,
+            mask=args.mask,
+            score_exons=args.score_exons,
+            column_ids=args.column_ids,
+            enable_gtf_cache=args.enable_gtf_cache,
+        )
+
+
+@dataclasses.dataclass
+class Variant:
+    lnum: int
+    chr: str
+    pos: int
+    ref: str
+    alt: str
+    id: Optional[int] = None
+
+
+@dataclasses.dataclass
+class VariantEncodings:
+    encoded_ref_pos: Tensor
+    encoded_alt_pos: Tensor
+    encoded_ref_neg: Tensor
+    encoded_alt_neg: Tensor
+
+
+@dataclasses.dataclass
+class PreppedVariant:
+    variant: Variant
+    score: str = ""
+    skip_message: str = ""
+    locations: Optional[List[BatchLookupIndex]] = None
+    encodings: Optional[VariantEncodings] = None
+    genes_pos: Optional[List] = None
+    genes_neg: Optional[List] = None
+    loss_pos: Optional[List] = None
+    gain_pos: Optional[List] = None
+    loss_neg: Optional[List] = None
+    gain_neg: Optional[List] = None
+
+    @classmethod
+    def with_skip_message(cls, variant: Variant, skip_message: str) -> "PreppedVariant":
+        return cls(variant=variant, skip_message=skip_message, locations=[])
diff --git a/pangolin/genes.py b/pangolin/genes.py
new file mode 100644
index 0000000..bd8e01b
--- /dev/null
+++ b/pangolin/genes.py
@@ -0,0 +1,70 @@
+import logging
+import time
+from typing import Dict, Tuple
+
+import gffutils
+from intervaltree import IntervalTree
+
+logger = logging.getLogger(__name__)
+
+
+class GeneAnnotator:
+    def __init__(self, annotation_file: str, use_cache: bool = True):
+        self.use_cache = use_cache
+        self.gtf = None
+        self.trees = None
+
+        if use_cache:
+            self.trees = self._load_data(annotation_file)
+        else:
+            self.gtf = gffutils.FeatureDB(annotation_file)
+
+    def _load_data(self, annotation_file: str) -> Dict[str, IntervalTree]:
+        load_time = time.time()
+        gtf = gffutils.FeatureDB(annotation_file)
+        trees = {}
+        for gene in gtf.features_of_type("gene"):
+            if gene.seqid not in trees:
+                trees[gene.seqid] = IntervalTree()
+            exons = []
+            for exon in gtf.children(gene, featuretype="exon"):
+                exons.extend([exon[3], exon[4]])
+            trees[gene.seqid][gene.start : gene.stop] = (gene.id, gene.strand, exons)
+        logger.debug(f"Load cached db: {time.time() - load_time:.5f}s")
+        return trees
+
+    def get_genes(self, chrom, pos) -> Tuple[Dict, Dict]:
+        if self.use_cache:
+            return self.get_cached_genes(chrom, pos)
+        return self.get_db_genes(chrom, pos)
+
+    def get_cached_genes(self, chrom: str, pos: int) -> Tuple[Dict, Dict]:
+        genes = self.trees[chrom][pos - 1]
+
+        genes_pos, genes_neg = {}, {}
+        for gene in genes:
+            gene_id, strand, exons = gene.data
+            if strand == "+":
+                genes_pos[gene_id] = exons
+            elif strand == "-":
+                genes_neg[gene_id] = exons
+
+        return genes_pos, genes_neg
+
+    def get_db_genes(self, chrom: str, pos: int) -> Tuple[Dict, Dict]:
+        genes = self.gtf.region((chrom, pos - 1, pos - 1), featuretype="gene")
+        genes_pos, genes_neg = {}, {}
+
+        for gene in genes:
+            if gene[3] > pos or gene[4] < pos:
+                continue
+            gene_id = gene["gene_id"][0]
+            exons = []
+            for exon in self.gtf.children(gene, featuretype="exon"):
+                exons.extend([exon[3], exon[4]])
+            if gene[6] == "+":
+                genes_pos[gene_id] = exons
+            elif gene[6] == "-":
+                genes_neg[gene_id] = exons
+
+        return (genes_pos, genes_neg)
diff --git a/pangolin/legacy.py b/pangolin/legacy.py
new file mode 100644
index 0000000..e4a9698
--- /dev/null
+++ b/pangolin/legacy.py
@@ -0,0 +1,94 @@
+import pyfastx
+
+from pangolin.data_models import AppConfig
+from pangolin.utils import compute_score, combine_scores
+
+
+def get_genes(chrom, pos, gtf):
+    genes = gtf.region((chrom, pos - 1, pos - 1), featuretype="gene")
+    genes_pos, genes_neg = {}, {}
+
+    for gene in genes:
+        if gene[3] > pos or gene[4] < pos:
+            continue
+        gene_id = gene["gene_id"][0]
+        exons = []
+        for exon in gtf.children(gene, featuretype="exon"):
+            exons.extend([exon[3], exon[4]])
+        if gene[6] == "+":
+            genes_pos[gene_id] = exons
+        elif gene[6] == "-":
+            genes_neg[gene_id] = exons
+
+    return (genes_pos, genes_neg)
+
+
+def process_variant_legacy(
+    lnum, chr, pos, ref, alt, gtf, models, app_config: AppConfig
+):
+    d = app_config.distance
+
+    if (
+        len(set("ACGT").intersection(set(ref))) == 0
+        or len(set("ACGT").intersection(set(alt))) == 0
+        or (len(ref) != 1 and len(alt) != 1 and len(ref) != len(alt))
+    ):
+        print(
+            "[Line %s]" % lnum,
+            "WARNING, skipping variant: Variant format not supported.",
+        )
+        return -1
+    elif len(ref) > 2 * d:
+        print("[Line %s]" % lnum, "WARNING, skipping variant: Deletion too large")
+        return -1
+
+    fasta = pyfastx.Fasta(app_config.reference_file)
+    # try to make vcf chromosomes compatible with reference chromosomes
+    if chr not in fasta.keys() and "chr" + chr in fasta.keys():
+        chr = "chr" + chr
+    elif chr not in fasta.keys() and chr[3:] in fasta.keys():
+        chr = chr[3:]
+
+    try:
+        seq = fasta[chr][pos - 5001 - d : pos + len(ref) + 4999 + d].seq
+    except Exception as e:
+        print(e)
+        print(
+            "[Line %s]" % lnum,
+            "WARNING, skipping variant: Could not get sequence, possibly because the variant is too close to chromosome ends. "
+            "See error message above.",
+        )
+        return -1
+
+    if seq[5000 + d : 5000 + d + len(ref)].upper() != ref:
+        print(
+            "[Line %s]" % lnum,
+            "WARNING, skipping variant: Mismatch between FASTA (ref base: %s) and variant file (ref base: %s)."
+            % (seq[5000 + d : 5000 + d + len(ref)], ref),
+        )
+        return -1
+
+    ref_seq = seq
+    alt_seq = seq[: 5000 + d] + alt + seq[5000 + d + len(ref) :]
+
+    # get genes that intersect variant
+    genes_pos, genes_neg = get_genes(chr, pos, gtf)
+    if len(genes_pos) + len(genes_neg) == 0:
+        print(
+            "[Line %s]" % lnum,
+            "WARNING, skipping variant: Variant not contained in a gene body. Do GTF/FASTA chromosome names match?",
+        )
+        return -1
+
+    # get splice scores
+    loss_pos, gain_pos = None, None
+    if len(genes_pos) > 0:
+        loss_pos, gain_pos = compute_score(ref_seq, alt_seq, "+", d, models)
+    loss_neg, gain_neg = None, None
+    if len(genes_neg) > 0:
+        loss_neg, gain_neg = compute_score(ref_seq, alt_seq, "-", d, models)
+
+    scores = combine_scores(
+        pos, genes_pos, loss_pos, gain_pos, genes_neg, loss_neg, gain_neg, app_config
+    )
+    return scores
diff --git a/pangolin/model.py b/pangolin/model.py
index 11dfb43..8f8f304 100755
--- a/pangolin/model.py
+++ b/pangolin/model.py
@@ -1,16 +1,16 @@
+from typing import List
+
 import numpy as np
 import torch
-import torch.utils.data as data
 import torch.nn.functional as F
 import torch.nn as nn
+from pkg_resources import resource_filename
 
 L = 32
 # convolution window size in residual units
-W = np.asarray([11, 11, 11, 11, 11, 11, 11, 11,
-                21, 21, 21, 21, 41, 41, 41, 41])
+W = np.asarray([11, 11, 11, 11, 11, 11, 11, 11, 21, 21, 21, 21, 41, 41, 41, 41])
 # atrous rate in residual units
-AR = np.asarray([1, 1, 1, 1, 4, 4, 4, 4,
-                 10, 10, 10, 10, 25, 25, 25, 25])
+AR = np.asarray([1, 1, 1, 1, 4, 4, 4, 4, 10, 10, 10, 10, 25, 25, 25, 25])
 
 
 class ResBlock(nn.Module):
@@ -47,7 +47,7 @@ def __init__(self, L, W, AR):
         self.resblocks, self.convs = nn.ModuleList(), nn.ModuleList()
         for i in range(len(W)):
             self.resblocks.append(ResBlock(L, W[i], AR[i]))
-            if (((i + 1) % 4 == 0) or ((i + 1) == len(W))):
+            if ((i + 1) % 4 == 0) or ((i + 1) == len(W)):
                 self.convs.append(nn.Conv1d(L, L, 1))
         self.conv_last1 = nn.Conv1d(L, 2, 1)
         self.conv_last2 = nn.Conv1d(L, 1, 1)
@@ -64,7 +64,7 @@ def forward(self, x):
         j = 0
         for i in range(len(W)):
             conv = self.resblocks[i](conv)
-            if (((i + 1) % 4 == 0) or ((i + 1) == len(W))):
+            if ((i + 1) % 4 == 0) or ((i + 1) == len(W)):
                 dense = self.convs[j](conv)
                 j += 1
                 skip = skip + dense
@@ -81,3 +81,22 @@ def forward(self, x):
         return torch.cat([out1, out2, out3, out4, out5, out6, out7, out8], 1)
 
 
+def load_models() -> List:
+    models = []
+    for i in [0, 2, 4, 6]:
+        for j in range(1, 4):
+            model = Pangolin(L, W, AR)
+            if torch.cuda.is_available():
+                model.cuda()
+                weights = torch.load(
+                    resource_filename(__name__, "models/final.%s.%s.3.v2" % (j, i))
+                )
+            else:
+                weights = torch.load(
+                    resource_filename(__name__, "models/final.%s.%s.3.v2" % (j, i)),
+                    map_location=torch.device("cpu"),
+                )
+            model.load_state_dict(weights)
+            model.eval()
+            models.append(model)
+    return models
diff --git a/pangolin/pangolin.py b/pangolin/pangolin.py
index 00a541d..0317ecb 100755
--- a/pangolin/pangolin.py
+++ b/pangolin/pangolin.py
@@ -1,278 +1,112 @@
 import argparse
-from pkg_resources import resource_filename
-from pangolin.model import *
-import vcf
-import gffutils
-import pandas as pd
-import pyfastx
-# import time
-# startTime = time.time()
+import logging
+from dataclasses import asdict
 
-IN_MAP = np.asarray([[0, 0, 0, 0],
-                     [1, 0, 0, 0],
-                     [0, 1, 0, 0],
-                     [0, 0, 1, 0],
-                     [0, 0, 0, 1]])
+import torch
 
+from pangolin.data_models import AppConfig
 
-def one_hot_encode(seq, strand):
-    seq = seq.upper().replace('A', '1').replace('C', '2')
-    seq = seq.replace('G', '3').replace('T', '4').replace('N', '0')
-    if strand == '+':
-        seq = np.asarray(list(map(int, list(seq))))
-    elif strand == '-':
-        seq = np.asarray(list(map(int, list(seq[::-1]))))
-        seq = (5 - seq) % 5  # Reverse complement
-    return IN_MAP[seq.astype('int8')]
+import time
 
+from pangolin.processors import process_variants_file
 
-def compute_score(ref_seq, alt_seq, strand, d, models):
-    ref_seq = one_hot_encode(ref_seq, strand).T
-    ref_seq = torch.from_numpy(np.expand_dims(ref_seq, axis=0)).float()
-    alt_seq = one_hot_encode(alt_seq, strand).T
-    alt_seq = torch.from_numpy(np.expand_dims(alt_seq, axis=0)).float()
 
-    if torch.cuda.is_available():
-        ref_seq = ref_seq.to(torch.device("cuda"))
-        alt_seq = alt_seq.to(torch.device("cuda"))
-
-    pangolin = []
-    for j in range(4):
-        score = []
-        for model in models[3*j:3*j+3]:
-            with torch.no_grad():
-                ref = model(ref_seq)[0][[1,4,7,10][j],:].cpu().numpy()
-                alt = model(alt_seq)[0][[1,4,7,10][j],:].cpu().numpy()
-                if strand == '-':
-                    ref = ref[::-1]
-                    alt = alt[::-1]
-                l = 2*d+1
-                ndiff = np.abs(len(ref)-len(alt))
-                if len(ref)>len(alt):
-                    alt = np.concatenate([alt[0:l//2+1],np.zeros(ndiff),alt[l//2+1:]])
-                elif len(ref)<len(alt):
-                    alt = np.concatenate([alt[0:l//2],np.max(alt[l//2:l//2+ndiff+1], keepdims=True),alt[l//2+ndiff+1:]])
-                score.append(alt-ref)
-        pangolin.append(np.mean(score, axis=0))
-    
-    pangolin = np.array(pangolin)
-    loss = pangolin[np.argmin(pangolin, axis=0), np.arange(pangolin.shape[1])]
-    gain = pangolin[np.argmax(pangolin, axis=0), np.arange(pangolin.shape[1])]
-    return loss, gain
-
-
-def get_genes(chr, pos, gtf):
-    genes = gtf.region((chr, pos-1, pos-1), featuretype="gene")
-    genes_pos, genes_neg = {}, {}
-
-    for gene in genes:
-        if gene[3] > pos or gene[4] < pos:
-            continue
-        gene_id = gene["gene_id"][0]
-        exons = []
-        for exon in gtf.children(gene, featuretype="exon"):
-            exons.extend([exon[3], exon[4]])
-        if gene[6] == '+':
-            genes_pos[gene_id] = exons
-        elif gene[6] == '-':
-            genes_neg[gene_id] = exons
-
-    return (genes_pos, genes_neg)
-
-
-def process_variant(lnum, chr, pos, ref, alt, gtf, models, args):
-    d = args.distance
-    cutoff = args.score_cutoff
-
-    if len(set("ACGT").intersection(set(ref))) == 0 or len(set("ACGT").intersection(set(alt))) == 0 \
-            or (len(ref) != 1 and len(alt) != 1 and len(ref) != len(alt)):
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Variant format not supported.")
-        return -1
-    elif len(ref) > 2*d:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Deletion too large")
-        return -1
-
-    fasta = pyfastx.Fasta(args.reference_file)
-    # try to make vcf chromosomes compatible with reference chromosomes
-    if chr not in fasta.keys() and "chr"+chr in fasta.keys():
-        chr = "chr"+chr
-    elif chr not in fasta.keys() and chr[3:] in fasta.keys():
-        chr = chr[3:]
-
-    try:
-        seq = fasta[chr][pos-5001-d:pos+len(ref)+4999+d].seq
-    except Exception as e:
-        print(e)
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Could not get sequence, possibly because the variant is too close to chromosome ends. "
-                                  "See error message above.")
-        return -1    
-
-    if seq[5000+d:5000+d+len(ref)] != ref:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Mismatch between FASTA (ref base: %s) and variant file (ref base: %s)."
-              % (seq[5000+d:5000+d+len(ref)], ref))
-        return -1
-
-    ref_seq = seq
-    alt_seq = seq[:5000+d] + alt + seq[5000+d+len(ref):]
-
-    # get genes that intersect variant
-    genes_pos, genes_neg = get_genes(chr, pos, gtf)
-    if len(genes_pos)+len(genes_neg)==0:
-        print("[Line %s]" % lnum, "WARNING, skipping variant: Variant not contained in a gene body. Do GTF/FASTA chromosome names match?")
-        return -1
-
-    # get splice scores
-    loss_pos, gain_pos = None, None
-    if len(genes_pos) > 0:
-        loss_pos, gain_pos = compute_score(ref_seq, alt_seq, '+', d, models)
-    loss_neg, gain_neg = None, None
-    if len(genes_neg) > 0:
-        loss_neg, gain_neg = compute_score(ref_seq, alt_seq, '-', d, models)
+logger = logging.getLogger(__name__)
 
-    scores = ""
-    for (genes, loss, gain) in \
-            ((genes_pos,loss_pos,gain_pos),(genes_neg,loss_neg,gain_neg)):
-        for gene, positions in genes.items():
-            warnings = "Warnings:"
-            positions = np.array(positions)
-            positions = positions - (pos - d)
-
-            if args.mask == "True" and len(positions) != 0:
-                positions_filt = positions[(positions>=0) & (positions<len(loss))]
-                # set splice gain at annotated sites to 0
-                gain[positions_filt] = np.minimum(gain[positions_filt], 0)
-                # set splice loss at unannotated sites to 0
-                not_positions = ~np.isin(np.arange(len(loss)), positions_filt)
-                loss[not_positions] = np.maximum(loss[not_positions], 0)
-
-            elif args.mask == "True":
-                warnings += "NoAnnotatedSitesToMaskForThisGene"
-                loss[:] = np.maximum(loss[:], 0)
-
-            if args.score_exons == "True":
-                scores1 = gene+'_sites1|'
-                scores2 = gene+'_sites2|'
-            
-                for i in range(len(positions)//2):
-                    p1, p2 = positions[2*i], positions[2*i+1]
-                    if p1<0 or p1>=len(loss):
-                        s1 = "NA"
-                    else:
-                        s1 = [loss[p1],gain[p1]]
-                        s1 = round(s1[np.argmax(np.abs(s1))],2)
-                    if p2<0 or p2>=len(loss):
-                        s2 = "NA"
-                    else:
-                        s2 = [loss[p2],gain[p2]]
-                        s2 = round(s2[np.argmax(np.abs(s2))],2)
-                    if s1 == "NA" and s2 == "NA":
-                        continue
-                    scores1 += "%s:%s|" % (p1-d, s1)
-                    scores2 += "%s:%s|" % (p2-d, s2)
-                scores = scores+scores1+scores2
-
-            elif cutoff != None:
-                scores = scores+gene+'|'
-                l, g = np.where(loss<=-cutoff)[0], np.where(gain>=cutoff)[0]
-                for p, s in zip(np.concatenate([g-d,l-d]), np.concatenate([gain[g],loss[l]])):
-                    scores += "%s:%s|" % (p, round(s,2))
-
-            else:
-                scores = scores+gene+'|'
-                l, g = np.argmin(loss), np.argmax(gain),
-                scores += "%s:%s|%s:%s|" % (g-d, round(gain[g],2), l-d, round(loss[l],2))
-
-            scores += warnings
-
-    return scores.strip('|')
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("variant_file", help="VCF or CSV file with a header (see COLUMN_IDS option).")
-    parser.add_argument("reference_file", help="FASTA file containing a reference genome sequence.")
-    parser.add_argument("annotation_file", help="gffutils database file. Can be generated using create_db.py.")
-    parser.add_argument("output_file", help="Prefix for output file. Will be a VCF/CSV if variant_file is VCF/CSV.")
-    parser.add_argument("-c", "--column_ids", default="CHROM,POS,REF,ALT", help="(If variant_file is a CSV) Column IDs for: chromosome, variant position, reference bases, and alternative bases. "
-                                                                                "Separate IDs by commas. (Default: CHROM,POS,REF,ALT)")
-    parser.add_argument("-m", "--mask", default="True", choices=["False","True"], help="If True, splice gains (increases in score) at annotated splice sites and splice losses (decreases in score) at unannotated splice sites will be set to 0. (Default: True)")
-    parser.add_argument("-s", "--score_cutoff", type=float, help="Output all sites with absolute predicted change in score >= cutoff, instead of only the maximum loss/gain sites.")
-    parser.add_argument("-d", "--distance", type=int, default=50, help="Number of bases on either side of the variant for which splice scores should be calculated. (Default: 50)")
-    parser.add_argument("--score_exons", default="False", choices=["False","True"], help="Output changes in score for both splice sites of annotated exons, as long as one splice site is within the considered range (specified by -d). Output will be: gene|site1_pos:score|site2_pos:score|...")
+    parser.add_argument(
+        "variant_file", help="VCF or CSV file with a header (see COLUMN_IDS option)."
+    )
+    parser.add_argument(
+        "reference_file", help="FASTA file containing a reference genome sequence."
+    )
+    parser.add_argument(
+        "annotation_file",
+        help="gffutils database file. Can be generated using create_db.py.",
+    )
+    parser.add_argument("output_file", help="Name of output file")
+    parser.add_argument(
+        "-c",
+        "--column_ids",
+        default="CHROM,POS,REF,ALT",
+        help="(If variant_file is a CSV) Column IDs for: chromosome, variant position, reference bases, and alternative bases. "
+        "Separate IDs by commas. (Default: CHROM,POS,REF,ALT)",
+    )
+    parser.add_argument(
+        "-m",
+        "--mask",
+        default="True",
+        choices=["False", "True"],
+        help="If True, splice gains (increases in score) at annotated splice sites and splice losses (decreases in score) at unannotated splice sites will be set to 0. (Default: True)",
+    )
+    parser.add_argument(
+        "-s",
+        "--score_cutoff",
+        type=float,
+        help="Output all sites with absolute predicted change in score >= cutoff, instead of only the maximum loss/gain sites.",
+    )
+    parser.add_argument(
+        "-d",
+        "--distance",
+        type=int,
+        default=50,
+        help="Number of bases on either side of the variant for which splice scores should be calculated. (Default: 50)",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        type=int,
+        default=0,
+        help="Number of variants to batch together",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=False,
+        action="store_true",
+        help="Enable additional debugging output",
+    )
+    parser.add_argument(
+        "--enable_gtf_cache",
+        default=False,
+        action="store_true",
+        help="Enable GTF db in memory caching, useful for large batches",
+    )
+    parser.add_argument(
+        "--score_exons",
+        default="False",
+        choices=["False", "True"],
+        help="Output changes in score for both splice sites of annotated exons, as long as one splice site is within the considered range (specified by -d). Output will be: gene|site1_pos:score|site2_pos:score|...",
+    )
     args = parser.parse_args()
 
-    variants = args.variant_file
-    gtf = args.annotation_file
-    try:
-        gtf = gffutils.FeatureDB(gtf)
-    except:
-        print("ERROR, annotation_file could not be opened. Is it a gffutils database file?")
-        exit()
+    log_level = logging.INFO
+    if args.verbose:
+        log_level = logging.DEBUG
 
-    if torch.cuda.is_available():
-        print("Using GPU")
-    else:
-        print("Using CPU")
-
-    models = []
-    for i in [0,2,4,6]:
-        for j in range(1,4):
-            model = Pangolin(L, W, AR)
-            if torch.cuda.is_available():
-                model.cuda()
-                weights = torch.load(resource_filename(__name__,"models/final.%s.%s.3.v2" % (j, i)))
-            else:
-                weights = torch.load(resource_filename(__name__,"models/final.%s.%s.3.v2" % (j, i)), map_location=torch.device('cpu'))
-            model.load_state_dict(weights)
-            model.eval()
-            models.append(model)
-
-    if variants.endswith(".vcf"):
-        lnum = 0
-        # count the number of header lines
-        for line in open(variants, 'r'):
-            lnum += 1
-            if line[0] != '#':
-                break
-
-        variants = vcf.Reader(filename=variants)
-        variants.infos["Pangolin"] = vcf.parser._Info(
-            "Pangolin",'.',"String","Pangolin splice scores. "
-            "Format: gene|pos:score_change|pos:score_change|...",'.','.')
-        fout = vcf.Writer(open(args.output_file+".vcf", 'w'), variants)
+    logging.basicConfig(
+        format="%(processName)s %(threadName)s %(asctime)s %(levelname)s %(name)s: - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        level=log_level,
+    )
 
-        for i, variant in enumerate(variants):
-            scores = process_variant(lnum+i, str(variant.CHROM), int(variant.POS), variant.REF, str(variant.ALT[0]), gtf, models, args)
-            if scores != -1:
-                variant.INFO["Pangolin"] = scores
-            fout.write_record(variant)
-            fout.flush()
+    start_time = time.time()
 
-        fout.close()
+    if torch.cuda.is_available():
+        logger.info("Using GPU")
+    else:
+        logger.info("Using CPU")
 
-    elif variants.endswith(".csv"):
-        col_ids = args.column_ids.split(',')
-        variants = pd.read_csv(variants, header=0)
-        fout = open(args.output_file+".csv", 'w')
-        fout.write(','.join(variants.columns)+',Pangolin\n')
-        fout.flush()
+    app_config = AppConfig.from_args(args)
 
-        for lnum, variant in variants.iterrows():
-            chr, pos, ref, alt = variant[col_ids]
-            ref, alt = ref.upper(), alt.upper()
-            scores = process_variant(lnum+1, str(chr), int(pos), ref, alt, gtf, models, args)
-            if scores == -1:
-                fout.write(','.join(variant.to_csv(header=False, index=False).split('\n'))+'\n')
-            else:
-                fout.write(','.join(variant.to_csv(header=False, index=False).split('\n'))+scores+'\n')
-            fout.flush()
+    logger.info(f"Using config : {asdict(app_config)}")
 
-        fout.close()
+    process_variants_file(app_config)
 
-    else:
-        print("ERROR, variant_file needs to be a CSV or VCF.")
+    print(f"Execution time in seconds: {time.time() - start_time:.2f}")
 
-    # executionTime = (time.time() - startTime)
-    # print('Execution time in seconds: ' + str(executionTime))
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/pangolin/processors.py b/pangolin/processors.py
new file mode 100644
index 0000000..cfe1650
--- /dev/null
+++ b/pangolin/processors.py
@@ -0,0 +1,171 @@
+import logging
+from typing import Callable, List, Union
+
+import gffutils
+import pandas as pd
+import pysam
+import typing
+
+from pysam import VariantFile
+
+from pangolin.batch import PredictionBatch
+from pangolin.legacy import process_variant_legacy
+from pangolin.model import load_models
+from pangolin.data_models import Variant, AppConfig
+
+logger = logging.getLogger(__name__)
+
+
+def process_variants_file(app_config: AppConfig) -> None:
+    models = load_models()
+    batch = PredictionBatch(models, app_config)
+    if app_config.variant_file.endswith(".vcf"):
+        process_vcf(batch, models, app_config)
+    elif app_config.variant_file.endswith(".csv"):
+        process_csv(batch, models, app_config)
+    else:
+        raise RuntimeError("ERROR, variant_file needs to be a CSV or VCF.")
+
+
+def handle_batch(
+    batch: PredictionBatch,
+    original_records: List,
+    writer: Callable,
+    fout: Union[typing.TextIO, VariantFile],
+) -> None:
+    for prepared_record, original_record in zip(
+        batch.prepared_records, original_records
+    ):
+        writer(original_record, prepared_record.score, fout)
+
+
+def vcf_writer(original_record, score: str, fout: VariantFile) -> None:
+    if score != "":
+        original_record.info["Pangolin"] = score
+    fout.write(original_record)
+
+
+def csv_writer(original_record, score: str, fout: typing.TextIO) -> None:
+    if score == "":
+        fout.write(
+            ",".join(original_record.to_csv(header=False, index=False).split("\n"))
+            + "\n"
+        )
+    else:
+        fout.write(
+            ",".join(original_record.to_csv(header=False, index=False).split("\n"))
+            + score
+            + "\n"
+        )
+
+
+def process_vcf(batch: PredictionBatch, models: List, app_config: AppConfig):
+    input_vcf = pysam.VariantFile(app_config.variant_file)
+    header = input_vcf.header
+    header.add_line(
+        '##INFO=<ID=Pangolin,Number=.,Type=String,Description="Pangolin splice scores. '
+        'Format: gene|pos:score_change|pos:score_change|...">'
+    )
+    fout = pysam.VariantFile(app_config.output_file, "w", header=header)
+
+    # NOTE: Only used in non batching mode
+    gtf = gffutils.FeatureDB(app_config.annotation_file)
+
+    original_records = []
+    for i, variant in enumerate(input_vcf):
+        if app_config.batch_size > 0:
+            # Store original VCF row
+            original_records.append(variant)
+            # NOTE: Only single alts are supported here
+            if len(variant.alts) > 1:
+                raise RuntimeError(
+                    f"Only single ALTs are supported for VCF predictions"
+                )
+            v = Variant(
+                i,
+                chr=str(variant.chrom),
+                pos=int(variant.pos),
+                ref=variant.ref,
+                alt=variant.alts[0],
+            )
+            batch.add_variant(v)
+            if batch.did_run_predictions:
+                handle_batch(batch, original_records, vcf_writer, fout)
+                original_records.clear()
+                batch.clear_batch()
+        else:
+            # This is the original path through the code
+            scores = process_variant_legacy(
+                i,
+                str(variant.chrom),
+                int(variant.pos),
+                variant.ref,
+                str(variant.alts[0]),
+                gtf,
+                models,
+                app_config,
+            )
+            if scores != -1:
+                variant.info["Pangolin"] = scores
+            fout.write(variant)
+
+    if app_config.batch_size > 0:
+        batch.finish()
+        handle_batch(batch, original_records, vcf_writer, fout)
+
+    fout.close()
+    print(f"Wrote results to: {app_config.output_file}")
+
+
+def process_csv(batch: PredictionBatch, models: List, app_config: AppConfig):
+    col_ids = app_config.column_ids.split(",")
+    variants = pd.read_csv(app_config.variant_file, header=0)
+    fout = open(app_config.output_file, "w")
+    fout.write(",".join(variants.columns) + ",Pangolin\n")
+    fout.flush()
+
+    # NOTE: Only used in non batching mode
+    gtf = gffutils.FeatureDB(app_config.annotation_file)
+
+    # Store original record here to use again when batching is completed
+    original_records = []
+
+    for lnum, variant in variants.iterrows():
+        lnum = typing.cast(int, lnum)  # Used to solve type hinting issues
+        chr, pos, ref, alt = variant[col_ids]
+        ref, alt = ref.upper(), alt.upper()
+
+        # Only do the batching if the batch size is set
+        if app_config.batch_size > 0:
+            # Store original CSV record
+            original_records.append(variant)
+            v = Variant(lnum=lnum, chr=str(chr), pos=int(pos), ref=ref, alt=alt)
+
+            batch.add_variant(v)
+            if batch.did_run_predictions:
+                handle_batch(batch, original_records, csv_writer, fout)
+                original_records.clear()
+                batch.clear_batch()
+        else:
+            scores = process_variant_legacy(
+                lnum + 1, str(chr), int(pos), ref, alt, gtf, models, app_config
+            )
+            if scores == -1:
+                fout.write(
+                    ",".join(variant.to_csv(header=False, index=False).split("\n"))
+                    + "\n"
+                )
+            else:
+                fout.write(
+                    ",".join(variant.to_csv(header=False, index=False).split("\n"))
+                    + scores
+                    + "\n"
+                )
+            fout.flush()
+
+    if app_config.batch_size > 0:
+        batch.finish()
+        handle_batch(batch, original_records, csv_writer, fout)
+
+    fout.close()
+    print(f"Wrote results to: {app_config.output_file}")
diff --git a/pangolin/utils.py b/pangolin/utils.py
new file mode 100644
index 0000000..ffc539f
--- /dev/null
+++ b/pangolin/utils.py
@@ -0,0 +1,277 @@
+import logging
+import time
+from typing import Tuple
+
+import numpy as np
+from pyfaidx import Fasta
+import torch
+
+from pangolin.batch import Variant, PreppedVariant
+from pangolin.data_models import VariantEncodings, AppConfig, TimingDetails
+from pangolin.genes import GeneAnnotator
+
+logger = logging.getLogger(__name__)
+
+
+IN_MAP = np.asarray(
+    [[0, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
+)
+
+
+def compute_score(ref_seq, alt_seq, strand, d, models):
+    ref_seq = one_hot_encode(ref_seq, strand).T
+    ref_seq = torch.from_numpy(np.expand_dims(ref_seq, axis=0)).float()
+    alt_seq = one_hot_encode(alt_seq, strand).T
+    alt_seq = torch.from_numpy(np.expand_dims(alt_seq, axis=0)).float()
+
+    if torch.cuda.is_available():
+        ref_seq = ref_seq.to(torch.device("cuda"))
+        alt_seq = alt_seq.to(torch.device("cuda"))
+
+    pangolin = []
+    for j in range(4):
+        score = []
+        for model in models[3 * j : 3 * j + 3]:
+            with torch.no_grad():
+                ref = model(ref_seq)[0][[1, 4, 7, 10][j], :].cpu().numpy()
+                alt = model(alt_seq)[0][[1, 4, 7, 10][j], :].cpu().numpy()
+                if strand == "-":
+                    ref = ref[::-1]
+                    alt = alt[::-1]
+                l = 2 * d + 1
+                ndiff = np.abs(len(ref) - len(alt))
+                if len(ref) > len(alt):
+                    alt = np.concatenate(
+                        [alt[0 : l // 2 + 1], np.zeros(ndiff), alt[l // 2 + 1 :]]
+                    )
+                elif len(ref) < len(alt):
+                    alt = np.concatenate(
+                        [
+                            alt[0 : l // 2],
+                            np.max(alt[l // 2 : l // 2 + ndiff + 1], keepdims=True),
+                            alt[l // 2 + ndiff + 1 :],
+                        ]
+                    )
+                score.append(alt - ref)
+        pangolin.append(np.mean(score, axis=0))
+
+    pangolin = np.array(pangolin)
+    loss = pangolin[np.argmin(pangolin, axis=0), np.arange(pangolin.shape[1])]
+    gain = pangolin[np.argmax(pangolin, axis=0), np.arange(pangolin.shape[1])]
+    return loss, gain
+
+
+def combine_scores(
+    variant_pos,
+    genes_pos,
+    loss_pos,
+    gain_pos,
+    genes_neg,
+    loss_neg,
+    gain_neg,
+    app_config: AppConfig,
+) -> str:
+    all_gene_scores = []
+
+    for genes, loss, gain in (
+        (genes_pos, loss_pos, gain_pos),
+        (genes_neg, loss_neg, gain_neg),
+    ):
+        for gene, positions in genes.items():
+            warnings = "Warnings:"
+            positions = np.array(positions)
+            positions = positions - (variant_pos - app_config.distance)
+
+            if app_config.mask == "True" and len(positions) != 0:
+                positions_filt = positions[(positions >= 0) & (positions < len(loss))]
+                # set splice gain at annotated sites to 0
+                gain[positions_filt] = np.minimum(gain[positions_filt], 0)
+                # set splice loss at unannotated sites to 0
+                not_positions = ~np.isin(np.arange(len(loss)), positions_filt)
+                loss[not_positions] = np.maximum(loss[not_positions], 0)
+
+            elif app_config.mask == "True":
+                warnings += "NoAnnotatedSitesToMaskForThisGene"
+                loss[:] = np.maximum(loss[:], 0)
+
+            if app_config.score_exons == "True":
+                scores1 = gene + "_sites1|"
+                scores2 = gene + "_sites2|"
+
+                for i in range(len(positions) // 2):
+                    p1, p2 = positions[2 * i], positions[2 * i + 1]
+                    if p1 < 0 or p1 >= len(loss):
+                        s1 = "NA"
+                    else:
+                        s1 = [loss[p1], gain[p1]]
+                        s1 = round(s1[np.argmax(np.abs(s1))], 2)
+                    if p2 < 0 or p2 >= len(loss):
+                        s2 = "NA"
+                    else:
+                        s2 = [loss[p2], gain[p2]]
+                        s2 = round(s2[np.argmax(np.abs(s2))], 2)
+                    if s1 == "NA" and s2 == "NA":
+                        continue
+                    scores1 += "%s:%s|" % (p1 - app_config.distance, s1)
+                    scores2 += "%s:%s|" % (p2 - app_config.distance, s2)
+                score = scores1 + scores2
+
+            elif app_config.score_cutoff != None:
+                score = gene + "|"
+                l, g = (
+                    np.where(loss <= -app_config.score_cutoff)[0],
+                    np.where(gain >= app_config.score_cutoff)[0],
+                )
+                for p, s in zip(
+                    np.concatenate([g - app_config.distance, l - app_config.distance]),
+                    np.concatenate([gain[g], loss[l]]),
+                ):
+                    score += "%s:%s|" % (p, round(s, 2))
+
+            else:
+                score = gene + "|"
+                l, g = (
+                    np.argmin(loss),
+                    np.argmax(gain),
+                )
+                score += "%s:%s|%s:%s|" % (
+                    g - app_config.distance,
+                    round(gain[g], 2),
+                    l - app_config.distance,
+                    round(loss[l], 2),
+                )
+
+            score += warnings
+            all_gene_scores.append(score.strip("|"))
+
+    return "||".join(all_gene_scores)
+
+
+def one_hot_encode(seq, strand):
+    seq = seq.upper().replace("A", "1").replace("C", "2")
+    seq = seq.replace("G", "3").replace("T", "4").replace("N", "0")
+    if strand == "+":
+        seq = np.asarray(list(map(int, seq)))
+    elif strand == "-":
+        seq = np.asarray(list(map(int, seq[::-1])))
+        seq = (5 - seq) % 5  # Reverse complement
+    return IN_MAP[seq.astype("int8")]
+
+
+def encode_seqs(ref_seq, alt_seq, strand):
+    ref_seq = one_hot_encode(ref_seq, strand).T
+    ref_seq = torch.from_numpy(np.expand_dims(ref_seq, axis=0)).float()
+    alt_seq = one_hot_encode(alt_seq, strand).T
+    alt_seq = torch.from_numpy(np.expand_dims(alt_seq, axis=0)).float()
+    return ref_seq, alt_seq
+
+
+def prepare_variant(
+    variant: Variant, gene_annotator: GeneAnnotator, fasta: Fasta, distance: int
+) -> Tuple[PreppedVariant, TimingDetails]:
+    chr = variant.chr
+    pos = variant.pos
+    ref = variant.ref
+    alt = variant.alt
+
+    empty_timing = TimingDetails()
+
+    skip_message = ""
+    seq_time = time.time()
+    if (
+        len(set("ACGT").intersection(set(ref))) == 0
+        or len(set("ACGT").intersection(set(alt))) == 0
+        or (len(ref) != 1 and len(alt) != 1 and len(ref) != len(alt))
+    ):
+        skip_message = "Variant format not supported."
+    elif len(ref) > 2 * distance:
+        skip_message = "Deletion too large"
+
+    if skip_message:
+        return (
+            PreppedVariant.with_skip_message(
+                variant=variant, skip_message=skip_message
+            ),
+            empty_timing,
+        )
+
+    # try to make vcf chromosomes compatible with reference chromosomes
+    fasta_keys = fasta.keys()
+    if chr not in fasta_keys and "chr" + chr in fasta_keys:
+        variant.chr = "chr" + chr
+    elif chr not in fasta_keys and chr[3:] in fasta_keys:
+        variant.chr = chr[3:]
+
+    seq = ""
+    try:
+        seq = fasta[chr][pos - 5001 - distance : pos + len(ref) + 4999 + distance].seq
+    except Exception as e:
+        logger.exception(e)
+        skip_message = (
+            "Could not get sequence, possibly because the variant is too close to chromosome ends. "
+            "See error message above."
+        )
+        if skip_message:
+            return (
+                PreppedVariant.with_skip_message(
+                    variant=variant, skip_message=skip_message
+                ),
+                empty_timing,
+            )
+
+    if seq[5000 + distance : 5000 + distance + len(ref)].upper() != ref:
+        ref_base = seq[5000 + distance : 5000 + distance + len(ref)]
+        skip_message = f"Mismatch between FASTA (ref base: {ref_base}) and variant file (ref base: {ref})."
+        return (
+            PreppedVariant.with_skip_message(
+                variant=variant, skip_message=skip_message
+            ),
+            empty_timing,
+        )
+
+    ref_seq = seq
+    alt_seq = seq[: 5000 + distance] + alt + seq[5000 + distance + len(ref) :]
+    total_seq_time = time.time() - seq_time
+
+    gene_time = time.time()
+    genes_pos, genes_neg = gene_annotator.get_genes(chr, pos)
+    if len(genes_pos) + len(genes_neg) == 0:
+        skip_message = (
+            "Variant not contained in a gene body. Do GTF/FASTA chromosome names match?"
+        )
+        return (
+            PreppedVariant.with_skip_message(
+                variant=variant, skip_message=skip_message
+            ),
+            empty_timing,
+        )
+    total_gene_time = time.time() - gene_time
+
+    encode_time = time.time()
+    encoded_ref_pos, encoded_alt_pos, encoded_ref_neg, encoded_alt_neg = "", "", "", ""
+    if len(genes_pos) > 0:
+        encoded_ref_pos, encoded_alt_pos = encode_seqs(ref_seq, alt_seq, "+")
+    if len(genes_neg) > 0:
+        encoded_ref_neg, encoded_alt_neg = encode_seqs(ref_seq, alt_seq, "-")
+    total_encode_time = time.time() - encode_time
+
+    prep_timing = TimingDetails(
+        seq_time=total_seq_time,
+        gene_time=total_gene_time,
+        encode_time=total_encode_time,
+    )
+
+    return (
+        PreppedVariant(
+            variant=variant,
+            genes_pos=genes_pos,
+            genes_neg=genes_neg,
+            encodings=VariantEncodings(
+                encoded_ref_neg=encoded_ref_neg,
+                encoded_ref_pos=encoded_ref_pos,
+                encoded_alt_pos=encoded_alt_pos,
+                encoded_alt_neg=encoded_alt_neg,
+            ),
+        ),
+        prep_timing,
+    )
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..ec0b44c
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,863 @@
+[[package]]
+name = "argcomplete"
+version = "2.1.1"
+description = "Bash tab completion for argparse"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+lint = ["flake8", "mypy"]
+test = ["coverage", "flake8", "mypy", "pexpect", "wheel"]
+
+[[package]]
+name = "argh"
+version = "0.28.1"
+description = "An unobtrusive argparse wrapper with natural syntax"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[package.extras]
+completion = ["argcomplete (>=2.0)"]
+docs = ["readthedocs-sphinx-search (==0.2.0)", "sphinx (>=6.1)", "sphinx-pyproject (==0.1.0)", "sphinx_rtd_theme (>=1.2.0)"]
+linters = ["pre-commit (>=3.0.4)"]
+test = ["iocapture (>=0.1.2)", "pytest (>=7.2)", "pytest-cov (>=4.0)", "tox (>=4.4)"]
+
+[[package]]
+name = "attrs"
+version = "22.2.0"
+description = "Classes Without Boilerplate"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
+dev = ["attrs[docs,tests]"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
+tests = ["attrs[tests-no-zope]", "zope.interface"]
+tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
+
+[[package]]
+name = "biopython"
+version = "1.81"
+description = "Freely available tools for computational molecular biology."
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+numpy = "*"
+
+[[package]]
+name = "black"
+version = "23.3.0"
+description = "The uncompromising code formatter."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "click"
+version = "8.1.3"
+description = "Composable command line interface toolkit"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+category = "dev"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+
+[[package]]
+name = "coverage"
+version = "7.2.3"
+description = "Code coverage measurement for Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+toml = ["tomli"]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.1.1"
+description = "Backport of PEP 654 (exception groups)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "gffutils"
+version = "0.11.1"
+description = "Work with GFF and GTF files in a flexible database framework"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+argcomplete = ">=1.9.4"
+argh = ">=0.26.2"
+pyfaidx = ">=0.5.5.2"
+simplejson = "*"
+six = ">=1.12.0"
+
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "intervaltree"
+version = "3.1.0"
+description = "Editable interval tree data structure for Python 2 and 3"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+sortedcontainers = ">=2.0,<3.0"
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
+[[package]]
+name = "numpy"
+version = "1.24.2"
+description = "Fundamental package for array computing in Python"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[[package]]
+name = "nvidia-cublas-cu11"
+version = "11.10.3.66"
+description = "CUBLAS native runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu11"
+version = "11.7.99"
+description = "NVRTC native runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cuda-runtime-cu11"
+version = "11.7.99"
+description = "CUDA Runtime native Libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cudnn-cu11"
+version = "8.5.0.96"
+description = "cuDNN runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "packaging"
+version = "23.0"
+description = "Core utilities for Python packages"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "pandas"
+version = "1.5.3"
+description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.20.3", markers = "python_version < \"3.10\""},
+    {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
+    {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
+]
+python-dateutil = ">=2.8.1"
+pytz = ">=2020.1"
+
+[package.extras]
+test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
+
+[[package]]
+name = "pathspec"
+version = "0.11.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "platformdirs"
+version = "3.3.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
+
+[[package]]
+name = "pluggy"
+version = "1.0.0"
+description = "plugin and hook calling mechanisms for python"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "pyfaidx"
+version = "0.7.2.1"
+description = "pyfaidx: efficient pythonic random access to fasta subsequences"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+setuptools = "*"
+six = "*"
+
+[[package]]
+name = "pyfastx"
+version = "0.8.4"
+description = "pyfastx is a python module for fast random access to sequences from plain and gzipped FASTA/Q file"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "pysam"
+version = "0.20.0"
+description = "pysam"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "pytest"
+version = "7.2.2"
+description = "pytest: simple powerful testing with Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.8.2"
+description = "Extensions to the standard Python datetime module"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "pytz"
+version = "2022.7.1"
+description = "World timezone definitions, modern and historical"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "setuptools"
+version = "67.6.0"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+
+[[package]]
+name = "simplejson"
+version = "3.18.3"
+description = "Simple, fast, extensible JSON encoder/decoder for Python"
+category = "main"
+optional = false
+python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "torch"
+version = "1.13.1"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+category = "main"
+optional = false
+python-versions = ">=3.7.0"
+
+[package.dependencies]
+nvidia-cublas-cu11 = {version = "11.10.3.66", markers = "platform_system == \"Linux\""}
+nvidia-cuda-nvrtc-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""}
+nvidia-cuda-runtime-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""}
+nvidia-cudnn-cu11 = {version = "8.5.0.96", markers = "platform_system == \"Linux\""}
+typing-extensions = "*"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+
+[[package]]
+name = "typing-extensions"
+version = "4.5.0"
+description = "Backported and Experimental Type Hints for Python 3.7+"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "wheel"
+version = "0.38.4"
+description = "A built-package format for Python"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=3.0.0)"]
+
+[metadata]
+lock-version = "1.1"
+python-versions = "^3.8"
+content-hash = "12a3bc2ba36b6f619cda0069cce9c4ded268a3e560cd847e518433f296f35c4d"
+
+[metadata.files]
+argcomplete = [
+    {file = "argcomplete-2.1.1-py3-none-any.whl", hash = "sha256:17041f55b8c45099428df6ce6d0d282b892471a78c71375d24f227e21c13f8c5"},
+    {file = "argcomplete-2.1.1.tar.gz", hash = "sha256:72e08340852d32544459c0c19aad1b48aa2c3a96de8c6e5742456b4f538ca52f"},
+]
+argh = [
+    {file = "argh-0.28.1-py3-none-any.whl", hash = "sha256:10e7311f3ea54a78a366e5456900d8b81049f44d8d653b524eb90cf7d29a71ee"},
+    {file = "argh-0.28.1.tar.gz", hash = "sha256:b2093086f0e809a3ecc24b64a2145309ee8f56d034936cd59e57c558a357329d"},
+]
+attrs = [
+    {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
+    {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
+]
+biopython = [
+    {file = "biopython-1.81-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef7c79b65b0b3f3c7dc59e20a7f8ae5758d8e852cb8b9cace590dc5617e348ba"},
+    {file = "biopython-1.81-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ebfbce0d91796c7aef422ee9dffe8827e07e5abaa94545e006f1f20e965c80b"},
+    {file = "biopython-1.81-cp310-cp310-win32.whl", hash = "sha256:919a2c583cabf9c96d2ae4e1245a6b0376932fb342aca302a0fc198b71ab3275"},
+    {file = "biopython-1.81-cp310-cp310-win_amd64.whl", hash = "sha256:b37c0d24191e5c96ca02415a5188551980c83a0d518bbc4ffe3c9a5d1fe0ee81"},
+    {file = "biopython-1.81-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7a168709694e10b338718c18d967edd5b56c237dc88642c22275796007a70000"},
+    {file = "biopython-1.81-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51d9c1d1b4b634447535da74a644fae59bc234fbbf9001e2dc6b6fbabb98019"},
+    {file = "biopython-1.81-cp311-cp311-win32.whl", hash = "sha256:2f9cfaf16d55ab80d514e7aebe5710dabe4e4ff47ede851031202e33b3249da3"},
+    {file = "biopython-1.81-cp311-cp311-win_amd64.whl", hash = "sha256:e41b55edcfd448630e77bf4de66a7235324a8a149621499891da6bd1d5085b9a"},
+    {file = "biopython-1.81-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b36ba1bf6395c09a365c53530c9d71f3617763fa2c1d452b3d8948368c0f1de"},
+    {file = "biopython-1.81-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c5c07123ff5f44c9e6b5369df854a38afd3c0c50ef58498a0ae8f7eb799f3e8"},
+    {file = "biopython-1.81-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97cbdbed01b2512471f36c74b91658d1dfbdcbf39bc038f6ce5a41c3e60a8fc6"},
+    {file = "biopython-1.81-cp37-cp37m-win32.whl", hash = "sha256:35506e39822c52d11cf09a3951e82375ca1bb9303960b4286acf02c9a6f6c4cc"},
+    {file = "biopython-1.81-cp37-cp37m-win_amd64.whl", hash = "sha256:793c42a376cd63f62f8a088ce39b7dc6b5c55e4e9031d887c434de1595bfa4b8"},
+    {file = "biopython-1.81-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:11d673698b3d0d6589292ea951fb62cb24ea27d273eca0d08dbbd956690f97f5"},
+    {file = "biopython-1.81-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:655df416936662c0c8a06a549cb25e1560e1fea5067d850f34fb714b8a3fae6c"},
+    {file = "biopython-1.81-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:762c6c43a8486b5fcd07f136a3217b87d24755618b9ea9da1f17124ff44c2ad6"},
+    {file = "biopython-1.81-cp38-cp38-win32.whl", hash = "sha256:ee51bb1cd7decffd24da6b76d5e01b7e2fd818ab85cf0c180226cbb5793a3abd"},
+    {file = "biopython-1.81-cp38-cp38-win_amd64.whl", hash = "sha256:ccd729249fd5f586dd4c2a3507c2ea2456825d7e615e97c07c409c850eaf4594"},
+    {file = "biopython-1.81-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ba33244f0eff830beaa7240065bdb5095d96fded6599b76bbb9ddab45cd2bbd"},
+    {file = "biopython-1.81-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bb0c690c7368f255ed45236bf0f5464b476b8c083c8f634533921af78278261"},
+    {file = "biopython-1.81-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65b93b513ce9dd7b2ce058720eadf42cd03f312db3409356efeb93123d1320aa"},
+    {file = "biopython-1.81-cp39-cp39-win32.whl", hash = "sha256:811796f8d222aa3869a50e31e54ce62b69106b47cd8bb06934867c0d843297b5"},
+    {file = "biopython-1.81-cp39-cp39-win_amd64.whl", hash = "sha256:b09efcb4733c8770f25eab5fe555a96a08f5ab9e1bc36939e08ebf2ffbf3e0f1"},
+    {file = "biopython-1.81.tar.gz", hash = "sha256:2cf38112b6d8415ad39d6a611988cd11fb5f33eb09346666a87263beba9614e0"},
+]
+black = [
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
+    {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
+    {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
+    {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
+    {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
+    {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
+    {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
+    {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
+    {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
+    {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
+    {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
+    {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
+    {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
+    {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
+]
+click = [
+    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
+    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
+]
+colorama = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+coverage = [
+    {file = "coverage-7.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e58c0d41d336569d63d1b113bd573db8363bc4146f39444125b7f8060e4e04f5"},
+    {file = "coverage-7.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:344e714bd0fe921fc72d97404ebbdbf9127bac0ca1ff66d7b79efc143cf7c0c4"},
+    {file = "coverage-7.2.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:974bc90d6f6c1e59ceb1516ab00cf1cdfbb2e555795d49fa9571d611f449bcb2"},
+    {file = "coverage-7.2.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0743b0035d4b0e32bc1df5de70fba3059662ace5b9a2a86a9f894cfe66569013"},
+    {file = "coverage-7.2.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d0391fb4cfc171ce40437f67eb050a340fdbd0f9f49d6353a387f1b7f9dd4fa"},
+    {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4a42e1eff0ca9a7cb7dc9ecda41dfc7cbc17cb1d02117214be0561bd1134772b"},
+    {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:be19931a8dcbe6ab464f3339966856996b12a00f9fe53f346ab3be872d03e257"},
+    {file = "coverage-7.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72fcae5bcac3333a4cf3b8f34eec99cea1187acd55af723bcbd559adfdcb5535"},
+    {file = "coverage-7.2.3-cp310-cp310-win32.whl", hash = "sha256:aeae2aa38395b18106e552833f2a50c27ea0000122bde421c31d11ed7e6f9c91"},
+    {file = "coverage-7.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:83957d349838a636e768251c7e9979e899a569794b44c3728eaebd11d848e58e"},
+    {file = "coverage-7.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dfd393094cd82ceb9b40df4c77976015a314b267d498268a076e940fe7be6b79"},
+    {file = "coverage-7.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182eb9ac3f2b4874a1f41b78b87db20b66da6b9cdc32737fbbf4fea0c35b23fc"},
+    {file = "coverage-7.2.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bb1e77a9a311346294621be905ea8a2c30d3ad371fc15bb72e98bfcfae532df"},
+    {file = "coverage-7.2.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca0f34363e2634deffd390a0fef1aa99168ae9ed2af01af4a1f5865e362f8623"},
+    {file = "coverage-7.2.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55416d7385774285b6e2a5feca0af9652f7f444a4fa3d29d8ab052fafef9d00d"},
+    {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:06ddd9c0249a0546997fdda5a30fbcb40f23926df0a874a60a8a185bc3a87d93"},
+    {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fff5aaa6becf2c6a1699ae6a39e2e6fb0672c2d42eca8eb0cafa91cf2e9bd312"},
+    {file = "coverage-7.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ea53151d87c52e98133eb8ac78f1206498c015849662ca8dc246255265d9c3c4"},
+    {file = "coverage-7.2.3-cp311-cp311-win32.whl", hash = "sha256:8f6c930fd70d91ddee53194e93029e3ef2aabe26725aa3c2753df057e296b925"},
+    {file = "coverage-7.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:fa546d66639d69aa967bf08156eb8c9d0cd6f6de84be9e8c9819f52ad499c910"},
+    {file = "coverage-7.2.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b2317d5ed777bf5a033e83d4f1389fd4ef045763141d8f10eb09a7035cee774c"},
+    {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be9824c1c874b73b96288c6d3de793bf7f3a597770205068c6163ea1f326e8b9"},
+    {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c3b2803e730dc2797a017335827e9da6da0e84c745ce0f552e66400abdfb9a1"},
+    {file = "coverage-7.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f69770f5ca1994cb32c38965e95f57504d3aea96b6c024624fdd5bb1aa494a1"},
+    {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1127b16220f7bfb3f1049ed4a62d26d81970a723544e8252db0efde853268e21"},
+    {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:aa784405f0c640940595fa0f14064d8e84aff0b0f762fa18393e2760a2cf5841"},
+    {file = "coverage-7.2.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3146b8e16fa60427e03884301bf8209221f5761ac754ee6b267642a2fd354c48"},
+    {file = "coverage-7.2.3-cp37-cp37m-win32.whl", hash = "sha256:1fd78b911aea9cec3b7e1e2622c8018d51c0d2bbcf8faaf53c2497eb114911c1"},
+    {file = "coverage-7.2.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f3736a5d34e091b0a611964c6262fd68ca4363df56185902528f0b75dbb9c1f"},
+    {file = "coverage-7.2.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:981b4df72c93e3bc04478153df516d385317628bd9c10be699c93c26ddcca8ab"},
+    {file = "coverage-7.2.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0045f8f23a5fb30b2eb3b8a83664d8dc4fb58faddf8155d7109166adb9f2040"},
+    {file = "coverage-7.2.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f760073fcf8f3d6933178d67754f4f2d4e924e321f4bb0dcef0424ca0215eba1"},
+    {file = "coverage-7.2.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c86bd45d1659b1ae3d0ba1909326b03598affbc9ed71520e0ff8c31a993ad911"},
+    {file = "coverage-7.2.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:172db976ae6327ed4728e2507daf8a4de73c7cc89796483e0a9198fd2e47b462"},
+    {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d2a3a6146fe9319926e1d477842ca2a63fe99af5ae690b1f5c11e6af074a6b5c"},
+    {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f649dd53833b495c3ebd04d6eec58479454a1784987af8afb77540d6c1767abd"},
+    {file = "coverage-7.2.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7c4ed4e9f3b123aa403ab424430b426a1992e6f4c8fd3cb56ea520446e04d152"},
+    {file = "coverage-7.2.3-cp38-cp38-win32.whl", hash = "sha256:eb0edc3ce9760d2f21637766c3aa04822030e7451981ce569a1b3456b7053f22"},
+    {file = "coverage-7.2.3-cp38-cp38-win_amd64.whl", hash = "sha256:63cdeaac4ae85a179a8d6bc09b77b564c096250d759eed343a89d91bce8b6367"},
+    {file = "coverage-7.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:20d1a2a76bb4eb00e4d36b9699f9b7aba93271c9c29220ad4c6a9581a0320235"},
+    {file = "coverage-7.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ea748802cc0de4de92ef8244dd84ffd793bd2e7be784cd8394d557a3c751e21"},
+    {file = "coverage-7.2.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b154aba06df42e4b96fc915512ab39595105f6c483991287021ed95776d934"},
+    {file = "coverage-7.2.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd214917cabdd6f673a29d708574e9fbdb892cb77eb426d0eae3490d95ca7859"},
+    {file = "coverage-7.2.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2e58e45fe53fab81f85474e5d4d226eeab0f27b45aa062856c89389da2f0d9"},
+    {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:87ecc7c9a1a9f912e306997ffee020297ccb5ea388421fe62a2a02747e4d5539"},
+    {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:387065e420aed3c71b61af7e82c7b6bc1c592f7e3c7a66e9f78dd178699da4fe"},
+    {file = "coverage-7.2.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ea3f5bc91d7d457da7d48c7a732beaf79d0c8131df3ab278e6bba6297e23c6c4"},
+    {file = "coverage-7.2.3-cp39-cp39-win32.whl", hash = "sha256:ae7863a1d8db6a014b6f2ff9c1582ab1aad55a6d25bac19710a8df68921b6e30"},
+    {file = "coverage-7.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:3f04becd4fcda03c0160d0da9c8f0c246bc78f2f7af0feea1ec0930e7c93fa4a"},
+    {file = "coverage-7.2.3-pp37.pp38.pp39-none-any.whl", hash = "sha256:965ee3e782c7892befc25575fa171b521d33798132692df428a09efacaffe8d0"},
+    {file = "coverage-7.2.3.tar.gz", hash = "sha256:d298c2815fa4891edd9abe5ad6e6cb4207104c7dd9fd13aea3fdebf6f9b91259"},
+]
+exceptiongroup = [
+    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
+    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
+]
+gffutils = [
+    {file = "gffutils-0.11.1.tar.gz", hash = "sha256:ca7bf814d600b389bb2d5c403dd279755118cb1476c19c6f7aecb8c51a84263c"},
+]
+iniconfig = [
+    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+intervaltree = [
+    {file = "intervaltree-3.1.0.tar.gz", hash = "sha256:902b1b88936918f9b2a19e0e5eb7ccb430ae45cde4f39ea4b36932920d33952d"},
+]
+mypy-extensions = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+numpy = [
+    {file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"},
+    {file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"},
+    {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"},
+    {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"},
+    {file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"},
+    {file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"},
+    {file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"},
+    {file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"},
+    {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"},
+    {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"},
+    {file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"},
+    {file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"},
+    {file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"},
+    {file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"},
+    {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"},
+    {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"},
+    {file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"},
+    {file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"},
+    {file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"},
+    {file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"},
+    {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"},
+    {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"},
+    {file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"},
+    {file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"},
+    {file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"},
+    {file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"},
+    {file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"},
+    {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"},
+]
+nvidia-cublas-cu11 = [
+    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"},
+    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-win_amd64.whl", hash = "sha256:8ac17ba6ade3ed56ab898a036f9ae0756f1e81052a317bf98f8c6d18dc3ae49e"},
+]
+nvidia-cuda-nvrtc-cu11 = [
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:9f1562822ea264b7e34ed5930567e89242d266448e936b85bc97a3370feabb03"},
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:f7d9610d9b7c331fa0da2d1b2858a4a8315e6d49765091d28711c8946e7425e7"},
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:f2effeb1309bdd1b3854fc9b17eaf997808f8b25968ce0c7070945c4265d64a3"},
+]
+nvidia-cuda-runtime-cu11 = [
+    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:cc768314ae58d2641f07eac350f40f99dcb35719c4faff4bc458a7cd2b119e31"},
+    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:bc77fa59a7679310df9d5c70ab13c4e34c64ae2124dd1efd7e5474b71be125c7"},
+]
+nvidia-cudnn-cu11 = [
+    {file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"},
+    {file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"},
+]
+packaging = [
+    {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
+    {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
+]
+pandas = [
+    {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"},
+    {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"},
+    {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"},
+    {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"},
+    {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"},
+    {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"},
+    {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"},
+    {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"},
+    {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"},
+    {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"},
+    {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"},
+    {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"},
+    {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"},
+    {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"},
+    {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"},
+    {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"},
+    {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"},
+    {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"},
+    {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"},
+    {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"},
+    {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"},
+    {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"},
+    {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"},
+    {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"},
+    {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"},
+    {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"},
+    {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"},
+]
+pathspec = [
+    {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
+    {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
+]
+platformdirs = [
+    {file = "platformdirs-3.3.0-py3-none-any.whl", hash = "sha256:ea61fd7b85554beecbbd3e9b37fb26689b227ffae38f73353cbcc1cf8bd01878"},
+    {file = "platformdirs-3.3.0.tar.gz", hash = "sha256:64370d47dc3fca65b4879f89bdead8197e93e05d696d6d1816243ebae8595da5"},
+]
+pluggy = [
+    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
+    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+]
+pyfaidx = [
+    {file = "pyfaidx-0.7.2.1-py3-none-any.whl", hash = "sha256:eee13d35bb5f2aa65932a9ad9dd74fa695aefe6e0baafc5836cfa869a7695acc"},
+    {file = "pyfaidx-0.7.2.1.tar.gz", hash = "sha256:30f0d20a9e3d53353fb20eb69b7e22e6f01a53ed4f21b3e17dd408f0be5051a0"},
+]
+pyfastx = [
+    {file = "pyfastx-0.8.4-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:bb67f42735e72d8b14b28590fcb0ced1a98cd9005413a0898c7cfa6c5bc1a5c6"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:85c0edb900848de5a25a0fefab30af314abb8a7b05900173a96508bfd3571ddb"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:3b4ae3809b71b72d59347830fa87412cf96c2f0b837a44cca1bb91d51b7eebf5"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:dc4207c19c3dc3ec4ce76661904f31a78ec99756b3ef1b656f5712b0ec7432b9"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:6b3c71b8acd850fd4b662728cb356d92b4789672efe2f5d2791becbc4705d27d"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-win32.whl", hash = "sha256:82fb7340f628cffc13751cb8ddf4539d96b1542e9b9dd21fc368b8f3b35024b7"},
+    {file = "pyfastx-0.8.4-cp35-cp35m-win_amd64.whl", hash = "sha256:6bf0dfc3fd1e7af8ef0777dbd20635e6e35db832af5c461bb0e676ff02e7552f"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:53a1b2d91c2a9c364579aff6305b413b6a9eac1cf739ffaa6182d0bdf6ec3dd6"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5966ea5788eac13a41816eb91348c26ac83e0dde677e0a1a875b24e73f88363e"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1857354f75a5c4dbff2cdf2b885c5eb58221dc72e539320ff75814d591aa0e8b"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:a12a8046e8585e8159403a95e110cec7020a79a6ceeffe6c1ad814b9365df6b9"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:ca06602d642d6fce692d9cf76003a2010d0791ca9a4f1c4ad26226a378866787"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-win32.whl", hash = "sha256:6364813d2d32bb52c2e8967a3fbeeb063e562942a47ee7966c5d0555aa39245b"},
+    {file = "pyfastx-0.8.4-cp36-cp36m-win_amd64.whl", hash = "sha256:45839975776f9217f66925cfb319d6780e1cdbb1bc91c23fff4e43411f1623a9"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8d4085c767550ba98e4b93cb1c7fb10fa4948347ae69460fdca22d65223755d0"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4f7d4c41cf5597622de4e6e28b7dd242f6ed14a017134076264c2461ef0952ec"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ca57156dd6bb8dc03d91ba6bb71513b3cc197d15dcc626b085f384f90ef73f19"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:5983dd1153485cbd0803b0c1cd2240c9156a5827f26d62d7cb34c74d269885d6"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:04ac4f5fc5c514e75fdbdaf783df770187221574277a38f5f27aa4d4ee849fd0"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-win32.whl", hash = "sha256:e3552a6a80d91355b29fbe562126270210587899a6514374eda4336967a4586e"},
+    {file = "pyfastx-0.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:3fcfde0890142c8f73872f6239bfab5f15d89276a2bb09ef18dabca0555c8038"},
+    {file = "pyfastx-0.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bebc53c878fa6d2a15d5dd574e760c1ebc93e40c46d27872e2d5f0fb28b63dd2"},
+    {file = "pyfastx-0.8.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c1693f7448e3e0dbd4fa60e7bea80720b953044d29b3fc191a4a3534d194ff29"},
+    {file = "pyfastx-0.8.4-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:ab071008c3a0d5a721891a2510067b475699dfd617446d0ac97a588a3a2d7471"},
+    {file = "pyfastx-0.8.4-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:88eaef44817d0932e1abd32c63015cb9a7400a1be783e72b0f9746926d8f3cfb"},
+    {file = "pyfastx-0.8.4-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:bd4bfbd9d1a7e796e0e2f558beddfa04f141467a82df76861b7c64d5750d62ac"},
+    {file = "pyfastx-0.8.4-cp38-cp38-win32.whl", hash = "sha256:6dbde773e8f1bb7a1a7def1f505519c577edce631e59a00cbf4080c3d80ae9a7"},
+    {file = "pyfastx-0.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:cc06070a09a5d55c8a15ab23ec5c119dc25ed3c4fd9956da36c0d252e8746290"},
+    {file = "pyfastx-0.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0760041447904aefd647df8ce2f4d008059324e6aa275b703c6c2ac1334b40cf"},
+    {file = "pyfastx-0.8.4-cp39-cp39-manylinux1_i686.whl", hash = "sha256:51e9fffa705992a4ac558cc399405e003b5b87cbdbe06f366ad4891511777dac"},
+    {file = "pyfastx-0.8.4-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:ca38b8c0f97dde4ebf8f221c52da13b4559722e32f209a886f4061597a5df21d"},
+    {file = "pyfastx-0.8.4-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4b0718ea0b64e76588cb6e8a2427559db78cef50ead70097a3cd8994571f4f7d"},
+    {file = "pyfastx-0.8.4-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:de6027a1c04c9cf1cab90167fc1f20097b4503485e6a142fa0c473aa9d00d45a"},
+    {file = "pyfastx-0.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:1563bc974bce54f586689df185aae03aef72ae5ed79ded9340254ea94507d764"},
+    {file = "pyfastx-0.8.4.tar.gz", hash = "sha256:20cee9faff140f973c59fbe98121eac2d67acf3eb7fef5fdf69a8b4942b4468c"},
+]
+pysam = [
+    {file = "pysam-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d919f40db3027f092bb39177aecbb49a02e2fd746bb5adfbe48eb839b2225e51"},
+    {file = "pysam-0.20.0-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:7145694675a0cfe0c04abb5582c70b3f6a19d6b30e6835931016afd57d423719"},
+    {file = "pysam-0.20.0-cp310-cp310-manylinux_2_24_i686.whl", hash = "sha256:fa98bd2e6bf1252dac7c275fe7c34bbc125644b781a6196bfe25cc078c6cb341"},
+    {file = "pysam-0.20.0-cp310-cp310-manylinux_2_24_x86_64.whl", hash = "sha256:12c56353739f2b76266407502e06127235197030a8e11188cb80693ca46321d1"},
+    {file = "pysam-0.20.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d4744e162476a62fab9458aa3d1e2e51614e0f919e4578c14e986c7e7cab377e"},
+    {file = "pysam-0.20.0-cp36-cp36m-manylinux_2_24_aarch64.whl", hash = "sha256:e61c3a68fb254ffd2c34ce956277615663c5ecab7a30e6308744873984794330"},
+    {file = "pysam-0.20.0-cp36-cp36m-manylinux_2_24_i686.whl", hash = "sha256:ff15d6a6ac29541d5dee30ea8233356c43a5f3a99886451fd0188b80daa0422d"},
+    {file = "pysam-0.20.0-cp36-cp36m-manylinux_2_24_x86_64.whl", hash = "sha256:29d1a2c60944f1bc46b9324f9e78dd343fd6a8de039badff71df43df960e223c"},
+    {file = "pysam-0.20.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7e017a3b8baeee7c0df7f666138e8bf5a73f9805c2ab6287a2a4d5351f6822d5"},
+    {file = "pysam-0.20.0-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:7c57867c80af3b5c3a4ae391ec86b914bb6361bb5cd41d985cbe06a75163188c"},
+    {file = "pysam-0.20.0-cp37-cp37m-manylinux_2_24_i686.whl", hash = "sha256:38f292478f32cbf6f2981021f7c2a961956e9b49141493557ace79810221c4e4"},
+    {file = "pysam-0.20.0-cp37-cp37m-manylinux_2_24_x86_64.whl", hash = "sha256:3d8c86ae4413c25d047aa4e9529b2adc366ecfeb1eb3f0098c525705314a0332"},
+    {file = "pysam-0.20.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b8a07732549ff10dfad8b0db7663076b39727a558e1f6d06ab5c6819f3cae9f"},
+    {file = "pysam-0.20.0-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:d958ce70865869f2aa8c8c0880ad451bafd4d5a8c94fb78a269ab913a57d9303"},
+    {file = "pysam-0.20.0-cp38-cp38-manylinux_2_24_i686.whl", hash = "sha256:93f0ea6b2050ad470b5b1cdd19fae0b88afd5ae48ee6e66a0dcd054b61e9fba4"},
+    {file = "pysam-0.20.0-cp38-cp38-manylinux_2_24_x86_64.whl", hash = "sha256:e14e33703bdb8ed812ab16b5c816ce68ffee2ae2a19906efdc5732c3e446791e"},
+    {file = "pysam-0.20.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5aefffd4ac1fad35b720cb7a1663be9bdb18376d0d361d33a744254a0da8e96"},
+    {file = "pysam-0.20.0-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:3ed4dbf8613007daf8b127c32743206126badc35822730de767d86f9ead445ad"},
+    {file = "pysam-0.20.0-cp39-cp39-manylinux_2_24_i686.whl", hash = "sha256:7ca81b7e5af5f3cdac460e0ce59a0eab11fdbe1a216ddee6f3172aa16445ae54"},
+    {file = "pysam-0.20.0-cp39-cp39-manylinux_2_24_x86_64.whl", hash = "sha256:a2d8f2e15934100ce6b380659af884066d5ebffa69e36025b4029f8c9e8b3adc"},
+    {file = "pysam-0.20.0.tar.gz", hash = "sha256:7cc250148ba0ffc9bdc38db6988b91e13b75db0d11c18cf1336467d1c97dd312"},
+]
+pytest = [
+    {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
+    {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
+]
+python-dateutil = [
+    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
+    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+]
+pytz = [
+    {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"},
+    {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"},
+]
+setuptools = [
+    {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"},
+    {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"},
+]
+simplejson = [
+    {file = "simplejson-3.18.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:50f4b6d52f3a2d1cffd11834a1fe7f9516f0e3f20cbe78027aa88ff990fad7d6"},
+    {file = "simplejson-3.18.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:169c2c7446ef33439c304a6aa5b7b5a2dbc938c9c2dd882dd3f2553f9518ebf6"},
+    {file = "simplejson-3.18.3-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:56f186d44a9f625b5e5d9ba4b9551e263604000a7df60cb373b3e789ca603b2a"},
+    {file = "simplejson-3.18.3-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:cf7168b2046db0eceb83d8ed2ee31c0847ce18b2d8baf3e93de9560f3921a8c3"},
+    {file = "simplejson-3.18.3-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:55df3dfd8777bf134e1078d2f195352432a77f23ccb90b92b08218123d56adc9"},
+    {file = "simplejson-3.18.3-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:6b997739fdbc9b7030ff490fc8e5f8c144b8ec80f3605eff643983672bb8cfde"},
+    {file = "simplejson-3.18.3-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:c98fddc374468158778a8afb3fd7296412a2b2fc34cebba64212ac3e018e7382"},
+    {file = "simplejson-3.18.3-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:55aa983575b0aef143845f5bfbb35075475eccaebf7d4b30f4037a2fe8414666"},
+    {file = "simplejson-3.18.3-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1b79e2607ac5ba98381c2e068727acc1e4dd385a6d216914c0613f8f568a06a5"},
+    {file = "simplejson-3.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b0352428b35da859a98770949e7353866ae65463026f1c8e4c89a6395d4b5fd7"},
+    {file = "simplejson-3.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb81cfef0c0039010f0212f4e5eb6909641b8a54c761584054ac97fd7bd0c21a"},
+    {file = "simplejson-3.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e762e9d8556fa9f3a99f8a278eeba50a35b5f554b82deeb282ddbdd85816e638"},
+    {file = "simplejson-3.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc8df5831b645e96a318ea51a66ce6e2bb869eebc3fa9a860bbf67aecd270055"},
+    {file = "simplejson-3.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b35fb90083218e59df5dba733c7086655f2938f3fcabe36ad849623941d660"},
+    {file = "simplejson-3.18.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f89f078114cacedb9a3392615cc099cf02a51efa7507f90e2006bf7ec38c880d"},
+    {file = "simplejson-3.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a2960b95f3ba822d077d1afa7e1fea9799cfb2990028cf010e666f64195ecb5a"},
+    {file = "simplejson-3.18.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:96ade36640734b54176c4765d00a60767bd7fae5b7a5b3574accc055ac18e34c"},
+    {file = "simplejson-3.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6c4c56c5abb82e22877b913186e5c0fd7d9eef0c930719e28fa451d3f11defb4"},
+    {file = "simplejson-3.18.3-cp310-cp310-win32.whl", hash = "sha256:8209c40279ed9b2cd5fbe2d617a29a074e90ea97fce7c07a0128a01cb3e8afc5"},
+    {file = "simplejson-3.18.3-cp310-cp310-win_amd64.whl", hash = "sha256:6a49665169c18f27a0fc10935466332ee7406ee14ced8dc0a1b4d465547299aa"},
+    {file = "simplejson-3.18.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:508342d7227ed66beecfbba7a38b46e1a713faeb034216f43f03ec5c175e0622"},
+    {file = "simplejson-3.18.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:070ab073ce72f1624107dfd6d095c87ac32aafe7ba54a5c5055a3dd83cb06e51"},
+    {file = "simplejson-3.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:88f59a07873dc1f06fd9e6712dd71286f1b297a066ad2fd9110ad080d3cb011c"},
+    {file = "simplejson-3.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5780e3929435a8d39671537174f8ce0ccafb4f6e0c748ffe139916ffbdca39d3"},
+    {file = "simplejson-3.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2be75f4cb9951efeb2616e16f944ee4f9a09768475a3f5c40a6ac4dc5ee68dfd"},
+    {file = "simplejson-3.18.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e49c84df6e71e3c23169d3df481565dd607cbee4aa1e0af15c493cccad7c745"},
+    {file = "simplejson-3.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab5bdf0b8d07f7fd603b2d0c1982412cd9f8ade997088ddced251f7e656c7fd4"},
+    {file = "simplejson-3.18.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:141782a0a25c1792627575b37b4951583358ccc7137623aa45947f8425ee8d96"},
+    {file = "simplejson-3.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:24823364fee93bab141621b3a2e10612e31be7ca58788bf9b2cd2b1ce37ab07d"},
+    {file = "simplejson-3.18.3-cp311-cp311-win32.whl", hash = "sha256:f73bae5e315adf7bc8cb7f0a13a1e9e33bead42e8ce174be83ac9ecc2513c86a"},
+    {file = "simplejson-3.18.3-cp311-cp311-win_amd64.whl", hash = "sha256:063db62a9251e61ea0c17e49c3e7bed465bfcc5359655abcb8c0bc6130a4e0d4"},
+    {file = "simplejson-3.18.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3bab9ea49ff477c926c5787f79ec47cf51c7ffb15c9d8dd0f09e728807d44f4b"},
+    {file = "simplejson-3.18.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cf299fbb7d476676dfea372a3262654af98694bd1df35b060ce0fe1b68087f1"},
+    {file = "simplejson-3.18.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62628ea5df8c830d00a7417d5ecd949a1b24a8d0a5063a2a77f7ec7522110a0f"},
+    {file = "simplejson-3.18.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff65b475091084e5bdb7f26e9c555956be7355b573ce494fa96f9f8e34541ac"},
+    {file = "simplejson-3.18.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2b0f6de11f5ce4b80f51bc49d08b898602e190547f8efe4e44af8ae3cda7779d"},
+    {file = "simplejson-3.18.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d990ea42ba908cb57a3df97d283aa26c1822f10a0a60e250b54ee21cd08c48d0"},
+    {file = "simplejson-3.18.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:2c7ee643ee93684bf76196e2d84a2090c6df8f01737a016e869b579593827b6e"},
+    {file = "simplejson-3.18.3-cp36-cp36m-win32.whl", hash = "sha256:0e7c3fae6c9540064e06a653780b4f263675cd69ca6841345029fee3e27e9bb5"},
+    {file = "simplejson-3.18.3-cp36-cp36m-win_amd64.whl", hash = "sha256:0baf8c60efef74944ed4adb034d14bcf737731576f0e4c3c56fb875ea256af69"},
+    {file = "simplejson-3.18.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:52465a5578cfc2c5e374a574df14dfb75e04c6cb6a100b7abc8bf6c89bea8f5e"},
+    {file = "simplejson-3.18.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fe1173b4146641c872bafa6f9a21f3a2012f502d54fbb523a76e6320024fae9"},
+    {file = "simplejson-3.18.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23fce984045804194f513a2739dcd82be350198470d5ade5058da019a48cf3f8"},
+    {file = "simplejson-3.18.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad323e92cb1bd3b1db6f57c007dca964d13c52247ad844203ce381e94066601"},
+    {file = "simplejson-3.18.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7c26fe63755ecc59c502ddde8e58ce8b765bf4fdd3f5858d2b7c8ab28bc2a9c8"},
+    {file = "simplejson-3.18.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:502d86fbfe914263642479b87ed61af3b27b9e039df77acd2416cfccfc892e68"},
+    {file = "simplejson-3.18.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:44d6c52d4f5c0c087a6e88a92bf9f94234321d21be32c6471ba39856e304bbe3"},
+    {file = "simplejson-3.18.3-cp37-cp37m-win32.whl", hash = "sha256:2a1b3222bc8f6ac91b5ebe3263111c7dc4dc4b01c52f0153f5bb1f3ef3bf0023"},
+    {file = "simplejson-3.18.3-cp37-cp37m-win_amd64.whl", hash = "sha256:1907d49d70c75530976119c13785db91168d2599288debaca7d25da9cd2f3747"},
+    {file = "simplejson-3.18.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:04a4b9a297cccbc9e1d66fe652fbffd55b36d6579c43132e821d315957302194"},
+    {file = "simplejson-3.18.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:16cc750d19852fa5ebafd55da86fa357f87991e07b4e2afb37a5975dfdde0153"},
+    {file = "simplejson-3.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:290bbcdcbb37af3f7e43378f592ab7a9168fca640da6af63d42cdb535f96bbf2"},
+    {file = "simplejson-3.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:694332fd6fd10fe8868c2508583220d1a1a7be9ff049dab5bd6b9aedfb9edc50"},
+    {file = "simplejson-3.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9f72d2b539512f382a48cc9ad6cea2d3a572e71e92c40e03d2140041eeaa233"},
+    {file = "simplejson-3.18.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcd9eac304a133ee4af58e68c5ded4c5ba663d3ee4602e8613359b776a1f8c8f"},
+    {file = "simplejson-3.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cde5a3ff5e0bd5d6da676314dfae86c9e99bff77bca03d30223c9718a58f9e83"},
+    {file = "simplejson-3.18.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:099bbd3b5b4ea83159a980348cd481a34984dee5fe1b9fac31a9137158f46960"},
+    {file = "simplejson-3.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4de9fed1166aeedee44150fa83bc059aca6b612940281f8b5a39374781f16196"},
+    {file = "simplejson-3.18.3-cp38-cp38-win32.whl", hash = "sha256:59a629240cfbc5b4f390a8578dca74ae77ab617de971862acb946822d2eb1b11"},
+    {file = "simplejson-3.18.3-cp38-cp38-win_amd64.whl", hash = "sha256:5b009342e712026ffabe8a471d5b4a4ff2a038687387e74eae601574c04dae33"},
+    {file = "simplejson-3.18.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6bd81d10cb3384f64242316da8a2b2f88618776bc1ef38bcc79f1afe8ad36616"},
+    {file = "simplejson-3.18.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c3b696770b504f881f271f97b94a687487ec1ef20bfbd5f20d92bbab7a85952d"},
+    {file = "simplejson-3.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:75eb555dc349d0cbe2c95ea2be665b306c6ac6d5b64e3a3920af9b805ecdb5f7"},
+    {file = "simplejson-3.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d774782159347d66563cd7ac18b9dd37010438a825160cde4818caa18110a746"},
+    {file = "simplejson-3.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2c4e8b65987f3c6529149495d28e23efe213e94dc3659176c4ab22d18a9ee4a"},
+    {file = "simplejson-3.18.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a4750e8db92109e6f1f7783a7faae4254d6d5dc28a41ff7eff7d2265f0586b"},
+    {file = "simplejson-3.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4b8d4d958c5ab3489d1174917a7fad82da642560c39ce559a624e63deaaa36b1"},
+    {file = "simplejson-3.18.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:efa70fd9b6c7b57b048ecadb909683acd535cddebc5b22f3c05ba3b369739caf"},
+    {file = "simplejson-3.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7e73d9d6af3c29b60a92e28b3144d951110f59a3d05fc402c3f6c5248b883400"},
+    {file = "simplejson-3.18.3-cp39-cp39-win32.whl", hash = "sha256:a80bd9a3db88a76a401155c64e3499376c702307c2206cb381cc2a8dd9cc4f1f"},
+    {file = "simplejson-3.18.3-cp39-cp39-win_amd64.whl", hash = "sha256:c4514675f6571da8190fea52a110bca686fa844972e8b2b3bc07ace9e632ee4f"},
+    {file = "simplejson-3.18.3-py3-none-any.whl", hash = "sha256:37bdef13412c0bc338db2993a38f3911d5bd2a0ba8d00b3bc66d1063edd7c33e"},
+    {file = "simplejson-3.18.3.tar.gz", hash = "sha256:ebb53837c5ffcb6100646018565d3f1afed6f4b185b14b2c9cbccf874fe40157"},
+]
+six = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+sortedcontainers = [
+    {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
+    {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
+]
+tomli = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+torch = [
+    {file = "torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:fd12043868a34a8da7d490bf6db66991108b00ffbeecb034228bfcbbd4197143"},
+    {file = "torch-1.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d9fe785d375f2e26a5d5eba5de91f89e6a3be5d11efb497e76705fdf93fa3c2e"},
+    {file = "torch-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:98124598cdff4c287dbf50f53fb455f0c1e3a88022b39648102957f3445e9b76"},
+    {file = "torch-1.13.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:393a6273c832e047581063fb74335ff50b4c566217019cc6ace318cd79eb0566"},
+    {file = "torch-1.13.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:0122806b111b949d21fa1a5f9764d1fd2fcc4a47cb7f8ff914204fd4fc752ed5"},
+    {file = "torch-1.13.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:22128502fd8f5b25ac1cd849ecb64a418382ae81dd4ce2b5cebaa09ab15b0d9b"},
+    {file = "torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:76024be052b659ac1304ab8475ab03ea0a12124c3e7626282c9c86798ac7bc11"},
+    {file = "torch-1.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:ea8dda84d796094eb8709df0fcd6b56dc20b58fdd6bc4e8d7109930dafc8e419"},
+    {file = "torch-1.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2ee7b81e9c457252bddd7d3da66fb1f619a5d12c24d7074de91c4ddafb832c93"},
+    {file = "torch-1.13.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:0d9b8061048cfb78e675b9d2ea8503bfe30db43d583599ae8626b1263a0c1380"},
+    {file = "torch-1.13.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:f402ca80b66e9fbd661ed4287d7553f7f3899d9ab54bf5c67faada1555abde28"},
+    {file = "torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:727dbf00e2cf858052364c0e2a496684b9cb5aa01dc8a8bc8bbb7c54502bdcdd"},
+    {file = "torch-1.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:df8434b0695e9ceb8cc70650afc1310d8ba949e6db2a0525ddd9c3b2b181e5fe"},
+    {file = "torch-1.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:5e1e722a41f52a3f26f0c4fcec227e02c6c42f7c094f32e49d4beef7d1e213ea"},
+    {file = "torch-1.13.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:33e67eea526e0bbb9151263e65417a9ef2d8fa53cbe628e87310060c9dcfa312"},
+    {file = "torch-1.13.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:eeeb204d30fd40af6a2d80879b46a7efbe3cf43cdbeb8838dd4f3d126cc90b2b"},
+    {file = "torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:50ff5e76d70074f6653d191fe4f6a42fdbe0cf942fbe2a3af0b75eaa414ac038"},
+    {file = "torch-1.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2c3581a3fd81eb1f0f22997cddffea569fea53bafa372b2c0471db373b26aafc"},
+    {file = "torch-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0aa46f0ac95050c604bcf9ef71da9f1172e5037fdf2ebe051962d47b123848e7"},
+    {file = "torch-1.13.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6930791efa8757cb6974af73d4996b6b50c592882a324b8fb0589c6a9ba2ddaf"},
+    {file = "torch-1.13.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e0df902a7c7dd6c795698532ee5970ce898672625635d885eade9976e5a04949"},
+]
+typing-extensions = [
+    {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"},
+    {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
+]
+wheel = [
+    {file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"},
+    {file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d443ffb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "pangolin"
+version = "1.3.11"
+description = ""
+authors = ["Tony Zeng <tkyzeng@gmail.com>", "Kevin Kazmierczak <kevin.kazmierczak@invitae.com>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.8"
+numpy = "^1.24.2"
+torch = "^1.13.1"
+gffutils = "^0.11.1"
+pyfastx = "0.8.4"  # This is fixed due to a weird error in the latest version where it complains about missing keys
+pandas = "^1.5.3"
+biopython = "^1.81"
+pysam = "^0.20.0"
+intervaltree = "^3.1.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.2.2"
+black = "^23.3.0"
+coverage = "^7.2.3"
+
+[tool.poetry.scripts]
+pangolin = "pangolin.pangolin:main"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/scripts/create_db.py b/scripts/create_db.py
index ee89984..d252575 100755
--- a/scripts/create_db.py
+++ b/scripts/create_db.py
@@ -2,8 +2,15 @@
 import gffutils
 
 parser = argparse.ArgumentParser()
-parser.add_argument("annotation_file", help="GTF file containing gene annotations. For example, from https://www.gencodegenes.org/")
-parser.add_argument("--filter", default="Ensembl_canonical", help="Only keep GTF features with the specified tags. Format: tag1,tag2,... or None to keep all features. Default: Ensembl_canonical")
+parser.add_argument(
+    "annotation_file",
+    help="GTF file containing gene annotations. For example, from https://www.gencodegenes.org/",
+)
+parser.add_argument(
+    "--filter",
+    default="Ensembl_canonical",
+    help="Only keep GTF features with the specified tags. Format: tag1,tag2,... or None to keep all features. Default: Ensembl_canonical",
+)
 args = parser.parse_args()
 
 gtf = args.annotation_file
@@ -14,20 +21,27 @@
 else:
     exit("ERROR, annotation_file should be a GTF file.")
 
+
 def filter(feat):
-    if feat.featuretype not in ["gene","transcript","exon"]:
+    if feat.featuretype not in ["gene", "transcript", "exon"]:
         return False
-    elif args.filter != "None" and feat.featuretype in ["transcript","exon"]:
+    elif args.filter != "None" and feat.featuretype in ["transcript", "exon"]:
         present = False
-        for tag in args.filter.split(','):
+        for tag in args.filter.split(","):
             if "tag" in feat.attributes and tag in feat["tag"]:
                 present = True
         if not present:
             return False
     return feat
 
-db = gffutils.create_db(gtf, prefix+".db", force=True,
-                        disable_infer_genes=True, disable_infer_transcripts=True,
-                        transform=filter)
+
+db = gffutils.create_db(
+    gtf,
+    prefix + ".db",
+    force=True,
+    disable_infer_genes=True,
+    disable_infer_transcripts=True,
+    transform=filter,
+)
 
 print("Database created: %s.db" % prefix)
diff --git a/scripts/custom_usage.py b/scripts/custom_usage.py
index 377ad47..735581d 100644
--- a/scripts/custom_usage.py
+++ b/scripts/custom_usage.py
@@ -15,8 +15,8 @@
 # Change this to the desired sequences and strand for each sequence. If the sequence is N bases long, Pangolin will
 # return scores for the middle N-10000 bases (so if you are interested in the score for a single site, the input should
 # be: 5000 bases before the site, base at the site, 5000 bases after the site). Sequences < 10001 bases can be padded with 'N'.
-seqs = [10001*'A']
-strands = ['-']
+seqs = [10001 * "A"]
+strands = ["-"]
 
 # Load models
 models = []
@@ -25,32 +25,36 @@
         model = Pangolin(L, W, AR)
         if torch.cuda.is_available():
             model.cuda()
-            weights = torch.load(resource_filename("pangolin","models/final.%s.%s.3" % (j, i)))
+            weights = torch.load(
+                resource_filename("pangolin", "models/final.%s.%s.3" % (j, i))
+            )
         else:
-            weights = torch.load(resource_filename("pangolin","models/final.%s.%s.3" % (j, i)),
-                                 map_location=torch.device('cpu'))
+            weights = torch.load(
+                resource_filename("pangolin", "models/final.%s.%s.3" % (j, i)),
+                map_location=torch.device("cpu"),
+            )
         model.load_state_dict(weights)
         model.eval()
         models.append(model)
 
 # Get scores
 
-IN_MAP = np.asarray([[0, 0, 0, 0],
-                     [1, 0, 0, 0],
-                     [0, 1, 0, 0],
-                     [0, 0, 1, 0],
-                     [0, 0, 0, 1]])
-INDEX_MAP = {0:1, 1:2, 2:4, 3:5, 4:7, 5:8, 6:10, 7:11}
+IN_MAP = np.asarray(
+    [[0, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
+)
+INDEX_MAP = {0: 1, 1: 2, 2: 4, 3: 5, 4: 7, 5: 8, 6: 10, 7: 11}
+
 
 def one_hot_encode(seq, strand):
-    seq = seq.upper().replace('A', '1').replace('C', '2')
-    seq = seq.replace('G', '3').replace('T', '4').replace('N', '0')
-    if strand == '+':
+    seq = seq.upper().replace("A", "1").replace("C", "2")
+    seq = seq.replace("G", "3").replace("T", "4").replace("N", "0")
+    if strand == "+":
         seq = np.asarray(list(map(int, list(seq))))
-    elif strand == '-':
+    elif strand == "-":
         seq = np.asarray(list(map(int, list(seq[::-1]))))
         seq = (5 - seq) % 5  # Reverse complement
-    return IN_MAP[seq.astype('int8')]
+    return IN_MAP[seq.astype("int8")]
+
 
 for i, seq in enumerate(seqs):
     seq = one_hot_encode(seq, strands[i]).T
@@ -62,7 +66,7 @@ def one_hot_encode(seq, strand):
     for j, model_num in enumerate(model_nums):
         score = []
         # Average across 5 models
-        for model in models[5*j:5*j+5]:
+        for model in models[5 * j : 5 * j + 5]:
             with torch.no_grad():
-                score.append(model(seq)[0][INDEX_MAP[model_num],:].cpu().numpy())
+                score.append(model(seq)[0][INDEX_MAP[model_num], :].cpu().numpy())
         print(np.mean(score, axis=0))
diff --git a/setup.py b/setup.py
deleted file mode 100755
index 6f86643..0000000
--- a/setup.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import setuptools
-
-with open("README.md", "r", encoding="utf-8") as fh:
-    long_description = fh.read()
-
-setuptools.setup(
-    name="pangolin",
-    version="1.0.2",
-    author="Tony Zeng",
-    author_email="tkyzeng@gmail.com",
-    description="Pangolin",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    packages=['pangolin'],
-    package_data={
-        "pangolin": ["models/*"],
-    },
-    entry_points={
-        "console_scripts": [
-            "pangolin=pangolin.pangolin:main"
-        ]
-    }
-)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/expected/medium_out.csv b/tests/data/expected/medium_out.csv
new file mode 100644
index 0000000..34cf08c
--- /dev/null
+++ b/tests/data/expected/medium_out.csv
@@ -0,0 +1,100 @@
+gene,CHROM,position.dna,REF,ALT,POS,Pangolin
+PEX10,chr1,2408451.0,C,T,2339890,PEX10|-3:0.02|1:-0.76|Warnings:
+MTHFR,chr1,11790916.0,C,T,11850973,MTHFR|-2:0.7|-18:-0.57|Warnings:
+MTHFR,chr1,11794385.0,C,T,11854442,MTHFR|-2:0.4|-27:-0.03|Warnings:
+MTHFR,chr1,11794539.0,CT,C,11854596,MTHFR|-62:0.36|-1:-0.83|Warnings:
+MTHFR,chr1,11794724.0,C,G,11854781,MTHFR|88:0.04|5:-0.78|Warnings:
+PLOD1,chr1,11952620.0,CA,C,12012677,PLOD1|31:0.65|3:-0.83|Warnings:
+PLOD1,chr1,11966985.0,A,G,12027042,PLOD1|41:0.12|2:-0.82|Warnings:
+ATP13A2,chr1,16996381.0,C,T,17322876,ATP13A2|31:0.06|5:-0.73|Warnings:
+SDHB,chr1,17028599.0,C,T,17355094,SDHB|55:0.43|1:-0.8|Warnings:
+SDHB,chr1,17028737.0,C,G,17355232,SDHB|32:0.03|-1:-0.85|Warnings:
+SDHB,chr1,17053947.0,C,A,17380442,SDHB|-195:0.41|1:-0.77|Warnings:
+HMGCL,chr1,23817475.0,C,T,24143965,HMGCL|14:0.04|1:-0.87|Warnings:
+DHDDS,chr1,26447535.0,A,G,26774026,DHDDS|-5:0.8|24:-0.57|Warnings:
+MECR,chr1,29200513.0,T,TA,29527025,MECR|-103:0.37|3:-0.56|Warnings:
+AK2,chr1,33014521.0,C,T,33480122,AK2|-3:0.26|1:-0.86|Warnings:
+P3H1,chr1,42747254.0,C,T,43212925,P3H1|-1:0.57|18:-0.61|Warnings:
+SZT2,chr1,43425950.0,G,A,43891621,SZT2|-11:0.04|-1:-0.88|Warnings:
+MUTYH,chr1,45330513.0,T,A,45796185,MUTYH|-96:0.0|44:-0.76|Warnings:
+MUTYH,chr1,45331180.0,G,A,45796852,MUTYH|63:0.01|-200:0.0|Warnings:
+MUTYH,chr1,45331558.0,T,C,45797230,MUTYH|-14:0.86|-2:-0.89|Warnings:
+MUTYH,chr1,45331656.0,G,C,45797328,MUTYH|176:0.01|-100:-0.0|Warnings:
+MUTYH,chr1,45331660.0,C,A,45797332,MUTYH|-9:0.01|1:-0.63|Warnings:
+MUTYH,chr1,45331809.0,C,T,45797481,MUTYH|-2:0.66|40:-0.32|Warnings:
+MUTYH,chr1,45332163.0,T,G,45797835,MUTYH|72:0.11|3:-0.79|Warnings:
+MUTYH,chr1,45332619.0,C,T,45798291,MUTYH|-2:0.01|68:-0.14|Warnings:
+MUTYH,chr1,45332886.0,TCCTATTTCCCCTA,T,45798558,MUTYH|-2:0.01|32:-0.51|Warnings:
+MUTYH,chr1,45333171.0,C,G,45798843,MUTYH|-13:0.79|-1:-0.86|Warnings:
+MUTYH,chr1,45340215.0,G,C,45805887,TOE1|10:0.0|89:-0.0|Warnings:MUTYH|-97:0.0|4:-0.06|Warnings:
+POMGNT1,chr1,46189457.0,C,G,46655129,POMGNT1|27:0.66|1:-0.87|Warnings:
+POMGNT1,chr1,46189457.0,C,A,46655129,POMGNT1|27:0.67|1:-0.87|Warnings:
+POMGNT1,chr1,46194860.0,G,A,46660532,POMGNT1|-8:0.0|-16:-0.29|Warnings:
+CPT2,chr1,53202430.0,G,A,53668102,CPT2|3:0.8|-1:-0.87|Warnings:
+ALG6,chr1,63402344.0,G,GT,63868015,ALG6|-91:0.02|-1:-0.85|Warnings:
+ALG6,chr1,63402348.0,G,A,63868019,ALG6|-95:0.02|-5:-0.79|Warnings:
+PGM1,chr1,63648295.0,G,T,64113966,PGM1|-2:0.69|-199:0.0|Warnings:
+ACADM,chr1,75745788.0,G,A,76211473,ACADM|47:0.12|18:-0.06|Warnings:
+ACADM,chr1,75761116.0,T,G,76226801,ACADM|1:0.81|6:-0.67|Warnings:
+ABCA4,chr1,94000832.0,T,C,94466388,ABCA4|54:0.08|4:-0.59|Warnings:
+ABCA4,chr1,94001046.0,C,T,94466602,ABCA4|3:0.55|-44:-0.5|Warnings:
+ABCA4,chr1,94011395.0,A,G,94476951,ABCA4|9:0.02|-10:-0.08|Warnings:
+ABCA4,chr1,94018366.0,G,T,94483922,ABCA4|4:0.24|-200:0.0|Warnings:
+ABCA4,chr1,94018445.0,C,T,94484001,ABCA4|-3:0.39|-200:0.0|Warnings:
+ABCA4,chr1,94019575.0,CACTT,C,94485131,ABCA4|5:0.24|7:-0.84|Warnings:
+ABCA4,chr1,94019581.0,C,T,94485137,ABCA4|-5:0.18|1:-0.84|Warnings:
+ABCA4,chr1,94025056.0,A,T,94490612,ABCA4|-2:0.52|-8:-0.74|Warnings:
+ABCA4,chr1,94027381.0,G,A,94492937,ABCA4|4:0.29|-200:0.0|Warnings:
+ABCA4,chr1,94027417.0,G,A,94492973,ABCA4|137:0.02|-200:0.0|Warnings:
+ABCA4,chr1,94027444.0,C,T,94493000,ABCA4|110:0.2|-200:0.0|Warnings:
+ABCA4,chr1,94030427.0,C,T,94495983,ABCA4|-56:0.07|1:-0.52|Warnings:
+ABCA4,chr1,94030427.0,C,G,94495983,ABCA4|-56:0.04|1:-0.53|Warnings:
+ABCA4,chr1,94030953.0,C,T,94496509,ABCA4|-2:0.0|43:-0.03|Warnings:
+ABCA4,chr1,94030991.0,C,T,94496547,ABCA4|-26:0.01|5:-0.44|Warnings:
+ABCA4,chr1,94036737.0,T,C,94502293,ABCA4|-161:0.04|3:-0.71|Warnings:
+ABCA4,chr1,94044608.0,C,T,94510164,ABCA4|-8:0.02|5:-0.69|Warnings:
+ABCA4,chr1,94056830.0,C,T,94522386,ABCA4|-2:0.56|-8:-0.63|Warnings:
+ABCA4,chr1,94062576.0,C,T,94528132,ABCA4|-10:0.23|1:-0.76|Warnings:
+ABCA4,chr1,94081224.0,C,G,94546780,ABCA4|-58:0.07|-200:0.0|Warnings:
+ABCA4,chr1,94081264.0,C,T,94546820,ABCA4|-1:0.17|-199:0.0|Warnings:
+ABCA4,chr1,94084225.0,G,A,94549781,ABCA4|-6:0.09|-200:0.0|Warnings:
+ALG14,chr1,95027119.0,TCTTA,T,95492675,ALG14|29:0.01|10:-0.17|Warnings:
+AGL,chr1,99864592.0,A,G,100330148,AGL|-1:0.06|-3:-0.55|Warnings:
+AGL,chr1,99880047.0,G,T,100345603,AGL|-37:0.01|-1:-0.77|Warnings:
+AGL,chr1,99884704.0,G,T,100350260,AGL|40:0.14|-1:-0.85|Warnings:
+AGL,chr1,99916398.0,A,G,100381954,AGL|1:0.76|12:-0.78|Warnings:
+AGL,chr1,99916498.0,G,T,100382054,AGL|-24:0.04|-1:-0.81|Warnings:
+DBT,chr1,100207186.0,T,C,100672742,DBT|1:0.8|-200:0.0|Warnings:
+DBT,chr1,100210772.0,C,A,100676328,DBT|-25:0.11|-1:-0.88|Warnings:
+DBT,chr1,100218750.0,GGTAACAAGGTAA,G,100684306,DBT|19:0.84|-3:-0.85|Warnings:
+COL11A1,chr1,102915630.0,C,T,103381186,COL11A1|9:0.27|1:-0.83|Warnings:
+COL11A1,chr1,102997075.0,C,A,103462631,COL11A1|-7:0.01|5:-0.8|Warnings:
+COL11A1,chr1,103008517.0,CT,C,103474073,COL11A1|71:0.17|-1:-0.83|Warnings:
+CASQ2,chr1,115705187.0,C,G,116247808,CASQ2|-68:0.62|5:-0.69|Warnings:
+CTSK,chr1,150806225.0,C,T,150778701,CTSK|-2:0.13|-1:-0.78|Warnings:
+CTSK,chr1,150806226.0,T,C,150778702,CTSK|-47:0.01|-2:-0.78|Warnings:
+ADAR,chr1,154588263.0,A,G,154560739,ADAR|-8:0.02|-5:-0.08|Warnings:
+LMNA,chr1,156130615.0,A,G,156100406,LMNA|12:0.74|2:-0.87|Warnings:
+LMNA,chr1,156130740.0,C,T,156100531,LMNA|-2:0.31|33:-0.05|Warnings:
+LMNA,chr1,156130818.0,T,G,156100609,LMNA|0:0.09|-45:-0.02|Warnings:
+LMNA,chr1,156134795.0,A,G,156104586,LMNA|1:0.88|10:-0.84|Warnings:
+LMNA,chr1,156134933.0,G,A,156104724,LMNA|-3:0.37|42:-0.07|Warnings:
+LMNA,chr1,156134977.0,T,C,156104768,LMNA|-47:0.27|-2:-0.84|Warnings:
+LMNA,chr1,156135890.0,C,G,156105681,LMNA|-29:0.69|11:-0.77|Warnings:
+LMNA,chr1,156136916.0,G,A,156106707,LMNA|2:0.78|5:-0.13|Warnings:
+LMNA,chr1,156137642.0,T,G,156107433,LMNA|1:0.88|12:-0.86|Warnings:
+LMNA,chr1,156137651.0,C,G,156107442,LMNA|7:0.36|3:-0.58|Warnings:
+NTRK1,chr1,156868250.0,G,T,156838042,NTRK1|10:0.05|-1:-0.75|Warnings:
+NTRK1,chr1,156873600.0,T,A,156843392,NTRK1|-104:0.16|33:-0.13|Warnings:
+NTRK1,chr1,156879365.0,A,C,156849157,NTRK1|-150:0.04|-3:-0.71|Warnings:
+NTRK1,chr1,156881445.0,C,A,156851237,NTRK1|2:0.85|12:-0.72|Warnings:
+NTRK1,chr1,156881446.0,G,A,156851238,NTRK1|2:0.85|11:-0.72|Warnings:
+SPTA1,chr1,158618068.0,G,A,158587858,SPTA1|2:0.0|-12:-0.06|Warnings:
+SPTA1,chr1,158643524.0,G,A,158613314,SPTA1|-29:0.1|-99:-0.01|Warnings:
+MPZ,chr1,161306847.0,C,A,161276637,MPZ|6:0.24|-139:-0.03|Warnings:
+SDHC,chr1,161318491.0,C,G,161288281,SDHC|-1:0.85|-198:0.0|Warnings:
+SDHC,chr1,161328417.0,G,A,161298207,SDHC|2:0.02|-21:-0.02|Warnings:
+SDHC,chr1,161340592.0,A,G,161310382,SDHC|46:0.03|2:-0.85|Warnings:
+SDHC,chr1,161340658.0,A,G,161310448,SDHC|109:0.06|-3:-0.1|Warnings:
+SDHC,chr1,161356841.0,G,C,161326631,SDHC|-61:0.03|-1:-0.86|Warnings:
+SDHC,chr1,161356841.0,G,T,161326631,SDHC|-61:0.03|-1:-0.86|Warnings:
diff --git a/tests/data/expected/small_out.csv b/tests/data/expected/small_out.csv
new file mode 100644
index 0000000..ac355a4
--- /dev/null
+++ b/tests/data/expected/small_out.csv
@@ -0,0 +1,7 @@
+gene,CHROM,position.dna,REF,ALT,POS,Pangolin
+foo,chr19,1.0,A,G,1,
+ELANE,chr19,855795.0,G,A,855795,ELANE|-31:0.17|-1:-0.81|Warnings:
+ELANE,chr19,855799.0,G,A,855799,ELANE|-35:0.15|-5:-0.76|Warnings:
+STK11,chr19,1218501.0,A,G,1218500,STK11|100:0.0|-84:-0.0|Warnings:
+STK11,chr19,1228432.0,C,A,1228431,STK11|199:0.01|-1:-0.0|Warnings:||CBARP|6:0.01|-145:-0.0|Warnings:
+PNKP,chr19,49861542.0,AGGGGTCAGGGGAGGAGG,A,50364799,PNKP|-184:0.02|-32:-0.12|Warnings:
diff --git a/tests/data/expected/small_out.vcf b/tests/data/expected/small_out.vcf
new file mode 100644
index 0000000..5eac2b3
--- /dev/null
+++ b/tests/data/expected/small_out.vcf
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20191004
+##reference=GRCh37/hg19
+##contig=<ID=chr1,length=249250621>
+##contig=<ID=chr2,length=243199373>
+##contig=<ID=chr3,length=198022430>
+##contig=<ID=chr4,length=191154276>
+##contig=<ID=chr5,length=180915260>
+##contig=<ID=chr6,length=171115067>
+##contig=<ID=chr7,length=159138663>
+##contig=<ID=chr8,length=146364022>
+##contig=<ID=chr9,length=141213431>
+##contig=<ID=chr10,length=135534747>
+##contig=<ID=chr11,length=135006516>
+##contig=<ID=chr12,length=133851895>
+##contig=<ID=chr13,length=115169878>
+##contig=<ID=chr14,length=107349540>
+##contig=<ID=chr15,length=102531392>
+##contig=<ID=chr16,length=90354753>
+##contig=<ID=chr17,length=81195210>
+##contig=<ID=chr18,length=78077248>
+##contig=<ID=chr19,length=59128983>
+##contig=<ID=chr20,length=63025520>
+##contig=<ID=chr21,length=48129895>
+##contig=<ID=chr22,length=51304566>
+##contig=<ID=chrX,length=155270560>
+##contig=<ID=chrY,length=59373566>
+##INFO=<ID=AF_ESP,Number=1,Type=Float,Description="allele frequencies from GO-ESP">
+##INFO=<ID=Pangolin,Number=.,Type=String,Description="Pangolin splice scores. Format: gene|pos:score_change|pos:score_change|...">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr19	1228431	.	C	A	.	.	Pangolin=STK11|199:0.01|-1:-0.0|Warnings:||CBARP|6:0.01|-145:-0.0|Warnings:
diff --git a/tests/data/medium.csv b/tests/data/medium.csv
new file mode 100644
index 0000000..0e2804a
--- /dev/null
+++ b/tests/data/medium.csv
@@ -0,0 +1,100 @@
+gene,CHROM,position.dna,REF,ALT,POS
+PEX10,chr1,2408451.0,C,T,2339890
+MTHFR,chr1,11790916.0,C,T,11850973
+MTHFR,chr1,11794385.0,C,T,11854442
+MTHFR,chr1,11794539.0,CT,C,11854596
+MTHFR,chr1,11794724.0,C,G,11854781
+PLOD1,chr1,11952620.0,CA,C,12012677
+PLOD1,chr1,11966985.0,A,G,12027042
+ATP13A2,chr1,16996381.0,C,T,17322876
+SDHB,chr1,17028599.0,C,T,17355094
+SDHB,chr1,17028737.0,C,G,17355232
+SDHB,chr1,17053947.0,C,A,17380442
+HMGCL,chr1,23817475.0,C,T,24143965
+DHDDS,chr1,26447535.0,A,G,26774026
+MECR,chr1,29200513.0,T,TA,29527025
+AK2,chr1,33014521.0,C,T,33480122
+P3H1,chr1,42747254.0,C,T,43212925
+SZT2,chr1,43425950.0,G,A,43891621
+MUTYH,chr1,45330513.0,T,A,45796185
+MUTYH,chr1,45331180.0,G,A,45796852
+MUTYH,chr1,45331558.0,T,C,45797230
+MUTYH,chr1,45331656.0,G,C,45797328
+MUTYH,chr1,45331660.0,C,A,45797332
+MUTYH,chr1,45331809.0,C,T,45797481
+MUTYH,chr1,45332163.0,T,G,45797835
+MUTYH,chr1,45332619.0,C,T,45798291
+MUTYH,chr1,45332886.0,TCCTATTTCCCCTA,T,45798558
+MUTYH,chr1,45333171.0,C,G,45798843
+MUTYH,chr1,45340215.0,G,C,45805887
+POMGNT1,chr1,46189457.0,C,G,46655129
+POMGNT1,chr1,46189457.0,C,A,46655129
+POMGNT1,chr1,46194860.0,G,A,46660532
+CPT2,chr1,53202430.0,G,A,53668102
+ALG6,chr1,63402344.0,G,GT,63868015
+ALG6,chr1,63402348.0,G,A,63868019
+PGM1,chr1,63648295.0,G,T,64113966
+ACADM,chr1,75745788.0,G,A,76211473
+ACADM,chr1,75761116.0,T,G,76226801
+ABCA4,chr1,94000832.0,T,C,94466388
+ABCA4,chr1,94001046.0,C,T,94466602
+ABCA4,chr1,94011395.0,A,G,94476951
+ABCA4,chr1,94018366.0,G,T,94483922
+ABCA4,chr1,94018445.0,C,T,94484001
+ABCA4,chr1,94019575.0,CACTT,C,94485131
+ABCA4,chr1,94019581.0,C,T,94485137
+ABCA4,chr1,94025056.0,A,T,94490612
+ABCA4,chr1,94027381.0,G,A,94492937
+ABCA4,chr1,94027417.0,G,A,94492973
+ABCA4,chr1,94027444.0,C,T,94493000
+ABCA4,chr1,94030427.0,C,T,94495983
+ABCA4,chr1,94030427.0,C,G,94495983
+ABCA4,chr1,94030953.0,C,T,94496509
+ABCA4,chr1,94030991.0,C,T,94496547
+ABCA4,chr1,94036737.0,T,C,94502293
+ABCA4,chr1,94044608.0,C,T,94510164
+ABCA4,chr1,94056830.0,C,T,94522386
+ABCA4,chr1,94062576.0,C,T,94528132
+ABCA4,chr1,94081224.0,C,G,94546780
+ABCA4,chr1,94081264.0,C,T,94546820
+ABCA4,chr1,94084225.0,G,A,94549781
+ALG14,chr1,95027119.0,TCTTA,T,95492675
+AGL,chr1,99864592.0,A,G,100330148
+AGL,chr1,99880047.0,G,T,100345603
+AGL,chr1,99884704.0,G,T,100350260
+AGL,chr1,99916398.0,A,G,100381954
+AGL,chr1,99916498.0,G,T,100382054
+DBT,chr1,100207186.0,T,C,100672742
+DBT,chr1,100210772.0,C,A,100676328
+DBT,chr1,100218750.0,GGTAACAAGGTAA,G,100684306
+COL11A1,chr1,102915630.0,C,T,103381186
+COL11A1,chr1,102997075.0,C,A,103462631
+COL11A1,chr1,103008517.0,CT,C,103474073
+CASQ2,chr1,115705187.0,C,G,116247808
+CTSK,chr1,150806225.0,C,T,150778701
+CTSK,chr1,150806226.0,T,C,150778702
+ADAR,chr1,154588263.0,A,G,154560739
+LMNA,chr1,156130615.0,A,G,156100406
+LMNA,chr1,156130740.0,C,T,156100531
+LMNA,chr1,156130818.0,T,G,156100609
+LMNA,chr1,156134795.0,A,G,156104586
+LMNA,chr1,156134933.0,G,A,156104724
+LMNA,chr1,156134977.0,T,C,156104768
+LMNA,chr1,156135890.0,C,G,156105681
+LMNA,chr1,156136916.0,G,A,156106707
+LMNA,chr1,156137642.0,T,G,156107433
+LMNA,chr1,156137651.0,C,G,156107442
+NTRK1,chr1,156868250.0,G,T,156838042
+NTRK1,chr1,156873600.0,T,A,156843392
+NTRK1,chr1,156879365.0,A,C,156849157
+NTRK1,chr1,156881445.0,C,A,156851237
+NTRK1,chr1,156881446.0,G,A,156851238
+SPTA1,chr1,158618068.0,G,A,158587858
+SPTA1,chr1,158643524.0,G,A,158613314
+MPZ,chr1,161306847.0,C,A,161276637
+SDHC,chr1,161318491.0,C,G,161288281
+SDHC,chr1,161328417.0,G,A,161298207
+SDHC,chr1,161340592.0,A,G,161310382
+SDHC,chr1,161340658.0,A,G,161310448
+SDHC,chr1,161356841.0,G,C,161326631
+SDHC,chr1,161356841.0,G,T,161326631
\ No newline at end of file
diff --git a/tests/data/reference/chr19.fa.gz b/tests/data/reference/chr19.fa.gz
new file mode 100644
index 0000000..7b781fa
Binary files /dev/null and b/tests/data/reference/chr19.fa.gz differ
diff --git a/tests/data/reference/chr19.fa.gz.fxi b/tests/data/reference/chr19.fa.gz.fxi
new file mode 100644
index 0000000..28e1739
Binary files /dev/null and b/tests/data/reference/chr19.fa.gz.fxi differ
diff --git a/tests/data/reference/chr19_genes.gtf.gz b/tests/data/reference/chr19_genes.gtf.gz
new file mode 100644
index 0000000..8d1537e
Binary files /dev/null and b/tests/data/reference/chr19_genes.gtf.gz differ
diff --git a/tests/data/reference/chr19_genes_filtered.db b/tests/data/reference/chr19_genes_filtered.db
new file mode 100644
index 0000000..7684e24
Binary files /dev/null and b/tests/data/reference/chr19_genes_filtered.db differ
diff --git a/tests/data/reference/chr19_genes_filtered.gtf b/tests/data/reference/chr19_genes_filtered.gtf
new file mode 100644
index 0000000..1a8b14d
--- /dev/null
+++ b/tests/data/reference/chr19_genes_filtered.gtf
@@ -0,0 +1,223 @@
+chr19	uta	gene	852209	856246	.	+	.	gene_id "ELANE"; transcript_id "";
+chr19	uta	transcript	852291	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; protein_id "NP_001963.1";
+chr19	uta	exon	852291	852395	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	exon	852876	853032	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	exon	853262	853403	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	exon	855564	855794	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	exon	855958	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	CDS	852329	852395	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	CDS	852876	853032	.	+	2	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	CDS	853262	853403	.	+	1	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	CDS	855564	855794	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	CDS	855958	856161	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	5UTR	852291	852328	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	3UTR	856165	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	start_codon	852329	852331	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	stop_codon	856162	856164	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.2"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	transcript	852209	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; protein_id "NP_001963.1";
+chr19	uta	exon	852209	852395	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	exon	852876	853032	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	exon	853262	853403	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	exon	855564	855794	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	exon	855958	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	CDS	852329	852395	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	CDS	852876	853032	.	+	2	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	CDS	853262	853403	.	+	1	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	CDS	855564	855794	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	CDS	855958	856161	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	5UTR	852209	852328	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	3UTR	856165	856246	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	start_codon	852329	852331	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	stop_codon	856162	856164	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.3"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	transcript	852303	856243	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; protein_id "NP_001963.1";
+chr19	uta	exon	852303	852395	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	exon	852876	853032	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	exon	853262	853403	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	exon	855564	855794	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	exon	855958	856243	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	CDS	852329	852395	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	CDS	852876	853032	.	+	2	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "2"; protein_id "NP_001963.1";
+chr19	uta	CDS	853262	853403	.	+	1	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "3"; protein_id "NP_001963.1";
+chr19	uta	CDS	855564	855794	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "4"; protein_id "NP_001963.1";
+chr19	uta	CDS	855958	856161	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	5UTR	852303	852328	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	3UTR	856165	856243	.	+	.	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	start_codon	852329	852331	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "1"; protein_id "NP_001963.1";
+chr19	uta	stop_codon	856162	856164	.	+	0	gene_id "ELANE"; transcript_id "NM_001972.4"; exon_number "5"; protein_id "NP_001963.1";
+chr19	uta	gene	1205777	1228434	.	+	.	gene_id "STK11"; transcript_id "";
+chr19	uta	transcript	1205798	1228434	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; protein_id "NP_000446.1";
+chr19	uta	exon	1205798	1207202	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	exon	1218416	1218499	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "2"; protein_id "NP_000446.1";
+chr19	uta	exon	1219323	1219412	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "3"; protein_id "NP_000446.1";
+chr19	uta	exon	1220372	1220504	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "4"; protein_id "NP_000446.1";
+chr19	uta	exon	1220580	1220716	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "5"; protein_id "NP_000446.1";
+chr19	uta	exon	1221212	1221339	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "6"; protein_id "NP_000446.1";
+chr19	uta	exon	1221948	1222005	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "7"; protein_id "NP_000446.1";
+chr19	uta	exon	1222984	1223171	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "8"; protein_id "NP_000446.1";
+chr19	uta	exon	1226453	1226662	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	exon	1227592	1228434	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "10"; protein_id "NP_000446.1";
+chr19	uta	CDS	1206913	1207202	.	+	0	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	CDS	1218416	1218499	.	+	1	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "2"; protein_id "NP_000446.1";
+chr19	uta	CDS	1219323	1219412	.	+	1	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "3"; protein_id "NP_000446.1";
+chr19	uta	CDS	1220372	1220504	.	+	1	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "4"; protein_id "NP_000446.1";
+chr19	uta	CDS	1220580	1220716	.	+	0	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "5"; protein_id "NP_000446.1";
+chr19	uta	CDS	1221212	1221339	.	+	1	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "6"; protein_id "NP_000446.1";
+chr19	uta	CDS	1221948	1222005	.	+	2	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "7"; protein_id "NP_000446.1";
+chr19	uta	CDS	1222984	1223171	.	+	1	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "8"; protein_id "NP_000446.1";
+chr19	uta	CDS	1226453	1226643	.	+	2	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	5UTR	1205798	1206912	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	3UTR	1226647	1226662	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	3UTR	1227592	1228434	.	+	.	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "10"; protein_id "NP_000446.1";
+chr19	uta	start_codon	1206913	1206915	.	+	0	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	stop_codon	1226644	1226646	.	+	0	gene_id "STK11"; transcript_id "NM_000455.4"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	transcript	1205777	1228430	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; protein_id "NP_000446.1";
+chr19	uta	exon	1205777	1207202	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	exon	1218416	1218499	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "2"; protein_id "NP_000446.1";
+chr19	uta	exon	1219323	1219412	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "3"; protein_id "NP_000446.1";
+chr19	uta	exon	1220372	1220504	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "4"; protein_id "NP_000446.1";
+chr19	uta	exon	1220580	1220716	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "5"; protein_id "NP_000446.1";
+chr19	uta	exon	1221212	1221339	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "6"; protein_id "NP_000446.1";
+chr19	uta	exon	1221948	1222005	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "7"; protein_id "NP_000446.1";
+chr19	uta	exon	1222984	1223171	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "8"; protein_id "NP_000446.1";
+chr19	uta	exon	1226453	1226662	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	exon	1227592	1228430	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "10"; protein_id "NP_000446.1";
+chr19	uta	CDS	1206913	1207202	.	+	0	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	CDS	1218416	1218499	.	+	1	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "2"; protein_id "NP_000446.1";
+chr19	uta	CDS	1219323	1219412	.	+	1	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "3"; protein_id "NP_000446.1";
+chr19	uta	CDS	1220372	1220504	.	+	1	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "4"; protein_id "NP_000446.1";
+chr19	uta	CDS	1220580	1220716	.	+	0	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "5"; protein_id "NP_000446.1";
+chr19	uta	CDS	1221212	1221339	.	+	1	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "6"; protein_id "NP_000446.1";
+chr19	uta	CDS	1221948	1222005	.	+	2	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "7"; protein_id "NP_000446.1";
+chr19	uta	CDS	1222984	1223171	.	+	1	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "8"; protein_id "NP_000446.1";
+chr19	uta	CDS	1226453	1226643	.	+	2	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	5UTR	1205777	1206912	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	3UTR	1226647	1226662	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	3UTR	1227592	1228430	.	+	.	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "10"; protein_id "NP_000446.1";
+chr19	uta	start_codon	1206913	1206915	.	+	0	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "1"; protein_id "NP_000446.1";
+chr19	uta	stop_codon	1226644	1226646	.	+	0	gene_id "STK11"; transcript_id "NM_000455.5"; exon_number "9"; protein_id "NP_000446.1";
+chr19	uta	gene	50364460	50370833	.	-	.	gene_id "PNKP"; transcript_id "";
+chr19	uta	transcript	50364460	50370822	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; protein_id "NP_009185.2";
+chr19	uta	exon	50370726	50370822	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "1"; protein_id "NP_009185.2";
+chr19	uta	exon	50370311	50370474	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	exon	50369656	50369702	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "3"; protein_id "NP_009185.2";
+chr19	uta	exon	50368384	50368683	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "4"; protein_id "NP_009185.2";
+chr19	uta	exon	50367581	50367660	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "5"; protein_id "NP_009185.2";
+chr19	uta	exon	50367436	50367493	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "6"; protein_id "NP_009185.2";
+chr19	uta	exon	50367221	50367328	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "7"; protein_id "NP_009185.2";
+chr19	uta	exon	50366946	50367017	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "8"; protein_id "NP_009185.2";
+chr19	uta	exon	50365947	50365995	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "9"; protein_id "NP_009185.2";
+chr19	uta	exon	50365795	50365865	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "10"; protein_id "NP_009185.2";
+chr19	uta	exon	50365628	50365720	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "11"; protein_id "NP_009185.2";
+chr19	uta	exon	50365442	50365538	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "12"; protein_id "NP_009185.2";
+chr19	uta	exon	50365301	50365362	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "13"; protein_id "NP_009185.2";
+chr19	uta	exon	50365029	50365138	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "14"; protein_id "NP_009185.2";
+chr19	uta	exon	50364865	50364952	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "15"; protein_id "NP_009185.2";
+chr19	uta	exon	50364706	50364767	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "16"; protein_id "NP_009185.2";
+chr19	uta	exon	50364460	50364622	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	CDS	50370311	50370461	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	CDS	50369656	50369702	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "3"; protein_id "NP_009185.2";
+chr19	uta	CDS	50368384	50368683	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "4"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367581	50367660	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "5"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367436	50367493	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "6"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367221	50367328	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "7"; protein_id "NP_009185.2";
+chr19	uta	CDS	50366946	50367017	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "8"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365947	50365995	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "9"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365795	50365865	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "10"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365628	50365720	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "11"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365442	50365538	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "12"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365301	50365362	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "13"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365029	50365138	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "14"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364865	50364952	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "15"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364706	50364767	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "16"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364508	50364622	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	5UTR	50370726	50370822	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "1"; protein_id "NP_009185.2";
+chr19	uta	5UTR	50370462	50370474	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	3UTR	50364460	50364504	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	start_codon	50370459	50370461	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	stop_codon	50364505	50364507	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.3"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	transcript	50364461	50370833	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; protein_id "NP_009185.2";
+chr19	uta	exon	50370726	50370833	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "1"; protein_id "NP_009185.2";
+chr19	uta	exon	50370311	50370474	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	exon	50369656	50369702	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "3"; protein_id "NP_009185.2";
+chr19	uta	exon	50368384	50368683	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "4"; protein_id "NP_009185.2";
+chr19	uta	exon	50367581	50367660	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "5"; protein_id "NP_009185.2";
+chr19	uta	exon	50367436	50367493	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "6"; protein_id "NP_009185.2";
+chr19	uta	exon	50367221	50367328	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "7"; protein_id "NP_009185.2";
+chr19	uta	exon	50366946	50367017	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "8"; protein_id "NP_009185.2";
+chr19	uta	exon	50365947	50365995	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "9"; protein_id "NP_009185.2";
+chr19	uta	exon	50365795	50365865	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "10"; protein_id "NP_009185.2";
+chr19	uta	exon	50365628	50365720	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "11"; protein_id "NP_009185.2";
+chr19	uta	exon	50365442	50365538	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "12"; protein_id "NP_009185.2";
+chr19	uta	exon	50365301	50365362	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "13"; protein_id "NP_009185.2";
+chr19	uta	exon	50365029	50365138	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "14"; protein_id "NP_009185.2";
+chr19	uta	exon	50364865	50364952	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "15"; protein_id "NP_009185.2";
+chr19	uta	exon	50364706	50364767	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "16"; protein_id "NP_009185.2";
+chr19	uta	exon	50364461	50364622	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	CDS	50370311	50370461	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	CDS	50369656	50369702	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "3"; protein_id "NP_009185.2";
+chr19	uta	CDS	50368384	50368683	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "4"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367581	50367660	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "5"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367436	50367493	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "6"; protein_id "NP_009185.2";
+chr19	uta	CDS	50367221	50367328	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "7"; protein_id "NP_009185.2";
+chr19	uta	CDS	50366946	50367017	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "8"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365947	50365995	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "9"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365795	50365865	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "10"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365628	50365720	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "11"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365442	50365538	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "12"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365301	50365362	.	-	2	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "13"; protein_id "NP_009185.2";
+chr19	uta	CDS	50365029	50365138	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "14"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364865	50364952	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "15"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364706	50364767	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "16"; protein_id "NP_009185.2";
+chr19	uta	CDS	50364508	50364622	.	-	1	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	5UTR	50370726	50370833	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "1"; protein_id "NP_009185.2";
+chr19	uta	5UTR	50370462	50370474	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	3UTR	50364461	50364504	.	-	.	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	start_codon	50370459	50370461	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "2"; protein_id "NP_009185.2";
+chr19	uta	stop_codon	50364505	50364507	.	-	0	gene_id "PNKP"; transcript_id "NM_007254.4"; exon_number "17"; protein_id "NP_009185.2";
+chr19	uta	gene	1228286	1238004	.	-	.	gene_id "CBARP"; transcript_id "";
+chr19	uta	transcript	1229947	1237990	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; protein_id "NP_689982.3";
+chr19	uta	exon	1237755	1237990	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "1"; protein_id "NP_689982.3";
+chr19	uta	exon	1235995	1236120	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	exon	1235778	1235917	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "3"; protein_id "NP_689982.3";
+chr19	uta	exon	1235500	1235564	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "4"; protein_id "NP_689982.3";
+chr19	uta	exon	1235000	1235144	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "5"; protein_id "NP_689982.3";
+chr19	uta	exon	1234570	1234741	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "6"; protein_id "NP_689982.3";
+chr19	uta	exon	1234190	1234330	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "7"; protein_id "NP_689982.3";
+chr19	uta	exon	1233425	1233635	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "8"; protein_id "NP_689982.3";
+chr19	uta	exon	1229947	1231274	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235995	1236099	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235778	1235917	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "3"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235500	1235564	.	-	1	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "4"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235000	1235144	.	-	2	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "5"; protein_id "NP_689982.3";
+chr19	uta	CDS	1234570	1234741	.	-	1	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "6"; protein_id "NP_689982.3";
+chr19	uta	CDS	1234190	1234330	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "7"; protein_id "NP_689982.3";
+chr19	uta	CDS	1233425	1233635	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "8"; protein_id "NP_689982.3";
+chr19	uta	CDS	1230895	1231274	.	-	2	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	5UTR	1237755	1237990	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "1"; protein_id "NP_689982.3";
+chr19	uta	5UTR	1236100	1236120	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	3UTR	1229947	1230891	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	start_codon	1236097	1236099	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	stop_codon	1230892	1230894	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.2"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	transcript	1228286	1238004	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; protein_id "NP_689982.3";
+chr19	uta	exon	1237755	1238004	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "1"; protein_id "NP_689982.3";
+chr19	uta	exon	1235995	1236120	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	exon	1235778	1235917	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "3"; protein_id "NP_689982.3";
+chr19	uta	exon	1235500	1235564	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "4"; protein_id "NP_689982.3";
+chr19	uta	exon	1235000	1235144	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "5"; protein_id "NP_689982.3";
+chr19	uta	exon	1234570	1234741	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "6"; protein_id "NP_689982.3";
+chr19	uta	exon	1234190	1234330	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "7"; protein_id "NP_689982.3";
+chr19	uta	exon	1233425	1233635	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "8"; protein_id "NP_689982.3";
+chr19	uta	exon	1228286	1231274	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235995	1236099	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235778	1235917	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "3"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235500	1235564	.	-	1	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "4"; protein_id "NP_689982.3";
+chr19	uta	CDS	1235000	1235144	.	-	2	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "5"; protein_id "NP_689982.3";
+chr19	uta	CDS	1234570	1234741	.	-	1	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "6"; protein_id "NP_689982.3";
+chr19	uta	CDS	1234190	1234330	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "7"; protein_id "NP_689982.3";
+chr19	uta	CDS	1233425	1233635	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "8"; protein_id "NP_689982.3";
+chr19	uta	CDS	1230895	1231274	.	-	2	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	5UTR	1237755	1238004	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "1"; protein_id "NP_689982.3";
+chr19	uta	5UTR	1236100	1236120	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	3UTR	1228286	1230891	.	-	.	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "9"; protein_id "NP_689982.3";
+chr19	uta	start_codon	1236097	1236099	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "2"; protein_id "NP_689982.3";
+chr19	uta	stop_codon	1230892	1230894	.	-	0	gene_id "CBARP"; transcript_id "NM_152769.3"; exon_number "9"; protein_id "NP_689982.3";
diff --git a/tests/data/small.csv b/tests/data/small.csv
new file mode 100644
index 0000000..a02d92e
--- /dev/null
+++ b/tests/data/small.csv
@@ -0,0 +1,7 @@
+gene,CHROM,position.dna,REF,ALT,POS
+foo,chr19,1.0,A,G,1
+ELANE,chr19,855795.0,G,A,855795
+ELANE,chr19,855799.0,G,A,855799
+STK11,chr19,1218501.0,A,G,1218500
+STK11,chr19,1228432,C,A,1228431
+PNKP,chr19,49861542.0,AGGGGTCAGGGGAGGAGG,A,50364799
\ No newline at end of file
diff --git a/tests/data/small.vcf b/tests/data/small.vcf
new file mode 100644
index 0000000..b91d5dd
--- /dev/null
+++ b/tests/data/small.vcf
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.2
+##fileDate=20191004
+##reference=GRCh37/hg19
+##contig=<ID=chr1,length=249250621>
+##contig=<ID=chr2,length=243199373>
+##contig=<ID=chr3,length=198022430>
+##contig=<ID=chr4,length=191154276>
+##contig=<ID=chr5,length=180915260>
+##contig=<ID=chr6,length=171115067>
+##contig=<ID=chr7,length=159138663>
+##contig=<ID=chr8,length=146364022>
+##contig=<ID=chr9,length=141213431>
+##contig=<ID=chr10,length=135534747>
+##contig=<ID=chr11,length=135006516>
+##contig=<ID=chr12,length=133851895>
+##contig=<ID=chr13,length=115169878>
+##contig=<ID=chr14,length=107349540>
+##contig=<ID=chr15,length=102531392>
+##contig=<ID=chr16,length=90354753>
+##contig=<ID=chr17,length=81195210>
+##contig=<ID=chr18,length=78077248>
+##contig=<ID=chr19,length=59128983>
+##contig=<ID=chr20,length=63025520>
+##contig=<ID=chr21,length=48129895>
+##contig=<ID=chr22,length=51304566>
+##contig=<ID=chrX,length=155270560>
+##contig=<ID=chrY,length=59373566>
+##INFO=<ID=AF_ESP,Number=1,Type=Float,Description="allele frequencies from GO-ESP">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr19	1228431	.	C	A	.	.	.
\ No newline at end of file
diff --git a/tests/test_processors.py b/tests/test_processors.py
new file mode 100644
index 0000000..83ebf8f
--- /dev/null
+++ b/tests/test_processors.py
@@ -0,0 +1,76 @@
+import difflib
+import sys
+import tempfile
+
+import pytest
+
+from pangolin.data_models import AppConfig
+from pangolin.processors import process_variants_file
+
+
+def build_config(input_file: str, output_file: str, batch_size: int = 1) -> AppConfig:
+    # This is just a download of the chr19 reference genome
+    ref_file = "tests/data/reference/chr19.fa.gz"
+
+    # The chr19_genes.gtf.gz is just a
+    # This gtf file was built with the following set of commands. I hardcoded in the explicit gene names used
+    # in the test files to reduce the size of the database and files
+    # gzcat tests/data/reference/chr19_genes.gtf.gz | grep 'PNKP\|ELANE\|STK11\|CBARP' > tests/data/reference/chr19_genes_filtered.gtf
+    # python scripts/create_db.py tests/data/reference/chr19_genes_filtered.gtf --filter None
+    gtf_file = "tests/data/reference/chr19_genes_filtered.db"
+
+    app_config = AppConfig(
+        variant_file=input_file,
+        output_file=output_file,
+        reference_file=ref_file,
+        annotation_file=gtf_file,
+        batch_size=batch_size,
+        distance=200,
+        score_cutoff=None,
+        mask="True",
+        score_exons="False",
+        column_ids="CHROM,POS,REF,ALT",
+        enable_gtf_cache=True,
+    )
+    return app_config
+
+
+def run_pangolin(input_file, expected_file, batch_size: int = 0, suffix: str = ""):
+    with tempfile.NamedTemporaryFile(suffix=suffix) as fh:
+        output_file = fh.name
+        config = build_config(input_file, output_file, batch_size)
+        process_variants_file(config)
+        with open(output_file) as out_fh:
+            batch_file_contents = out_fh.readlines()
+
+    with open(expected_file) as fh:
+        expected_file_contents = fh.readlines()
+
+    if expected_file_contents != batch_file_contents:
+        sys.stdout.writelines(
+            difflib.unified_diff(expected_file_contents, batch_file_contents)
+        )
+    assert expected_file_contents == batch_file_contents
+
+
+@pytest.mark.parametrize(
+    "batch_size",
+    [
+        0,
+        1,
+    ],
+)
+def test_batch_vcf(batch_size):
+    input_file = "tests/data/small.vcf"
+    expected_file = "tests/data/expected/small_out.vcf"
+    run_pangolin(input_file, expected_file, batch_size=batch_size, suffix=".vcf")
+
+
+@pytest.mark.parametrize(
+    "batch_size",
+    [0, 2, 3, 5],
+)
+def test_batch_csv(batch_size):
+    input_file = "tests/data/small.csv"
+    expected_file = "tests/data/expected/small_out.csv"
+    run_pangolin(input_file, expected_file, batch_size=batch_size, suffix=".csv")