From 10bddc92a342a496fc4d2d41072a265c16f00979 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Tue, 10 Sep 2024 14:36:32 -0600 Subject: [PATCH] 1.0.0 (#62) * Adjusted the name of the TSV output flag ("--ultra" -> "--tsv" * Adjusted the default memory window sizing algorithm. * Adjusting example fasta files. * Adjusting example fasta files. * Fixed an repeat splitting output bug in TabFileWriter * Removing json11 lib files, adjusting default tuning parameters, fixing bugs with --fdr * Including a new example fasta * Adjusting JSOn output and ULTRA TSV output extension (.ultra -> .tsv) * Improving README and adding in an example tune_file. * Update README.md Fixing README typos * Update README.md README wordsmithing * Update README.md Typo fix in example 2 of the README * Update README.md More improvements to example 2 * Update README.md Improvements to example 3 * Update README.md More improvements to example 3 * Update README.md Improvements to the output format section * Update README.md Improvements to json format section of tuning guide * Added a --cite flag * Adding a workflow for building/storing binaries. --- .github/workflows/build.yml | 43 ++ CMakeLists.txt | 5 +- README.md | 210 +++++++--- examples/ex1.fa | 91 ---- examples/ex2.fa | 152 ------- examples/ex3.fa | 37 -- examples/ex4.fa | 6 - examples/ex_at80.fa | 51 --- examples/ex_at90.fa | 51 --- examples/ex_large.fa | 284 ------------- examples/example_1.fa | 23 ++ examples/example_2.fa | 301 ++++++++++++++ examples/example_3.fa | 165 ++++++++ examples/run-all.sh | 6 +- examples/tune_file | 8 + lib/json11.cpp | 797 ------------------------------------ lib/json11.hpp | 233 ----------- src/JSONFileWriter.cpp | 6 +- src/RepeatSplitter.cpp | 1 + src/TabFileWriter.cpp | 16 +- src/cli.cpp | 47 ++- src/cli.hpp | 12 +- src/main.cpp | 3 +- src/ultra.cpp | 2 +- 24 files changed, 739 insertions(+), 1811 deletions(-) create mode 100644 .github/workflows/build.yml delete mode 100644 examples/ex1.fa delete mode 100644 examples/ex2.fa delete mode 100644 examples/ex3.fa delete mode 100644 examples/ex4.fa delete mode 100644 examples/ex_at80.fa delete mode 100644 examples/ex_at90.fa delete mode 100644 examples/ex_large.fa create mode 100644 examples/example_1.fa create mode 100644 examples/example_2.fa create mode 100644 examples/example_3.fa create mode 100644 examples/tune_file delete mode 100644 lib/json11.cpp delete mode 100644 lib/json11.hpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..7428c66 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,43 @@ +name: Build and Release + +on: + push: + tags: + - 'v*.*.*' # Trigger only on version tags + pull_request: + tags: + - 'v*.*.*' # Trigger only on version tags + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up CMake + uses: lukka/get-cmake@v3 + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y build-essential + + - name: Configure CMake + run: cmake -S . -B build + + - name: Build with CMake + run: cmake --build build --config Release + + - name: Create zip archive of binaries + run: | + mkdir build_output + cp build/ultra build_output/ + zip -r binaries_${{ github.ref_name }}.zip build_output/ + shell: bash + + - name: Upload binaries to GitHub Release + uses: softprops/action-gh-release@v1 + with: + files: binaries_${{ github.ref_name }}.zip + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c31c5c..6620c74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set( LIB_CPP_FILES - - lib/json11.cpp ) set( HPP_FILES @@ -57,7 +55,8 @@ set( src/JSONFileWriter.cpp src/RepeatSplitter.cpp src/mask.cpp - src/cli.cpp src/TabFileWriter.hpp) + src/cli.cpp + ) find_package(Threads REQUIRED) diff --git a/README.md b/README.md index 1c36ca8..00799c6 100644 --- a/README.md +++ b/README.md @@ -1,100 +1,176 @@ # ULTRA **U**LTRA **L**ocates **T**andemly **R**epetitive **A**reas +## About +ULTRA is a tool for finding and annotating tandem repeats within genomic sequence. Model details and evaluation can be found in our release paper, [ULTRA-Effective Labeling of Tandem Repeats in Genomic Sequence](https://www.biorxiv.org/content/10.1101/2024.06.03.597269v1) -(Note: The current stable release is 1.0.0 beta. As we move out of beta we will update the README to be more informative.) - -ULTRA is a tool to find and annotate tandem repeats inside genomic sequence. It -is able to find repeats of any length and of any period (up to a maximum period -of 4000). It can find highly decayed repeats missed by other software, and it -will also be able to find very large repeats in highly repetitive sequence, -regardless of the size of sequence or length of repeats. ULTRA offers meaningful -annotation scores and can produce annotation P-values at user request. - -## Install - -ULTRA uses the CMake build system (version 3.12 or greater). This makes building -the application pretty straightforward. - - 1. Generate the Makefile: `cmake .` - 2. Build the `ultra` executable: `make` - -Note that on some systems you may need to install CMake 3 as a separate package -and run `cmake3` specifically. - -## Run - -After building ULTRA you can use it by running: - +## Building +ULTRA requires a compiler supporting C++11 or higher and CMake 3.12 or higher. To download and build ULTRA run the following commands: ``` -./ultra [options] +git clone https://github.com/TravisWheelerLab/ULTRA +cd ULTRA +cmake . +make ``` +## Basic usage +A list of all flags and options can be seen with `ultra -h`. To annotate tandem repeats with ULTRA use `ultra [options] `. The following examples demonstrate common use cases. + +
+Example 1 - Default settings -ULTRA will produce high quality annotations out of the box, and so we can -analyze one of our example files by simply running. - +`examples/example_1.fa` contains randomly generated sequence with three inserted tandem repeats. We can use ULTRA to annotate the file by running: ``` -ultra examples/ex1.fa +ultra examples/example_1.fa ``` -If you want to direct the output to a file you can do so with the -o flag: +The above command will cause ULTRA to output (to stdout) the settings used by ULTRA followed by a TSV formatted annotation of the repeats found within `examples/example_1.fa`. +ULTRA annotations can be directed to an output file using the `-o ` option. For example: ``` -ultra -o ex1_output.bed examples/ex1.fa +ultra -o examples/example_1_ultra.tsv examples/at_repeat.fa ``` -Now your current working directory will contain an `ex1_output.bed` file. At -the top of this file will be a complete description of the parameters used to -run ULTRA, and then there will be a list of repeats found, and their associated -annotation information. Output is in JSON format. +Running this command will result in two files, `examples/example_1_ultra.tsv` (containing the repeat annotations) and `examples/example_1_ultra.tsv.settings` (containing the ULTRA settings used). ULTRA's default TSV format consists of a header row followed by one row per annotated tandem repeat. Looking inside `examples/example_1_ultra.tsv` we find: +``` +SeqID Start End Period Score Consensus #Subrepeats SubrepeatStarts SubrepeatConsensi +Rand_seq_1 447 592 2 175.659958 AT 2 0,81 AT,CG +Rand_seq_1 859 920 4 49.729370 AGGC 1 0 AGGC +Rand_seq_1 1104 1167 3 71.850250 AGT 1 0 AGT + +``` +Columns 1-6 (`SeqID`, `Start`, `End`, `Period`, `Score`, and `Consensus`) describe the overall repetitive region. Columns 7-9 (`#Subrepeats`, `SubrepeatStarts`, and `SubrepeatConsensi`) describe changes in repetitive pattern. Running ULTRA with the `--show_seq` flag will result in an additional column, `Sequence`, that contains the complete repetitive sequence. + +The second and third repeats (AGGC and AGT respectively) have no changes in repetitive pattern and so their `#Subrepeats` field is "1". The first repeat contains two unique repetitive patterns though, and so its `#Subrepeats` field is "2". The `SubrepeatStarts` field contains a comma separated list of start locations (relative to the repeat `Start` field) and the `SubrepeatConsensi` field contains a comma separated list of subrepeat consensus patterns. When a letter cannot be resolved for a subrepeat consensus pattern the letter will be displayed as "\*". -By default, ULTRA has a maximum detectable repeat period of 15. The sequence in -`examples/ex_large.fa` has a period 1000 repeat with a 10% substitution rate. To -detect larger period repeats, we will use the -p argument. +ULTRA can be used to mask repeats with the `--mask ` option. For example: ``` -ultra -p 1000 --json -o ex_large.json examples/ex_large.fa +ultra --mask examples/example_1_masked.fa -o examples/example_1_ultra.tsv examples/at_repeat.fa ``` +By default ULTRA will use lower-case masking, although ULTRA will use N-masking if supplied with the `--nmask` flag. -It's important to note that ULTRA's runtime grows linearly with maximum period -size. It’s also important to note that as a repeat increases in period, it -becomes harder to pin down the exact repeat period. +
-Notice how ULTRA labels the period 1000 repeat inside `examples/ex_large.fa` as -being a period 999 repeat with 3 insertions. Period 999 ends up being the most -probable explanation of the `examples/ex_large.fa` repeat given the high -substitution rate, even though we have artificially created the repeat and -therefore know that it is in fact a period 1000 repeat with no indels. +
+Example 2 - Large period repeats + +`examples/example_2.fa` contains a period 1000 repeat, which is larger than ULTRA's default maximum detectable repetitive period (100). To find the large period repeat we must adjust ULTRA's maximum detectable repetitive period using the `-p ` option. -ULTRA's default parameters are robust against different levels of AT richness. -For example, running ULTRA on ex_at80.fa (random sequence with 80% AT bias and a -single tandem repeat in the middle) with default parameters (run: `ultra -examples/ex_at80.fa`) yields a correct repeat annotation with one single tandem -repeat found. +`ultra -o examples/example_2_ultra.tsv -p 1000 examples/example_2.fa` -However, running ULTRA with default parameters on `examples/ex_at90.fa` (random -sequence with 90% AT bias and a single tandem repeat in the middle) will result -in many false positive repeat annotations. We can clean up our annotation by -adjusting ULTRA's expected nucleotide distribution using the `-at` flag: + After running the above command `examples/example_2_ultra.tsv` will contain the following output: ``` -ultra -at 0.9 examples/ex_at90.fa +SeqID Start End Period Score Consensus #Subrepeats SubrepeatStarts SubrepeatConsensi +period_1000_repeat 0 17999 1000 22938.433594 AAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCG 1 0 AAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCG ``` +The large consensus can be cumbersome and some users may prefer ULTRA's output to not include consensus patterns for large repeats. Consensus patterns can be limited to smaller period repeats using the `--max_consensus ` option. For example: -Using `-at 0.9`, ULTRA will correctly find the only tandem repeat inside the -sequence without reporting false positives. +``` +ultra -o examples/example_2_ultra.tsv -p 1000 --max_consensus 10 examples/example_2.fa +``` -To see a full list of ULTRA arguments you can use ‘ultra -h’. +results in the following output: +``` +SeqID Start End Period Score Consensus #Subrepeats SubrepeatStarts SubrepeatConsensi +period_1000_repeat 0 17999 1000 22938.433594 . 1 0 . +``` +
+
+Example 3 - Tuning and FDR + +`examples/example_3.fa` contains randomly generated 80% AT rich sequence along with two inserted tandem repeats (an "AAAGC" repeat and an "AAAATAC" repeat). The large AT bias is far outside ULTRA's default expectation, and as a result ULTRA will have a high false discovery rate, as seen by running: +``` +ultra --fdr -o examples/example_3_def.tsv examples/example_3.fa +``` +After this command runs, ULTRA will print to standard out: `Estimated false discovery rate: 0.58`, meaning that ULTRA expects 58% of the repetitive coverage in `examples/example_3_def.tsv` to be falsely labeled. The high false discovery rate is caused by the % AT-content in `examples/example_3.fa` being much higher than ULTRA expects by default. Note that ULTRA uses random sequence shuffling to estimate false discovery rate (see the tuning section of [our paper](https://www.biorxiv.org/content/10.1101/2024.06.03.597269v1)), and so the exact estimated false discovery will be different each time the command is ran. -## License +We can improve our results by automatically tuning ULTRA's parameters using the `--tune` flag: +``` +ultra --tune --tune_indel --fdr -o examples/example_3_tuned.tsv examples/example_3.fa +``` +`--tune` causes ULTRA to perform a parameter search that maximizes coverage while keeping the estimated false discovery rate under some threshold (by default 0.05 and adjustable via `--tune_fdr `). By default ULTRA will tune itself using a lighter-weight repeat model (one without indel states) in an attempt to decrease tuning time. The `--tune_indel` flag will enable the full ULTRA model and result in the highest quality tuning results. ULTRA's tuned annotation, `examples/example_3_tuned.tsv`, contains only the inserted "AAAGC" and "AAAATAC" repeats: +``` +SeqID Start End Period Score Consensus #Subrepeats SubrepeatStarts SubrepeatConsensi +80_AT 4404 4535 5 132.823410 AAAGC 1 0 AAAGC +80_AT 8406 9134 7 716.489990 AAAATAC 1 0 AAAATAC +``` +
-BSD 3-clause license. See LICENSE file. +## Output formats and tuning guide +
+ULTRA TSV format -## Authors +ULTRA's default output format is a tab-separated-values format that includes a descriptive header. Default settings will result in the following columns: +``` +SeqID Start End Period Score Consensus #Subrepeats SubrepeatStarts SubrepeatConsensi +``` +`SeqID` describes the SequenceID, `Start` describes the repeat starting location (starting with 0), `End` describes the ending location (total repeat length is `End` - `Start`), `Period` describes the repetitive period, `Score` gives the ULTRA score, `Consensus` gives the repetitive consensus, `#Subrepeats` describes the number of repetitive patterns found within the region, `SubrepeatStarts` contains a comma separated list of where subrepeats begin (the first will always be 0), `SubrepeatConsensi` contains a comma separated list of subrepeat consensus patterns. + +Using `--pval` will cause the `Score` column to change to "`Score,PValue`". +Using `--max_split -1` will remove the columns `#Subrepeats SubrepeatStarts SubrepeatConsensi` columns. +Using `--max_consensus -1` will remove the columns `Consensus SubrepeatConsensi`. +Using `--show_seq` will include an additional `Sequence` column containing the complete repetitive sequence. +
+
+JSON format + +Using ULTRA with `--json` will result in JSON formatted output. The JSON output will contain an objects with a `Repeats` array. Each object in the `Repeats` array will contain descriptive fields such as "Start", "Length", "Period", "Score", "Substitutions" (the number of mismatches), "Insertions" (the number of insertions), "Deletions" (the number of deletions), "Consensus", and additional fields depending on the specific settings being used. + +Repeats that contain subrepeats will have a "Subrepeats" array, each object in the array containing a "Start" field (that describes the subrepeat's starting location relative to the overall repeat), and a "Consensus" field. +
+
+BED format + +Using ULTRA with `--bed`will result in a BED file with four columns (`Sequence ID`, `Start`, `End`, `Consensus`). +
+
+Multiformat output + +When using the `-o ` option for saving output it is possible to provide ULTRA with multiple output formats. For example: +``` +ultra --tsv --json --bed -o examples/example_1_multi examples/at_repeat.fa +``` +Running the above command will result in four output files: +``` +examples/example_1_multi.settings +examples/example_1_multi.tsv +examples/example_1_multi.json +examples/example_1_multi.bed +``` +Note that when using multiple output formats ULTRA will automatically choose the extension based on the output format type. +
-See `AUTHORS` file for up-to-date authors list. +
+ Tuning guide + +ULTRA's automatic parameter tuning (via `--tune`) can greatly improve annotation performance by testing several parameter sets and then using the parameter set that achieved the highest coverage within some threshold of estimated false discovery rate (by default 0.05, and adjusted with `--tune_fdr `) . By default `--tune` will test 18 parameter sets, `--tune_medium` will test 40 parameter sets, and `--tune_large` will test 252 parameter sets. Each parameter contains different emission probabilities and transition probabilities, but does not affect the repeat period. By default tuning will disable indel states in order to decrease the overall runtime. Best tuning performance will be achieved by tuning with indel states, using the `--tune_indel` flag. - - Daniel Olson - - Travis Wheeler - - George Lesica +Users can also specify their own parameter sets to tune ULTRA against by using the `--tune_file ` option. Each line in the tune file should contain arguments to run ULTRA against. An example can be seen in `examples/tune_file` which contains: +``` +-p 1 +-p 4 +-p 16 +-p 64 +-p 256 +-p 10 -m 0.5 --at 0.3 +-p 10 -m 0.55 --at 0.2 +-p 10 -m 0.5 --at 0.1 -i 2 -d 2 +``` +Here `examples/tune_file` tests different repeat periods (`-p `), different repeat emission probabilities (`-m `), different background AT frequencies (`--at `), and different indel states (`-i -d `). +
+## Citing +``` +@article {Olson2024ultra, + author = {Olson, Daniel R. and Wheeler, Travis J.}, + title = {ULTRA-Effective Labeling of Repetitive Genomic Sequence}, + elocation-id = {2024.06.03.597269}, + year = {2024}, + doi = {10.1101/2024.06.03.597269}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269}, + eprint = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269.full.pdf}, + journal = {bioRxiv} +} +``` diff --git a/examples/ex1.fa b/examples/ex1.fa deleted file mode 100644 index 74d264c..0000000 --- a/examples/ex1.fa +++ /dev/null @@ -1,91 +0,0 @@ ->chr3 184796623-184801995 -GGCGGTGAGGTCGCTGGACTCCTCAAGCAGAGGTTTTTCTGCCGCCCCAGAGTGCAGGCA -GATGCAAGGGCTTTCTTTGTCACCTCCTTCTTCCTGACAGAACAGCAGCCTTGGACAATG -GCGAACAGCACTCTTGAAATCTGATCCCGGACCTCACACTTCTCTCCACACTTCTGTCAC -CATGTCTTCTTATCTTCTCACTGTCTCCTCTGTCTCACCCTCATGTGTCAAGACTCTGCC -CCTGAATTCCCTCGCTTGACTGTCTCCTTGATTTCTCCATGGACTCCCACACCATCTGCT -AGTTAGTACCTTGCTGGGAGCAATTTCCAGAAACTCCCTGAGGCCCTGATTCTGCTCCTG -agctgcccattggatatctctcagatgtcctgctagctacctcaagtcaacatgtcccaa -agggacctcaccaccttttcctcccaaaccagtcctcccctgtgaccttgctatgcctgt -aagtggtaccaccgttcttAATCCTCCTGGACCACACGGACCACCCGCTGGCACAGTGCC -CATAATGGCCATCCACCTGCTGTGGCCTAATGCTTTGGAGACCCCAGTGAAGATCAGTTC -TGAAGTTGAGACTGAGGCTGGGTCTGGCTTCCTGCAGCCCTTGTCCTACCCTGAGATCAT -GACCCCATCTTCCTTTGCACTGGTGTGTTCATTTTTTTGTAAATATAATGCtatttttat -tgaaataattcatatgtactacttaaaaccaaatagtcctactaagtatataatgaaaaa -acagattgctccctgtgtcatttctttttacttgtgattactactccttagaggtaattg -caacattttttgctgtttcttcaggtatttacctccatttttcaaatgcggtaaatggta -atcttagattttttcattctggacattatctattgatttTTAATAAagatgctgtcaata -tcccatctgtatgccttagactaagcttgtccaaccttcagccataggcaacatgtggct -caggacggctttgaatgtagcccaacataaatttgtaaactttcttaaaacgttgagttt -ttgtgtgatttatttatttatttatttatttttgatggagtctcttgctttgtcgcccag -gctggagtgcagtggcacgatctcagctcactgcaagctccgcctcccaggttcacgcca -ttcttctgtctcagcctcctgagttgctgggactacaggcgcccaccaccacgcttggct -aatttttttttttttttgtatttttagtagagacggggtttcaccgtgtaagctaggatg -gtctggatctcctgacttcgtgatccgccctccttggcctcccaaagtgctgggattaca -ggcatgagccaccacgcctggcctttttttttttttcctcaccatctattgttagtgtta -gtgtattttatgtgtggcccaagacaattattcttcttccaatgtggcccagagaagcca -aaagtttggacatccctgccttagacatttctaagcagcattcttccttacttctaactg -ccgggggctgcatctctgtatctgaaagcttctggggtctgccctcacagactagacata -cctagaaattaatatgccatctcccccaatcccagcagtcctcaactaatggctgtcagg -agctggagtataaatacctcagctccctcccccatgggtgcgataattccaaagcatgtg -ttttgccccatttcccagagttttcccagtaagattaagctccaatcgcccacagctgtg -cctggcttaatcacacattctttattggctgttttcccttccctacatcactttcccaac -tccctgtgcaagctgtcccctgcatttcccaaataaactacttaagtctcagagtctgct -tctggggaaacccaaactaagataacttccttataggaagatgaggatttaccttcctgc -caacttctttccctgaacacattctcttaatatcattgtagcatggtttttagttaaatc -aacattcagtatatacattattatgactatgtaaatattgcttgcaaccgagcaacataa -actaggattatattttttaaaaaagcatttccctttagagttaataataccttctttctt -ttctttgcttagttttctatgtacttatcactaatttgtccccaactctcttatagagta -gaactctatatagtaaaataaatcatgttatctctatgttctattatttccttggaaacc -actcctgaagcctggagttttcctgctccaatgtggactggttgctttcctggtttgcta -cgcaactattatcctggggctcccttcattatcattctagaaagttcttttccctctctc -tcctatatgaattcccctattgtctgtgtcccatgtcttctttcatgatttactttctct -tcacgtcttttatgagaatgggtacatcagagacaaattatttgaaatctggaatgtctg -caaataactttgttcgtgttttattgacaatttgacttgctagagaattctagaaaaaaa -attgtctcccagaatttggtggaaatttgtctactgtcttctagcttcTTCTTTTTTTTA -AATAATTTTTAAAATGATTTAAAAATTTTTAAATGATTTATTTGGTAATTATTCCCCAGA -CTTTTCTCAGTTCCTCCTTCTGGAGCTCCTGCTTTTATTTGTTTCTATTTTACTTTACAT -AGTAATTTATTTCAAAAGTATATTTAATACTTACACATGTATTATTTTATTAGCTACTTT -GTAATATGATTATACATTTTTGCATTTGAAATGCTTTATATATTTAATTCTGCAATTTTT -TAATTTTATGAATTCTGTTTTTTacaatgttattgaggtatatttgagttttataaaatt -tccccattttgagtgtacaatttaatggcttcactgggcgcagtggcacctgcctataat -cttagctactcacgaggctgaagtgggaagactgcttgaatccaggggtttgagaccagc -ctgggcaatatatgaagactccaccttaaaaaaaaaaaaagacttttaaattcaccaagt -tctacaaccatcaccacaaacaaattttagaatgtttccatcaccccagtaagatccctc -atgcccattccagcattttttcccAGTGTattctgaaccccaatcatttgtaggtgacct -agtttttcctctctacaaggttctgctctttatttctgatatgttacaatgatgtgcctt -aatgtgatttttttttactatcctggaaaattttcaaagaatttatttggtaattattcc -ccaaacttttctcagtttcttcctacgggaattcctgttattctggcattgagccttctg -gagtaattgtttgatgctttaatctttttcttttattaattctttttgtctttatttgcc -ttatttttaggagattttctagttttttgtcttctaaacctatacaatattaaaacttta -gcaatcttgatttccaagagcccattcttattctctgaaggttttttttccatagcccct -gatattgtttcctgggttcaatggttttctagccctctaaggatattaaatatgccgttt -aaaattgcagtgaattctctgaattgtctctgcttcgcctgagttttcctccctccctgc -ctccctcccttcgtccctccttctttccttcttctcccttctccacctcttcttccccct -ttttcttctactgggttttgctttagatcctctcttcctttcctcaccatccgttcatat -tgaagagtgaggcactgaaaagctcattgaaatctctgtgtgttggggcagggctggttt -agtggcaggccgcaccttaaggttatcaggtaggaagctaatgttttactggggattccc -caaccaaggtcaaccaatgccattatcaacaggtctttctcttgagccagtcagtttacc -cagcaggaccttccccagcctctttcctctagaatgtaagccaggcttccagaattctgg -gCTCCCCCGCTTTCCCTTCCCATCAGAAGGAGTGGGTGATCACATTGCtcaagaatgaga -cttctgcctgaccaacatggtggaactccctctctactaaaaatacaaaaaaatttagtt -agcatggtggcacgtgcctgtaatcctagctactcaggaggctgaggcaggagaatcatt -tgaacctgggaggcagaggttgcagtgagccgagatcacaccactgcactccagcctggg -tgacacagcaagactccatctcaaaaaaaaaaaaaaaaaaaaaaaagaaTGagactttca -cttgatcatcctgctttctctatgctgccttacctcttcatcttccatgcctggtgcctt -ccccagatccagaatctctccttcagcctctccagagattaagcctgtgttttgcagggt -gggatgaacaaagctgtttgccagggctggagaggagaagctgactttcagctcatcctt -atgtttgcagcccactccatcccattaacttcacagttacccatacctccattttctaaa -ctcttccgttcttattttttgttgtttttcatgtcaaattggcttcctttttgaaagcat -ctctgccccttgcctgccctgcagccagcttttttttatctgctaagtcagttaatactc -atccaatgggatctttcttccaaaattctttgacatcatccatcagctgttgtcttttct -cttgttctgatagttcttgtgggtttagactttactactttactgtcattgtaggaacag -agtcaaacacatgggctaaatgtgccaagtctaaccagaaatTCTCTCCCTGTCCCTGCT -GCCACACTCTTCAGCCTGACTCACGTTtccagcctggaatgcttactactccccagacaa -acccagctcttcttggcctccttacctttgctgataacgtttctggaatgtactccttgt -ctcctttccatcaagactcatcttagggatcatacccatgaaacctcctccaATTTACCG -CTGCTAGAGAAACATCTTGTCCTTTCTAAAGCTCATGTCATGTGCATGTTTCACCTCTCC -TGAGGCACCTATGGTATGAACATCCATCGCTCTGCTATACAAGTTCTTTGAGGGAAGGGA -CAACAGCTTACCGTCTTTGAATCCTCTTCAGCCAGCAGTTGATCTACCTATCTATCTATT -ATAACAAGAACCATAAAAATTAAAtgtctgccataatgtgtacaggcattttaaccacaa -tggtctctaacacctacagtaattctacaaggt diff --git a/examples/ex2.fa b/examples/ex2.fa deleted file mode 100644 index 64fd196..0000000 --- a/examples/ex2.fa +++ /dev/null @@ -1,152 +0,0 @@ ->chr2 128345832-128354866 -TTACCCAGGGACGCGCGCGGGGATCATCGTCTCCCGGGGGCGAGCATATAAGACGGACCC -TGCCCCAGACCCCTTGCCTTCCTCCACAGGGGGATGGGAGCTAGGGCTGGGGGCGCTGAG -AGGAGGGGGTCTGCACGCTGGCTTGGTGGTCCCCTCTGGCGGGGACAGTGAAGGCTTGGG -GGCCTTCGAGACAGGTGCCCACGTTGCCCAAACCACGTGTTGGTGCCACGTGGCAGTGAA -GGTTTTAAAATGGGGGTTGGGGTTGTGAGGCTGGGCCGGCGGGGTGAGATGGGGCTTGGG -CATCTGCACTTGCCCGGGCGTGCGGGGGCTCCTAAGGAGGAGGCTGAACCGGTTGTTGGG -GTGGCCTAGGGGGAGGAAGGCGCCCAGGCCCAGGAACAGCCTGGGAGGCCCCAGGGGCAG -GGGCAGGTGGGCTGGAAGGAAGTCGTCCGTTGTGGTGGAGACAGGAAAGGGCTGGAAAGG -TGAGGACCCGTTCCCGCCCCGTCCCTTCTTCTCCGCGGCCTGCAGACCCTGGGCCAGGTG -GGAATCAGCCTGCTCGCGGCGCTCGGAAGGCGGGCTCCTCCGTGAGAAGCCGGCCCCTGA -GGTGCCAGGAGGACCGCGCCGCCTGCTTGGCATCCATCCATTTGTTCTAATGAGAGAAGG -AATGAACGGGTTGACTCTGCAGGGCTGCGCCAAGGCCTGCGTCTGCGGGGCAGCGGTGCG -GTGTGGGGTCTGTGCGCTGAGCCCCGCCAGGGTTTCTTCTAGATGAAGCCCGCAGGCCTT -CTCTCCTGCGCACGTGTCCCACGGCAACCGGGAGTGCGAGGTCCTCACGGCTCAGCCCTG -TCCCTCCGTTCCTGGAGAGCGCATCGCCCCATGGGCAGAGCGGGCAGACAGAGGGTAGAA -GTAGGCTTGCCCCTGGGGTGGGTCAGGGAGGTCTCTCTGAAGAGCTGAGCTGTCTGTCAA -GCAAGCCTAGAATGATGAGGACCAGCAGCAGAGAGCAGCCTTGGAAACACCCCCGCCTCA -CCCCGGGCCGGGGAGCGGCTGGAGCAAAGATCCTGGGGCGCAGATAAGTGTGGGCTGCCC -ATGTGGAGCCCCCTGAGCAGAGGGAAGCAGCGGCGGGCGGGGGGCGGGTGCCAGGGCTCG -GATCCGGGAAGAGGGCCTTCTGGGCGATGGCGAGTGGTTTGGCTTTATCCCAAGCCTGAG -AGGAAGCCGCCGCGCGGGGCTAGGCAGAGGGAGAACTCCTGTACTGTACTCTCTTGTTTT -TCCCCTCTGGAAACTAGGTGGGGTGGGGAGGACAGCCATGTGGTTCGTCCTCCAGGCAGG -AGCGGGCCTGGTGGGTGAGGGCAGACGTGGTGAGTGAGGAGGGCAAGCCTCGTGGTTGCT -GCGGCTCAGTGAGGCCTCCTGGAGGGTGAGGCTCCTGTGAACTGGGCACACACAGGCGGC -CACCACGGTGCCTCAGCTGGGCTGTCACCTCTCACCCTGGGCCTCTCCACCTGCCCCAAG -CCAGGCGCTGTGTGGGACGGTGGGCCACAAGCCCTGCCTATGGTCAGCCCATGGGGGAAA -ATCATGCACTTTTAGGACCATCAGCTAGGCCAGAGAGGAAGCCAGAAAAGAAGGCACAGA -CTCCAGCCACTGCGGCCCACTCCCTGGCGTCGCCGTCACCGAGCTGCTGCCTCTCTGCTT -CGTATTTTCAGTGACTCCTCGCCATAACCTTGTGTTACACATAAAGCGACTAAAGTTAAA -GGACTAACAGAATTTTCTAGAGACCCAGCTAACAAATGACAAAGCAGGGTTGGCCAAGCA -GGGGTTGGAATCCACAGAAGAATCACATGCCTGTTCCCAACACCCCTTGAGGCGTGGAGC -CCCAGAGGCAGGCCTGCTGCACAGCCAGGCATCTCCCACAAGCACCAGGGGGTCTGTTCC -ACATATGCACATGTACCCAAatgtgcacacacatgtgcacacacaCGCCCGCTCACCCCG -ACCCTCCTGTACGCACCCATTCACGTGCACACCCACAGTTCCCCTACGTCTGTCTGATCC -TGCGTCACAAGGCTGTGTTTTCTGAGCACACCTCTGCACTCCCAGCTACCACCTCAGCCA -CAGATGTTTGCCAGGAAGCTGGACGTGCTCTTCCCTGCAGTCTTCTTTCTCTGCTCCTCG -TTGCTTGGCTTGGCTGCCACAGGGAGAGAGGATTTGCAGTGCAGAGGTAAAGAGGTTTTT -CCTAATTTGCTCCATGTCCAAACCATTGTCTGCAGATGCAGCCTCACGGCCACCACATTT -TTCACAGACGGTGGCTTGCCAAGAAAAAGAGCCTCCTCTGCCAGCCACCAAGGGGGAGGA -GAAATTCCAGGGAAGCTGCACGGCCCAGTGGGCCTGGCTTCCGCAGGCCTCCTGTGAGGG -CTCCTGCCAGGGCAGCTTTAGGAAGGCAGGGACTGGGAGGCAGGGCTGGCCTCCTGCCCA -GGATGGGCACAGCCCCCTCTTCATGCTCCTGGGTTCTCCCGGTCCTGGCCCAGCTGGGTA -GATGTGCTCCCTCAGGCCGTCTACGCCTTGTTCTTTCTTTTCTTTTTTTCTTAAAACAAT -TGTTTTGGttttgtgagcaggcaggctgttcacatggctccaaagctgaaagctgccaga -agctcccagggaagggtttctcctgccctgaacctccagcaggccttcctccctgctagg -gccaatatcacacagcttttgcaggtccttttagagctattttctgcacattcaagcaaa -tatgtgcatacatttctctttccctccaattttgtagaaaagatgacacatgtcacatgc -ttgtttcatgttgcctttttcactcgatggtgtgacagtggggcccactctagaccagga -tataaggagtttccttaggaatttgagacgcatagttcacagtgtggagaagtcataatt -tatctaacctgtAATGCatccagttgttcaaaactcctttgggcagcacccactgcgtgc -ccagctctgtgcccaggtcctgagggcTGCTGTTGGTTGTAACGTTTCATGGAAAGAACA -GGGCTTGCTGCTGTACAGCAGCTCAGATGACGCTGGGCCAAAGACCCCACGTTTGTGTGG -GTGAGGTCCAGCTGGACACAGGCAAAATGGGAACTGACAGCAGCTTAGATCACTCTTGCA -GGAGTGACTGTTGCAGTTATGCTGAGGGGGAGTTATAGGATGTTTATAGTTTTAACACAT -CACATTGTTTCCCTTTTAtttttaaagtttttgtgggtacagagtaagtatttatgggtt -acatgagatattttgatgcaggcatgcattgcataataattatatcagggtaaatggggt -atccatcacctcaagtatttattatttctttgtgttgcaaacaatccaattacactcttt -tagttattttgaaatgtacgacaaattattgttgactgtagtcactctattatgccatca -aatactagatattattcattctttctaaccatatttttgtaccctttaaccattctcact -cctcctccacgacccttcccagcctctggtaaccatccttctactctctagctccatgag -ttcattgttttaatttttaactcccacaaatgagtgagaacatgtgaagtttgtctttct -gtgcctggcttatttcactcaacacaccaatctccagttccatccatgctgctgTGTAAA -GCTCTTCCTCTTGACCCCTCTGCCCCTATGGGATTGGGACTATCTCTGCTCTCCTCCTCC -ATGCCGGTGTCTATGTGGTATGCTGAGTTCAGGTGAACATCACGTGGCCTGCGCAATCCA -CATGGTGATAGGCTTCCCAACCCACCATGTTCTGATGATCAATCCCTCTACCCATCCCTC -TTCAACAAAGTTGCCAGGggctgggcgtggtggctcacacctgtaattccagcactttgg -gaggccgaagtgggtggatcacctgaggtcaggagttcgagaccagcctggccaacatgg -tgaaaccctgtctctaattaaaatacaaaaattagctgagtttggtggtgggcacttgta -atcccagctactcgaaaggctgaggcaggagaatcacttgaacctgggaggtggaggctg -cagtgagctgagacgacaccattgcactccagcctaagcaacaagagcaaaactccatct -caaaaacaaaaacaaaacaaaataaaacaaaATCAAAGTCACCAGGACTTCTGGGGACCA -CTTCACTACCTCCTGTCTCCCCTAGCAGCCCTCCTGTAGCTCAATTCTAGGGCTCTCCCT -GTTTTCCAAAGCAGAGGAAAGGGAGGACACCAAGATTTCATAAACAGCCACTTTGTATGC -CACAGTagagggaaaatgtgggctgggccatcagtcaaggccagtggtaaattggtgttc -agaaatcacgtcataaagtccaacacaattgttgaaaatggaaagcaaatttgaaaacta -atttcttggtggccaaagcagaTCAGCTGAAAACACTcagacctgtgaatatgtgcactt -ttgatttatgacaaagctggccaagcagtaggaaaggaaaggctttttaataaatggtgc -tgggattattgagtatctaaatagaagaaagcaaaacaaaattcttgacccctacctcac -accatccacaaaaaaagcaactccaggtagatttagatttaaatgagaaaggcaaaacaa -tgtagctttagaagataatataggagtatcagtgagggctcaaccagagaagcagggcca -gtcagtgatcctctgtgtgtgtgtttacatggaattggctcatacaattgtgggcgcagg -ctaaggacatctgaagtttgatgggcaggctgtcaggGGCTGTTTGGAGTGTGGCTcagg -gaatgcctaaacccttttaacaaagcctttcaattaactgagtcaggcccatccaggata -atctctcCCCACTtttttttagattaatcaagtgcagtagtgagaagtggaggatgagtg -gaacaaggagttcgatctgtaactgactgtgagcaataatctcccttttgattatcttaa -tgtcaactgacttgggactttaatcacattgcaaaatctctttgcagcagcatgaaggcc -agggctcaagtgagtaactggagaaagtgtgtgcaTGCTTGAAAGGCCACTGCTTCCCTG -TTGTCCTCCTGCTCTCAGTAGAGAATACCCCTGGTTGCCCATCCCATCCCTGTAccctcc -ctaactgaatgcacactagaaagggcattccagggaaatatagctcagcctaggcaaggt -cacacatcagaaagccaccCAAataggaaaatagctttatgactttgcttcttaagcaag -atataaaaagcacagtccataaagaaaaagactgataatttaggctatgctaaaattaag -aaagtttgtttatcaaagagaccattaagaatgtacaaaaagcaatctaggagtgggaga -aagtatttgcagcacatataactgacaagagactcatatctagaatatagaaagaactct -tacaaatcaatgagaaaaactaaaaaaaaaaaaaaaacaaaccagaaaattatgaaagag -acttgaacaggtactgtgtgaaagagttaatggccagtaaacacatgaaaaagtgttcaa -cctcatttagtcagagaaatacaaattaaaaccatagtgagtttcctgtggtagtgttaa -aatatgtccacaaattctttagtactcttcatttcaagaggtggaacttaattcccctcc -ccttgagtgtggctggacttgagtgattcaccttaataaataggatatggcaaaagtaat -ggtgtatcacttgcaacactaagtcgtgagacaatgtggcctctctctctctttctctct -cttccccacacccctctctctcacagcacttgctgatgccatgaggcatgactctcatga -ggacagtcaagcagacctgctaagaggtgcatacagcgagaaactgaggcctcttgccat -cagatcatgtgagtgagccctcttggaagcaaatcctatagccccagttgagccttcaga -tgacagcagccctggataacatcttgatggaaccttgtgagagaccagaaccacccagac -aagccactcctggctttctgacccacacaaaccttgaaataataagtgtatattgtttta -ggctactacattttgaggataatttgttatgcaacactagataactaacacactaccttt -actcaccagaatggctaatattaagaagattgatagtaacaagtgttgatgagaatgtgg -agctactggaactctcacggactgctgctggaaatgtaaattgaaacaaccattttggaa -aacaatttggtattatttctattactttctaaataattgaaggtatgcatttccaatgac -ccatccctgatgacccagcaattacacttatgggaatatacccaacggaaaggtgtatgt -gtaaatgtgcgtatcaggatacatatatagaacattcctaacagcattatttataatagc -cccaagcaaaaaatggaccaaaatttcattcacagttcattcatacaatggaatactgta -agctatgaaaattaaggaattatgactatacacaataccaggaataggtctcagaagcct -aatttgagtgaaagcagtctgacacagaaaagtccatgttatataattctatttaaatga -agttACCCTTGGTGAGGTGGAGGGGCCTCAGGGAGTTGGCCTCCCACCTCACCAAATAAG -ACTCCatgtcttattctttacctggctagtggttacatggctgttcactacatggtctgc -acatttttgttttgtacctttttgtgcaaatgtACACTTTTAAAATGAATTACAAAACCA -CACAACCCAACAGCATAACACCAAAAACCACAGTCCCATGAGTCTGTCTTAACAGCCCTC -AGAGTGGGCTCAGCAGATCATCCTGGTGTGAGTTCAGTGAGTGCTGTTTTATCAGGGCCA -AACCATGCAGTTGGCCTGTGGTTCTCTAGGGGGCTTGACCTTCaggagtccatggaatgt -gtatttcttgggtaattctccatgaaaaatattgctttaattaaaactggagaacagaga -gtttgaggtaacgttctctggtgatagcctgacgtcctgttcctcagctcggctgattca -aggtcaattgctattgtttgagtgtttgtcccctctaaaactcatgttgaggctgggtgt -ggtggctcatgcctgtaatcccagcaccttgggaggctgaggcaggcagatcacttgagg -ccaggagtttgagaccagcctggccaacatggtgaaaccctgtctctacaaaaatacaaa -aatttgcttggcatgatggcaggtgcctgtaatcccagctacttgggaggctgaggcagg -agaatcgcttgaacctgggaggtggaatctacagtgagccgagattgtgtcactgtactc -caacctggacgacagagtgagactctgtctcaaaacaaacaaacaaacaaaaaacaaaaa -aaaactcatgttgaaactttgatccccaatgtggcagtattgagaggtggagtctttaag -aggtgattgggtcaggagggttctgccctcaggaatggattaacccattcgtggattaat -gggctaatggattaatagttatcatgagagtgggactggtggctttacaagaagaggaag -ggaggcctgagttagcacctccgtcccctcaccatgcaatgccctgtgttgcctcagaat -gctgtagagtccccaccagcaagaaggctctcaccatgtgctcccttggccttggagttt -ccagcctccataactgtaggaaatatatttcttttctttataagttacctagtttcagat -attctgttatagacaacagaaaacaaattaagacaccaatctgcccacttgagagaagga -atggtgtgatggtagagctgacacccttgaaagaaggccagggagtctcagcctgattct -tacctgaacatgtaacaacatcctgccctggactggggctgcctgcgatgtgtctgccaa -tggggccatgagtgcctggattgtgtgctcccccactgtggatgaggggatccgaaagct -ggccaccacgcgatgtgggtggtggggatttaggctggatatttccaaagctggagttac -caattgccccaaggcccaagaaacatctcaacatacagttctcatcatgtctttaatttc -aaacggtgtaattgggtctcttcaggatactgctgtggagtgaccgaggtgactgtatgg -gGTCATGGCTCCAGTCACTTCCCACATCCTTCTGCTCATGGAAGCAGCCTCAGCTGTTCT -GGCCCAGCGGACACCACTGATTTTACCACAAGGTCACCTGATCCCAGGCAGCCTGGCTCC -CGGCTGCCCAGTGACCCATGGAGCAGGTGATGGTGGTTCCTGAACTGCTCTGATGTGATC -CCTCTGTCATTCCTGATCCTTCAGGGCCAGAGAGGCAGCAGCCCCCACCCACAGCAAGAC -CCCGAAAATTTGGGCAAGAGCTTGGGGCTGTGTTCTCTCTCAGAAGCTGGGGGTACTCTG -GTTCTGCACACAGCTGTCCCCAGTCTGTCCcatatttcctgggtatataccacagagtgg -aattgcttggccaaattgtcactttatggttagttttttgagggactgctagactgtttt -ccaaagtggctgcaccattttacgttcccaccagcagtgtatgaaggttccagtttctcc -acatctgcaccaacactcgtgaatttttatctgtttgattaGTCATAGCTGGGCCTCATC -TCCACCCATGCTCTAAGAGAAAGCCTGCCTGGGAGGAGAAGAAGTCAGCCAGCAGGGGAG -GGTGTCCCACCAAGTAtcaggagctttggtttgca diff --git a/examples/ex3.fa b/examples/ex3.fa deleted file mode 100644 index c54c3e4..0000000 --- a/examples/ex3.fa +++ /dev/null @@ -1,37 +0,0 @@ ->chr11 11989996-11992119 -CTGGGACATGTTGTAGCCCTTGATGTTATTTTAAAAACAAAACAAAATGGACTCAATGAG -TTTTTTGGAATGATGCCAACAGGTAAAGAAGGACAGGGCCACAAATTTGGCAGCCAGGAT -ACAAAGACCCTATGGTGAAAGTTATCCAGGCTTGTCTGGAAGAATCCCCAGGTCCTGAGA -GCAGTGGACAGCCACACACCTGCATGGAGGAGCTGTTGGGCCCCTGACATGCCCAGGCTA -CTCAGAGACCACAGAAATCAGCTTGTTCTGCAGCCGACACCATCAACCTAATGAGAGTTC -AAGTTCAATTACTGTCACTTATCTGTCCTGTCTACTGGCACCAAGCAAAAGCAAAGTGCT -CCATTTCTAAGTAACGGTTTCCAGATTTCAAGGCCAGAAGCCAGGCTTTTTGCAGGGTAC -AGGCTTTGCAGCCCAAAGGGGCTCAAGCCTTCAGCTTCAGCAGGATTAAGCAAAGCTCCA -TCGCCACGAATGTCTGTCTTCCAAACCACATCTTCCTGGCTCTTTCCTCTAGGGATTGCA -TGAAGTCTCATTACAGTATACAGTACAGGACATGGTAAAGCACTGCAGATGCAGGATATG -TTCCTAACTCAGCATGTATTCCTAACTCCTTGGCATGGCTCTCAAAGGCctgcctgatct -catccctgcctctcccccagctttacttgctgctcccccacgcacatctccagctctggc -attctaaattgtttagcgttcctccaacatattcttatttccacatgcctccttgacttt -gaacaagaaatacttgttcaacgaataccctttttccccacaactgagaattcctagaca -acattcaaaatacagctcaactgccacctcttccagggagcctgtcaagaagtgtgaagg -gtctaagattctattctacttgcaagatgatacatgttcctgccacggtttcatggctgc -tagcagaagacatgagacttctgggtcagagacaaaggactttatttcagcaacacggaa -acagcataagcctcttgtctgtatcagttctccttgcctgcaggttccacactgccccat -ggaggggtccagacggatgctgcacatgcattgggtttacatcacacctaagggatctca -agctcagagaacccaaacctttgataatggactgcaagcaaacctgccctctgccaagga -gggagagctagctctgtttttcaaggccaccgcctagaaaagatggcctggaacaagaga -tcaattagtgTCAgatatagtttggatatttgtcctctttaaatctcatgttgaaatttg -atctctagtgttggaggtgggacctagcagaaggtagctgggtcatgggggcagatccct -tatgagtggcttgctgctgttctcgctctattagttcccatgtgttctgattgttaaaaa -gaggctggacatagtggctcatgcctatagtcccagcactttgggaggccgaggtgggag -gatctcctggggccaggagttcaaaaccagcttgggcaatatagtgagacactgtctcta -caaaaacaaaaaaacagaaaaaagcctggcacctctcctctctctctctctgtctctctc -ctccctcccctcacttcctctcacaatgtgatgcctgtgcaccttcatcttctgccatga -gtgagggctttccgaagccgtcaccaggtgcagacactggtgccatgctcgtacagcccg -cagaacggcaagccaaataaacctttctttgtaaattacccaggcttgtgtactacttta -tagcaatgcaaatggactgagaagtgcctttgcttataaaatgagcagaaacagagaccc -gtagagaactgtctcccaaGCaagtcttccatggctacctggttttcccctgcaccatgc -ccccacaatgccctgtgcaacctttgcctttcaaaattatgcatccatattgtaatgact -tgtttattttttttgtccactatagcctggatgcttctccgatgttagagactgtctttt -acacttctctgccaaaaacgtagcaggtactcaaggaatgtgcaaataaatgaatgaatT -AATACTGATAATTTTGATAGCCAA diff --git a/examples/ex4.fa b/examples/ex4.fa deleted file mode 100644 index 005df35..0000000 --- a/examples/ex4.fa +++ /dev/null @@ -1,6 +0,0 @@ ->chr1 84212532-84212827 -gtaataaagagcaaaattcatgaagctacaaatcatgactagattactggtcccaactct -gccactttatggcctgatgactggggaagctatacccagtaacattgctgaatcttagtt -tctccatctttaaattggagttaataccatattcactgaatttgtcctatccaactagaa -tgtaagttccatgaaatcagagacttgccaggttttgctcactgctatatatccagcacc -taaaatagtgcctggcatatagtaggtattcaataaataattgtaaaatggatgaa diff --git a/examples/ex_at80.fa b/examples/ex_at80.fa deleted file mode 100644 index 7b4a116..0000000 --- a/examples/ex_at80.fa +++ /dev/null @@ -1,51 +0,0 @@ ->80_AT 80% AT rich random sequence -GATGTTTCACTGTATTAATAATTGTCAATAAATCATTAAGATTAAAAAATAATAACAGTATTATTA -AAAGTTCTCTAATTAATATGGATAACTGTATTTTCACTATGTTGTGATTACTCATTATTTAAAAATCAATTGATGTTAAGCGATTTCCTGTTTAAAATGCCATATGTAATAAAATATATACATT -CATCGGATTTACTTAGATTAGATTTGAATCGATTAAAGATACATTCAAATTTCAATTTTTATTATCAAGAAATTTATATCCTACCCCCATAATTTACTAGCTTTTTAAATATTATAGAACTTAT -AATTGAATAAGTTCAATAACAAACTTTAATATAATAAAGTTTTTCTTTCGTGATTTTCAGAAAAAAAGATTACTGAATTGAGCATTTCTAGTACTTTAACTAAGAAAGTTATCAATTTTTAGTT -TTTAAGTGATACATTTAATTTTATAAATAGATTGCCAAATATAGAACTTATGAATCAGAAAAAACAATAAGACCTATTTAAATAAAAGGAATAAGTGATTAAACGCTTTAAATATAACTATATA -CTTAAATTAGATTTTCTCCTGTGAATTTTAATCCTCATCAATTATAAAATATAAGATAAGCCAAAAAAGTACGTAATAACGTTCTTTAACTGTTCCCAAATTGTAACTTATTGTTTTGTAAAAA -TCAAAGTTATTTCTTAATTCTTTTTATGTACGTACCATATTTTTTTAATTCTTTGGTTATTTTTCTGAAGTAGGAGTGAATAAACTTTTGTTTATGTCTTATTATTAATGATATAACTATGCAC -TTTGTATAAGGTATCAATAAATTTCATAATTTATAAATAGTGAAGATCCCAACAAAAGATTTATATTTACGGTTCAACTTAAGTATAAACCTTGATGCTATTTATTTAGATCTACTTTACACAA -AGTAAATAAAGCATTTATTCCAGCTGGTTTTTGGTGTTTTACATAATGATATATTTATTAATAGTTATTTGATAACACAAAATTTTTATTATAGTCATAGAAATTCTCCAGAATTAATTTATAC -CTAATGTCATAAATATGATAGAACACTAATGAGTATTAAATTTTAAATTGAGTACAGTTTGGTAATGAAGAAACTTTACGACATACTTTATTATATATATGAAATATGTTTAAATGATACTAAA -CAAATTTTAGCTAATTTCTATGTTAGTTTAAAGATAAATATACAAGTTGATTGAATATGGGTTGGGGGTTTTAAATCATTGAAGACTCTATAATATCTTGAAGATTAAGTAGAGTAACCTATAG -TTTAAAATAAAATTATTTCGAAAAGTAAGCTCTTATTGTTTTTTTTGCGAATGACTTAACATGTTAAGAATATAAAATTGATTCTTTTGATAGTTATAAATCAAAAATTCAGAATGTTATTTAA -AGTATAAATCTAACAATATATATCTCGATTACTTAATCATTTTTTGTACTTTGCGAAAGTTTGTACTGCTTATTCACTAAATTGCGAAACTTATGCTGATATATGAATTTAAACTAAAGCAGGA -TTTTTAAGATTTGAAGTTGTAAATATTTAATATTCTAATCGGCTTTTATATGCACTATTACAGGCAATTGACAAGGATCTCACATCAAGAAATAAAACAATTTTAGGTTGGAAATAGTATTAAC -TAAAAAAGATGCTTGGTACAATAAAACTATGAAACCAGTATAAAGACAACATCCTTACTTAAGTGAATAAAAACACAGTATTATAATTACTTTTTAGATATGTGAAACATATTCCATAATAATT -TTTAGATTTGGAAATTTATCACTTCTAGGATTCATATCTAAAAATAGATGTTAAGTTGAATTTATATTTAGAAGTATAATAAAATAATCTAGATGAGATGTGTTTCATTTATATCCTACACAAG -TTGGATCGAAGACCACAAAGTGTAGTAGTACATAAGCAATTAATAATTAACTACTGTGTATTTATTATAATATCAGAAACTTTAAGTCGAGTTAATAAAACTCACATTACAGTATTTATCACAT -TTTATCATTATTCACAAATTATGATTTATCACAAATCAAATCATTACTTTTTTGATATGCTGTAAAAATTAATATGTAAACTTTATGCAAATTAACTATGATTTATTTAATTTCATTTGAAAGT -ATAATCTTATTTCTATTTATATGTTCAGTTAACTTTTACTCATCCTCCAAAATTAAGTAAGTTGTGAGATGCTATAAAAGTTCTTATTTATCTCATAGGACATTAAGTTTTTCTTTAATAAAGC -ATTTTATTCGAGTATAACAAAAAAAACGCTTTCTAAATTATATAATTTTTTTATCTTATTTAAGTTTACTATTAATAATTAGCATTTTTGCTTTGCAATAAATTTCTTATTTAGATTGTTACAT -ATTGAGTTAGTCAATAAATAATAAACTTGATACACTTTCAAAGGTCACAAATACGTAAATTAAGACTCTGAACAGGATTATATATTTGAATTTAATTTCACCTTAATAACTGCAAACTTCAATT -TTTTTAGATTATATGATTAGCTGAAGTTATACGAGATAATATCCGTAAACTGCTCTTCGAGTGTGGTTCTTTTATTTGATAATATGCAACCCCTATCATAATTGATTATTTTTACAAATGATGT -TATTTTCATAGTTTAAGCATATTTTTTTTATAGATATAAAATCATTTAATTTCGTGCCGTAGTCTTAATAAAAAATTTATAGACTTTATTTTAAGTAATACTAAAAATCTAAACCATATAAATA -gtagtcttaatagtagtcttaatagtagtcttaatagtagtcttaatagtagtcttaatagtagtcttaatagtagtcttaatagtagtcttaata -TGAATGTCATATGCGTAAATTTTTATCTTTAATTCAAACTAAAAATGTGACTATATATTATATTATTACATTTATACTACTTAAAATAAATATATGAAGTGCACAGCTTAGCCAGAAATGTTTC -GTATCATTATGTATTCGTTAATTATTAATTGATGACATATAAACAATATTGTAGTTTGTCAAATTCAACTTTATTATCCTGGGTATTATATATTAAATAGTGTAAAATAAAATTAACTGTTTAA -TGGTAATTACTAATTGGTATAATAATAAAAAATCTATTAAACTATATCACTAATACTTTCCAAATACCTTATATCAATATTGAATAAATTGATATAAACTTTCATTTTTAAAAAGAGATAAATA -TACAAATGAATAGATAATGAGTAAAAATTTCTAATATATTTAACACTCTACGTCCTTTTTAAGAATTAAAAGAGTATTCTGCAATTGAAAAAGAAATTATTTCGTAAAATAAATTTATACTATT -ATTCATGTGAAAAATTTAATATAATAAAAAATAAGAATAATTTTAAATGACAATTATTAATTCTAATAACGAAACATTATTTAAAGAATAAGTATGAAAAAGTATAATTCAATGAACTATCTAA -TAATTGAACTAAATGAGACATTCCAGTATTAATGTATTCAATCTTAAGACTTGACCCAACATATTAGCTTAGATTAAAGTGAAATTATTGACGTTTAAGATTAAATAAATCTATTGTATTAATC -AACTTCAATATATCCTATAAATGGCATTTAAAGAGAAGCCCATACCAAGGAAATAAATTATTATATGTCAGCGATGATAATAATTAATTATATTATTTATCACTATAATAGTATTTAAATCGTG -TAAAAAAAATATCTATATTTTAGTAAATTAAAGCACGGCTTCAGAATACTGTTTTAACTATCTGATATAAAGTTCATTATGATTTTCAAATATCGTGTATGTAAAATTACATTATATGCATATA -AAATTAGTAATAAAATATGATTTTAAGACTGATATATAATATAATTATACAAATATGATAAATTTTTTTTATATACTTCACCTATCGAATCGGTCTTATGATAGCATAGTTACACATAAGCAAA -TAATAATTAATAAGTATATACTTGTTTTATCATTTAACAGTTAAAGTTGGAAAAATAGGAGCCATAACATATAATTTATCACATTTAAATTTAATTAAGATATTACCATAAATAATTAACTATT -TTTCTAATTTTTAAATAATTAAATATAGTGATTTTGAAAGATTTATGAAATACAGTTATAACTTACTTAACTGTATATGAAAGAAAGAATTTTTGTATGTTAATATATTAACTCGTATATTATA -TATATAAATAGATTATATAAATTAAAAAATGCAGGAAAAATTTTTAACTTTTTTATAAAAGGTAAACTTATTCACTATTGGAACATTTTATTTAAACATGATAATAAGTACACTTTTTTCATTT -TAATAATTTTTATTCCTATTAGAATTTAAAGTTAATGATCAAGGTTATTTCTTTGAAATAAATTTTTTATTCATAGTTAATCATATTAGATTACTTCATAGATTTTCGACTTAATTTAATCTGT -TAGGCTATAATTGCATAAGTTTCAATCCTATGCTGTGTTGTATAATCGATTCTTATTCAACTTTTATATCTGGAAATTCTAATTTATTTAGATAACATTATTAATTGAAGTTATTAAGTATATT -TATTGTAAAATTCTTTTCGGACATCTTTTCTTTAAATAATGATAAATACTGGCAAGTATTATTAATCAACGTAAAAATTAGTGATATTAACAAAAATTTAGCACATTATTTTTATTAATATAGA -ATCGTTTAATTACATAGTGAAGCCTTATGACAAAATTGATAAGAAATAATTCAAATAATGCTAAAAGTTCATAACACATACATTCTAATCTAATATACGTATAATTTGACAATTATTTTACACT -AAAATGCTGATCATGTATTAAATTAATGTGATTGTATTATTTTAAAGACAATTATAGAATAGATAGTTTAACGAAAATATTATCCTATCAATGTATACTTGAATATTATATATTATTCTTATAT -GAACAAATTAGTAAAATGCTGCTTTTAATCTCTCTGTCGTTGATAACGTTTATAAAATAGTATTAATATAAATCAATTTTATAGAGGCATTTATTAATACATAAAAAGATTTGAAATATATTAA -TTTATATCACTGAAATTGTCTAAATATTTTATGTCATTATTGAATGTATTGACATATACCTTTTTTGTTAAGAACTTACAATTATATAATTAAGGATATAATTTTTATATTTATCTTCTATACT -TGTTTTTATGAGTCCAATTTAAATATTGAGAAAATATTTTTCATAAAATCTAATGGTATGGTAATATAATAAATTTGTACTATAATACATATAACAAGAGTAAAATTATCAATAATAAAGATAA -TGCTAAAATTCATTCACCACTACCTATAAAAAGTATTTACTTTTAAAATAAGTATCAATTAATATAATCTTATAAAGAATCAAAAAGGTAAATCAAATTAGATAATTTGAAAACACCGAGTTCA -AAGTTACGATACAATAGTTCACAGTCGGTAAGAATAACTTTAAAATAAATATTTATAAGACTAGATAAAATTGCCCTGATGACTATTATCAATAATTCAAATATATAATATTTTAAAAAATGAC -TGAAAAAGAGATATTTTTTACTATTTAAGATCGTTCATTATAATTTGTTGCATTTTTAATCGTTATATTTATCTTTTTATTAACTAATAAAAATAATTATATCATAACAATTTAACGTAATATC -TTGGAATACTGTATCAACAGATGTCTCATAAGATTATTACGAATACCAAGTATTATGTAAGTAACATAAGCTTATATGCGAAATAAACTGTTAAAAAATCGTGTCTTTACAATTAGTACTTAAT -TTAATTTCACTAACATAATAATATTTCAGGCAGTATCTTTATTATTAAGTTTTTTTTATAATTAAATAATTAGATATGCTTTTGAAAAATATAGTAAAAACTTGCCTATCTATTAATATTATGA -CGAAAATTAGAGAGACAACAACAATTGCAAATATTATATTAAAATAATATTAAGATAAAATAACTCTGTAACTAATTATATCTTTTTCTAAACTTTATTTGATTAA \ No newline at end of file diff --git a/examples/ex_at90.fa b/examples/ex_at90.fa deleted file mode 100644 index 97b747f..0000000 --- a/examples/ex_at90.fa +++ /dev/null @@ -1,51 +0,0 @@ ->90_AT 90% AT rich random sequence (with one tandem repeat) -ATATAATATTTTTAACATATTTATAAATTATAATAATTATATTTAAGATTAATTATATATAAAAT -AATTCTTAAATATTAAAATATTAATTTTTATATTTAAAATATAAATAATAGATAACTACAAGAATTCTAATATTAAATATATTATTCTTTTATAAACAACATTTTAAATTATGTTATTGTTATA -TTATCTAAATATAATATCATTTCTATTATATTTATCTTAATAGTTATTATTTTTATAATTTTTTTAAATATCCAATAGTAAATATCTTTTATAAATGAAAATATTTTTTGTTAATAATTATAAT -AAAACGTTTTTAAATTTTTTTATTTAGTTAAACTAACTAAATTTTTCTGTTTGAAATTTTAATATAATATTATTAATAGCCAAATATTATTTTTAAAATATTATTCAAAAGCATTTAGATTTAA -CTTATTGAAACTACCTAAAATAATCATCAAATTTATATAATAAAAAACTTAATATTTAATTTTTCATTATGGAAAGTTATTAATTTTAATAAATAATTAAAAATAATATAAAAATTATGAATAT -AAATAGAAAAATAATCAAATATATCTATTTATATTATATTAATAAATAATTTTATAAAATTATGTATTATACTTATCTATAGAGTACTTTAATAATATATATAATTTACAAAATATTTGTTATT -TTTGAATATATACTTTATGTAATAGTTTTTTAATATACAAATATTTAATATTATTTAAATTAGATTAATTTTAAATTTATAAAATTTGCCATATAAATAATAATTCATAAAAATTATTGATTAA -TTTATTTAAATGATGATCATTTGTGTGATATTAGAGTAGAAAGATAATTATGATATTATTTATAATTTTTTTATTATTATTAATATTTATAGTCTAATTTCATTATAATTATATGTTATTTAAA -AATTGATTAATATTGGAAAATATTTTAAATAAAGTAATTTATACAATAAAGATCATATACCTATTAAATTTTTTTAATATAAATTTAAATTAATTTATATTTTATTTAAAATTTATATTTTAAT -AATAAAAAATACATGTTTTATTTTTATAATTTATTAATATTCTTAAAATTATAATATATATAACAAAGTAATATTTGTAGTAAATCTTAATATAATAACAATTTTTTACTTTTATCTTAGAGAA -AAAAAAATGTTAATGTAGTTATTATTAATGTAAAAATTAAAAGGATTTTTGACTTTTCATTTTTAAATAATATTTATTCTTTTATAAAAGAGTTATTAATTATGATTTGTAAAAATTTAAAAAA -ATAAGTAAATTTGACTGATTTAAAAAAATAAATTTTAAAATTATTTAATAATAATTACAAATTTATATTAAAATTTTTTTAATAAGTCTTCGTAATTATTAGTTGAATAATTTAAAATGAATTA -ACTAAAAGTTTAATTATATTATTTAATGATTTTTAAATTAGAATATATTACTTTACAAGAATTAAAATTATCTATTAATTTCTTTTATATTTATATTATATATTTTTATTTTTTTATTTGTTTA -CTTAGATAAAAATGATATAATTTCTTACTAAAATATTTTTATACATAATACAATTGTATATTCTATAAAATTAATATATATATTAAATGTTTTATGAACAATAAAAATTTAAATAAAAAATACG -TCATCAAAAGTTAATATAAATATAAATTATAACAAATTTGGTCTAATTATAATTTAAATATTTTAAATGATATATTTATTTTAATTTATTTTTAATAGCTTATTAAATAATTTAATTACTAGTA -AATACTTTATAATTTCAATTTTTCATTAATACTATAATAATTTTTAAAAAACTAATAATAATTATAAATGTGAAATTAAAATAATATTAGTAAATAATAAATTTTTAATTAGTTGTAATCTTTT -ATAAAATGAATTTTATAATATATGTTATTTCAAGTATATGAATAATTTGAATAAATTAAATTTAAATTTAATCAATTTTTATATAGATTTAAAATTTAAAGCAATGATTTTTCACGTGTAAAAT -AAAAATATTAAGTAATTAAAAAAATTATAATATTCTAAATGTTGATTCATTAGAAATATTATCTAAAATTATATTATTTTAAAAAAAAAATATTAGAAACTTTCTTTTTTTACAATTAAATTTA -TAAAAACTACAAAACTAATTTTCTTAGTAACTAAAAAATAAGAATTTATTATATATAAAAAGATATTTATTTAATAATTAATACTTTAAATATTTAATTTTTCTTATTGTTTTAAATTAATTTA -TTTTTTTTATTTATAAATCTATTAAATTAATATTAATGTATAAATATAATTCGAAAAAAATTATCTAGAAATAATAATAATCAGTATTTTAAATTTTATTTAATCCATTTATATATTGTTATAA -TAAATCAATAGAAATTTAATGTTATATGATAAAATATTCTTAATTTTAATAGATAATTAAAGGAAATATATATATAATATAAAGAAGTTAGAAAATTAATAAGAGAATTTTCTTTCTGTTTATT -AAAGAATGATTATATAATAATAATCATTATGTTAACATATAAGATATATTTACTAATTATTTAAGATATGAAATATTTACTATTTTTAAAATTATTTTTTATACACAAATATTGAATTATGTTT -ATATCTAATATTGTTACAAATAAATTTATATTAAATATATTATACTTATTATATAAATAAAATTAATTAAAAATTATTAAATATTTTATTATGTTAATAATTACTAATAAATTATTAAAATTAA -AGAATAATGTTAATATTTTTAAAAATTTTTTGATTAATATAAGTAATTACATTTCTATTATATCATAATCATTTATTATTTAAAACTTAATCAACATTAAATTGTTTTTTATTTAAAATATTAT -atccatttatatatccatttatatatccatttatatatccatttatatatccatttatatatccatttatatatccatttatatatccatttatatatccatttatatatccatttatat -GCATAATAAGATATATATATTATATAAACTTATTATAATTTATCTTAAATTAGTTAAAAATGTAGTTAAATTTTAACTTACGTTATTAAAAAGTTTACATTTAGTTTTTAAAATTTATTAATTT -TTTAAATATTATAAGATTCATAATTTAATATATTTAATATTAGATTTTAATATAGCAAAATTTTTTTTTTAAAATTATTAAAAGAAAAAATTATTAAATTAAATAGTTTTGATATTTTGATCAA -AAAAATCATTAATTTCTAATTTTTAAATTTTATACTTAATTTTATAAATAAATCATTGAATACAAAATTTATAAATTAATAAGAAATATAAAATATTTTTAGATATAATAAATAAATAGTTTGA -TATTTTAAATATAATCATATATTTAAATTTAGCTTTATTTTAGAGATTTTAAATTTTAATAATCATAAATTATAAAATTAATTAAAAGAATATATAACAATATTTATTAGTTATCTTTAAATTA -TAATTTACTATTTTATTAAAATTTAATTTATTTTATAATTTTTTTTATATATAAGTTATATTACTTCAGTCTCCTAATTATACCTTTAACACGAAAATAAATTTATTTTTATTTATATTTTATT -ATTAGTAAGGAAATTATATATTTTACATAAAATATTATTAAATTCTTAAGTTTTTAAATGATAAAAATTTTAAAAAAAAATTTATATTTATAATTTAATTTAAATATAGATTTTTATATCATTT -AATTAAATTTATTTTATATAAAATGAATATAATATTTATTTTAATTAATTTTCAATATTTTATATTATTATACATTTATCAATGACTATTTAACAATATAAAAATATTGTTATAATTTTAAAAA -CCTAATATAATTAATTTAAATTTAACATTATAGATAAAATAAAGTATTGATAGATATTGAATATTAAATAAAACTTAATTTAACAGTAAAAAAGTCTTATAATACGTTTGACTGTATATGTAGA -ATATATTTAAATAGTATCAAATAGATTTTAATAAATTTTTATATCAATTTAAAAATGGGTATGTGAATTAATTAAATATAAACCAAAAATCTTGAACTATTTAATATTTTATAATATTTTAAGA -ATTAAATAAAATATTTTATAACTTAAAATAAAATTATTTTAGAAAATAAATTTATATTATTTTTTTTATAAATAATTCAATATATTAAAAATATAAAATTAGTTTTATTAATAATTATAAATTA -AAAGCTTAAAGTATCATTTAAAAAACAGATTTAGTAACATACGTTTTAATTATTTGATTATCTTTTACATTATATAAAAATCTATATTATTTATATATCTAATTTTAAAATTTACTTTAGTATA -TAAATTTGAGTTAAAATAAAATCTTTAATATTCAAAATCATAGATGCTCAATACTTTAATTAATTTTTATATATATTATTATAAATAATTAATAAAAAATTTATATTAAAAAGTAAAGTAATTA -TAAATTAAAAATAATATTAATTAAAAAAAGTATTTAATATGATAAAATCATAGGGTTAATATAGAAATAGTACTTTTATTCTAATAGATTAAAATATAATACTAAAATTATTCCTTAAATATTT -AAAATAAGTTTTACTATAACTTTTAAATTTAAAAATTTTTTATTTTTAAAATTTATACTCAAAAATAAATATTAGATTTAATTTACATTTTAATATATAATAAAATAATATAAGTACAGTAGAT -CCTATTTATATTTTCTATATATTAAATTAAAACTTATAAAATATAATAATATATAAATAGTTAATGATTGATTGTTATATATTTATTATAATATTAAAAAATTCGGATTAAATTAATAAAATTT -ATAATGTGAAATTTATTATGTTCTATTACAATCAATGGGTTATAATTAATTATGAATTAAATTATTATTTTTCGAGTATATTACTATAACAATTACATAAATCTTATATAATATAATCGTAGTC -TATTTAATTATATTTAAAAATATAATTTTGCTTTTATCCATACACTTGATTATTTTTTATTTATTTTTAAGAACTAGACAAATCATAGAGTATTAAAAAAATTTTTAATTATTTTATAAAATAT -TAATTTTTTTTTTAATAAAATGTTTTATTTAAATGTAATAAAAAAGATATTTTTTAAGTTATATCATTTCTTAATTTCACTAAAATTTATTATTAATAATTAAAATCGCTATTTCATAATAAAT -TTTTCGTCTAAATTATTTTGTGTTAAATTAACTAACAAATAGTAAATCTAATTTATTTTTTAAAATTATAAATATATAAATCAAAATTTTAAATCAAATTATATATTTAGATTCAATTTTATTT -TAATAATTATAAGTTTTAATTTCTTCGAGTAATGTAATTAATTAAAATTATATAAAATATTACTTATAAATTATTTTTTAAATATAATTTTTTTATTCAATAATACATGATTTTCATTATTGCT -AATTAGTTTCATAAATAATACCATTATTATAATTCAAATATATTTTTTTTATAAATATAAGGTTATCTGATTTTATATTATAATTCCAAAAAAAAATTTATAAATTATATTCTAAATAATATTG -AAAATTTAAAATGTATAAATATAAGTATAATATATATATATTTTAATTTTTATTCTATATTAAAAATTTAATTATATATTATATTAATATATTCATGTTATTTAAGAAAAATATGTAAGATAAA -TAATTTAATTAAAATTATATTATGCTACTATATATTTATTTATTATTGATTAATAATATATAAATAATACCATGATTTATTAAGTTTGATTTTATTACTTCTAATATTATATATTAAATAATAC -AAAATCAAATTAATTATATAATAATATTTATTAATTAATATAATAATTAAAAGTTTGTTAAATTATATTATTAATATTTTTTAAATATTTTATACTAACATTAGGTAAGTTAGTATATATTTTT -CTTTTTAGAAATATGTAAATATATAAACAAATAAATAATAAATATAAATTTTCAATATGTTTAGTATTTCATATTTCTTCTGAAAATTAAAAAAATATTTTATAATT \ No newline at end of file diff --git a/examples/ex_large.fa b/examples/ex_large.fa deleted file mode 100644 index 5129297..0000000 --- a/examples/ex_large.fa +++ /dev/null @@ -1,284 +0,0 @@ ->ex_large -CGAAACCTGCGGAGACTCGTATCACGTTGAATTGTATCTATTCGAACAGGGGAATTCAAA -GTGGCGAAGGTTCTCAGTGTTCGCCTGAGTTCGGTTCAAACAGGGTAGAGCGCAATAGCG -CTTGTGTGTCCGGGAAGGTGTGTTGGAGCGATTTGCATTACTGTACCTCATGATGGATAC -CCTAAGCAGCGTTGGTTGAGAGTGGGAACACTCGTAAGCAGTGAAGTCACTGGAGGGGGA -TTGATACTAAGAATTGTTTCAAGGCCTGTGAAAGCTCAGGTGGTTTGAGTCGGGTCAATG -GTTGCCCTTCATCCCTGAGCTCGTATCTCGGTACCGATAAGGTTCTATTAATTAGAAGGG -CATGTGCTATCATCGCCATGTAAAGTAACTAAGACAATGAGGTCGATGACCACCTCTAAA -GTGCTGTGACGCCGTTTCCGGTAGCAGACAGTCCGGTATACGGCATAAGACTAAGGGAAG -GTCCCGTTCAAGCGTACGACCCCATGCGTCATTGGATACCTACACCATGGCGTATGCAGG -ATACCCGGCCAGAGTTGATCGAGAAGTGCGCGTATTGACACCCCACGGAATCCGCAGCAC -GCCGACACGACTCCTTTCAGTAGCTGTATATTCCGACGATCCGAGGCATATAACCCTCCT -GACGAGGCGTCAGTACTACCACGTTATCCGTTTAAACGTAATGCCGAGGATTGGCATCTT -TTTAATCTGGGGACATATGGTAGAGACCCGATCCCTACATGCTCTAGGCGCTTCCACGGG -CCTGATTGGTCTCGCTAGTTGGCAGTGAGCCGAGTCGTAAATACTTGAAGGCTACTTCAC -CAGCTTACGGTGGGGGAGAGTCGGTAGGAAATCGAGTTGTGGGCGGGTCTACCAAAAGTT -GACGAGGGGCTTAGGCAGCCTGCACCTCAAGCAGGCGAGCTCGAATGGCCCAGGGGAACT -CGAGACTAGTGGAACGGCGCGTAGGGCGTCGGTATATGTAGTACGGCGTGTTTCTGCGTT -CCTTTTTATGAGCTCTGACTAGGTAAGACGTTACCTGCTAAGAAACCGGCTCAACTGGCT -TGAGTGCTCTTGGATAAGGACGATGCCTTACCGGGCCCCAGGCGACAGCCAACAGCGTGC -CCCTATCCGAAACTCGCAAGTCACTAGTCAGTGCTTCTGTCGAACTCGGCAGAGAGCGCT -GAAGTAAAGGCTATATCAAGGCAGTCATGTCAACCTAAGCACATGAGCACTGCAGTGTCC -GTGAGGGCTCCTACCGTTAAACCTGGCGACCAGAGTACCTGGCTCTCGTACGCAATCTAC -GTTCTGTAGTGTAACTTGCCCTCTCTGGCTAACAAAAACGTATTGCTCTACTGTATAAAA -CACTAAGTATTTGATGCCACAGTACAGTCTCCTAGGCTGGCCGCGCCTCGATGCGCTTGT -GCGATTAAGCCTTCACGGGCCTTCGATCCCCTATCTCCATCAAGCGATGAGGTTTAGACC -TGTTAATTGATTTAATGTAACGCCCCCGTTCCTACGAGACGCTCCTATGAATCCATTGGG -CATGGCGTATGGAGTGTGGTTACGCATCGGTCCTGACATCGCCCACTTGGTTCAGCACAT -GAACGGAACAGGTGGTCTGGGATTGTTGCCCAAATTTACTAGTCTGGCGTTAAATTGCAT -TCCGGTCGAGCGGATTACTCTCAACCGCCCGAAGGAGGTTACCCTAGGTAAGCGTGACCT -CAGAAGGTAGAGTCGTCATCTACACGGCCTCAACCATCCCTAAATACGGCCTACTTAGTC -TTAATGAGCAATAGCCAAACCTCTGGATATCGATTTTTTCGCTATGAGTTCTTGACTTGT -ATACGACATCTACGGCGGCAAGTGCGTACATATTCTAAACGGACGATTTCCCGGCAGGAC -TGTAACCACCGTGAGGCCTGTGGTAGTCATTTGCGTGGAAGCCCGGACCCATTTTCTATT -CCGAAGTGGCTTGGGTTTTTTGCTCATGAATGTCAGGGCCGTAGTACTAATAAAGCCCCT -GCCACAAAGTATGCCACATCAAAGCTTAGCCTACACCCCGTGGTACTTTGGATGGCCCCG -CACACGTCTCTAGAGGATCCTGTCGACTAGGCGGGTGGTGTATAGCCTGGTGCACCACAC -CGCACCAGCTGTTAGATCGCAGATTACTCCCCTAAACGCCAGAGACACGGATAGATCCGC -GAATCGGGACAGACAAGCGGCAGTGATCAAGCTCATATTGCTAGATGCATTCGGCGAATA -ACGGGGTACGGCAAGCGATATTGAAGGAATCCCCTGAGCTGCATGACGCAGTCAAAACCC -AAGGCGGGATACACCATAGAGTAATTAGACAGTAAGTTTGCAATTGCGCCATGGGCCCTT -TAGAGTAGCGAATGGACCCAGGTTTGTACAGACCAGACCTCATAGCTCCGCTGCCAACCA -CCATGGTCCCAGACTGAACTGAAGTGGCACTACCCTGATAGCGCTGTGCCGGATGACAGA -TCAATAGATCTCCCTAACGGGCTAGCTGGCATGCTTCTTATCCTGAGGTCCAAACACTAC -CGGCCCCGGGTGTGCACTTAAGGCTAGATACGAAGGGGAGGCCCGGTATCTCCCTGGTTG -CTATCTCGGGGCTTTGGGGGCTCATGGATATAAGAGAGAGAGACTAGCCTGGGTGATTAT -TGCGGCCTCCCCTAAGAACGAAACACGACGGTCGGAGGCTGTACCGCGGCTTGACCCGGT -GGAAACTCTATAAACATATATGCCTTCCTGCTATTAGCTGAGCTGATCTATCGCCCACAT -ACCATAGAGGAGAGTCTACCACGCGTAATTCTATCCGATACAACGCTGCCTCCAATGATA -atcgatgtgggacctccacgtgccttctgaagttcgtgggatgttttgctttaaacgtgg -cccgttggagcatcgcggaaggtcgagtggttccacgatcgcggtctgtgggggacgtta -cggactatgatcccgaggatccattcatccaggtagcttccgtgttcccccgctccacgc -cttctctgtctagatcgcctcgtatcatatactgtgggctgttttggtgtcacagaggtc -gggttacgatctggatttgggtgcccacgaaatggttttaaagcctcgtggacccgtaga -aggtaatcagttgacaggttcgccgcgtacccactgttcgggcatctgcgcaggtctctt -agcgcctgtcttataacagtggggtagactgagccggctcttggagggccaagcatctat -gatgacaagaatcatagcagaattttttttggcaaatgtggccatttacagattcttttg -tcggcttccatacgtcaccccctaaaattgcgactccagcgatggccggtattaaactgc -cccttgatggtctagcgacatgcaacagaataattccccagatcaagttcgttgtattcg -cccacatagagatgggattgtatgtaatcaagtaatacctttccacgtcccgaagttatg -actgcagccgtgataaccatacagagcattatattagtacgttattgtcccaggtatttt -tcagtaagccctgagatacttgcgggtgctaaacaattagatcgacagatcggccatgtg -aatcagatcatatgcattggaccccaacactctcattaaaacggcggccgcaacagccga -atgctcttaccatttaatccgggaggcacatgccttggccctaaggtgtatgtcgaatat -ctccgtggaaatgtggaccacaatgagcaaagctttatagggggcggtgcattttcgatc -gttcataaactgtgcactcaggggtatgtgcccaaagaga -atcgatgtcggacccccacgtgccttctgaatttcgtgggatgtatcgctttaaacgtgg -cccgttggaccatcgcagaaggtcgagtggttccaagatcgcggtctgggggggacgtta -cggactatgatcccgaggatccatacaccgaggtaggttccgtgaccccccgttccatgc -ctcctcagtctagatcgccccgtatcatatactgtgggctgttttggtgtcacagaggtc -ggtttaagatctggatattggtgcccacagaatggttttaaaccttcgttgaccagtaga -acgtgatcggttgacaggttcgcggcgtacccactgttcgggcatctgcgcaggtctcgt -agcgcctgtcttataacagtgggggagcctgagccggctcttgaagggccaaacattttt -gatgactagaatcatagaagaattttttttcgcaaatggggccatttatagattcttttg -tctgcttccatacgtcaccccttaaaatttctgctccagcgatggccggtggaaaactgc -cccttgatggtctagcaacatgcaacagaataattccccatatcaagttcgttgtattcg -cccacatatagatgcaattgaatctcatcaagtaatacctttccacgtcccgaagttatg -actgtagccgggataaccatacagagcacgatattagtacgttattgtcccaggtatatt -tcagtacgacctgagttacttgcgggtgcaaaacaattaggtcgacagatcggccatatg -aatcagaacaaatgcattggcccgcaacgctctcattaaaacggcgggcgcaacagccga -atgttcttaccattttatccgggaggcacatgccttgccacttaggggtatgtcgaatat -ctccgtggaaatgtcgaccacaataagaaaagctttatagggggcggtgcagtctcgatc -gttcataaacagtgcacacatgggtatgtgcccaaagaga -ctcgatgtgggacctccacgtgccttctgaagttcgtgggatgttttgctataaacgtgg -cacgttggagcatcgcggatcgtcgagtggttccacgaccgcggtctgtgggggacgtta -cggactgtgattccgaggatcctttcatcaagttggcttccgtgttcccccgctccacgc -cttatctgtctagatcccctcgtatcatatactgtcggctgttttggtgttactgaggtc -gggtttcgatctggatgtcggtgcccacggaatggttttaaagcatctcggacccgtaga -aggtaatcagttgacaggttcgccgcgtacccactgttcggacatctgagaagctcgctt -agcggatgtcttataacagtagggtagattgaaccggctgttgaagggccaagcatctat -gatgacgagaatcatcgcagaattttttgtggcaaatgtggccatttacagcttcttttg -tcggcctccttacgtcaccccctaaaattgcgactccatcgatggccggtattaaactgc -cccctgatggtctagcgatatgcaacggaataatttcccagatcaaattcgtaggatgtt -gccacatagaggtgcgatggtatgttatcaagtaatacctttccacatcccgaaggtatg -actgcagccgtgattaccacacagtgcattatatcagtacgttattgtcccaggtatttt -tcagtaagcccctagatacttgcgggttcttaacaattagatcgacatactggccttgta -catcccatcatatgcatgggatcccaacactctaattaaaacggcggcctcaacagcaga -atgctcttaccatttcatgcgggtggcacatgccttggacctaaggtgtatctcgaatat -ctcagtgaaaatgaggaccacaatgagcaaagcattagtgggagcggtgcattttcgatc -gttcgtaaactgtgcactcaggggtatgtgcccaaagaga -atcgatgtgggacctccacgtgccttgtgaagttcgtggaatgttttgatttatacgtgg -ccagttggagcatcgcggaaagtcgagtggttccacgatcgcggccggtgggggacgttt -cgtactatgaccccgaggatccattcctccaggtagcttccgtgttcccccgctccccgc -tttctctgtctagatcgcctcgtatcatatactgtgggctgttttggtgtcacagaggtc -gagaaaagatctggatctgggtgcccacgaaatgtttttaaagcctcgaggacccgtaga -aggtaatcagttgacaggttcgccgcgttcccactattcaggcatctgtgcaggtctctt -agcgcctgtcttataaccgtggggtagactgagtcggctgttggagggcaaagcatctct -gatgacaagaatcatatcagaatttatgttggcaaatgtggccatttacagatactcttg -gcggcttccatacggcaccgcgtaaaattacgactccagcgatggccggtattaaactgc -cccttgatgttctagcggcatgcaacaaaataattccccagagcatgttcgttgtattcg -cccacatagacatggggttgcatataatcaactaatacctctccacatcacgaagttgtg -actgtagccgtgttacacatacggagcgttatattagtaccttattgtcccaggtattta -tcggtaagccctcagagacttgcgggtgctacacaattagatcggcagatcggccacgtg -aatcagatcaaatgcattggaccccaacactctcattaaaacggcggccgcatcagccga -atgctgtaaccatttaatccggaaggcacatcccttggcactaaggtgtttgtccaatgt -ctacgtggaaatgtggcccacaacgagcaaagctaaatagggggcggggcattttcgatg -gttcataaactgtgcgcttaggggtatgtgcccagagaga -GCGAAAGCGCAAGCCAATAGAAGTGAGAGTGAAGTCGGACACATAGGCTTTAGAGAGTTC -CAGCCACTTATCGAGTTCAACGCAGACCATAAGTGCGATTACAACGCGCCTATTCATGAT -TTATCGATTTTAAATACGCTCTACTCTGTTGCCGCACACCTCCCGTATGAGCAGCTTCCC -AGATTAAGTAATGTATTTTGTAACCGAAGTTGTGTCAGTTTGCGCGAGGGATATCTTCCA -GACATAGTTTTGTCCCGTTCGTCGATTCGGTTAGCTCAATCAGCTACTAGTCAAGGCACC -CGGATTTCACTACGAACGAGCTGACGCTGGCCCCGTCGACGACACGGGATCCCTGCGTTA -GGAAATTGAACAAAAAATAACCAGAAGATGCACGCCGCCATTATCCGTTGCGGGCACTTC -ACGGCGTTCCAGTCCCTAAGGCTATTATTAGTCACACCACACTCAAACGCTAGACCCCCA -GCTAACGAGTCTGCTACCTGCCGAATTGCAAACGGTGTGATTAATTAGAGGCACTAGCCA -ACGCTGTAACTTTTTCGCAGGTTCTGACAAGTTATCATCCCTGGTCTAAGCGGCCTCCTC -ATATAGGTCGGTCAAACGATGAGAATCGTCGGCTAGCAAATTCGTGTGGAGAGTAATCTT -GGAGATCACTTGAGGTTTAGAATCAGTCCGTAACACCACTGCCAAAATGCCACTGAGCAC -AACAGGTCGAGGATGCCAACGTAACTCCATGACTAACGTATAGCGTCGATTGTAACAGCG -CTCGAAAATGAGTGGGTCTGCCCAAGAGGAGTACAACTAAGAAGGATTTTAAGCGGCTGC -GTAAGGACAATTCTTTACAATTTGACATGCGTTAGTGGCCTCGGACAGTTACGCATGTTA -TTGACCTTCACTTATTAGTATTTGCGTGCTGCGGCTCCTGTCCCACAAAGACGCGTGTTC -GTCTTATACAGACTCGGCATCAATAGCCTTTCAGAAAAGAGTAAAAGCGCGAGCACGTGC -TATCAGCCCAATACAAGCAAAGTTCCCTCTAAGGGTGGAAATCGCACGCCACCCCTACAG -CCGAGCCTAAAAATGGCCGTAGAACGTCGCCGAGGACACTGTGGCAAACTGTACGTTCCT -TAGAGACCGACACCTCTCACTCCAATACAGTTTCTGGCACGCTCAGCGGACACGCCGGTA -ACATGGATTTGCACGTTTGGCATCGCCTGTTCTCATGGGTTGTAAGTGGACCCTTGCATT -TCCATCTTCTGAAGGGCCCAACATAAAATAAGACTACACATCTATGTATCACGCGCCTTA -TCCGAAATGACACCCCTACCGCGGGACTGAGGAGTTTTCATCATGAGACGGGATCATTGG -CCGAACCGTGCTCTAATGACATACCACGTGTTATTTGTGCTCGCTTCTTTCTGAAGTGGG -CGGGGCAGTACTCCTCTCTGATGCACATTTTAGTCCGCGTCCCAGCGCTGGCAACCGCAA -GCGTTCCACCTGTTTCAGTCCTTACGCTACTTTTTCTCATGCGATATGCAGCATGCTAAT -ATGTAGCTGACAACTTCTCTAATCGCAGAGCATATCAGCCGGAAGGGAGGCCCAACGTAG -GTGTAAGCATTTATGTCACTACTGTCGGCAGATGGTGCATGCCTTGTGGTTGGGTCTTGA -GAGACGATTAATACACAAACGTTAGTACAACAATTATTAATCTGCTCCGAGGGCGGCCCG -AGCAATCTTTGAAGAGGTTCACCATCCCGATTTGCGAATAAGTCTCGGACCAACGCCGAG -CCAGATACGAGGTATTTGTGTGTGATATGTAGTGCGGTTTAAATGATCCCTCATGCGAAG -TCCCGATGAAACCGATGGTATCTGACTTGCTGTAGCGCGATGATGCATGCCTGGATAATA -AGTCTGCCTCCGGCATGATCTTAAGTATTTGACGGGACTCTTATTCGTCACGCAGAACTG -GCCTTCTACAACTTTCCTTTTGTTGTGCAACAGCCTGCATGTGCTCGCCGCATTCCGCTG -ACTCGCAGAACCCAATCCGTCGACGTGTTCTTTTGCCGACCATTTAATCCGCTATGCGTA -GTAGGACGACCCTATCAGGAACCTTACCCCTTCCGTGTCCCCGGTCGCTTGAGGAGCACG -TTACCCCACTTACTCGTACCGGAAACAGACGCCCCATCATCATGTTAGTTGTAAAAAGTT -CTCGATTAAACGCCGCTGTCTCTGGACGTAGCAGGGATGATATAAAATTTTACGTTTGTG -AGCTGGCAGGCCCGGACGTTCCGACGATCAGTGTCAATACACGGTCGCCGCTGTTTCGTC -TGGATCCAACTGGCAAGGGACTATTCGTTTGGGTGTATGTGACGGACAGGGTGCACGCAG -TACCGATCGGCGCCAGGGATGGCTTTTAGATGCCACGTAGTTCTCGCATTATCATACGCT -TACCTAAAAGGCGTACTTACGTAAACCATAACACCTGCGCTTTAACTGTGATGCAGTAGA -GTTTAGGCATGTGAACGGCAGTCTGTGGACGGGGTGCATGCATATCACCCGGATTGTGGG -TACCAGTGGGCGGATTTACAAAGGGTCTTCGCTACTTCATTACCGCATCACAAAACAACA -CGGCATGCATGCTTCCACCTTAGAGGCTACTTAAGTACGATCGCAGTCTTCGACCAGTTC -CAGTTTTCGGGGTTATTATGTCATTTCCGCCGGCCCTGTATTCAGCCCAAGCTGCTTCGC -AATAAATAATATGTAACCAACGCCGGCATGATTGGGTTTGGATGCCCCCTCGTCAACGTG -TGTAATAGACATGGAGTACTACAACCTTCTGCCTTAACTACGGGGCGCTATGTTGTGCCC -GCTCTCTTCGTTAACTGATGGACGTAGCTCCCAAGCTCGTCCTCACATCCGTAATTTGGG -GTACTTTACCTTTCAGACGCAACGCGGCAGTCGACACCTTCGCAAGAGTTTCCTCCCGTT -GACGCCATTCGTGCCCGATACGATCCCTGAAAATAATGTGGTAGTGGACATGATCTGCCG -CCTTGAGAGAGCCAACCGCTCGGAGCAAGAGTCCGAGTTAAATTATACTGCGGAAGGGAT -TGAACGCAGGAGTCATCCCACCCCATGATGTTCCCTAGCAGCTGACCGCAAGTCCAAAAT -TATTAGCCGCTCATTATTCCTAGTCACCTAAAAGATGCGCCGTCGACCGCTTTATGCGAC -GAGTAGTGCGCGAGATACGAATAGATCTCCATAACTAGGCTGACGGAAAATCCACGAGGC -TAACGGAGCTTGTCAATGTCTAAGTAGCCATGACCAATTGGGAAACTAGAATTTCAAAGG -TTATGCCGATCTACGACTATTGGTGTGCTGGGGGTACTGCTAAGGTGTTCACCCTGTATC -CTAAGGTAAGAAAATACTATAAGAAATGGCACTGTTAACGACTCATTTCGCCATTGTCTT -ATCAGGTATTCGCAACCCGACATACCTCTGCCCCATGAATACTCGATCGCGCTTGGAATC -CCCTTCAAGTGCCTTCACTTTTCTAAGTACAGTTGGTTATATTACCGGATACCCTGTCTA -GTCCGCATACATCTCGGCCACTCCCGTCATATTCCATGTATGAGTGTTTAACTTAACTAA -AAGGCTACAGGCAGTTAAAAGGCAACCGTAACCAAGCCCGGCCTAATTTAGAAGATTGCT -GCTAGCCAACAGTAGATCGAACCCATGCCTTGTATCAAACTAGAATTGGTAATTGCTTAC -TCGACTATATGGGGTCATGAACCACCATTTTATACCGGTCGGGTGCACGTCTCGAAGCAG -TGCCCGGTATTTGATTGGGATAAGTAACTCCACTTGGCGGGTACTTATCGAAGGTGTTTA -TGATGTGCTGGGAATCAGAAAGTCGATGTGAGAGTCTACCTCGGACGTCCTGCGAGCTTA -GTCCAGTCAAATAGGCGGTCAGGATACAATATCTGGTTACACCTTCAAGTTACATCTTCT -ACCACAGCCACATTCCAGCATATGAGCACTGGACGGGTTAGGCAGACTTGTGGACTGATT -GGAAAGGAGCTAGATAGTACCTGTAACTGAGCCGTACCCTCATCGGACTCAATCGAGACA -TCTGTCGTAATAGGGTATAATAGGACCGATTCTGTTCGACTCTTACAATTACCTGCCCGA -CCCGTAAGTCGGATTTGACCAAAAAGACATATTCTCATTCACACGTAAAATGTCCCCATA -ACACCATTATACGTGTCCGCCATTAATTGGTCGGACCTACATTAATATGTGACATCATAA -CGATAGGGTTTAGCGGCCTTACATGATAGTCGTAACAACCAACACGATTTTCCTCCATCG -AGCAAGTGGCTCTTACCACAGGGGGGGAGATAATTATTGCTTTAGCATGCATCGAAGAGG -TAAGGTCTGGCGCAGGTGTGGTAGAACTTCCCTCGGACTCTTCTGTTGGAAGATATATCG -ATACATCGCCTTCCTGGCCGTGTTCGTGGTGGGGCCGCATCTCTTACAGCGTTCCGGCTT -CATATCTTTAGTCGCAGCGCCAACAACAGCTAGGGCGCTATAGATTGTGGTCGGCGCGTA -TATATCTCCCTTTGGTATTCATGCTTACATTTTTACTAGCAACGAACGACGGTCGCGAAT -AGTGGAAATCAAACGTGTGCGGAATATTTAGATCTCCTTCAGCGAAAGGGTTAATACCGA -GTTCATTCTTCATGTCACGCAGAAGACAGATAGTCTTGCTAATGAATAGCACTAGGACTG -CCTTTCTACAGCTTTACAACGGTATGCTTAGGGTGTCACGCGCTGCCCATGCCAAGAGTT -CTAAGAACGGATTCGATTCTGTGTTACAAATTGAAGTATCACCGTTTTCCCCTGACGGAC -ATTATTTCAGGATTGCGTACAGCTGAATGAAAGCCTAGTTGTCAGAGCCTGCGTCAACGT -GAGTATCATTACGGGGTACCGGGGACATAGGCGTGAGTGTACTTCCTGACTGAGCTTGCT -ACGCCGTGAGCTATCCAGCGTTAATTTCCACGTGAATACTGATAATAAGCCGCAAAAAAG -TTCTCACCGGGGCATCACCGGTTCACGTGGTATTAAGTTGCACTCTATGCGACTGCTCCA -TTGTTTACCCTAATGTCCATTTCTAACGCTAGTATCTGCTCGCCGTATAGCACTGCACAT -AGGGTCAGGGCACATGCCCTCTTTTGTAGACAAAATGAGTTGATGCATGTCTTTCAGACC -AGTATGGAAAGACGCTCACGTGACGAATCGTGTTTTCCGTCTTAGCTAACAATCAATATA -CTGGTCGGGACATACACAAATTACTAGGAGAGGCTAGTTCGTATCCGTATTTGACATACT -GTTGAGGTGTCATTGCCGGTCGTGTAGTCGGGCAGTACTCTATCGCGAACACGCCTCAGG -GCATTTCAGGCCCGGTCTGATGTAGTCCTCGCCCCAACTTGGAGGCCCACCTATCATGAG -AGGACACCGCGGCACTCGAAGGATAGCACTGAGCTGAGCTACTACTTCGTCACTGACAAC -TACCACTTCACGGTTACGTAAATGCTCGAATCCTGCGTATCTGCGGTAGTAAAGTGCATC -GTCATCCATCATCTGCTCTCATACGAACATCAATCCTGACTGGCTGGGCAAGCTGGAGCG -TCATATTAATCCCCCCTCTTATTCTTCGCAGACGTATAGGGTGTCGTAATAAGGCGTATC -TTTGGCGAGGCTACCAAGCCGAGTTAACCCTTGAACATCTGGGAATTCCTTCCGCTTTTT -TTTTGTCAGGTTGTATCAACACACACAAACCGATCATGGGCCGCACGAGGACACCGTCGC -CATGTCGCTACTGGCGAGCCGCGTATCACAATTAACTAATAGGCGTAAGAACTTGCCCAC -AATGGCTTGCTTTCCCCGCTTTTAGATGCCTACGAGTCTAAGTGCCTCTCCCCACTAGAG -CTGATTGTCAACGAATATCCAAAGGGCACTATCTCAGGGTGTCCAGTAGGGGACGTTCTC -AGACCTTAACGCCCTCTTTGATGACAGCTTACCTATCCATGGGTTGCCTATATCTGACAA -CATAGGGTCCGGGTCTGGTTATCGTTAGATCCCCCGTACTGATTGTGCGTGGACCAGTCT -CTATCTCCACGAGAAACGACTCACAGCAGGTGATCTCAGTTGCGTCATTGGAATAATGCC -ATTGACTCGAATTCGACTTAGCTACCCCGCAGCGCCATAGCGCATGTTTTGTTGAGGAGC -ACTTGCGAGATAATTTCGTATGCGATGACACAGTGCACCACTGCGCTGCTTCAGGGCCTG -CAAATCAAGTTGTACCCATCATTCGCACGTATAGTGGGGCAGCTGCCAAACCTTTCCTAA -CTTAATGGACAACAATATGGCGGGCAGAGGCGAATGTCGTAGTCTGAAAGAAAACCACTC -ACATTCAAACAGCTGGCGACCAATAATACGCAGGATGTAGCTTGTTCGTGGGACCATGTA -TCGCCCAGGAATAACTCCAGCGAAGCTATAGGCTTCCTGTTAGAAGTACTCTTTCCCACG -CACAAAGGCTTAATCGTAAGAGTAGACATAACGCAATGTCTCGCTACGCGCACCGGACAA -GGGTTACGGAGGAGAGAATATTTTAGCCAGATAGACTCGCTAGCGTCTCCGCATATGCAC -GCACATTGGACCATCCTTTCCTGAGTCGCCCACTGTTCGTCTATTGGAGATTAGAGTAGG -ACATAGGAATAGCGTCATCGTCAGGCCTTGGACGGGCAGGTTAGCTCACACTAAGATAGG -CTCTCCCCGCCTCAAGCCGTCGTTGCGTGCGCAAATTCTTGTCCTTAGGGGGCTCGTGTG -AGTTTAATTCGGCACACAGAAGAAAACCACATGTTATATATCTCCATTAGTAATGAGCAC -GTTGTTTGGCGTCATTAAGTCAGGGGGTTTGATAGTGCGGTATTTCCAAGTTAGGTCGGT -CTTTCTCTCGATGGCTATCAGACATCTTGGCAAATGTTATAAGGTAAAGTAACTACGTCA -GGCCACCTGAGAGAGGGAGTGGGGGACGGAGTAATAAAGTCAGCCGTGCAACGGAAGCCT -AGTCCTCGAGTCGGAGAAAAATAGTCTCGGTTAACGTGTCACCCATAGTCAGGGTTATAA -CTGATACTGCGCGCCCAGGGTTCCCTATTTTGCATTGAGTGGGTGCACTGAACAGGATGC -GGGGCCAGGGTGTATAGACTACCCATGGTCCCCGCGCAACTCCGGTGGCTTCTTACGTAA -GAGAGGATTTCTATTAGCCTGTTAGTGGGCAACGAGCGTCATCCTACCCTCAATATTTAA -CCGTGACGAGACTATCGTGCTTCCAGGTGGACCACATCACTATCCCATGTGCTAGTGCAT -ATCTTATACGTCTCGATCATCGCGTCTCCCCTGTGCTGCCGGATTGACTAAGCGGCCCTA -CCGGGAAATGCGTACTCCAGCAAGTTCGCATCTTCCTGAATCTCCCTATTTTAAAGCTTA -GGTGCGGATGGACCAAACGCAGGAACTAGATATGGCATATTACCACATCCGCACTGTCCA -GGTGGATGTATTGTTTAATTCATTATGGAGGCCTTCGCAGCGCCTCATAGCTTAAAGACG -TTCGCAAGAGCACGGCGCGTCCCGTATGTGTGAACCTCAACTCCTTTCATTTGACAAAAT -TAATGGCGGTACCCCTCAGCCTGAGTAATAAGCCTACAACGCGAAATCTTATGTTAGCCA -TGTTTGTTGCGGCACTCTTTAATAAGTCAGCTAGCACGACACTGGGTCACTGCACGTGCT -TCCTTTCGCTTCGGTCAACTTCCTATATAGTGTAGTCACACAGACTACGAGGAACTGAAC -GGCGCCCTAACCTCACAAATCTGCGGTACAAGTTTTGTGTATGTCACGCTGTGTTGAATT -CAACATCTTAGACCACCCGTACCATCGTATTTTGTAGTATCGGGCGAGTGAATAAGGAAC -TATCTTCTGACGAAAAAAGGCATTGCATAACTTCCGACTACTATGCCAGGCTGACTCCTG -ACGGTTTACAACTACTATCGGGTCTACGACTTGGCGTATTTAAGGACTGTCAGTTGGGAC -ATTCTCCATGCACCCGCCGTTCACGATAGGGCCTAAATAGATTGGCTGAACGCCAGTCTC -CATTGCACTAAGTTGCGTCGTGTTTCAGACGCTCATAGAGTCGACGGATACGCCCATGCT -GTCCTCCTTCCGGGGAGAGATGCCGCTGTCAGAGTGATACAAAGTGTGTGGTGATCGGGT -GTCCGATAAAGATACAGCATAACCAGCACAGTTTCACGATGAGTTAGTTGATCCAAAGTA -GCGGGGACTGCTGCGTCTTGACCTTGCGAGGCCCGGACAGTCGCCCTTACACCAATGGGG -GGTAATCGGTCAGGTGGTACTCTGAGCACCCTTACAGTGGCACAGATATACGCACACAAC -TCCAACTTCTTATCTAGATCCCCTGAACGGAGTCGAACGCCATTAATTTCTCCATTATCG -CGCGTATCTCGCAAATACTCGTTAGCTCTATAGGGCGCGTGTCTGAGGGATATAAACTTG -CGGGGGCATAGCGAGAACTTGCAAGCCTCCCTGTTAGTGATAGTGACTTAGGAAGTGTCC -CCTGCAAAGCACTTTGCAGCCCTGTAAATACGCCGTCACGAAAGTGGACGGTAAGATGAA -AAACCATTGAGCTATTGCCTTATCGGGACGCCTCCATACAATTCTTATCCCGATAGATGC -GTCGGCCTAACAAGTGGCCCGGTCACCTATCGAAGGCCTTGCGAAGCTGGACTCTAAAGA -AGCGCCTCCTATCCAGGCCCAAGTATTACATAACTAATAACTACCAACTAGATAGTCTCA -GGTAGTCTCCGAGAATGATGAGTTGGTAGATTATGAAAGGGTCCGTCTACTTATGAGGTA -GTGCGTCTTCTTAGGATTTAACCTCTCTTCGCGCTGCCGTGCTCATACTGGAGCTACCAA -TAACGTGTACCCCCCAGCGGTCTGAGGCAACCTGGCATCTCCTATGTGCGTGCACATTGG -CGGATCAAAACCAATTCTAACGGACCACCCGCAATGTGGTGCACAGCAGCGTGGCACAAG -CATTCTGTAGCTACGATCCGGCGCCGGGGGTTCTTGTAGCAACTCGACTATAAGTCACTA -ACTCCACGCAATGCTATAGCATATTATATAAAGTTGGGTAGGATTGATAGGGAATCGGGT -AGTGTCCAGGTGGACGAGCTGGGTGTGCCTCTGACATGATCAGATGTCAGAGGTCGGCGC -TTCCGCCGTGTTCTTTTCAACCGGATCCCTGCGGGATAGTCCCTAGCTTAGCACGTGAGT -CAGGATGCCCAAAGGCAACGATACGGATTGGCGATGAATGAAGAGGGTGCCGACCCGGCC -TACTGGTGACTGGTCACTGATTCGGCCGAGATCCATACCGCCCGGGAGATCACACCGGCC -TTATCTCATTCTTTGACAGCGCCTAGATAGTTAGATCAAATGCGACAGATAGGACCGAGG -CTGTTGCGCACCCTACCGCTAAGCTCCAGAGTGGATTATAGTACTGCTATAATACTCGAG -GCTACCACTCCATCGTCGAACACAGGAAAGCATTCTGCAACATTATATCAGACTCTGTAC -CAGCCAGGGGATGGGCGTCACACTCGGGGCGGTCACATTATCCTTTTTAAGCTGGCACCC -GAAGTGTCAACAGTTGATCCATTGCTTTTCACGTGGACTAGACAAAAAAGCGTCATTTCC -TTTCCTATTGCATATTTAAGAATCTGTTTACTCTCCAGATGAGTGCCCCAAATTTTTATA -GCCCACTACGGCGCGTCCACAGCTCGGGCTCCTGTGGTAACTATAACCTAGTTAATGATG -TAGACCACGGGCCGGTGTGAACCTGGATGATGTTAACCTG diff --git a/examples/example_1.fa b/examples/example_1.fa new file mode 100644 index 0000000..261dca8 --- /dev/null +++ b/examples/example_1.fa @@ -0,0 +1,23 @@ +>Rand_seq_1 +GCTTAGTTCGAGATGAAATGTCCGGGGCCAAAGACAACCAGCATCTCGCGTCTTGCCTAA +CCCCCCTACATGTTGTTATAGACAATCAGTGGAAACCCAGTGCCAGACGATGGAATGACC +TTAAGTCAGGCAGGAAATTAAAGGGAACGTATATTCAACGCAATGAAGCTGGAGGATTGG +CGTGGGAATCGTGCTTCTGTCTAAGCAAGAATGGGTATGAGGTGGCAACCGTCCCCCTAG +CGTACAGGGTGCACTTTGTAACGATTTGGGAGTCCAGAGACTCGCTGTTTTCGAAATTTC +CCTTTAAGCGCGGGTATTGAACCAGGCTTATGCCCAAGATCGTAGCAAGCAGACTCAAAC +AAAATATATTTTGCCCGCGTTACAGACGAAACTAGTTGGAGGTTATGGAGCATACTAACA +CGTGGACGGCCACTGGTGAGTTGCTACATATATATATATATATATATATATATATATATA +TATATATATATATATATATATATATATATATATATATATATATATATACGCGCGCGCGCG +CGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCCCCTGCGGCAA +CGTTGAAGCTCCTAAATTACTCTGGCTGGATCCAAGCCGTAACACCCGTCTCACTTCATA +ACCGTTTGCAATTCACGGCTTGATCTAGACTGGATTGCCATTCTCTCAAAGTATTATGCA +GGCCGGCGTACGCGTCCCATATAAACCTGTCATAACTTACCTGAGACTACTTGGAAATGT +GGCTAGATCTTTGCCCACGCACCTAATCGGTCCACGTTTGGTTTTTAGGACCCGATGATC +TTCAAACACTGCAAGATTCCAGGCAGGCAGGCAGGCAGGCAGGCAGGCAGGCTCGCAGGC +AGGCAGGCACGGCAGGCAGGTCAACCTGCTTTACAAAGCGCTGGATCCTACTCCAGCGGG +ATCTTTTATCTAAAGACGATGAGAGGAGTATTCGTCAGGCCACATAGCTTTCATGTTCTG +ATCGGAACGATCGTTGGCGCCCGACCCTCGGATTCCGTAGTGAGTTCTTTGTCCGAGCCA +TTGTATGCGAGATCGGTAGACTGATAGTAGTAGTAGTAGTAGTAGTAGTAGTAGTAGTAG +TAGTAGTAGTAGTAGTAGTAGTAGTAGGGGATGCAGTATATCCCTGGATGCAATAGACGG +ACAGGTTGGAATCCTAAGTGAAGTCGCGCGTCCGAACCCAGCTCTATTTTAGAGGTCACG + diff --git a/examples/example_2.fa b/examples/example_2.fa new file mode 100644 index 0000000..3332511 --- /dev/null +++ b/examples/example_2.fa @@ -0,0 +1,301 @@ +>period_1000_repeat +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC +GTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGT +CGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTG +CCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGG +AGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCA +CCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTC +TCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGA +TCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGG +TCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACAC +TGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATA +CTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGG +CAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCC +ATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTT +ACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCC +CTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGAT +CAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGA +TGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATC +TTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCC +GCAGAGGTGACACGGCGCCGATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGG +AGAGGCCCTGCGGCGCGCTTCACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGG +TCTCAGCCGGTTCCTGCGTCAGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCG +GGGGCCTCGAACCGTCGAGGACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAG +CCTGAAGCAGTACCGTTTCAGGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAAC +ACGCCAGGGGCGTGAAGCCGGTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGT +CTGGAGCGCCCATCTAGCGGCACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGA +AAATCCATACCGCTCATTCACCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTG +GAGCAGGGCCCCTAAAATTCGGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTG +CACCACCGCGGGTGGCTGACAGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAA +AGTAGCGCCGGGTAAGGTACGCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCT +GCGCCCTCACTCGGGTGGACGGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAAC +GTGTCCCACGGTAGCCCAAGGACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATA +TAGACGCCAGGTTGAATCCGCATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCC +CCACTTATACCCTGCGCAGGCTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGT +TGACGACAGACCGTCGTGTTTTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAG +CTCGGATTGCGGTGTCTACCGTTAGATCATACCCAACGCCGCAGAGGTGACACGGCGCCG +ATGGGTACCGGACTTTGGGTCGGCCGCAGTTCGGCAGGGGAGAGGCCCTGCGGCGCGCTT +CACTCTGTATGTGCAACGTGCCCAAGTGGCGCCAGGCAGGTCTCAGCCGGTTCCTGCGTC +AGCTCGAGGCTGGGCGCGGGAGCTGATCGAACATGGGCCGGGGGCCTCGAACCGTCGAGG +ACCCCATAGTACCCCGGGCACCAAGTAGGGCAGCCTATAGCCTGAAGCAGTACCGTTTCA +GGGGGGGAGCCCTCATGGTCTCCTCTACTGATGACTCAACACGCCAGGGGCGTGAAGCCG +GTTCCTTCGGTGGTTATAGATCAAAGGCTCAGAGTGCGGTCTGGAGCGCCCATCTAGCGG +CACGCGTCTCGATTGCTCGGTCGCCCTTCACACTCCGCGAAAATCCATACCGCTCATTCA +CCAGGCTGCGAAGCCTACACTGGTATATGAATCCGAGCTGGAGCAGGGCCCCTAAAATTC +GGAGTCGTAGGTGCTCAATACTCCAATCGGTTTTCTCGTGCACCACCGCGGGTGGCTGAC +AGGGGTTTGACATCGAGAGGCAAGGCAGTTCCGGGCTGAAAGTAGCGCCGGGTAAGGTAC +GCGCCCGGTATGGCGGGGCCATGGGGCCAATACAGAGGCTGCGCCCTCACTCGGGTGGAC +GGAAACGCAGAACTATGGTTACTCCTTGGATACGTGAAACGTGTCCCACGGTAGCCCAAG +GACTCGGGAGTCTATCACCCCTAGGGCCCATTCCCGGATATAGACGCCAGGTTGAATCCG +CATTTGGAGGTACGGTGGATCAGTCTGGGTGGGGCGCGCCCCACTTATACCCTGCGCAGG +CTGGACCGAGGGCCGCAAGATGCGACGGTGCACAAGTAGTTGACGACAGACCGTCGTGTT +TTCATTACGGTACCAGGATCTTCGGGCCGAGTCAATCAAGCTCGGATTGCGGTGTCTACC \ No newline at end of file diff --git a/examples/example_3.fa b/examples/example_3.fa new file mode 100644 index 0000000..4521c8f --- /dev/null +++ b/examples/example_3.fa @@ -0,0 +1,165 @@ +>80_AT +AACTTTTTCTTTAATAAAGCATTTTATTTAAATATGACAATAAGTACACTTTTTAAATTA +TATTATTTTTTATTCTTATTAAAATTTACTATTAATAATTAAGATTATTGCTTTGTAATA +AATTTCTTATTCACACTTTTTCATATTAAGTTACTTGATAGATAATTAACTTGATTTACT +CTGTAAGGTCATAATTACATAAGTTAGAATTCTGAACTGGGTTGTATAATCGAATCTAAT +TTCACTTTAATAACTGCAAATTCTAATTTATTTAGATAACATAATTAGTTGAAGTTATAT +GGGATATTAACCGTAAATTCCTCTTCGGGTATGGTTCCTTTATTTGATAATATACAATCG +CTACCATTATTAATTAATTTAATAAATAGTGTTATTATCATAAATTTAGCACATTTTTTT +TATAGATATAAAATCATTTAATTTCGTGCCGTAGTCTTATGACAAAATTGATAGACTATA +TTTCAAGTAATACTAAAAGTTTATAGCACATACATTTTAATGTAATATACGTATATTTTA +ATCATTATTTTATACTAAAAATCTGACTATATATTATATTAATATGTTTATACTACTTAA +AAAAAATATATGAAGTGGATAGCTTAGCCAGAATACTATCGTATCAATGTGTATTCGTTT +ATTATTAATTGTTGACATATGAACAATATTGTAAATTGTCAATTTCAACCTTTTTATCCT +CGGTATTGTATATTAAATAGTGTAAATTTAAATTAATTCTATAATGGTATTTACTAATTG +ATAAAAAGATTAAAAATTTATTAAATTATATCACTAAAACTTTCTAAATACTTTATGTCA +ATATTGAATGAATTGATATATACTTTCTTTCTTAAAAACATACAATTATACAAATGAGCA +TATAATATATATATTTATCTAATATATTTAACTTTTTACGTCCTTTTTAAGAATTGAAAA +AATATTTTGCATTTGAATAAGTGATAACCTTGTAAAATAAATTTGTACTATTATTCATGT +GAAAAAATTAAAATTATTAAAAATAAGGATAATTTTAAATTTCAATTACTAGTTCCAATA +AAGAAACTTTATTTAAAAAATAAGTATCAATTAGTATAATCTAATGAAGTATCTAAAAGT +TGAACTAAATTAGACAATCCGATATTAACGTATTCAAACTTAGGACTCGACACAACATAT +TAGCTAAGAATAAGTTGAAAATATAGACCTTTAAGATTAAATAAATCTATTGTATTAATT +AACTTCAATAAATCATATAAATGACATTTTAAGAAAAGCCTGTAGGAAAGAAATAAATTA +CTATTTATCACCGTTGATAATAATTAATTGTATTTTTTATCACTATAATTGTATTTAAAT +CGTGTAATAAAAATAATTATATCTTAGCAAATTAATGTATCACTTCGGAATACTGTTTTA +ACTATTCATTATTAAGTTTATTACGATTTTCAAGTATTGTGTATGTAAGATTAGATTATA +TGCATATTAAACTGCTAATAAAATGTGATTTTACGACTAGTATATAATTTAATTACACTA +ACATAATAAATTTTCTGTTAATATTTCACCTATCAAATTGCTCTTATAATAGGATAGTTA +CACATGAACTTATAATAAATAATAAGAATATACTTGTTTAATCATTTTACGATGAAAATT +AGAAAAACAGCAACTATTACATATATTTTATCATAATTATATTTAGTTAAAATATTTCCA +TAAATAATTATGTATTTTTCTAAACTTTATATAATTAAATATAGTGACTTTAACAGATTT +ATAAAATACAGTAATAACTTACATAACTGTATATGGAAAAAAGAATTCTTGTATGTTAAT +ATATTAATTCCTATATTATAAATATAAATAGAAGATATGAACAAAAATACTCAGGTTAAA +TTTTTAACTTTTTTATAAAAGGTATTTTAGTTCACCATACGATTATATTATTTAAACATG +ATATTATGTATATTGTTCTCATTTTAATAGTTATTATTTCTATTACGATTTTAAGTAAGT +GGTGATGGATATTTTTCATAAATGAATATTTTATTCATAGTTAATTATATTAGAATATTT +CTTAGTTTTTCCATTTAGTTTAATCTATTAAACTTTTGTAGCATAAGTTTCAATCCTATG +TTATCTAGTGTCAGCCATTCTTATTGAAATTTTATTTTTAGATATTCTGATCTATTTTAA +CGGCACTATTGATAAAAGTTATTAAGTTTATATATTATAAAATTTTTTACAGACTTTTTC +TCTATAAAAAATGATAAATTCTAGCAAGTAATATTAATCAACGTAAAAATTAGCAATATA +AATAGAAAAATAATTGATTATTTTTATTAATATAGTATTGTTAAATTGCATTATAGAACC +TTATGACATAGTTGTCTACAGAGTATTCTAATAATGCTTATGGTTCATAATACATTCATT +GTATTCGAATATACGCTTTATTTGACAATTTTTTAGCACAGAAATGCTAATCATGTATTA +AATTAAAGTGATTGTATTATTATAAAGTCCGTCATAGAAATGATAGTTCAAAGAAAATAT +TAATTTATTAATCTATACGAAAACATTTTATATCATTTTTGAACGGACAGATTACTAAAA +TACTGCTTTTAATCTCTCTGTTGTTGTTAACGTTTGAAATAAACTAATTTATATTACAGA +AATTGTGTAAATATTTAATGTTATTAATATAAGTTCTGACTAATATGTATTTTGTTAAGA +ACTTACAATTTTATAATTAAGGATATAAATTTTATATTTATCATCTATTGATGTTCTTAA +GATTTCAATTTATATAATGAGAAAATATTTGTCGTAAAATCTAATGCAATGGTAATATAA +TAAATTTGTATTATAGTAAAGATAACATGAGTAGAATTATCAATAATAAAGATAATGGAC +TAACGATTCATTTAAAATCTAAATTCTAATAATAAGAAATATGTATCTTATTCCCATAAT +TTACTAGCATTTTTAAAATTATAGTATATATGGTAGAATGACTCTTATAACAAACTTTAA +TATAATAATATTTTCCCTTTTTAATTTCTAGAAAAAAAAAGTATTGATATGAGCATTCCT +AGTACAATAACTAAAGAAGTTTCCAATTTTTTATTTTTAAATGATATGTGTCTCTTTGTA +GATAAATTGCCAATCATAAAACTTAAGAATCAGAGAAAACAAATAGGTCTAATTAACTTA +AGGGAATAAATCTTAAAATCGTTCAGTTATAACTATATACTTATATTAGAACTTCTCCAG +TGAATTTTTATTGTCATCAATTATAAAATTTAAAATAAACCAATTAAGCACATAATCACG +TTATTCGACAATTTCCAAATTATAACATATTGTTTTATGAAAATCAAAATTATTTATCGA +TTCTTTTTATGTATGTACCATATTCTTTTATTTCTTTGCTTATTTATCTGAAGAAAGAGT +GTATAATCTTTTGTTAAGATATTTTTATATATGATATAGCTATGTATTTTGTAAAGGTAA +CGAATATATTAAATATTTTACAAATAGTGAAGATTCGGACAAAAGACATATAATTGGGGA +TTAACATAAGCATAAACTATGATGGTATTAATTTAATCGTAGTTTGTACATTCTAAATAA +TGTATTTATCTCAACTGATTCTTGGTAATCTACAAAGTGACTTAATTATTAACAGTTATT +TAGTAATTCTAATTTTTTATTAATATCATAATAATTTTCAAGAGATTAATTATACCTATT +GACATAAAAGTGATACAACACCAATGAGTAATAAATTTTTAGTTAACTACAATTCCCTAA +AGGACAAATTTTACGGTATACATTATTTTATATATGTAAGTAACCTAAATGAATCTAATC +TAAACTTAGCTAATTTCTATGTAAGTTTAAAGTTATTAAATTTATATTATTAAAATTATA +TAAATGTTTGGTACTATCGATACGAATCTCAGTAAACGTTCTTCTTATTAGAAATCTGTA +ATCATATAATTAAAAATATGATTTTTATATTTGTTGTTTATTTATACTTTTATAATCTTG +ATCTGTATGGTAAAAAGACACTTTTTATAAAGTTCGACAACATAATAATGCGACGAATCT +ACATCATAACAAGTATAGTAAAAATAAAATTATTGACAGCAGAAATAATATAGAAATTTG +TTAATTATTATTTATAAAAAATATCTGTCTCCAAGAAGAATAATTACTAATGTAATTTTA +TAAAAGTCTAGGAAAACAAATTAGTTTAAATAACTTAAGAATGTAAATTTTGAAATTATA +GTGTAATAATTTTTAATTAACGATAATAGTTTTAAGATAAATTTTTATAAAATTAAATAA +AATAATTTTAACAAATTTTATTATAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCA +AAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCA +AAGCAAAGCAAAGCAAAGCAAAGCAAAGCAAAGCATGATTTAGACGTATAATGCCTTAAA +AAATAATTAAAAAAATAATATTCTTTGTTGCTTAAAATTATTTACCGGCGTTTACTATAT +CTTCAATTATTACACCTATTTTTTTATTAATCAATACAAATAAATATATTATGATAATTC +GTTAAGATATTTTAAGGTATAATATAGATAAATATTTTATAAAGTTATCATAGACGTTAA +ACATTTCACAGATAATAGAAATTTGCGTATAAAACGAATTATTGAAGAATTATATACTCA +TAATTAACATTTAATCCAACTCTATCAATATATCAATGTTTTAAATAACATATTCATCAT +TAAACTTTCTTAACAATCAAACGACCAAGTACATTTTTAAGAAATGTAATGAAGATTTAT +TCTTTTATCAATATTGTAATAAATATTAAAAGAATAATGATAATTATAAATACTATACTA +AAACAATGTCAAAATAAAATAGTCTTATGGTCAATTATATTTCCTTTTGAGTCTTATTCA +GTTGAATATATTATTTTTAATATATTAGCAAGCCAAGAAAACTATATTTAGAGTTAATTG +GATGTAGAATAATTTTTAGACATTAGATTATTATTCTTTATACTTAAAATATAAACATCA +AATAATCATAAAAATTTTAGTATCAGATATACCATTTTACTGTGAATATTATTTGAAACT +GTACTATTATTATAGAGCAAAGATATAGTACTGTTTTTATCATATTTATATTGATAACAA +TCGTCTTTATGATTTCTTCAAACATTTAAAAATAAAAATTTATTATAGACAGAGATACAT +CTTTTTAATGGTTACAATAAAATATTTTTAGATCCTTTTGTTTAACCAAATTAATTGAAT +TCTCTCATTTAAAATTTTAATATGTCATTATTAATATTTAAATATTATCTTGAAAATACC +ACTTAAAAATATCTATAATTAATTTATTAAAATTGATTAAAATAATTGCTAAATCTGCGT +AATAAGGTATTTAAAGTTTAATTTTGTATTACAAGATACTTTTAATTTTAATAAATAGCT +AGAAATGATACAAATGTAATAAATGTAGATAAAAAAATAATTAAATACGTTTATTTATGT +TATATTAATAAGCAATTCTATAAAAATATATATTATATCGATTCATAAAATATTTCGATT +GCATGTGCAATTTATAAAATGTCTATTATTTCTAAGCGTATATTTTGCATAATAACCCCT +AAATATATTGGTATTTGATGTTACCTAAGTTGAATTAGTATTAAACTTACAAAATTTATT +ATATAAGTAGAGATTTGTAAGAACTATTAGTTAATTCACTCAATTAATAATTGTCTATAT +AACATTAAAATTAGAAAATAATTATAACATTATTAATAGTTCTCTAATTATTATCAATAT +CTATGATTTGACTTTGTTGTAGTTACATATTATCTGAAAATCAGTTAATGTTAAGGAATA +CTCTGATTAAAACAACATATGTAATAAAAATTATATATTCATTGGATTTTTTTAATTTAG +ATTTGAATTGATAATAGTTCTTTAAATATAGGATGGGGGTATTAAATGATCGAAAAATTT +ATAATATCTTGAATACCAACTTATTCAAGTTATTGTTTGAAATTATATTATTTCAAAAAG +AAAGCACTAAAAGTCTTTTTTTCTAATGACTAAACTCGTAAAGATCATGAAATTGATTCT +TTCAATAGTTAAAAAACAAAAATTTACTATGTAAACTGAAATATTTATCTAACGGTTTAT +ATCTTGAATACTTAGTCTTTTTTGTTATTCCGGATAAATTTATTTTCCTTATTCACTAAT +TTGCGAAATTTATATTGATATATGAATATAATCTAAAAGAGGACACTTAAAATTAGGAGT +AGTTAATATTTTATATTCTATTTGGTTTTTTCATGCATTATTGCAATAAATTGACAAGGG +TTTAACATTGAATAACAAAACATTTTTAGTTTCAATAAAGAATTAAGAAAAATACATGCA +TGGTATAAAAAAATTAAGAAACCAATAAAAAGACTTCTTCCTCACTTATTAGAAAACAAA +TACAGAATAATAATTACTATATTGATACGTGAAACATATCCAATAGTTATTTAAAGTATT +AAATATTTATCACTTCTAGGGTTGTTTTCTAAATATAAATGCCAAGTTGAATTCATATTT +GAAACTACCATAAATAAATCTGGATCAAATGTGTTACATTTATTACGTAAATAGGGTCGA +CCAAAAACCACTAAATGTATTACTATATAAATAATTATCAATAAACTATTGAGTTTAAAA +AATAATATCAGTATTTTTAAGAGATCTTAATTAAATATGGATTACAGTATTTACACTATC +TTGTGATTACTCATAATTTAAAATTTAACTCATGTCAAACCATTACTTCTTTGAAATGCT +GTATGAAATAATATATATACTTTATACAAATTTACTATGATTTGTTTAAAATCGATTAAA +GATACAATCAAATTTCAATTTATATGTTCAACTAACTTATACCCAACCCCCAAAATTTAG +TAACTTCTGAGATATTATAGAACTTGTAATTCATCTCATTGGATATCAAATTTTATTTTA +ATAAAGCTTTTCATTCGAGAATATCAAAAAAAATGTTTACTGAATTGTACAATTCTTATA +TTTTAACTAAGAAAACTATCAATATTTAGTTTTTAAGTCTTACAATAAATTTCATATATA +GATTGTTATATATAGAGCTAATGAATTAGTAAAAAACATGAAACGCTTTTAAACATGACG +AATAAGTGATTTAACGCTTTGAATATGACTATATACTTAAATTTGATTTCGTCCTAAAAA +TTCTAAACCTCAACATTTATAAATTATAAGATTAGCCGAAAAAATACGTGATAATGTCCA +TTAACTGTTCCTAGACTGTAGCTCTTTATTTTGTTAAAATCCAACCTTTATCATAATTGA +TTTTTTCTACGAACCATATTATTTTGATACTTTGGTCATATTTCTGTTGTAGGAGTGAAT +TCACTTATTTTTGTGTCTTAATATTAATGAAAAATCTATACACTTTGTATAAGGTATTAT +TAAAATTCTAAACCTTTAAATAGTGAAGATCCTAAGTATAGATTTTTATCTACAATTCAA +CTTAAATATAAATCTTCATATTATTTTATTAGATCTACTCTACACAAAGTAAATATAGGA +TGTGTTCAACCTAGCTTCTGGTGTTTTACATCATCATGTATTCGTTAATTGTTAATTGAT +AACACAAAAATAATATTATAGTCTTTGAAATTCAGCTCAATTAATTTGAGTGTAATGTCA +TAAATAGCGTAAAATAGTAATAAGTGTTTAATACTAAATAGTGTTTAGTTTAGTAATGAA +AAAACTATACGACATACTTAATTATACATTTGAAATACGTTTAAATGATACTAAATAAAT +TAAAGTAAATTTTCATGTTAGTTTAAAGATAAATATACAAGTCAATTGAAAATGGGTAGG +AGGTTTTAATTCATTCAACACTCTACGATATTTTCAAGAATAAATAGAGTATCCTGTAAT +TCAAAATGAAATTATTTCGTAAAATAAGCTCATATTGTTTTTTTTGCGAATGACTTAATA +TGTTAAGAAAATAGAATTAATTCAAATGATAATTATAAATCAAAAAAACGGAACGTTATT +TAAAGAATAAATCTAACAATGTATAACTCAATCACTTATTTATTATTTGAACTATGTGAA +AGTTTCCAGTGTTTATACATTTAATTCTGAGACTTGTCCTAATATATAAACTTAAATTAA +AGTGGGATTATTGACGTTTGAAGTTAAAAAAATCTAATATTCTAATCGGCTTCAATATGC +ACTATTATAGGAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACA +AAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAAT +ACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAA +AATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATA +CAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAA +ATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATAC +AAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAA +TACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACA +AAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAAT +ACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAA +AATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATA +CAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAAATACAAA +ATACAAAATACCAATTGACGAGAAGCTCACACCAAGAAAATAAACTATTATACGTTGGGG +ATAGTATTAACTAATAAAAATGTTTAGTACAATAAAAGTATCAAATCCGTATAAAAACAA +TATCCTTATTTTAGTAAATTAAAGCACGGCATCAGAATTACTTTTTAAATATCTGAAACA +AAATTCATTATAATTTTTAGATTTGGTATATTTACACTTACAGTATACGCATTTAAAAAT +AGAAATTAAGTTTGATTTATACTCTGATATATAATATAATAATGTAAATATGATGAATTT +CATTTATATTCTTCACGTGTCGAATCGGTCTTCACAAAGCATAGTAATACATAAGCAATT +AATAATTAACTACTGTATATTTGTTATAACATCAAACAGTTTAAGTTGAAATAATAGGAC +TCACAATATATAATTTATCACATTTTATTTTAATTGACAAATTATGATTAATCATTAACC +ATATTATTACTTTTTAGATATTTTGATATAGTGATTATGAAAGGTTTATGGAATATAGTT +ATGACTTATTTAACTATATTTGAAAGTAAAAATTTATCTCTATTTATATGTTTACTTATC +TATTACTCATTTTTAAAGATTAAATAAATTGTGAGATGCAGGAAAAATTCTTAATTTTCT +CATAAGACGTT \ No newline at end of file diff --git a/examples/run-all.sh b/examples/run-all.sh index 5a0845c..5e7da00 100755 --- a/examples/run-all.sh +++ b/examples/run-all.sh @@ -14,7 +14,5 @@ run_one() { echo "" } -run_one examples/ex1.fa -run_one examples/ex2.fa -run_one examples/ex3.fa -run_one examples/ex4.fa +run_one examples/example_1.fa +run_one examples/example_2.fa diff --git a/examples/tune_file b/examples/tune_file new file mode 100644 index 0000000..9f46726 --- /dev/null +++ b/examples/tune_file @@ -0,0 +1,8 @@ +-p 1 +-p 4 +-p 16 +-p 64 +-p 256 +-p 10 -m 0.5 --at 0.3 +-p 10 -m 0.55 --at 0.2 +-p 10 -m 0.5 --at 0.1 -i 2 -d 2 \ No newline at end of file diff --git a/lib/json11.cpp b/lib/json11.cpp deleted file mode 100644 index b34bfe6..0000000 --- a/lib/json11.cpp +++ /dev/null @@ -1,797 +0,0 @@ -/* Copyright (c) 2013 Dropbox, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "json11.hpp" -#include -#include -#include -#include -#include -#pragma warning(push, 0) -namespace json11 { - -static const int max_depth = 200; - -using std::string; -using std::vector; -using std::map; -using std::make_shared; -using std::initializer_list; -using std::move; - -/* Helper for representing null - just a do-nothing struct, plus comparison - * operators so the helpers in JsonValue work. We can't use nullptr_t because - * it may not be orderable. - */ -struct NullStruct { - bool operator==(NullStruct) const { return true; } - bool operator<(NullStruct) const { return false; } -}; - -/* * * * * * * * * * * * * * * * * * * * - * Serialization - */ - - -[[maybe_unused]] static void dump(NullStruct, string &out) { - out += "null"; -} - - -[[maybe_unused]] static void dump(double value, string &out) { - if (std::isfinite(value)) { - char buf[32]; - snprintf(buf, sizeof buf, "%.17g", value); - out += buf; - } else { - out += "null"; - } -} - - -[[maybe_unused]] static void dump(int value, string &out) { - char buf[32]; - snprintf(buf, sizeof buf, "%d", value); - out += buf; -} - -[[maybe_unused]] static void dump(bool value, string &out) { - out += value ? "true" : "false"; -} - - -[[maybe_unused]] static void dump(const string &value, string &out) { - out += '"'; - for (size_t i = 0; i < value.length(); i++) { - const char ch = value[i]; - if (ch == '\\') { - out += "\\\\"; - } else if (ch == '"') { - out += "\\\""; - } else if (ch == '\b') { - out += "\\b"; - } else if (ch == '\f') { - out += "\\f"; - } else if (ch == '\n') { - out += "\\n"; - } else if (ch == '\r') { - out += "\\r"; - } else if (ch == '\t') { - out += "\\t"; - } else if (static_cast(ch) <= 0x1f) { - char buf[8]; - snprintf(buf, sizeof buf, "\\u%04x", ch); - out += buf; - } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80 - && static_cast(value[i+2]) == 0xa8) { - out += "\\u2028"; - i += 2; - } else if (static_cast(ch) == 0xe2 && static_cast(value[i+1]) == 0x80 - && static_cast(value[i+2]) == 0xa9) { - out += "\\u2029"; - i += 2; - } else { - out += ch; - } - } - out += '"'; -} - - -[[maybe_unused]] static void dump(const Json::array &values, string &out) { - bool first = true; - out += "["; - for (const auto &value : values) { - if (!first) - out += ", "; - value.dump(out); - first = false; - } - out += "]"; -} - - -[[maybe_unused]] static void dump(const Json::object &values, string &out) { - bool first = true; - out += "{"; - for (const auto &kv : values) { - if (!first) - out += ", "; - dump(kv.first, out); - out += ": "; - kv.second.dump(out); - first = false; - } - out += "}"; -} - - -[[maybe_unused]] void Json::dump(string &out) const { - m_ptr->dump(out); -} - -/* * * * * * * * * * * * * * * * * * * * - * Value wrappers - */ - -template -class Value : public JsonValue { -protected: - - // Constructors - explicit Value(const T &value) : m_value(value) {} - explicit Value(T &&value) : m_value(std::move(value)) {} - - // Get type tag - Json::Type type() const override { - return tag; - } - - // Comparisons - bool equals(const JsonValue * other) const override { - return m_value == static_cast *>(other)->m_value; - } - bool less(const JsonValue * other) const override { - return m_value < static_cast *>(other)->m_value; - } - - const T m_value; - void dump(string &out) const override { json11::dump(m_value, out); } -}; - -class JsonDouble final : public Value { - double number_value() const override { return m_value; } - int int_value() const override { return static_cast(m_value); } - bool equals(const JsonValue * other) const override { return m_value == other->number_value(); } - bool less(const JsonValue * other) const override { return m_value < other->number_value(); } -public: - explicit JsonDouble(double value) : Value(value) {} -}; - -class JsonInt final : public Value { - double number_value() const override { return m_value; } - int int_value() const override { return m_value; } - bool equals(const JsonValue * other) const override { return m_value == other->number_value(); } - bool less(const JsonValue * other) const override { return m_value < other->number_value(); } -public: - explicit JsonInt(int value) : Value(value) {} -}; - -class JsonBoolean final : public Value { - bool bool_value() const override { return m_value; } -public: - explicit JsonBoolean(bool value) : Value(value) {} -}; - -class JsonString final : public Value { - const string &string_value() const override { return m_value; } -public: - explicit JsonString(const string &value) : Value(value) {} - explicit JsonString(string &&value) : Value(std::move(value)) {} -}; - -class JsonArray final : public Value { - const Json::array &array_items() const override { return m_value; } - const Json & operator[](size_t i) const override; -public: - explicit JsonArray(const Json::array &value) : Value(value) {} - explicit JsonArray(Json::array &&value) : Value(std::move(value)) {} -}; - -class JsonObject final : public Value { - const Json::object &object_items() const override { return m_value; } - const Json & operator[](const string &key) const override; -public: - explicit JsonObject(const Json::object &value) : Value(value) {} - explicit JsonObject(Json::object &&value) : Value(std::move(value)) {} -}; - -class JsonNull final : public Value { -public: - JsonNull() : Value({}) {} -}; - -/* * * * * * * * * * * * * * * * * * * * - * Static globals - static-init-safe - */ -struct Statics { - const std::shared_ptr null = make_shared(); - const std::shared_ptr t = make_shared(true); - const std::shared_ptr f = make_shared(false); - const string empty_string; - const vector empty_vector; - const map empty_map; - Statics() {} -}; - -static const Statics & statics() { - static const Statics s {}; - return s; -} - -static const Json & static_null() { - // This has to be separate, not in Statics, because Json() accesses statics().null. - static const Json json_null; - return json_null; -} - -/* * * * * * * * * * * * * * * * * * * * - * Constructors - */ - -Json::Json() noexcept : m_ptr(statics().null) {} -Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {} -Json::Json(double value) : m_ptr(make_shared(value)) {} -Json::Json(int value) : m_ptr(make_shared(value)) {} -Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {} -Json::Json(const string &value) : m_ptr(make_shared(value)) {} -Json::Json(string &&value) : m_ptr(make_shared(std::move(value))) {} -Json::Json(const char * value) : m_ptr(make_shared(value)) {} -Json::Json(const Json::array &values) : m_ptr(make_shared(values)) {} -Json::Json(Json::array &&values) : m_ptr(make_shared(std::move(values))) {} -Json::Json(const Json::object &values) : m_ptr(make_shared(values)) {} -Json::Json(Json::object &&values) : m_ptr(make_shared(std::move(values))) {} - -/* * * * * * * * * * * * * * * * * * * * - * Accessors - */ - -Json::Type Json::type() const { return m_ptr->type(); } -double Json::number_value() const { return m_ptr->number_value(); } -int Json::int_value() const { return m_ptr->int_value(); } -bool Json::bool_value() const { return m_ptr->bool_value(); } -const string & Json::string_value() const { return m_ptr->string_value(); } -const vector & Json::array_items() const { return m_ptr->array_items(); } -const map & Json::object_items() const { return m_ptr->object_items(); } -const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; } -const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; } - -double JsonValue::number_value() const { return 0; } -int JsonValue::int_value() const { return 0; } -bool JsonValue::bool_value() const { return false; } -const string & JsonValue::string_value() const { return statics().empty_string; } -const vector & JsonValue::array_items() const { return statics().empty_vector; } -const map & JsonValue::object_items() const { return statics().empty_map; } -const Json & JsonValue::operator[] (size_t) const { return static_null(); } -const Json & JsonValue::operator[] (const string &) const { return static_null(); } - -const Json & JsonObject::operator[] (const string &key) const { - auto iter = m_value.find(key); - return (iter == m_value.end()) ? static_null() : iter->second; -} -const Json & JsonArray::operator[] (size_t i) const { - if (i >= m_value.size()) return static_null(); - else return m_value[i]; -} - -/* * * * * * * * * * * * * * * * * * * * - * Comparison - */ - -bool Json::operator== (const Json &other) const { - if (m_ptr == other.m_ptr) - return true; - if (m_ptr->type() != other.m_ptr->type()) - return false; - - return m_ptr->equals(other.m_ptr.get()); -} - -bool Json::operator< (const Json &other) const { - if (m_ptr == other.m_ptr) - return false; - if (m_ptr->type() != other.m_ptr->type()) - return m_ptr->type() < other.m_ptr->type(); - - return m_ptr->less(other.m_ptr.get()); -} - -/* * * * * * * * * * * * * * * * * * * * - * Parsing - */ - -/* esc(c) - * - * Format char c suitable for printing in an error message. - */ -static inline string esc(char c) { - char buf[12]; - if (static_cast(c) >= 0x20 && static_cast(c) <= 0x7f) { - snprintf(buf, sizeof buf, "'%c' (%d)", c, c); - } else { - snprintf(buf, sizeof buf, "(%d)", c); - } - return string(buf); -} - -static inline bool in_range(long x, long lower, long upper) { - return (x >= lower && x <= upper); -} - -namespace { -/* JsonParser - * - * Object that tracks all state of an in-progress parse. - */ -struct JsonParser final { - - /* State - */ - const string &str; - size_t i; - string &err; - bool failed; - const JsonParse strategy; - - /* fail(msg, err_ret = Json()) - * - * Mark this parse as failed. - */ - Json fail(string &&msg) { - return fail(std::move(msg), Json()); - } - - template - T fail(string &&msg, const T err_ret) { - if (!failed) - err = std::move(msg); - failed = true; - return err_ret; - } - - /* consume_whitespace() - * - * Advance until the current character is non-whitespace. - */ - void consume_whitespace() { - while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t') - i++; - } - - /* consume_comment() - * - * Advance comments (c-style inline and multiline). - */ - bool consume_comment() { - bool comment_found = false; - if (str[i] == '/') { - i++; - if (i == str.size()) - return fail("unexpected end of input after start of comment", false); - if (str[i] == '/') { // inline comment - i++; - // advance until next line, or end of input - while (i < str.size() && str[i] != '\n') { - i++; - } - comment_found = true; - } - else if (str[i] == '*') { // multiline comment - i++; - if (i > str.size()-2) - return fail("unexpected end of input inside multi-line comment", false); - // advance until closing tokens - while (!(str[i] == '*' && str[i+1] == '/')) { - i++; - if (i > str.size()-2) - return fail( - "unexpected end of input inside multi-line comment", false); - } - i += 2; - comment_found = true; - } - else - return fail("malformed comment", false); - } - return comment_found; - } - - /* consume_garbage() - * - * Advance until the current character is non-whitespace and non-comment. - */ - void consume_garbage() { - consume_whitespace(); - if(strategy == JsonParse::COMMENTS) { - bool comment_found = false; - do { - comment_found = consume_comment(); - if (failed) return; - consume_whitespace(); - } - while(comment_found); - } - } - - /* get_next_token() - * - * Return the next non-whitespace character. If the end of the input is reached, - * flag an error and return 0. - */ - char get_next_token() { - consume_garbage(); - if (failed) return static_cast(0); - if (i == str.size()) - return fail("unexpected end of input", static_cast(0)); - - return str[i++]; - } - - /* encode_utf8(pt, out) - * - * Encode pt as UTF-8 and add it to out. - */ - void encode_utf8(long pt, string & out) { - if (pt < 0) - return; - - if (pt < 0x80) { - out += static_cast(pt); - } else if (pt < 0x800) { - out += static_cast((pt >> 6) | 0xC0); - out += static_cast((pt & 0x3F) | 0x80); - } else if (pt < 0x10000) { - out += static_cast((pt >> 12) | 0xE0); - out += static_cast(((pt >> 6) & 0x3F) | 0x80); - out += static_cast((pt & 0x3F) | 0x80); - } else { - out += static_cast((pt >> 18) | 0xF0); - out += static_cast(((pt >> 12) & 0x3F) | 0x80); - out += static_cast(((pt >> 6) & 0x3F) | 0x80); - out += static_cast((pt & 0x3F) | 0x80); - } - } - - /* parse_string() - * - * Parse a string, starting at the current position. - */ - string parse_string() { - string out; - long last_escaped_codepoint = -1; - while (true) { - if (i == str.size()) - return fail("unexpected end of input in string", ""); - - char ch = str[i++]; - - if (ch == '"') { - encode_utf8(last_escaped_codepoint, out); - return out; - } - - if (in_range(ch, 0, 0x1f)) - return fail("unescaped " + esc(ch) + " in string", ""); - - // The usual case: non-escaped characters - if (ch != '\\') { - encode_utf8(last_escaped_codepoint, out); - last_escaped_codepoint = -1; - out += ch; - continue; - } - - // Handle escapes - if (i == str.size()) - return fail("unexpected end of input in string", ""); - - ch = str[i++]; - - if (ch == 'u') { - // Extract 4-byte escape sequence - string esc = str.substr(i, 4); - // Explicitly check length of the substring. The following loop - // relies on std::string returning the terminating NUL when - // accessing str[length]. Checking here reduces brittleness. - if (esc.length() < 4) { - return fail("bad \\u escape: " + esc, ""); - } - for (size_t j = 0; j < 4; j++) { - if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F') - && !in_range(esc[j], '0', '9')) - return fail("bad \\u escape: " + esc, ""); - } - - long codepoint = strtol(esc.data(), nullptr, 16); - - // JSON specifies that characters outside the BMP shall be encoded as a pair - // of 4-hex-digit \u escapes encoding their surrogate pair components. Check - // whether we're in the middle of such a beast: the previous codepoint was an - // escaped lead (high) surrogate, and this is a trail (low) surrogate. - if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF) - && in_range(codepoint, 0xDC00, 0xDFFF)) { - // Reassemble the two surrogate pairs into one astral-plane character, per - // the UTF-16 algorithm. - encode_utf8((((last_escaped_codepoint - 0xD800) << 10) - | (codepoint - 0xDC00)) + 0x10000, out); - last_escaped_codepoint = -1; - } else { - encode_utf8(last_escaped_codepoint, out); - last_escaped_codepoint = codepoint; - } - - i += 4; - continue; - } - - encode_utf8(last_escaped_codepoint, out); - last_escaped_codepoint = -1; - - if (ch == 'b') { - out += '\b'; - } else if (ch == 'f') { - out += '\f'; - } else if (ch == 'n') { - out += '\n'; - } else if (ch == 'r') { - out += '\r'; - } else if (ch == 't') { - out += '\t'; - } else if (ch == '"' || ch == '\\' || ch == '/') { - out += ch; - } else { - return fail("invalid escape character " + esc(ch), ""); - } - } - } - - /* parse_number() - * - * Parse a double. - */ - Json parse_number() { - size_t start_pos = i; - - if (str[i] == '-') - i++; - - // Integer part - if (str[i] == '0') { - i++; - if (in_range(str[i], '0', '9')) - return fail("leading 0s not permitted in numbers"); - } else if (in_range(str[i], '1', '9')) { - i++; - while (in_range(str[i], '0', '9')) - i++; - } else { - return fail("invalid " + esc(str[i]) + " in number"); - } - - if (str[i] != '.' && str[i] != 'e' && str[i] != 'E' - && (i - start_pos) <= static_cast(std::numeric_limits::digits10)) { - return std::atoi(str.c_str() + start_pos); - } - - // Decimal part - if (str[i] == '.') { - i++; - if (!in_range(str[i], '0', '9')) - return fail("at least one digit required in fractional part"); - - while (in_range(str[i], '0', '9')) - i++; - } - - // Exponent part - if (str[i] == 'e' || str[i] == 'E') { - i++; - - if (str[i] == '+' || str[i] == '-') - i++; - - if (!in_range(str[i], '0', '9')) - return fail("at least one digit required in exponent"); - - while (in_range(str[i], '0', '9')) - i++; - } - - return std::strtod(str.c_str() + start_pos, nullptr); - } - - /* expect(str, res) - * - * Expect that 'str' starts at the character that was just read. If it does, advance - * the input and return res. If not, flag an error. - */ - Json expect(const string &expected, Json res) { - assert(i != 0); - i--; - if (str.compare(i, expected.length(), expected) == 0) { - i += expected.length(); - return res; - } else { - return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length())); - } - } - - /* parse_json() - * - * Parse a JSON object. - */ - Json parse_json(int depth) { - if (depth > max_depth) { - return fail("exceeded maximum nesting depth"); - } - - char ch = get_next_token(); - if (failed) - return Json(); - - if (ch == '-' || (ch >= '0' && ch <= '9')) { - i--; - return parse_number(); - } - - if (ch == 't') - return expect("true", true); - - if (ch == 'f') - return expect("false", false); - - if (ch == 'n') - return expect("null", Json()); - - if (ch == '"') - return parse_string(); - - if (ch == '{') { - map data; - ch = get_next_token(); - if (ch == '}') - return data; - - while (1) { - if (ch != '"') - return fail("expected '\"' in object, got " + esc(ch)); - - string key = parse_string(); - if (failed) - return Json(); - - ch = get_next_token(); - if (ch != ':') - return fail("expected ':' in object, got " + esc(ch)); - - data[std::move(key)] = parse_json(depth + 1); - if (failed) - return Json(); - - ch = get_next_token(); - if (ch == '}') - break; - if (ch != ',') - return fail("expected ',' in object, got " + esc(ch)); - - ch = get_next_token(); - } - return data; - } - - if (ch == '[') { - vector data; - ch = get_next_token(); - if (ch == ']') - return data; - - while (1) { - i--; - data.push_back(parse_json(depth + 1)); - if (failed) - return Json(); - - ch = get_next_token(); - if (ch == ']') - break; - if (ch != ',') - return fail("expected ',' in list, got " + esc(ch)); - - ch = get_next_token(); - (void)ch; - } - return data; - } - - return fail("expected value, got " + esc(ch)); - } -}; -}//namespace { - -Json Json::parse(const string &in, string &err, JsonParse strategy) { - JsonParser parser { in, 0, err, false, strategy }; - Json result = parser.parse_json(0); - - // Check for any trailing garbage - parser.consume_garbage(); - if (parser.failed) - return Json(); - if (parser.i != in.size()) - return parser.fail("unexpected trailing " + esc(in[parser.i])); - - return result; -} - -// Documented in json11.hpp -vector Json::parse_multi(const string &in, - std::string::size_type &parser_stop_pos, - string &err, - JsonParse strategy) { - JsonParser parser { in, 0, err, false, strategy }; - parser_stop_pos = 0; - vector json_vec; - while (parser.i != in.size() && !parser.failed) { - json_vec.push_back(parser.parse_json(0)); - if (parser.failed) - break; - - // Check for another object - parser.consume_garbage(); - if (parser.failed) - break; - parser_stop_pos = parser.i; - } - return json_vec; -} - -/* * * * * * * * * * * * * * * * * * * * - * Shape-checking - */ - -bool Json::has_shape(const shape & types, string & err) const { - if (!is_object()) { - err = "expected JSON object, got " + dump(); - return false; - } - - for (auto & item : types) { - if ((*this)[item.first].type() != item.second) { - err = "bad type for " + item.first + " in " + dump(); - return false; - } - } - - return true; -} - -} // namespace json11 - -#pragma warning(pop) \ No newline at end of file diff --git a/lib/json11.hpp b/lib/json11.hpp deleted file mode 100644 index 9d311c5..0000000 --- a/lib/json11.hpp +++ /dev/null @@ -1,233 +0,0 @@ -/* json11 - * - * json11 is a tiny JSON library for C++11, providing JSON parsing and serialization. - * - * The core object provided by the library is json11::Json. A Json object represents any JSON - * value: null, bool, number (int or double), string (std::string), array (std::vector), or - * object (std::map). - * - * Json objects act like values: they can be assigned, copied, moved, compared for equality or - * order, etc. There are also helper methods Json::dump, to serialize a Json to a string, and - * Json::parse (static) to parse a std::string as a Json object. - * - * Internally, the various types of Json object are represented by the JsonValue class - * hierarchy. - * - * A note on numbers - JSON specifies the syntax of number formatting but not its semantics, - * so some JSON implementations distinguish between integers and floating-point numbers, while - * some don't. In json11, we choose the latter. Because some JSON implementations (namely - * Javascript itself) treat all numbers as the same type, distinguishing the two leads - * to JSON that will be *silently* changed by a round-trip through those implementations. - * Dangerous! To avoid that risk, json11 stores all numbers as double internally, but also - * provides integer helpers. - * - * Fortunately, double-precision IEEE754 ('double') can precisely store any integer in the - * range +/-2^53, which includes every 'int' on most systems. (Timestamps often use int64 - * or long long to avoid the Y2038K problem; a double storing microseconds since some epoch - * will be exact for +/- 275 years.) - */ - -/* Copyright (c) 2013 Dropbox, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include - -#ifdef _MSC_VER - #if _MSC_VER <= 1800 // VS 2013 - #ifndef noexcept - #define noexcept throw() - #endif - - #ifndef snprintf - #define snprintf _snprintf_s - #endif - #endif -#endif -#pragma warning(push, 0) -namespace json11 { - -enum JsonParse { - STANDARD, COMMENTS -}; - -class JsonValue; - -class Json final { -public: - // Types - enum Type { - NUL, NUMBER, BOOL, STRING, ARRAY, OBJECT - }; - - // Array and object typedefs - typedef std::vector array; - typedef std::map object; - - // Constructors for the various types of JSON value. - Json() noexcept; // NUL - Json(std::nullptr_t) noexcept; // NUL - Json(double value); // NUMBER - Json(int value); // NUMBER - Json(bool value); // BOOL - Json(const std::string &value); // STRING - Json(std::string &&value); // STRING - Json(const char * value); // STRING - Json(const array &values); // ARRAY - Json(array &&values); // ARRAY - Json(const object &values); // OBJECT - Json(object &&values); // OBJECT - - // Implicit constructor: anything with a to_json() function. - template - Json(const T & t) : Json(t.to_json()) {} - - // Implicit constructor: map-like objects (std::map, std::unordered_map, etc) - template ().begin()->first)>::value - && std::is_constructible().begin()->second)>::value, - int>::type = 0> - Json(const M & m) : Json(object(m.begin(), m.end())) {} - - // Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc) - template ().begin())>::value, - int>::type = 0> - Json(const V & v) : Json(array(v.begin(), v.end())) {} - - // This prevents Json(some_pointer) from accidentally producing a bool. Use - // Json(bool(some_pointer)) if that behavior is desired. - Json(void *) = delete; - - // Accessors - Type type() const; - - bool is_null() const { return type() == NUL; } - bool is_number() const { return type() == NUMBER; } - bool is_bool() const { return type() == BOOL; } - bool is_string() const { return type() == STRING; } - bool is_array() const { return type() == ARRAY; } - bool is_object() const { return type() == OBJECT; } - - // Return the enclosed value if this is a number, 0 otherwise. Note that json11 does not - // distinguish between integer and non-integer numbers - number_value() and int_value() - // can both be applied to a NUMBER-typed object. - double number_value() const; - int int_value() const; - - // Return the enclosed value if this is a boolean, false otherwise. - bool bool_value() const; - // Return the enclosed string if this is a string, "" otherwise. - const std::string &string_value() const; - // Return the enclosed std::vector if this is an array, or an empty vector otherwise. - const array &array_items() const; - // Return the enclosed std::map if this is an object, or an empty map otherwise. - const object &object_items() const; - - // Return a reference to arr[i] if this is an array, Json() otherwise. - const Json & operator[](size_t i) const; - // Return a reference to obj[key] if this is an object, Json() otherwise. - const Json & operator[](const std::string &key) const; - - // Serialize. - void dump(std::string &out) const; - std::string dump() const { - std::string out; - dump(out); - return out; - } - - // Parse. If parse fails, return Json() and assign an error message to err. - static Json parse(const std::string & in, - std::string & err, - JsonParse strategy = JsonParse::STANDARD); - static Json parse(const char * in, - std::string & err, - JsonParse strategy = JsonParse::STANDARD) { - if (in) { - return parse(std::string(in), err, strategy); - } else { - err = "null input"; - return nullptr; - } - } - // Parse multiple objects, concatenated or separated by whitespace - static std::vector parse_multi( - const std::string & in, - std::string::size_type & parser_stop_pos, - std::string & err, - JsonParse strategy = JsonParse::STANDARD); - - static inline std::vector parse_multi( - const std::string & in, - std::string & err, - JsonParse strategy = JsonParse::STANDARD) { - std::string::size_type parser_stop_pos; - return parse_multi(in, parser_stop_pos, err, strategy); - } - - bool operator== (const Json &rhs) const; - bool operator< (const Json &rhs) const; - bool operator!= (const Json &rhs) const { return !(*this == rhs); } - bool operator<= (const Json &rhs) const { return !(rhs < *this); } - bool operator> (const Json &rhs) const { return (rhs < *this); } - bool operator>= (const Json &rhs) const { return !(*this < rhs); } - - /* has_shape(types, err) - * - * Return true if this is a JSON object and, for each item in types, has a field of - * the given type. If not, return false and set err to a descriptive message. - */ - typedef std::initializer_list> shape; - bool has_shape(const shape & types, std::string & err) const; - -private: - std::shared_ptr m_ptr; -}; - -// Internal class hierarchy - JsonValue objects are not exposed to users of this API. -class JsonValue { -protected: - friend class Json; - friend class JsonInt; - friend class JsonDouble; - virtual Json::Type type() const = 0; - virtual bool equals(const JsonValue * other) const = 0; - virtual bool less(const JsonValue * other) const = 0; - virtual void dump(std::string &out) const = 0; - virtual double number_value() const; - virtual int int_value() const; - virtual bool bool_value() const; - virtual const std::string &string_value() const; - virtual const Json::array &array_items() const; - virtual const Json &operator[](size_t i) const; - virtual const Json::object &object_items() const; - virtual const Json &operator[](const std::string &key) const; - virtual ~JsonValue() {} -}; - -} // namespace json11 -#pragma warning(pop) \ No newline at end of file diff --git a/src/JSONFileWriter.cpp b/src/JSONFileWriter.cpp index 30552d7..1608b41 100644 --- a/src/JSONFileWriter.cpp +++ b/src/JSONFileWriter.cpp @@ -24,7 +24,7 @@ std::string JSONFileWriter::StringForSubRepeat(RepeatRegion *r, int split_index, int start_pos) { std::string repeatString = "{"; - int start = start_pos + r->sequenceStart; + int start = start_pos; int end = r->repeatLength; int consensusPosition = r->splits->size(); @@ -39,10 +39,6 @@ std::string JSONFileWriter::StringForSubRepeat(RepeatRegion *r, int split_index, repeatString += "\"Start\": "; repeatString += std::to_string(start); - repeatString += ",\n\"End\": "; - repeatString += std::to_string(end); - repeatString += ",\n\"Score\": "; - repeatString += std::to_string(subScore); repeatString += ",\n\"Consensus\": \""; // Converting to a c string is important. // ...Dunno why, but here we are diff --git a/src/RepeatSplitter.cpp b/src/RepeatSplitter.cpp index 3830bae..aa0d687 100644 --- a/src/RepeatSplitter.cpp +++ b/src/RepeatSplitter.cpp @@ -423,6 +423,7 @@ void ValidateSplits(std::vector *consensi, join_threshold)) { consensi->at(i) = consensi->at(i - 1); splits->at(i - 1) = -1; + } } } \ No newline at end of file diff --git a/src/TabFileWriter.cpp b/src/TabFileWriter.cpp index 8401390..7a76c92 100644 --- a/src/TabFileWriter.cpp +++ b/src/TabFileWriter.cpp @@ -18,12 +18,12 @@ void TabFileWriter::InitializeWriter(Ultra *ultra, FILE *out_f) { fprintf(out, "\tScore"); if (owner->settings->pval) fprintf(out, ",PValue"); - if (owner->settings->max_consensus_period != 0) + if (owner->settings->max_consensus_period >= 0) fprintf(out, "\tConsensus"); - if (owner->settings->max_split > 0) { + if (owner->settings->max_split >= 0) { fprintf(out, "\t#Subrepeats"); fprintf(out, "\tSubrepeatStarts"); - if (owner->settings->max_consensus_period != 0) + if (owner->settings->max_consensus_period >= 0) fprintf(out, "\tSubrepeatConsensi"); } @@ -75,7 +75,7 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { fprintf(out, "\t%s", rep_con.c_str()); } - if (owner->settings->max_split > 0) { + if (owner->settings->max_split >= 0) { std::string sizes = ""; std::string starts = "0"; std::string consensi = ""; @@ -99,11 +99,13 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { } } - if (owner->settings->max_consensus_period != 0) { + if (owner->settings->max_consensus_period >= 0) { for (int i = 0; i < repeat->consensi->size(); ++i) { std::string con = "."; if (owner->settings->max_consensus_period >= repeat->repeatPeriod) { if (repeat->consensi != nullptr && repeat->consensi->size() > i) { + if (i > 0 && repeat->consensi->at(i) == repeat->consensi->at(i - 1)) + continue; con = repeat->consensi->at(i); } } @@ -119,7 +121,7 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { fprintf(out, "\t%i\t%s", numberOfValidSplits + 1, starts.c_str()); - if (owner->settings->max_consensus_period != 0) { + if (owner->settings->max_consensus_period >= 0) { fprintf(out, "\t%s", consensi.c_str()); } @@ -127,7 +129,7 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { else { fprintf(out, "\t1\t0"); - if (owner->settings->max_consensus_period != 0) { + if (owner->settings->max_consensus_period >= 0) { fprintf(out, "\t%s", rep_con.c_str()); } } diff --git a/src/cli.cpp b/src/cli.cpp index eaf2457..ffdfe14 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -50,8 +50,8 @@ void Settings::prepare_settings() { "The exponential scale used for converting scores to p-values") ->group("Output"); - app.add_flag("--ultra", this->ultra_out, - "Use ULTRA output format") + app.add_flag("--tsv", this->ultra_out, + "Use TSV output format") ->group("Output"); app.add_flag("--json", this->json_out, @@ -207,7 +207,6 @@ void Settings::prepare_settings() { app.add_option("--tune_fdr", this->tune_fdr, "FDR to be tuned against (see README)") - ->default_val("0.1") ->group("Parameter Tuning"); app.add_flag("--tune_only", this->tune_only, @@ -325,6 +324,8 @@ void Settings::prepare_settings() { "Minimum repeat split window size") ->default_val(this->min_split_window) ->group(""); + + app.add_flag("--cite", this->cite)->group(""); } void Settings::set_multi_option() { @@ -364,6 +365,22 @@ bool Settings::parse_input(int argc, const char **argv) { exit(0); // or any other error handling } + if (this->cite) { + printf("BibTeX: \n" + "@article {Olson2024ultra,\n" + " author = {Olson, Daniel R. and Wheeler, Travis J.},\n" + " title = {ULTRA-Effective Labeling of Repetitive Genomic Sequence},\n" + " elocation-id = {2024.06.03.597269},\n" + " year = {2024},\n" + " doi = {10.1101/2024.06.03.597269},\n" + " publisher = {Cold Spring Harbor Laboratory},\n" + " URL = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269},\n" + " eprint = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269.full.pdf},\n" + " journal = {bioRxiv}\n" + "}\n"); + exit(0); + } + bool passed = true; if (this->in_file.empty() && !this->show_memory) { fprintf(stderr, "Input file required.\n"); @@ -389,7 +406,7 @@ bool Settings::parse_input(int argc, const char **argv) { } if ((this->ultra_out || this->json_out || this->bed_out) && this->suppress_out) { - fprintf(stderr, "--suppress is incompatible with --ultra, --json, and --bed\n"); + fprintf(stderr, "--suppress is incompatible with --tsv, --json, and --bed\n"); passed = false; } @@ -595,37 +612,33 @@ void Settings::assign_settings() { if (this->window_size == -1) { int num_states = this->calculate_num_states(); - unsigned long cmem = num_states * this->max_period; + unsigned long period_memory = num_states * this->max_period; // itty bitty models use ~40 mb per thread - if (cmem <= 1000) { + if (period_memory <= 1000) { this->window_size = 10000 * this->max_period; } // tiny models use 80 mb per thread - else if (cmem <= 10000) { + else if (period_memory <= 10000) { this->window_size = 2000 * this->max_period; } // Small models use less than 160 mb per thread - else if (cmem <= 200000) { - this->window_size = 200 * this->max_period; + else if (period_memory <= 200000) { + this->window_size = 400 * this->max_period; } // medium models use less than 1 GB per thread - else if (cmem <= 2000000) { - this->window_size = 50 * this->max_period; + else if (period_memory <= 2000000) { + this->window_size = 100 * this->max_period; } // Large models use less than 4 GB per thread - else if (cmem <= 20000000) { - this->window_size = 10 * this->max_period; - } - - // Massive models use a lot of memory else { - this->window_size = 2 * this->max_period; + this->window_size = 50 * this->max_period; } + } this->a_freq = this->at / 2.0; diff --git a/src/cli.hpp b/src/cli.hpp index 36a5b56..e82023e 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -5,7 +5,7 @@ #ifndef ULTRA_CLI_HPP #define ULTRA_CLI_HPP -#define ULTRA_VERSION_STRING "1.0.0 (beta 19)" +#define ULTRA_VERSION_STRING "1.0.0" #include "../lib/CLI11.hpp" @@ -15,6 +15,9 @@ struct Settings { std::string args = ""; + // Are we displaying the citation text? + bool cite = false; + // Input settings std::string in_file = ""; bool read_all = false; @@ -60,7 +63,7 @@ struct Settings { // Tuning parameters - double tune_fdr = 0.1; + double tune_fdr = 0.05; bool tune = false; bool tune_medium = false; bool tune_large = false; @@ -92,7 +95,7 @@ struct Settings { // Split and naming parameters bool no_split = false; - unsigned long long max_split = 10; + int max_split = 10; float split_threshold = 3.5; unsigned long long split_depth = 5; unsigned long long min_split_window = 15; @@ -105,7 +108,8 @@ struct Settings { "=================================================\n" "(U)ltra (L)ocates (T)andemly (R)epetitive (A)reas\n" " Daniel R. Olson and Travis J. Wheeler\n" - " Version " ULTRA_VERSION_STRING "\n" + " Version " ULTRA_VERSION_STRING "\n" + " Use '--cite' for citation instructions\n" "=================================================\n"}; void prepare_settings(); diff --git a/src/main.cpp b/src/main.cpp index 4416ea0..1f8c0d7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -160,13 +160,14 @@ int main(int argc, const char *argv[]) { settings->ultra_out = false; settings->json_out = false; settings->bed_out = false; + settings->out_file = ""; ultra = new Ultra(settings); ultra->shuffleSequence = true; ultra->AnalyzeFile(); ultra->OutputRepeats(true); shuff_coverage = ultra->Coverage(); float fdr = (float)shuff_coverage / (float)true_coverage; - printf("Estimated false discovery rate: %g = (%llu / %llu)\n", fdr, shuff_coverage, true_coverage); + printf("Estimated false discovery rate: %g\n", fdr); } return 0; diff --git a/src/ultra.cpp b/src/ultra.cpp index 472193e..5621dcc 100644 --- a/src/ultra.cpp +++ b/src/ultra.cpp @@ -359,7 +359,7 @@ Ultra::Ultra(Settings *s) { if (settings->bed_out) c++; if (c > 1) { if (settings->ultra_out) { - std::string ultra_path = settings->out_file + ".ultra"; + std::string ultra_path = settings->out_file + ".tsv"; FILE *out = fopen(ultra_path.c_str(), "w"); if (out == NULL) { fprintf(stderr, "Unable to open output file %s\n", ultra_path.c_str());