From 88be598bc45295a571715cf7d57df0fc80921442 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Mon, 8 Jun 2020 14:19:57 +0200 Subject: [PATCH 01/21] add tests for data export --- test-data/exported.gff3 | 2918 +++++++++++++++++++++++++++++++++ test-data/exported.vcf | 5 + test-data/exported_cdna.fa | 103 ++ test-data/exported_cds.fa | 68 + test-data/exported_peptide.fa | 29 + test/io_test.py | 90 + 6 files changed, 3213 insertions(+) create mode 100644 test-data/exported.gff3 create mode 100644 test-data/exported.vcf create mode 100644 test-data/exported_cdna.fa create mode 100644 test-data/exported_cds.fa create mode 100644 test-data/exported_peptide.fa create mode 100644 test/io_test.py diff --git a/test-data/exported.gff3 b/test-data/exported.gff3 new file mode 100644 index 00000000..8c7cc61c --- /dev/null +++ b/test-data/exported.gff3 @@ -0,0 +1,2918 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin . gene 2 691 . + . owner=admin@local.host;score=["-856.563659"];gene_product=;ID=290ad7f1-fa0b-4e41-bdd5-73b184b9bdac;seqid=["Merlin"];date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . mRNA 2 691 . + . owner=admin@local.host;Parent=290ad7f1-fa0b-4e41-bdd5-73b184b9bdac;gene_product=;ID=c7ce0a38-beee-4aa3-8f34-5f35f549f287;date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . exon 2 691 . + . Parent=c7ce0a38-beee-4aa3-8f34-5f35f549f287;ID=68c6743e-83c4-4654-89f2-49efcb87ecbe;Name=68c6743e-83c4-4654-89f2-49efcb87ecbe +Merlin . CDS 2 691 . + 0 Parent=c7ce0a38-beee-4aa3-8f34-5f35f549f287;ID=c7ce0a38-beee-4aa3-8f34-5f35f549f287-CDS;Name=c7ce0a38-beee-4aa3-8f34-5f35f549f287-CDS +### +Merlin . gene 752 1039 . + . owner=admin@local.host;score=["-339.046618"];gene_product=;ID=ab178020-44cd-4489-9cc6-4c450ddc4b8c;seqid=["Merlin"];date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . mRNA 752 1039 . + . owner=admin@local.host;Parent=ab178020-44cd-4489-9cc6-4c450ddc4b8c;gene_product=;ID=74f8e03d-f003-490c-9eeb-15b3b68763c0;date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . exon 752 1039 . + . Parent=74f8e03d-f003-490c-9eeb-15b3b68763c0;ID=2d83a16e-34e2-438c-8a9c-45b0fde83025;Name=2d83a16e-34e2-438c-8a9c-45b0fde83025 +Merlin . CDS 852 860 . + 0 Parent=74f8e03d-f003-490c-9eeb-15b3b68763c0;ID=74f8e03d-f003-490c-9eeb-15b3b68763c0-CDS;Name=74f8e03d-f003-490c-9eeb-15b3b68763c0-CDS +### +Merlin . gene 1067 2011 . - . owner=admin@local.host;score=["-1229.683915"];gene_product=;ID=a4cbf7a4-2422-4db6-bc69-5be2d5ecd83d;seqid=["Merlin"];date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . mRNA 1067 2011 . - . owner=admin@local.host;Parent=a4cbf7a4-2422-4db6-bc69-5be2d5ecd83d;gene_product=;ID=5280a04b-53f0-4ae6-ae5c-2c358e5c5a93;date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . CDS 1704 1811 . - 0 Parent=5280a04b-53f0-4ae6-ae5c-2c358e5c5a93;ID=5280a04b-53f0-4ae6-ae5c-2c358e5c5a93-CDS;Name=5280a04b-53f0-4ae6-ae5c-2c358e5c5a93-CDS +Merlin . exon 1067 2011 . - . Parent=5280a04b-53f0-4ae6-ae5c-2c358e5c5a93;ID=3c5d9d4c-eb4a-4c4c-a6ec-670cc0bc03ce;Name=3c5d9d4c-eb4a-4c4c-a6ec-670cc0bc03ce +### +Merlin . gene 2011 3066 . - . owner=admin@local.host;score=["-1335.034872"];gene_product=;ID=c74a20c6-1241-4ffa-be05-f8538b6b6879;seqid=["Merlin"];date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . mRNA 2011 3066 . - . owner=admin@local.host;Parent=c74a20c6-1241-4ffa-be05-f8538b6b6879;gene_product=;ID=8d6e6288-a8d4-4b81-b7fe-766119917628;date_last_modified=2020-06-08;Name=Unknown;date_creation=2020-06-08 +Merlin . CDS 2011 3066 . - 0 Parent=8d6e6288-a8d4-4b81-b7fe-766119917628;ID=8d6e6288-a8d4-4b81-b7fe-766119917628-CDS;Name=8d6e6288-a8d4-4b81-b7fe-766119917628-CDS +Merlin . exon 2011 3066 . - . Parent=8d6e6288-a8d4-4b81-b7fe-766119917628;ID=78754e10-6b1f-4e97-9c7b-a50bc2100c49;Name=78754e10-6b1f-4e97-9c7b-a50bc2100c49 +### +Merlin . gene 3066 4796 . - . owner=admin@local.host;score=["-2177.374893"];gene_product=;ID=9fc896ad-91b2-46e2-8b07-4ecfc68387d0;seqid=["Merlin"];date_last_modified=2020-06-08;Name=multiexongene;date_creation=2020-06-08 +Merlin . mRNA 3066 4796 . - . owner=admin@local.host;Parent=9fc896ad-91b2-46e2-8b07-4ecfc68387d0;gene_product=;ID=154a6d4e-dc94-4de2-9403-63aa47a01d82;date_last_modified=2020-06-08;Name=multiexongene;date_creation=2020-06-08 +Merlin . exon 4366 4796 . - . Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=8fa02768-0a5f-441e-91c3-d4779d41b459;Name=8fa02768-0a5f-441e-91c3-d4779d41b459 +Merlin . CDS 4366 4796 . - 0 Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=154a6d4e-dc94-4de2-9403-63aa47a01d82-CDS;Name=154a6d4e-dc94-4de2-9403-63aa47a01d82-CDS +Merlin . CDS 3066 4296 . - 1 Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=154a6d4e-dc94-4de2-9403-63aa47a01d82-CDS;Name=154a6d4e-dc94-4de2-9403-63aa47a01d82-CDS +Merlin . non_canonical_three_prime_splice_site 4297 4297 . - . Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=154a6d4e-dc94-4de2-9403-63aa47a01d82-non_canonical_three_prime_splice_site-4296;Name=154a6d4e-dc94-4de2-9403-63aa47a01d82-non_canonical_three_prime_splice_site-4296 +Merlin . exon 3066 4296 . - . Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=41c1e328-b122-4b73-8c09-cf1d3c612e62;Name=41c1e328-b122-4b73-8c09-cf1d3c612e62 +Merlin . non_canonical_five_prime_splice_site 4364 4364 . - . Parent=154a6d4e-dc94-4de2-9403-63aa47a01d82;ID=154a6d4e-dc94-4de2-9403-63aa47a01d82-non_canonical_five_prime_splice_site-4363;Name=154a6d4e-dc94-4de2-9403-63aa47a01d82-non_canonical_five_prime_splice_site-4363 +### +Merlin . gene 5011 6066 . - . owner=admin@local.host;score=["-1335.034872"];gene_product=;ID=f5829bec-d121-4db4-a8b8-8ba050ea6b93;seqid=["Merlin"];date_last_modified=2020-06-08;Name=cds-not-under-exon;date_creation=2020-06-08 +Merlin . mRNA 5011 6066 . - . owner=admin@local.host;Parent=f5829bec-d121-4db4-a8b8-8ba050ea6b93;gene_product=;ID=ce047673-3c00-425c-862b-20fd004eca42;date_last_modified=2020-06-08;Name=cds-not-under-exon;date_creation=2020-06-08 +Merlin . exon 5011 6066 . - . Parent=ce047673-3c00-425c-862b-20fd004eca42;ID=4f0e0e07-3dbf-4e91-bc08-ee46fe52a997;Name=4f0e0e07-3dbf-4e91-bc08-ee46fe52a997 +Merlin . CDS 6061 6066 . - 0 Parent=ce047673-3c00-425c-862b-20fd004eca42;ID=ce047673-3c00-425c-862b-20fd004eca42-CDS;Name=ce047673-3c00-425c-862b-20fd004eca42-CDS +### +##FASTA +>Merlin +TCGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGA +GCATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCC +TCTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTA +TACTGCTGAGACGCCTGATGATTATAATCGTTTCGCAAACCCAGAAGATATAATTGATAT +TGGCGCTACTCAAAAAGACTCATTTGACGATAACAATAATGATGGAACATCTATTAATAT +CGGCAAACAAGTTAATTTAGGATTCGTTATTTCCGGTGCTGAAAATGTTCGAGTTATTGT +TCCAGGTTCTTTAACTGAATATCCAGAAGAAGCGGAAGTTATTCTGCCTCGTGGTACTCT +TTTGAAGATCAATAAAATCACTACTCAAGTAGATAAACGCTCGAATAAGTTCATGGTTGA +AGGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGA +TCTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGA +ATCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGA +CATTGATGACATGCCACGTAAGTTCCGCTAGCCGTTTACATCCACATGGAAGTGGATTAT +AATGGCTCTACGTTAACAAGAGGAAAACAACATGAAATCAATTTTTCGTATCAACGGTGT +AGAAATTGTAGTTGAAGATGTAGTTCCTATGTCTTATGAATTCAATGAAGTTGTTTTCAA +AGAGCTTAAGAAAATTTTAGGCGATAAGAAGCTTCAAAGTACTCCAATTGGACGTTTTGG +AATGAAAGAAAACGTTGATACTTATATTGAAAGTGTAGTGACAGGGCAGTTAGAAGGTGA +ATTTTCTGTAGCAGTTCAAACTGTAGAAAATGATGAAGTTATTTTAACTTTACCAGCTTT +CGTAATTTTCCGCAAATAAAACAATGGGGAGCTATGCTCCCCATTTTTACAATCCAAGTA +TTTTCGAAGTAGAGTTTCGGGTCGAATTAATGACGTGAGACAACCCTCCAGCAGCTCCTC +CAAGTCTAGATAATCTACTTAAACTTCCATTAAGAGACATTTCACTATTAATTCCAGTTA +TAGAATTAACAGCTCTATCTTCAATCCAATCAAGAGCAGCTTGACGTCCAACAGCACCCG +TTTGCATTACTCTGTAAGCAAATGTAACATCGAAAACCGCAATTTGGTTATCTCCTTCAT +ATGTAAGCTCAGGAGCTCCACACGCAACAGGAACACAACCTGTGAACATTATCACAGTAT +GAGGTAATCCATTTCGAGCATGAAGGTTAACCTGAATGTCAGCTTCGACGTCAGTTGGTA +ATGCTCGCAATCCAGTAACCGGGTCTTGAACGGAGTTCACCCAATCTTGCATTGCACGAT +AGTTACTTGCTTCGGGATCCATTCTGAATGATATAGTTAACGGATCGAGTTCACGTCCAG +TTATTCTAATATTCGGTGAGTTATGGTTGAAATCCATTTCATGAGACAATCTGTTCTCTG +GAATTTTGACCGAATAAATCATCAATCCAGATTGCGGATAAGCCATGTTAAAGAAGTCTA +ACAAATAAGTTCCGACTTCAAATTCACCTAATAAAGACTGAACAACACGATTGCTCATTG +CTCCAATAAGATATTTCGATACACCAGACTTTCTTACCAGCTGTTGAGTACCGGCAGTGA +TAATTGAGGTGAGTCCTGATGTGAACTCACCTTGTGTTAATCCAAGCCAGTCATTATTCA +ACGGAAGGTTATTAAAGAGCATACCGCCAAATTGATCGAGTAATTGTTGAGACTTTGCTG +ACGGAGTAGTTGCAAATACACAACTAAACATATTAGTACGCTGAAAGTCTATATTACCCG +CTTGGTTTTTAAATTCATCTAAAGTTAGCATCAGAATCCTTCCGCATATACTGAAGCTCG +GTTCAATGTCAAGATTTCACGCATAGTAATTTCTAATGTGAATGTACTTGGCAGGTTTGG +AGCTATAGCTAAACCGTTAAAGTTTCCATTTGGAGTTTTATCAAAACGGATACTCTGAAT +TTGACATGGACCGAATACTTCAGCACGTCCATCGAATTTACTTGTGGTTCCAAAGTTTCT +GACGAACCACACAGTAGGGTTACTTACAACAATAACATTACTTAAGAATGAAGTTATTTT +CTCAAAAACAGTGTCATTTTTATTAGCTTCATCTGGAGTTAATGTATCAAGGAAAGTTGA +TTTATACCATTCATCTAATTGAGACTTAACTTCTTTTGCATAAGTAGACGTTCCCGTTTC +GCCATAACTATAGTAGTTAAAGTATTCATAGATCTCGATAATAGCAATAAGATCTTGTAC +TGATCGAGGAGTTAAATCCCACGTGAATACCTTCGTACGGTTATCTGCGCCGCCATACAT +TGATCGAGCAGTGTTATAGATCTGCTCGTTATGGTCAGCCATTAATCCTTGAGTCAATGA +CTCTAATCCGCCAAAGACAGCAGTAGATGCAACGTTACTTAATACCCCTGTAGCAGTACC +GCCGCCACGAGAAATAAGTGAATCTCCAACGTCATTAAATTTATGAGAAACTGATTCAAC +ATCTGATTTCGAGCGTGGAAGTAAAATATTCACTACTGGAATTTTATCAACTTTATTAGT +ATTTGTTCCAGTGATTGATTTCACTACACTATTTGCAGTACGTTTCATTTCACCTAAACG +CATGCTACGCATATCACCGGTTGTACGAGAATTCATATCATACGCAGTGAACAACAACCC +GTTCTTATAAAGATCATGAACTCGTAAAGAACCAGATGTGTCATTACCAGCTGAACGTTC +AGACGGATATTGCGCAGTTATAGTGGATTTTATTTTTGCTGATTGTGAACTTTGACCAGC +GGAGGTTTTAACTCCGCTAATTAAAGCATCAGTCTTATCATCTAATTCTCTGACTTTAAT +GCTCATTAATTAACTCCTGTTGCCCCGAATACTCCAGGAGCTGGAGTAGCCGTGACTGTT +TGAACCTGGTGAATAGTCTTACTATTATTTACGTTATTAACCTGAGTGTTAGCAACATTC +ATATCACCGGTTGATTTTTTAGATTGCTCTTTAGCATTTTCAGCTTTTTGAATATTTTGA +ACTCGTTGATTATCTTCCGAAGTAGCTGGAGCCGCAGGCTTCGGAGTGTTATCTTCTTTG +AGCTTCTGATACTTGGATTCTACTCGTTGGAATCTTTTATCGAGTTCCTTTTTAGTAGCT +GGTTGATCACTTATAGCAGAATCACTAATAGACTTTTTGGCACTGTTATATGCCTTCTCT +AAAGATTGCATATTAGTTGGATTCTCTGGATCAACATCACCAATATATTTTTCTAAACGC +TGAACAGCTGCACGAGCTTCGTTTTGTTTGATCAGTGTTTCTTCGCGTTTTTCAGGAGCC +ATTGCTTTAAGATTCTGAGTCTCTTGATCACGGTCAGATGCTTGTGTAGAATCGATTTTA +TTCTCTCTTCCTAGTACCCAATCAAATGCACGAGTTTTAAATTCGCCAGCTTTATCAATA +ATTCCAGGACCTTCTTCAATACGCTTACTCTGATATTTAGCCAAAGCTTTTTGATCATCT +TCAGACAATGAATTACCAGTGCGTTCCTGGAATCCTTCTAGTGCTGAACCACGAATAGTA +GTTGCTGCATTTTCAAAGCCAAGTGCATCGAGTATAGAAGCAGATATCTTTGAAATTCCC +AAAGACATTATTTCGCTCAGGTTGTAAATCACATCAGCTAATCCTTTGACAATAGCTACT +GCTAATCCACTCCAGTCTCCAGCTTCCCAGAACTTTTTAATATCTCCTAACATTCCAAAA +ATTGATTGAAGCAGTCCACCCCATTCACCAGCTTCAGCACTGAATTCATCGAAGTTACTC +ATGAATTTATCAGTCCAATACTTAAAATGAATACGAAGTAAATCTATGCCTAATACTAAT +GCAAATAGCATAGCAGCCATTTTAGCAGCTTCAGCTAATGCTGTTACGGTGTATTTGAAA +AGCATAGACGATATTTTATCAGTGATTGATATCGTAGCTTTAAATCCGCCCTTCGTAGTC +TTCAGCAAGTCACCTAACATATCGGTAGGTTTCTTATCATCTTTCTTTTGATCTTTCTTG +TTGTTCTCGGGTTCCTGAGGGACAGGTGGAAAGAAATCCTCATCAGGTAAATTGTTATTA +TCATCTAATGGAGGAAGTATTCTCTCAGGTAATCCAGGGGACTCTGGCTCATCAGGATCT +GGAAGCTTATCTTCTATCACTGACAGCCCAGTTGAAGCTGAGGCACCAGATTCTTGTACT +TTTTGCTCCACAGCCTGAACCTTCGACTCGAGCATCGAAGCGAGTTTACTTAATTTGTCG +CTGATTGTTGACGCTACGCCAGTAAGAGTCTTGATAGACTCAGTAGTTCGTTCAGATGCT +TCAGCAGCTAATTCAGTACCTTCAGCGACGTTATCAACTGAATCAATTAAGTTATTGCCC +TTTTCTTCAATTACTTCAGCAACTAGTTCATTAGCGCTTTGTACATCATCAAGTTTAAGG +CCGATTAATTCTAAAGAGTCTACTTGATCAGATGCAGTCGAGGCTGCATCTCTTTCACCC +TTTGAATCAGCGATAACTTTACGACGTCTCATTGTGGACATGTTTTCGCTTTTCATTCAA +ATAATCCTAGTACTTTAGCTGTTCCCCTTATATTGTGTGGACCGGGAATAGCAATAAGAT +CTGTGATTTCTTCTGCCCACTTTAAAACAAACGCAGGCATCTTAAGGAAATTAGGAACCT +CAGTGGAGTCATTCACTGATATGAAACATTCCGTGAGCATCTTATCGATAGTAGTAAATG +TTTCATATCGCTCAGGAGAGCGGAACTTAAATGTCTTTCCTTGGAACTGGAATTCAAGGC +GTTGGCAAATATAAATGTCATTGATATTGTAAGTGTATCCATCTTTAACGACACTTGATT +TTAACTTGCCATTGAACTCTAGCAAATGAATAGACACTAAATCTGATTCAGCAGCGTTTA +GCCCAGGATGAATTGAGTCCAAAAGAATTCCCATGTTCTCATCCATTGCACGAACATCTT +TAATCAACGTGTGATGCTTCAAGCCAAGCTTAGGAATAGAAATGGTTTTGCCATTGACTT +GTATTTTCTTAAGTGGTAGTATCAGATTTAAGTTCATTTTTAACCTTTATTGGTTCTACT +GTTTCCAACTTACGAGAGTTAGTAAACATATAAAGTGTAGTTATGCTTTGGTTATTACTT +ACTTCATGAATTACTTCATCAATGTAAAAATCAGCTTTGAATTGTTTCTTTGGATCAAAG +AAATTAATCTTATCTCCAGGAGTCATTTCAAAGTCTCCAACCATTTTACAAGTTGCATAA +CCGTCGTATTGTGCCATAGTCTGAAGCCTGATAGCTTCTTCATATCCATTTCGATAAGTC +ATTTCAGAATATGCACCCGAGCGAGAAACGAAGATGCTATTCTGTCCATCACCTGTTACT +ATTCTAGGAAGGTTATTATCCAAGAATGAGTGTGAATAGATTGTAGCGTTAAAAATAGGA +TCGCGTGTATGGGCATTAGCCTTCGTTAACCACTGGAAGTCATAAGCGAGATTATATTCC +AGCTCACCGACAAATTGACCTATTGTGCGTGGTTCACCGACAATAACCTTGATTGATTCT +TGGTTAATCATTGCGTCATAATCCATCATGTTCAAGCCATAGATATCTTCCCATACAAAA +ACGAATTGGTCATTATCTACTGCTAATGCAACATCTCGAACATATTTCTTGTATTCAGTT +ATGTTACTCGTCCATGGAACTCGAGGAACATATGTGTTGATTGTGTTCATCGCAGGTGCT +ATAAGAGGTTTTGATTCATAAATTGCACCGATCATCTCTTTTATTGATTCACCAGCGTCA +TTAAAGAAACAGCGGCTAAATTTAAGATTCTCTATTTCATGAACCAGTCCTAAGTTAATA +GCAATGATATTATCACCCTTAGAATCTACTGAAACACTGAAGTGTTTGCAGCCATAAATT +CGGTTCAATGTTTTCTTAGAGTTCGCATTAGCTACTGAGATTTGTATAATTTGATTTCCA +TCCATTAACGTATGAAGGTTCTTTGAATCAAAGAATTGTAAAACACCTTCATTACGTCCA +AATAAACCATCTCTCATTGTTAGTGTAGTAATTGTAGCTGCTAACTCAATGTATCTGTTA +GATTCCCATGCATCGTAATCCTGATACAACTTAATGCTCAGGTTCGGGAATCCCGGAAGT +TGTTGTATCATTTAGTGTTATCCTTCTCTATCAGAGATAATGCAATAGATCTTTCAACTG +GGATCATCTGCATTATTGACTCTAAATTGTAATGATTTTTAACCAGCATATGATTTATTT +GATAGAAGCTAAATACTTCGTCTGGATTCAGCAAAAGATTAAAGATATGCAAAAAATCAT +CGTAAATTATTTTCTTAGTTTCACAACATTTCATTCGAAGCTCAAGATGAATTGGTGTCA +TCTGCTTAATAATTGATTCCAAAGCGTTCAAATCTATAGCATCTATAACTTGAATTTGAT +TCTCTTCTGATAACTCGTTCCAAGGTATTATTTTCCCTTGGTATTTAATTGATTTCACAC +ATTCAGCTACTTGTGTAGCTTTATCATCGTAAAACTTATCCGGGTAATTGAAGTAAATCG +TAATACCAGCAACTTCGACGGTAGGCTCAGTAAGATCTTTGGTATTCAAACTGAATAAAG +TTTGCTTATTCTTTTCGCAAACTGGACAAGTAAAAGCAATAGGAATTTTAGTTTTACCGA +TTGACCCAGTGAACACCCTTAAGAATATGTACGGTCTCCATGTTGACGGAAACTCTTCGA +AGTATTCTTCGAGCAACTCATTTAAGATAATGTTTTGCTCTTCTGGTGTTTTTTGGACTA +AGTCGTTTCGGACTAACAGAAAATCTCGATAATCTGCTACTGTGAACGGTTTGAAACGAT +GCACACCATCAGGCAAAACACAACGAATAATATTAGCCATTATGTCTCCTTTATCATATT +TATAAATATTTCAATAAAGGAGTTGATATGAAATATGAATACACTTTTGAGGCTCGTATT +GGTGATGAAGTAATCCAATGCCGAGCGTTTACACTAGAAGAATATCGCGATCTCATCAAA +GCTAAAGCTGATGGGACTATCAAAGAGTATGTTCCAGAGCTGATTAAAAATTGTACTAAT +GCTCGTGGATTAAATCGTCAAGAGTCAGAGCTATTGTTAGTTCACTTATGGGCTAACAGT +TTAGGTGAGGTTAGTCACCAGAATACATGGGTGTGTTCCTGCGGACACGAAATACCAATG +TCAATTAATCTTACATTCGCTCAGATCGATGAACCAGATGATCTCTGGTATTCCCTAGGT +GGTTTCCGTATTAAGCTAAGATATCCTAATCTTTTTGAAGATTCAAATATCTCTATGATG +ATCGCATCATGTATTGAATACATTCATGTGAACGGTGAAACCATTTCAGTCGATGAACTG +AATGACAAAGAGATTGATGACCTTTACTCTGCAATAACAGAAGATGACATCGCTCGCATT +AAAAGCTTATTGCTTAAGCCTACAGTTAAACTCGCTATTCCAGTTAAATGCGATAAATGT +GGGGAAAATCATATCCACGTAATCGAGGGGCTTAAAGAATTCTTTAAGTTAATTTAATGG +CAGATATAAAAAATCTTTATTCAGATATCGATCCCGAGTTCAAAATGGACTGGGATCATG +ATGTAGCGCGGGCGCGTGGCCTACGGGCCATTAAAAACTCTCTTTTGGGTATTATAACAA +CTCGCAAAGGAAGCCGTCCATTTGACCCTAACTTCGGGTGCTCATTAACAGATCAGTTGT +TTGAAAATATGACGCCATTGACCGCTGATACTGTAAAGCGTAATATCGAGTCTTCAGTTC +GTGCTTATGAACCACGCATTCGATATTTAGCAGTTAATGTAACTCCAGTGTATGATGATT +ATACGCTTATTGTAGAAGTGCAATTCAGTGTAATTGATAACCCAGATGATATAGAACAGA +TTAAGCTACAACTTGCTTCAAGCAATCGATGATCGCTTACACACGCCGTTGTTATAATGG +TTCTAGTTCCTTTCAATCAATAACAACCAGAGTTACTAAAGAACAAGAAAGAGGGTTAAA +TGAAACTTGAAGATCTACAAGAAGAGTTGAAAAACGATCTACAATTAGATTCAACTAAAT +TACAATATGAAGCTGCAAATAACCCAGTGCTCTATGGCAAATGGTTGAATAAGCACTCGA +GTATCCGCAAGGAGATGCTTCGCATCGAGGCTCAGAAGAAAACGTCTTTAAAGCGTAAGT +TGGACTACTACACAGGCCGTGGTGACGGTGATGAATTTAGCATGGACCGTTATGAAAAGT +CTGAAATGAAAACTGTTTTAAGCGCTGATCAAGAAGTACTTAAGTTAGATACATCTTTAC +AATATTGGGCTATTCTGTTGGAATTTTGTTCTGGTGCGATGGATGCTATTAAATCTCGTG +GTTTCGGGATTAAGCATGTAATTGAAATGAGACAATTTGAGGCAGGAAAGTAGTATAAAT +AAGATAGTAAACTAGAGGAGACAACCATGTCTGAGATCTGTACTGTATGTAAGCAACCGA +TTGATTCGGCATTGGTTGTTCATACTTCAAGTGGTCCGGTTCACCCGGGTCCATGCTATA +ATTATATCGTTGAATTGCCGGTATCTGAAAATACACAAGAGCATTTAAACGAAACCGAAC +TTTTGCTTTAGTCTAGTGTTGATAGCCAACTTGTTGGTTTTTGCCCCTTCCTTTCGGTTG +GGGCCTTTTTGTATTAGAAGTCTTCTTCCGACTCAGATTCATCTTCTGATTCAAATTCCA +AACGCTTACCGGCCAACGCATCGCGGATAGAAATATCATCGGTGTCCTTCAATTCTGTAT +CTTTAACACGCTTTTTATAATAAGCCTCGAGTTCTTCCAGACCATCAAGAGTCTGGCATT +GAGCGATCTTACTCATAAACGTATCAATAGATGCTTCATAAAGAAATTCTTTAAAATCCA +TAATTCCTCACAGATTGATTGTTTTCATGATGTAGTTAAATTTTTCGTCTGCATATCTTT +GAATGCGTTCTAAAGCGTGTTTTAAAGCATAGTTTAAATGCACGTACTTCTTTTTGGCAT +TCGCTGATTTAGGCTTAACACCCATGTCATCCACGATATCCCAAACTTGTGCCAATGATT +TAGAGTCATGCTTACGAAGAACACGACCAATACTCTGGAGCACTGTAACTTTTGATTTAA +CTGGATGAGCGAAAATAACATGGTGTAAGTTTTTGACTGAAATACCAGTAGAGAATACTC +CATAAGAAGCAACTACTACTAATCCGGTATCTTTTTCAGCCATAGCTTTAAGAGCGTTAC +GAGTGTCAGTGTCAATTTCACCGTTAATGAAATGAACATTTTCATGACCGAGCTCTTTAA +CCATCGCATAAAGCTCTTTACCGTGTTTAGCATTTTTAAACATCAAGAAGACGTTTTCAT +TTTTCTTTGCTAATTTAACACCAAGAGCTGCAATCCATTGGTTTCTTCGTTTATATCCTG +TGATAGCTGCAATTTCTTCTTGATAGGTTTTACCCTTCATTTTAACTGTAGCTGCATCTG +GATAACGAAGGAAAATACAGTTGATCTTTAATTCAGTTACTTGGCCATCTTCCATTAGCT +GAGCAGTTGAAACAGGCTTAAAGATTTCACCGAACATTCCAACATATTGCATGAGGTTGG +CTTTGCCATCTTTCAGGGAACCAGAAAGACCGAATTTAAACATGCAATTATTTAAACCGG +AAATGATGGTTGAAATACTTTTACCTGTCGCGAGGTGACATTCATCGTTCATCATTAAAC +CAAATTGGCTGAACCATTCTTTTGGTTGTTTAACTGCTGTCTGCCAAGTACTAACATAGA +TCATTGCGTTCGAATCACGAGCGGTACCGCCTCGAATTCCAAGACAATGTTTCTTACCAA +ATAAGCGATAATCACAGAAGTCATTGATCATCTGGTCTACTAGTGCAGTAGTCGGAACGA +TGATTAAAATTTTACCCTCATAGTTCTCAACATAATAACGAGCTAAAAGAGCCTGAATAA +GAGATTTACCAGCAGAAGTCGGAAGGTTCAGGATTCGTCTGCGGTTCACTAGCCCTTCAT +ATACTGCATCCTTTTGATACCAATGCGGTTCAATTTGAGTGTTACCAGAGTATATCTTGA +GCTTCCCTAACCAACTGTCAAAGTCCTTACGGGAGAGTTCTTCTTTATCAAATATTTTAG +GGTCAATCCATGCTTTGTAACCCATATTGTCGCAGAACTTTTTAATTTGTCCAACTAAAC +CAAAAGGCAGAAGACGGTTATAATCCAGAAGACGAATACGTCCATCCCATTGTCCATATT +TGAATTTGGGATTGAATCGATAACCATCTGCTTCAAAGCTAAAAAAGTCTCTTAGTTCAT +GGAACACGGATTCATCGCATTCGATATGAACATGACTAAAGTCGTAAAAGTTTACTCGGA +TATCCATTTTGATTCCTTAGTTATAAATACAATCATATTTATACACAAGAGGCGATACCA +TGCTAGACATGCAATATATTGAAGAAATCCGCGTACTTGATAAGAAAGAAGCTAAAGACA +AGCTCGACGAGTATGCATCTCAGTTTGGAATCAAACTGAAGAAAACCAAGTCATTTGAAA +ATATGCTGGCTGATCTTGAAACAGAATTTAAAGCTCTATCTGATGAACCATTACCTGAAG +ATAACGTAGGTATTTCAATTTCTGATCTTATTGACGATGAGAAAGAAGAACTTCCTGATT +TGGTTCAATTAGAAGATTCTCCTGAAGCTGTTATTACTGTTATAGAAGTAGATAACATTC +AGGAAGATGCCATCTGTATTCCAGAAACTGCAACCGTTGAAGAAATATCTAAAATAATTG +AAGATAGCGCAGTTGTAATTGAATCAGAAAAGTTTGAATTACCTGAAGGATTTTCTCCTC +ATTTTGAACTAATTGGAAAGGCTCCAGGATATTGTACTCTTCCTTGGTGGATTTATGAGT +GGATTTCTAAAAATCCAGATTGGAAAGAAAAGCCTACATCATTTGAGCATGCAAGTGCCC +ATCAAACTCTGCTTAGTTTGATTTATTATATTAATCGCGACGGATCAGTTATGGTACGCG +AAACCCGCAATTCATCATTCGTTAAAATTAAATAAGAGGGTTAATCCCTCTTTAACTTAG +GAAACTATATGGCTACATCAATTGCATTATCGCCAACAAACCCCACAATTAAAATTGGAG +ATTCACAACAATTCACCGCAACTTTAACAGGAGCTCCAGAAGGTTCCACTACTGAATATA +AATGGACCGTAGATAATATTCAGCAAAGCTCTACTAGTGCAACTATGAATTATGTTGCAT +CGACACCAGGTAATAAAGTTATCAAGGTAGAATCTACCACTAAAGTAGACTCTCAACCTG +ATGACGTTCAAAGCGCGACTACATCTTTGACAGTCAAAGATGTTATGACTTTGAATGTTA +CGATATCCGCTCAGTCACAAACTATAAAAGTTGGAGAAAGTTATACGGCAGCGTGTGGAG +TAACAGGACAACCATCTGGATCAACTATTGCATATAAATGGTCAACTGGAGAAACCACTG +AAACAGTTTCTAAAGTTGCAACGCAAGAAGGCAATATATCACTGACTTGTGAAGTTACCG +TAACTGCTACAGGGTTTGAAGATGCTGTTAAAACCTCAAATGTTCTTAGCATTACAGTTA +CAGCAGCGGATCCAGTTGTTCCGCCTGAATGTCCATTGATTTATGTTCATCCTCTTCCAT +GGAGAAGCTCAGCTTATATTTGGGCAGGTTGGTGGGTTATGGACGCTATCCAGCGTTTAA +CAATTGAAGGAAAAGATTGGAAAACTGCAACTAAAGAAGATACTCCATATTATTGTCATT +TAGCAGTTCTTGCTAAAATGATTAATGACTATCCAGAAGTGGATGTTCAAGAATCACGCA +ATGGAAGAATTGTTCATCGTACAGCTTTAGAGGCTGGTATTATTTATTAATAAAAAGGGC +TCCCTTGGGAGCCCTTTTTTGCTTTTATATTTTATGATAGAATCACCACACAAACGAGGT +GAACATGAAAACTGAAATTAAAGTGCACATGATGCACGAACGTGGTGAAAGCTTTAAAGA +TATTGCAAAAGCAATAGGTGGAATGTCTGCATATGATGCAGCTTTGATGTATACGAAGGT +TGAATCTCTAAGAGAGAAAGCGAAGAACAAAGAGAAAATTGTTTATCGTAAACGCTTATC +AAATGTTGGTGTTAAAATTCGTCATAAGAAACTCGTCAATAAAATGAAGGAATTAGTATG +ACAAACTTTTATGAACAGATAACAGAATCTCAACTATTTGTAACTGATATGCTAGATCAT +ATGATGTATGAATCTAAATTCAGTCCTGCTGCGCATGGTGTAAACAAATGGCTTCCAGTA +AACGAATTCATCAAACGTTTATCTCCATTTGATGCTAAATCGCAAAATTTAGCAGATAAA +AATGCGTGGGTAATTATTCGGCAAGTATTAGCTACCCGCTTTGCGGTAGAAATTGATCAT +ATTGATTCAGGAATTCCACTCATTATTGAAGTCGGCGATAAAAACCAATTTGAAATATAC +ATCACTACATGGGGATTAACGAAAGCTCGTGTAGTTCCAAGTGATTTGTAATATGCTTAA +AGTTTAAGCAGTGTATAATGGTTTCAAGGATTCTCTACCAGATAGATTACGTTCCAGAAG +GAAAGGATAGACCTTGACTATGTATTCTTATTTATTATGGTGAAATATGACTCGAGTAAC +AACTTTGGCCAGTCACTATGGCCAAATAGCTGATAATGTAACGTATAATCTTTTCAGTAA +AATAAAAGAAGAAATTGAAAAAGAATTTAATAAAAACGCTGAAGTTGGGTGTCGTAGCTT +CATTTGGTATCCAAGCCCAAAAGCTGGCATAATCAAAGAAGAAATTATTAAATGGCTGCA +AGATGAAGGTTGCGCAGTTGTTTGGAACTATGACCAGAAAGATGGTAATTGGGTTGAAAT +CGCTTACTAAGGAATAATTATGTTTGAGAAATATAGCACGCTAGAAAACCACTACAACAA +TAAATTCATTGAACGTATCCGTAGTGCTGGATTTGATTTGACAGAAACATGGGTAGCTCG +TGAAAAGATTCATGGTACTAACTTTTCTATTATCATCACCAAAGACACAGTAACGTGTGC +GAAGCGCACCGGACCTATTCTTGAAGCTGAAGACTTCTTTGGTTACGAGATTATTCTTAA +GAAGTATGATAAGTCTATTAAAGCTCTCCAGGACACAATGAAGAATATGACCACAGAATC +TTATCAGTTATTTGGTGAGTTCGCGGGTGGTGGTATTCAGAAAGGTGTTAACTATGGTGA +AAAAGACTTTTATGTCTTTGACTGCCTGGTCAAAACTCCAGGTGGAATTGTAGAATATTC +TGATGATTACATTCTAACAGCATTTTGTAACGTGTTCGGATTTAAAATGGCTCCGTTGCT +GGGTCGTGGTAAGTTTGACGACCTTATTCAGATGTCCAATATGCTCGATGTTGTTGTTAA +TGACTATAATAAGCTAGCGGAAGCTGATTTGGAAGCTGCTAACCTAAAAGTATGGCCGGT +TGTTGTATCAGAAGATAATATTGCAGAAGGTTATGTTCTGAAGCCTTGTTATCCGAAGTT +CTTTAATAATGGTGCTCGTGTAGCGATTAAGTGTAAGAACTCCAAGTTCAGTGAAAAATC +TAAATCTGATAAGCTGATTAAAGCGAAAGTGGAATTAACTGAAGCTGATAAGAAATGCTT +GTCTGCTTTCTCTGAGTATGTTACTATCAACCGTGTCAATAACGTTATTTCTAAGATTGG +CACAGTAACAACTAAAGATTTTGGTCGAGTACTTGGTCTGACGATGAAAGATATCTTGGA +AGAAGCAGCCCGTGAAGAAGTTGTATTGACTTCTGCTGATAATCCCGATATCGTCAAGAA +AGAACTAACTCGTATCCTTCAAGAAACTTTACGTCCAGCATGGATCGAATTAATAAGTTG +AGATTTGCATTAATAGGGTCCAGGGAAACTCCGAGACGAGTCCTGGATTTAATGAGTCTT +ATGGGGTTGGCCTTTTCGGAGGCTGGCCATTTTTCATATTCAGGTGGAGCACCTGCCGCA +GATGAAGCATGGTTAAGTAGATATGATAGAGCGAATTCTCTTAGGATTATTCCTTACAAC +GGCTTTAATGGGCTTGTATCTGGTACTGGGGTTGCCACCTGGGAATCTATGAGTAATGAA +GCTCGAATAAAAAGTTACATAAAAGCTAAGCAAGTATTCCCGGATTTAGACAATCAACGA +GACATCGTAAAGACTTTATTTTGCCGTAATGCTATGCAGGTTTTAGGTGAAGATTGTATG +TCACCCGTAGATAAAGTTTATTTTTGGGCAACAGTAAGAAATGGAGAAGAAGCCGGTGGA +ACTCGTATCGCTGTGCGAATAGCTAGAGCCCATGGAATAGAGTGTATTAACTTGAATGAT +AAAAGAGTGTTTGCTGATCTTCAAGAAGAATATGCACCAAAGTTTGACATCTTCTCTCTA +TAAACAACAAAAGGGCCTTTCGGCCCTTTATCATTCTTCAATGATGATTTTTGGTAACTT +AACACCAAGAAGAACAGACAAATCAGAACGTCCTGCCATTTTGTCCATATCTCCCCCATC +AATAACTCGAGCTTCTTTATCGTCTTTAGCTACAGTATATGGGTTTGCTGAAAGTGCATA +ACGTACCAGCAAGGCGATAGATGGTTGCAAACTTTCAGGGTCTGTTATGACCTTGAATGC +TCCTACATGCTCTGGGTCATCCAGATCGGCTCCTTCAGTGTATGGAGCGTAGAAAATAGA +ACCAATCAGCTCTTTCTCCCCTAACTTCTCGACCACCCCTACGATCACGTAATCCAATGG +ACTGTTAGTATCACAGTACAAAGGCAATCCATTAGCTAAGAATCCGTAAGCATTTTGTGA +CAAATACTGATCATCTTCAGGTTTATGCTTCAACCAACCACTTGCAGCCAGAACAGCTGC +AGCACGAGTTGACGCTACACAGAATGATGCGGTATAAGTTGACTCACGCTGAATATGAGA +AACCATTTCACAAACCATTCGATACAGAGAGCGACCGGCTTCTGGAGCTGAAGCATAACT +CAAATCGATAAATCCAGTATCTGTGATGCCTTCTACTTTATAACGTTTAGAAACAGTAAT +CAAAGACTGCAGAATATCTTTGTTGATTTCATCAGCCATTTCAGTAGCTAGCAGATCTTC +AATAAAGCTTGGTGCATCAAAGCCGTTGGCTTCTAAGTCTTGAGCAAGTTCAACTGTGAC +GCTAGTTTTTAATTTACGAGACTTAACATGAGTCTGCCATTTGTTGACCAAGAAACGTGC +TTCAGCGATTTCAGTATCTTCACCACCTTCAAACTTTTCAGTGCGAGCAGCATCAGAGAA +AATACGAACTGTTAAAAGAACTACAGCAATTTGAGTAGCCAGCTCCATATCAGTTTCGGT +AATAGAAGCAAATGGATTATCTACCAGAGACTTATAAACGATTTTATTCAACTGAAACAG +ATCGCCTTTCATCAGCGAGCCTTTATTAGCTGCTGTTACTTCGGGAATAGTTTTACGATC +GACAAATCCAGCTTCACCTGCGTATGTAGCACCAGTACGCCATGTGAATTCATTATCTTG +GTTTAGATATTTGATTCCATAAAATGCTGCCATTGGTTGAGTAGTACGTTGAGTAGCAAC +AATATCAGAATAGATTAATTTAGTGGTAGCGCGAGTCAAGGCAACGAGATTTGGGCGACC +AATTTGGTTGCTCGTTGAAATAGTTGATTCGCGCAGGAGTTCGTTGATTTTAGCCATCGC +GCTTTTTCCTTCGTTAGAATATATGAATATTTATTACGTCCAGAAACAACAAAAGGAGCC +CGAAGGCTCCTTATAGCATTAAATGCCTTTAACGTAAACGCGACGGAAGTAAGCGTTCTT +CGAGAGAGTTCAGCAGAGACGGCATACCAGATACGATACGGCCTTTAGGCTGTTGAGCCT +GAGAATCAGCGAACGGGTTAATACCAATACCGTAACGGGTTTTGAATCCCATGACCGGTT +GGAAGTTCTTCGGATCGGAACCACGCAGCGGGGTCAGCGCAACATAAGGAGCGTAGTAAA +TACCAGCATCCATTTCGTTGGCACCTTTGTAACCGATGGTGAAGTAATCTTGGCGAGCAT +ACTGGTCGATATAGACACGGTACTTACCACCCAGAACACCAGCGAAAACAGCTTTAGTGG +TGTCGGTTTCGAAACCACGACCCAGACCTTGAGCAGCCGGAGAAACGTTAACATCGACAG +CAGCCAGAACGTTAACAACGTTACGAGATGCGATCAGGAAGTTACCAGCACCACGACCGG +TCTGACGAGCGATTTCAGCAGCTTCTTTGTCGATCTGGAACAGCAGAGCTTTGAAGCTTT +CACCTGCCCAACGAGCACCGCGGATATCAATCGGATCCTGGAAGTCAAATACACCAGCTT +TAGCACCAGTAGTCAGAGTCATACCAGTTTTACCAACCTGTGCAGAGTAGTTGATCCAGT +CAACAACTTCACGGTTGATTTCCAGCATAATTTCTGTTGCCAGAATACCGGACAGCTCAG +CATCAGCATCCATACCGTGAACAGCACGAAGGTCTTGCGCCAGTTCGATAGAATAAGCAG +CTTTCAGCTGACGAGATTTAGCTTCGATAACTTGTTTATCGATACGGAATCCCATTTCAT +TCCAAGGGTTATCCTGGGAACCATTAAATTGTTCTTGCAGTTCTGCAACAGAGGTAGCCA +TACCTTCAGCAATTTCAGCCAGTTGACCAGCTTCGATCAGCTTAGTTACTTCAGCATCCA +GTTTACCAGCATCGGTAGCACCAGCATCTGGGGTTACAGCAACTACGGCCTGGAGGTGAG +CACGTCCGGTAGCAACGAAGTCGTGAACTACGATAGCACCTTCAGCAATAGCAACACCAG +CAGTCAGTTTGTCAAATTTCTCGGCAGCACCCTGACCAGAATACATTGCGTCCGGAGAGT +ACATCGGGTGGAATGCTTCTTTAGCACCAGCAGCCAGAGGATCGGTACCGTAAACTGCGC +GCAGAGCGAATACCTGACCAGTAGGAGTACTCATCGGCTGAACGCCACAGATATCAAACG +CGATCAGGTTAGGGATTGCACGACGTACCATACCCATAACAGCCGGTCCAATCTGAGTTA +CAGCACCGGAAGTTTGACCAGCAGCGATATTCTGAGCATCGTAACCATGGTCACCGCCGA +TTTCAGCTTCAGTCAAGAAATCACCGAATGCCTGAGCGATTTTTTCATCGCGATATTCTG +GAGAAACGGCGAAGTCTGCTTCCTGGTTTTCAAGAATTTTTGCAATCAGAGCCTTTTTAG +AAGCTCCAACAATTTCCGGCAGTTCTTCGTTTTCCAGCAGCGGCTGCCATTTTTCTACGA +GTTTGTTCTTTTTCATGTGTTGTATAACCTTGTTAAATTAAGAGAGACGTGTTGCACCAG +CAATGTAAGCGTTCATCATAGAGCTTGTCTGATTTGCTGGATTCGGTTTTTCTTCAACCG +CTTCAGTTACGAAATTAAGGCCGCTGGCCTCAGGGTCAACAGTATTTATACTTTCATTTA +CTGCCGGTTTTTCAGCAGAACCTTTAACCATTTCTACGATAGCATTCAGTTTTCCGCCAA +ATTCATCAGAATACGACATACCTTCAATCAGAGATTCGACTTTTTCTTTTTGAGATTCAG +TCAATTCGCGAACAGCTTCATTTACTGCAACTTCGCGCTGAACATAATTGATGTATTCGT +TCTGCTTTGTTACTTCTTCGAACAGTTTAGCCGTAGCTTCTTTCTGTTCTGCCAGTTCTT +CTTCCATTTCAGCAACAACATCTACTGCTTCTTCTGGAATAACAACGTTATGTTCTACGA +ACAGTTCTTTCATTCCACCAAGCATAGATTCGAACAGATCGGCTTTAATACCGCGATCTA +CTGCCAGCTGATTTTCAGCCATCCATTTAGTTGCCAGGTGATCGAAGAATTTAGCAGCAG +CTTCGGTGATTTTCTTTTCAGCTTTTTCTTCTGCTTCTTCTTCTTTTTCTTTTACCTTTT +CTTCCGCTTTTTCAGCGATAGCTTCGATACGTTTTTCTGCCAGCGTCGCGGCACCTTTTT +TAACTGCGGCTTCGAATACAGTGCTGAAGTTAGCTTTTACTTCCGGAGAAAGTTCAACTG +ATTCGAAAACGCTGTCCAGTTCTACCGCGATGTCCAGATTAGTGGATTCAGCGATTAGTT +CTTGTTTCAGCATTTTGATGTTCCTGTTGTTAAGTTACATTATTATTTATAATGCTTTTA +ATGACTCAGTAAGAGCCTTAAATGCTTCATCAGCACTTTTCTTGGCAACCGCTGGTGCCG +CTTCATGAGACTCACTAATTTGTTTAGGAGTAACCCATGCATCTGGAGCGGAAGGTCCCC +ATACTGCGTCAACACCGACAGTAAGTTTAAACCCTTCGTTCACAATCTTGTAGCCTTTAT +TAGTCTCAGTCAGAGAACCAAGTCCTCGAGATGAAACTCCTGGAATCCAACCAGCACGGA +TATTGGCAGCTAGTTTATCGCCTGGGCCATGATCGCCTTCGATAATGCGTGCTCGTCCGT +ATACGTCATTTCCTTTCCACCACATATCTTCAATGATAATGGCAGCTTGCATCGGGTCAA +CGTTTGCACGAGGTGGATGGTTTAATTCTCCTAATGCTTGCTTAGTAACAACTTGTTCTT +TTATATAGTTGGAAACCGCTTTTTCAAGAATACGCTTCGGATAGAGACGTTTATTTCGGT +TAACAACTTCAGCTTGCATAAAAATTCCTTCGATGTAAAGACCTGGAGCTAATCCAGAAT +CTGTTCCATCATGGGATTCAAGCATTGGAACTCCGTCGATACATTCGCCCGGTTGACCCC +AATGCTCAATAAGTAATTGGGGTTCATTCATCAGCTTAATCCTAGTGCTTCGCGACGTTT +CATCGCTTTCTTACGCTTACGAAGTCCACGAGCTTGGCCGCTTGGGTTAGCGCGTTTTGA +TTTGGTCGCTTTACGAGCGATTGAACGACGTTTAGCTTTAGACAATCCGGTAGTTTGAAA +TGCGTTACGAGCCCGAGTCTTACGATCTTTGGTGCGGGTAATTTCACCGCGACTTGAAAC +ATGCTTAACGATAAATTCGTCGAGCTGCATTTCTTCATTAATGGAACCAAGTGCAACCGC +GAGGTCGGTCTCAGTTTCCATCATGTTTTCTACAATTGTATTTATATCGTCTTTGCTTAA +TGCGCCGGATAATGCATCGTAACGACCCTGAGCTTCTGGTAAAAGTAATTCTACGCTTTC +AATTACTAATTCATGGTTATCAGGGATCAGAAACATTATTCATCATCCTCTTTTTCGTCT +TCGTCATCTTCATGATCTTCATCTTTTTCTTCTTCAGGCTCTTCGCCTTCGATCATGACT +GAACGTGCAATTTCGATTTTGCGCTTTTCTACTAAATCAACGATTCGCGGAGCCATGGCT +GCTTCAAACATTTTAGTTGCTTGAACGAGGTCGTTGGATAAGCAGGCGGAAATGAATTCA +TTTTCCATTAGAAATCCTCTTGATTTTCTGGGTCTTGGTAACGAGCCTCTTTAGACTCTA +ATTCAATTTGCTTAGCCTCTTTATCAATTTCATCATCGTTCATATGAAGAATATCTTTCA +TAGCTGTCTGATGAGAAATGTACTTACCAATAAATGGTTCGGCCATTTGAAGCATATTGA +TTCTGCGTTCCATGATTTCAGCATCTTTAAGTTCTGTAAAATATGAATCGCGGTGGAATG +AAATCTTAATATTATTTATTTCATCGTTCCACTCATCTTCTGTGATAACGCCTTTAAGAA +CGAGGTTCGTTTTAAGCGGATCTAAGAAAATTTCTTCAAATTTGTGCTGCAATCCACGGA +TGAATTTGGCAAAAGTTAATTCGTCGCGGGTAATTGAAGTACCGGCATCAAATTGAACTC +CACCTTGTTGATCATTTGGAATACGAGACAGAGGAACACGAAGTGCCATATAAAGAGCAT +TTCTGAACCAACGGACGTCATCCATATCACTCATACCCTGCATACCAGGAAGAGTATCAA +TTTCTGTTACTGCTTTACCGTCACGACGTTGTAACCAATAGTCTTCTGTCATTGACATAT +TATGTTGCTGGTTTTTAATCTTACCAGTCGTGGCATCATATACGACACGGTTTTTCATCG +TGTTCATGATATTCTGCATATGTTGTGCAGCCTTACGTGAAGGCATATTCCCTGTATCGA +TATAAAATACTCGACGATCAGGAGCACGGGTAATACGGTAGATAACTAAAGCATCTTCCA +TTAATTTTAGTTGGTTTGCAGGTTTAACAGCGCGGTGCAAATAGCCAACGATGTTTTGAC +CTGTACAATCCACTAATCCGGAATGAGCGTATACTACTGCTGCTTTAGGGATTTTAATTT +TAGTTCCAGCGTCATATATTCTGCCGTCACATTGATAAGATTCATGACCAGTATCGTAGA +TAAAATATTCTTTATATCCTTTTACGATCTTGACACCTTTTTCCATTTCTGTGACTATTT +CACGAACGTATTGAATATTGCGAGGATCTAAGCGACGAAGTTCTTTAATACCTTCTTTCG +GTTTATGCGGATTAATTATTTTGTGAAAGAAAATACGAGAATCGACATACCAACGCTGGA +AGTGGTCTGCTCCTTTACGTTGGAAATTTAATGTATTCAGCACTCCGTCGAATTCTTCTA +GCATTCGATCTTTGATTTTTTGGCTGAATTCGGTAGCATCTAAATCTAACGCTACTACAT +CATGGTCATCTTCATAAACAATTGAGTCCATCACGATTTCTTGAACCGCGTTATCGACTT +CATAGTTGTTCATCAGGTTACGATAAGTGTTGATTAATTCCCTGGTTGTTTTCATGCCAG +GTTCATGATTGCCCAACATTTTCTGGAAGAAGCCACTAGAAGCTTCATTCACAGACGATT +CAATTTCGTTAGCGCCGTCATCGAATTTAGGGGCGGTTATCGACTCTAAATCGTTATTGA +GTTGTTCTTTATAATTCCGTTCGTCTTCCTTTGCCCAAGGAGCGAACAGACTTAAAATGT +CGTAATTCATTAGAGTCTCCGTGAAAGGGTAATAAGCATATACTTATTTATATCTTAAAA +CTGGCGGATTTCTCCGCCAATTAATTACAACCACCAGTCCATTGCAAACGTAACTTCGAA +GGTTTCTACCTCGTTATTAGAATCCCAATCCATCTGAACTTCACCAACGTTAGTCGGCCA +CAATCCAGTGATTGTTACTTCTTTAGTTACAGTTTTGCCATCACGGTGATATTGACGCAC +AATTGCAGTCTTTTTATACTCTGCTGGAGTACCACCAGTAATTTCGTTGCCTTGGCCATG +ACAAATACCTTGCCAATCAACGATTTGCTGACGAGTTGTATGTGCGTCATCGTTATAAAT +GGTGACAGTCCAGTCATCAAACGTACGGTCGCCCGCAAGGTTAATTTTACGGTTCATATA +ACCGACTGGAACTTTTTCTACGATACCTGCTGGCATAGGAGCAGCTTTACATTTAAAACT +GAAATTTCTGCCGAGGTACGGAATTTCTACTTCAAACAAGTTAGGACGAGCTAAGTCGCC +TGACTCAAATGCACGAGTCATGTCATCAAGAAACATAATAACCTCTATGTGTTTATTTAT +ATGGCTCCAGATTCACCAGAGCCATCGAGACGAATTAAATTGTGTGATCTGTATATATTT +ATGGACCCTCGTTAGAAGGTCCCTAAGTTAATTCTATGATCACTGAGGTCCGATCAGCTC +GTCGAAGTCGGCGCCTGTTGCAGTTGCAACGAAGTTCAGAGTGATGTAGTTAATGCTTCT +TGCCGGCTTGATGTAGAACGAAGCAACAAATTCGTTACGATCGATAACTGCCGGGGTGTT +ATTCGTAGTATCACAAACAACACGGAAGTCATACATTCCACCCAATGCCTTAATACCTTG +GAGATATTGGCTAGTTTCCATACGGAAGCTTGAACGAGTGAAGTTATCGTTCATTTCAAA +CAGACGGTATTTAGAAGCATTACCGATGTTTGACTTCAACATGTTAAACAGACGACGAAC +GTTAACACGGTCAAATGGAGTAGGAACTTTAGTAGCAGTTTTATCGCCAAACAGAACGAA +TCCTTCACCACCGGTTCCAGTCACTGGGTTAATAGCTTCTTGGTACAGACGATCACGCTG +TGACTGACGCGCTTCGATTGCCAACTTAATTACGTTCAGAATCTGACCGCGATTATAACC +AGCTGGAGACATCCAAGGTTGGGCAATATTATCAGTACGAGCACACAGGCCTGCAATATC +AGCCGCTAATGGAACCCAACGGTTGACATCATTATATTTGTCGTATTGATATTTGTAGTT +ACCATCAATAAATGAATAAGTACTAGAAATATTCATATTGTTGTCGGTATAAGTACCGGT +CGCGGTACGCCAATCAACCAAGTTATCAATTGCACGAGTCAACGGAATATTGACAATAGT +TTCACGCGGAGGAGAAATAAGAACCAGACAGTCCTGACGCTCATCACCGATTGCAGAAAC +ATGTTTCTGAACTGTAGAAGCAAATTCTAAACCTTCACCAGCACATGCACCAGCAATAAA +CAGTTGAACCGCGATAGATTCACGATCAGCGAACAGATCCCATGCTTGCATTAAATCACC +AGCAGTAACGCTAGAGTTTGCAGACACTCCGCCATTTAATTTGACAATACCAGAGAAGCC +TTTAGGCCAACCAACTGATGTGCCAAAAATGTAGTTACTTGAACCTTTAGCAAAATAATC +ATCCATAAAGATGTTATTGCTATATACGTCTTTTTCACCAGGCTTAGTAGACAAAACTAC +AGATTCTACAATTGCTCCATCACGACGGACAATAATTGCATACTGATCATCGGTCTGAGG +ACCATAACCAAACACAGCCCGCGCGGTTGATGCACGAGTACCGCCTGCAGGATAAATCGG +AAGTTCAGCCGATGCTCCTTTTTCATACTGGGATTTAGATACGATTTCAATTTCCAGTTG +AGATCCAATTTCACCTGGATACAGAGCAACAACCCCTGGAAGTCCGTATTTTTTCAAATT +GGCTTGAAATTCAGTTGCAGTCATAGCTGCGTCAGCATTTTCGGGTTCAGTCAACAGAAT +ACCAGAATCTGTGATAATTTTACCTACAGATATTGCACCAGCTACGCCAGATGATGCTGA +AGTTACTTCAGCAGTCCAGTTTGGACCAAGATCAGGATATTGGTTAGTACTTTTCGCGTA +AGCAACTATTTTACCAGTAGGAATATAAATTGCTTTAATTTGCCCTTGAGTATCAACTTC +AGTAACTTTACCTGCGGTTTCTACAACAGTCTGATTGTATTTAACACGAATAGTATCTCC +AACTGCATAGTTACTACCAGCAGCTGAAATAGTAGTTTCGATGTTTTCTGCGATAGGAGA +AGAGTTTTTAGCTACATCTCTATTAACAGCTCGAACGATACGAAGATCGTTACCATATTG +CAAGAAGTTCATTGCTGACATAAAGTAATCAGCAGTTTCGCTATTCGGAGTACCAAACAA +ATCAACGAGTTCAACTTCGTTGGTGACCTGAATCATCTGATAAGCAGGACCCCATTGAAA +TTTACCAGCGATTGCAGCACGCCCGGTAGCATTATTAACAATAGTGCTTTGTACACTAGT +TTCTTTGAGCTCAACGCCCGGCGACAATAAAGCCATTTTTAAATCCTCTACTTGTATGCT +TTAATATATTTATACAAATGACACGCCACGATCAAATGACTCGGTGTTGTATTCAGCACC +ATTAACTGCATCCAGAATTACTACTGGAGCGTAATCATCGTTCATATCTTCCAATTCGCG +ACCAAATACTTCGGCTGCTAAACGCATATCGTCTTTTTCAGCATATTCCACGAATTTGGA +TTGGGTCGAAAGCCAAGAGAAAATAACGAGTGACATAACCAAGTCATCATGATACCCTTC +TTCTGCCGCCCAAGATACACCTTTTTCACTAAACGTTCTAAATTCTTGAATAGTGGCTTT +ATGGTGTAGAACTAATTTGTCTTTTTCAATTAAATCTTTTAAAGCTGAACAACCAACTGC +TTTAGTTCTCTTCGTCTGTTTCATACCAAGATCAACAAACGAGTCGCAAATTACACCCTC +ATATTCGAGGTCCATGTAAAGAGATTTAGCAACTGAAACTCCAGTAGAGTTCAACTCAAT +ATAAATTGGAGCTTCGTTATATTCTATCAAGTATCTCATTACGATATCAGGAAGAATAAG +GTGAGATGTTGTGTTATTATGAAACACAGCAACTTGTTCCCAGGTTGAATTCGTGATATC +GATTATATGCATTGCATGATAATCTTGACCACGACCTTCAGAACAGTCCAGCGCCGCTAT +ATATTTATGCCCTTCTTCTGGTTCTTTGAATTTGGTAAATCCATTTGAATCTGGAGTAAC +TTCAATCCAGTCCATATTCGCCAATTTCATACCGGAAATTAATGTTCCTGATGTTCCATG +GAATTCTGCACAGTGTTCTTGCTTAAATTGCTCAAGAGATGAAGCACTAATAGTTTGAGA +CGACCATTGCCATCCATCATCAAACATATCATTATCATCATACAGACGTTCTTTTACTGA +GTTCCAAATAGCTGTATAAGGAGCAAAGCCCGATTTACCTGTTATTGCTGCATCCCAAAT +ATCATAGAAGTGGTTCAATCCATTTGGCGTAGTAGTAATAATAATTTTAGAACGACGCCC +AGATGAAATAACTGGCTGAATAGCAAGCCAAGCATCAATAAAGTTTGGAATAAATGCACA +TTCGTCAATATAGATCATTGCGAATGAGTTACCACGAACAGCATCAGGAGATGACGCATA +AGCACCGATAGAAGAACCATTATCAAGCTCAATAGAGCCTTTGTTCCATTCTACAATACC +AGGTTGTAAAAAGTCTGGAAGTAATTCAATAGCCTGCTTAGTACGATCTAATACTTCAGC +TGACATACTTCCTTTATGCGCAAGAATACCGATTGCTTTATCTTTGTTGAAACAAGCAAA +ATGCGCTAGAAATATTGCTACTACAGTTGTTTTACCAAGCTGACGAGACAGGTTACATAC +TGTCATACGCTTAGAGTGCATGATTTTAAGCATATCGCGCTGATAATCACGAAGTTGGAC +CTTGATAGTCCCGTAGTCAATGTGTGTGATAGCGCAATACTTCTCTGCGAAATAGACTAT +GTCATCACGACATTTCTTCCATTCAGCAATCATTTCAGCTGTATAGTTAACTTTAACGTT +TGCTCGTTTAAGGTTCGGAAGACCCATATAACGAGATCGTTTTACGTTTTTATTTTTAAA +CGTTTGAAATAATTCTGGATTCTCGCCTTGTAATTTAATTTTAACTATTTTATTAATGCG +AAGATAATCATCAAATTTCTCTGGATACCACTTATCATCCCATTGAGATTTGATCCAGTT +TATTCCTTGATCTACCTTTCGTTCTAATTCGCCTGGTGGGCGAATTATAATTTTATTTGT +GGTATTTAGCGGATGTGTGTCCGCTAATACGTTATACGGCTGAGTCTGTTCCATTTACAA +TTTTCTCCTGACGCTCTTGGGATTCATATGAATCTCCCAATTCATCCATTAAATCAGTAG +GAGAACTCATAAAAATGGTAGCGTTTTCAATATTATTAGTTTGATTACCACCTTGTGTAC +CGACGTTCTCAGCAGTAATTTCTTTCATTTCTTTATGAAGTTTAAGAATTTCCTTATTGG +TAGTAGTCATTTGCCCCATCAGCGTGGCGAATACTTCCATATGACGAGGAGAATCAGCGT +TCTTTGCAGTCTCTAGGAAAATCTTAGCTGCATCCATCAACATCTGTTGTTGGAAGTGCA +TGTTTTTACGAACAACTGTATAGTCATCTTCTAAGTCAGGTTTACGATCATTGGGGTTAC +TTTTAACCTCAACTAATTCCAGCTTTTCATATACTGGAATTTCTTCTCCTCCGATACCAG +GGAGGTCCCCGATATCGAGAAGTTTAGCTATATCTAATTGATCTGTCATTGTTATGTCCT +TGGCCCAGGTGGTACAGGAGCAACCGGAATAGGAATATCGGCGGTGTAAGTTTGTTTAGA +CGATCCGTCCCAATCTTGTATTTCAACATCCCGTGGTTCTACTTCAGAATCAACTGATTC +AAATACACCTTCAGGCTCGAGTACTTTACTATTTGCATGGAAATCAAGATAAACAGTACG +AATTTCGCCTTTAATATCGTTCTGAGGTGGGTACATCCACCCTTGAACCTCAAACATAAT +TGACCATTCAAGTCTACGACGACTTATATTATCTCCGTCAATTTGTTCGTCTACTGATAT +AGATTGAAATACTACTCTAATATCTCGGCTAAATTCAATATCTTCACCGTATTGCTCAGT +CATAGTAGTATTGAAGTGTGGCTGAAAATACGGCAGAATCTGTTCAACAATTTGATACAT +GTCATCTTCATAACGTGTAAAAATTCCTAACTCATATATCATTTTATATGGGGTAGGGTT +GAACTGCGAAATCATAGCAGATTGACCATTTTGAATAGCGGTTCTATTTACTATATTCGT +TTTATATGTGCCGTTATACATTACATCCACAAGATGCAAATTCATGCGAGGGAGAATAGT +CTCAACCTTCGCTACATCGTCTTGTGAGTTAATTGAAGTCCACTTATTGAGCTTCATCAT +AAAGTGTTCTTTAGATGCGTATGTAATAGGGACTTTAATGAATCGGGTTCCAGTGTCTTC +CCTTGTTCTAGCTACTTGAATATTGGAGAATAAATCACCCATCAACAATGCGTAGCGGCG +GAAAGACGAATGATAAAAGTGACCAAACATTTTTTCTCCTACGGCCCCGAAGGGCCAATA +TGCTTTATATTATTTATGACATAAAATTATCATCAAATGGACTAGTCTTTTCGGGGTCAT +TAACACCACGGCCATTGATTACAATAAAGTCTTCAACAAACTCAGACGCTTCAGCATTAA +TTGCATCGACTTCTTCATATTGAACTTTAGATATATCAGCAAGTCCATCAAGATTTTTAA +CTGGTTCAAGATCAAGTTCGCTAAATTCAGGTATTTGAATACCGGCATTTCGTTGCAATT +CAGGCTTAAGCTGCTCCCCAGAGTAAATAAATTTGCCTGCAGTAATTTTACGAGTAGCGT +TACGTCCTACTTGATAGAACGGATCATATGGCTCAACCCAGTTAATTTCAAACAAACTGT +TGTCCATTTTGAAATAAATTAAATCACCTTCACGAGGTTCACTTCCATCGCATTGATGCT +TAAACAACCCAGGGTTTATTGTTAAAGTGACTTCATCTGATACTTGCATGCCAAACTTAC +TAAAGAAACTATTTGCTCCTTCATATCCCTCAAATGAGTTCAAATATGCAGCGAATAACC +AAGCTTTGGTAAATTTGTTCTGAAGATCTTCACCGAATAATTGATCTGGTTTAACGTATT +CGCGAGGAATGAAATAACACTCTACCCCTCTCATTTGAATGGCCTCAGCCACTAGCGTAT +CCGCCAACGACTGAGTATTTTCATGACCATTGAAATTTACATAAGGGTTAAGTATATTTG +CTACGTTAGTTTTCTCATAACCAGAACGATTTTCGAGCTTCGCAAATAACTTATTGTTCA +TCATCCGATTAATATTCCAAATGGAGGATCAAGTAAGTATAATTCTTCGCGGAGACGTTC +TTTTTCTCGTTCTGCTTCTTCGAGCAAGCGTTCACCGTTCAGAGTAACACCGCCAGGTAA +TTGAAGACCTTGATGCTTAGCTAATACTTGACCGAGCAATTCTTTTGATAACGTGGTTGC +ATAATCTTTAACCCATCGGTTATTATATGCACCTTGTTTAACCATTGCATCTTCACCAGC +AACTCGCTGAGGTAAAGATCGGTCCATGTTATCCCATTGTTCTGCTACTGACCACTGATC +AGCTGATCCAGCATATCCATAGCCTGCAGTATTTCCAACTGTTTTATCTACACCGGAATA +AGCTAAAGTGAATACTTCAACAATGATAACATCGCGTTTTTGGAAGTTACCCATTACTTT +AAGTTGTTCATTAGCTCCATTATACCAAAAATCTGGTAATGGGCTTAGCATATCTTGCAT +CATACTCATATAAGTCATGAGTTGAGTGAAATAACCAAGATCAGCGCCAAAGGCGTTCGG +GCCATAAAATTTATTACATGAACTTCCCATTCCACCATTAATTCCAGCCATTCCCAAGAG +AAAATCTGTGAACCATGGATAAGTGGCATTACCGTCCATCGAAGTAATAGAACCGATATT +CGTGCGAACTATTTTAGTTACAGCAAAGATATTTGAGCCGCGAAGGTCGAATACACCATG +CTTGTATAATTCATCATCGTCACCAATATAAAATGCGTGATATCCTTTGTTTAAACCATC +ATAATGATATTCGCCAAATAGTTCGAGTGCTCGTTGAATGCAGTTGTAGATCATGTCTTC +TGTTAATTCAACGTTTAAAATTGGTGCGCCTAATCTAGTTAAGATAGCGTCTTTTAATTC +TTTTGGGTTTTGTGGTGCATGAATAGACATAATAAACCTCTAAGGGCCCGAAGGCCCTCT +ATTAAGGAAGAAGAGTTTCTAAATCAACCCAAGCTCCATCTTTACGAACGTAAGCTTTTC +CATCGCGTGGGGCTTCAGGAATATAAGTAACAGCTGCACTTTCAAGCTGGTTTATACGGG +TTTGATGTTTGTTTAAAGTATTTAATACACCAGCAGTTTCAATATTAGGGCTATTTGGAT +TAGTTCCGTTCATTTGATTACTCAAACGAAGAACTTGTCCTTTAAGACCTTCATTGTTAT +TACCGATTTCCACTTGAAGATCTTGAATGGAATCGTTCATAGTACTCTGTTGTTGCTCCA +TTGCAGTCATTTTAAAAATCAATGAAGTCTGCGGAGGTTGAGTACCCTCTGGAACAATAC +CAACAACCTGATTTATCCATGCAACCTGACCACGAAGACCAGATGAAGTATCAGCTCCAA +CTATTTGTTGAAGAGTTGCAATTGAATCAGTGTTAGTTTTTATTTTTCCATTAATAGTCA +ATGGAGCTGAAGGAACACCTATTTGCTGCTCAATAGAATCAAGACGCGGTATTACGCCGT +TTTGTCCACTTAACTTATTATTAATTAAGTTAATTGACGTGATATTGGTTTGAACGAGTG +CGTTGATATTAGAAACACCATCATTCATTCCTATTGAATCCATCACGTCTTCCATTGACA +GTGACACTCCGTTTATACTAGCATTCATACGATTCAATCTAGTATAGACGTTATCAACGG +TAGAAGAAGATCGCGGACCTAATTCTTTACGTAAGTTAGTAACTTCAATAGTTAATGATC +CAACGTCAGAATCATTGAATTGATCTTCTAATATTCCGACACGCTCGATAGTCCTAGAAA +TAGCAGATGAGTTGTCAATAATTCGACGCTTCATGCCAGTAGCTTCAGTACCCTGGACCG +ATTGTCCATTTATATCTTGACCTGGATATTGGCCCATTTCTTTTTTAATCCAGTACAAAT +CTCCACGCACCGGACGATAAAAAGTATCTAAACTTGGATTATAAGTACCAATATCAGTTT +CTATAGAATCCACACGATTATCAAGATCAGTCAAAGAAACTTTATTTTCCGCAAGTGTAG +TATCAATTCGCTCTATAGATTCAGTATTTTTACCGATTTGAGTCATTGCATCGATATTTT +CAGACACCGCAAGAATGCTATTAATTGTTTCAATTTCAGCAGTATGTTTATTTGTGGCTT +CCGCAACTAAACGAATATTTTCGTCTAATACAACCACGTTCATCTGTACCTGATTTGGTC +CTCGATTCAATTGCCCATCGAATCCATATTTAGTAGAAGCCCCATTGAGAGGCTCTCCAT +CTTTAATCCAGTTGATGCGTTGCTGACCTTCAGCTGGAAGTCCATTTACATATGGTAAAT +CTTTTAATTCCATAAGTCCTCTCTTATTTTACGCGAATAACGTAGTTTATCGCGACGTTT +TTCATTCTGTTTTCATTTGCGGTTCTAACTACTCTAGATGAATCAAAGTGCATTCTTAAT +GCAGTCCAACGATTATCCTTTGATCCTGCGTCATAGTTAATACTTCCCCAGCCATCAGCA +TAATATGCTCCATCAGGAGGATAGTTCGACCAAGCGGCCTGATCATCAATAGTCCAGTTA +CCCCAGATTCTTTGTTGAGCATCTTCCTGATATGTTCCAAAACCTCGTCCTGGATCAACA +CCACGCCCAGCATCAAATCCACGAGCTGTCACGCCGCGCATGTCTGGTAAAGCAAAATGA +TCTGATCCATTGCCGCCATAAGTAAAACCTATTCGCTGAAATAAAGTTGGATAACCAGAT +ACAGGCAACCATCTGCCGTTTGCGATCATTAAATTACCGTGATCACCATTGAAAGCAGCC +ATGAACATTCCACCAACTGGAACCGTTGTATCCAATTCTCCCTTTGTCATATATTTCTGA +TTAGGAGATGTATTTCCTTCATAAACACCACCAGTTGATACTGCAGCTCTATCTGACGGA +AGAACGGCTGCTGATCCTGATAAAGCTCTTGAGCCATCTCCTATATTGTTGGCTAATAAA +GTTGTGCCAACACGATTCGTTGTTGCACGTTGACTATTAAATTTAGCAGGTGTTACAGCA +GTATTATTAGAAGATCCATCCATATTAGCTTGCGAAGCTAATTTAATTGCACCAATATTA +CTTTCTGTTGCTTGCCATTGATTTAAAGTAAATGGAGATGCACTAAATCCTTCTCTGATA +TTAGGATCACGTAACTGAGCTAATGTAGCTAATCTAACAACACCTTGTGCTGATTCTGTA +GCTGGTCCATATGCTGGAATAAGAGCTGTGGCGGCGGCGATGGCTTGTTTAGTTTTCAAT +GGAGTCATTGCAGTTGTGTCATCTACACCAGCTTGGGCAGCTGGTGTAGATGACAATTTA +AGAACACCATTTCTAGATTCAGACGACGTTCTATTTGTAAATGTCCAATCAATTACATGC +TTTAAAGCCGATGCTACGATACTTCTGTCGGTCGCTGTACCAGTCAAAGCTTCATCATTT +GTTGCGTATCTAGTTACTCCAAATACGTCGGTTGTAGCTTGAGGGCGCTGTAATCGAAGT +TCGAGAGTTTTTGGAGTTACGGCGACATCTCCAAGAATACCATTATTTACTTCTTCCTGA +GTACCTATTCTAATTTTTCCTTGAACAGTCTCAGTTGCAGAGGGAGTTCCTGACACAGCA +TCTGGACTTAACAAACTTAAAGCCTCTTGGACGTTGTCAACGCTAGAGTTAAAGTTGCTT +CCCGCTGGATCAAATCTCACATAAATCGCTTCATCTGATATGTGTCTATTAGTATTATTC +ATTATGCGATTCTCTTAAAGTAGTGAAGTAACACAGGATCTGTTAAACCTTCTAAAGTTT +CAGTTTTAGTTCCGATACGGTTCCAAACACCATATCCCTCTTTTCCTGGAGTAAGAGTAG +TAGATGATACTGTTATTCCATATGATGACAATGGTTTCAAATTATGCGATTGATTGTCAA +TATAAGTAACAACTAATTCATTACCATTAGTTCCATCTGCGACAGTATTGACAACAAAGT +TATTTGTAACAGCATCTTCTAAAACTAATTTAGCTTTAGCTTTAACTTCTTCTGCGGTAT +CTCCAACAGAAACTAAAACATTAAATCCAAATACTGAAATAATACAATCTGAGGTCTCTC +CGTCTTGACGAGCTACAGTTCCAGTAAAAGTCCATTGATCTGTTTGAGATACACCTTCAG +GAGAAATACCGTCAGTATTAATTTCAATTGAGTTTATTGGACGAATACTGAAACTGCGAA +TATCTTGTATAGCTGATTGAATGTTAGAATATTCAACACCCTTAGCTAATTGAGTAATAG +TGATACTGCCATAAGGCTGTTTATTCATCACATCATTATTTTGTGTCGTATTGAACTGAA +GATAATCAGCTAAGCGGGATACGACACCCGCTTTATTATTCAATAAACTCATTATGCAAT +CCTTAACCAGCGATAAACTGTAATTGAAGGTTGAACTACATCAATTGAAATAGGACTTTG +ATGTGTTTCATTGGTCTTCGCATAATCTTCACGGTATTTAGTGTAAACTGGTCCAGTATC +ATCAGGATCAAACTGGCATCCGCCTACAATAATTGGTCCATTATCATCTGAAATTAAAAC +TTTAGAATCAGTTTGAGTTGGAGGAAGATTATTGTTATTCAATTGTACTGACGTAGAACC +GGTGGTTCCGCCAGCTGTATGTGATGGATTTCCTGTAATGTCTAAATCATTATTATTCAT +TGCAAATCGAGGATCATTTACGTCTGAATTCCATCCTACTGTAACTTGACCTTGTCCCCA +TAGAGTCCATCTTCCGAATCCCATATAAGTAGCTGGGTTGTTCGGGTTAATAGCATTTTC +ATAAATTGTTCCAATTGGATAAATCAAATCAAATATTGCAAATGGACTATTAACAATAAC +TTCGCTAGGAGGAACAGCCTCTGTGTTTGGAATACGAGGTTTGTCGAAATCAGTGATTAC +AACTTGACCAGTAAGACTAATTGGAGATCCTTGAGAAATATACTTAGAATCTGTCTCTTG +GATAATGTCTTCTAAATCAATAGTAGTTCCGATGTTATTATTGAACCAAGTCAAAGTTAT +TATGTCACCATGCTCCATAACTCTGTCAGTTTCAATTCCAGTGATAACGTTATTTTCATC +TATTACGACAAAATAATCAGTTAATCCTGTTCCCCATGTTCCACCGAGTAAAGCACAGCT +TTCTGGAGTATCAGCATCAGCACCATGACAAAATACATCAGGTTGTCCAGTTGTTCCAGC +TTGAACTTGAGTAATTCCGTTAAATTTAACCTCAAGAGAATTTGGATTAATTTTTTCGTG +GGCTGAAATACCAAACATCTGGACAGTGAATTCTTTGGTATTTTTTAAGTCGCCAACAAA +CGATGTTCCTGGGATAGATTTTTCAGTAGTTCTAGAAGAATCTTTCAAGATAATTTGGCG +TTTATTATATGAGCTTCTCCATTGACTTAATCCATCAACATAAGAAACTATGATAACGGT +ATCGCCAATATTACATTTTTGACGCAACCTAATATCTCGACCGTTCAGCGGAACTATTTC +ATTACCCGTTCCAGGAGAGCCATAATCGGAGTTTTCACTAAAATTCGGTCCGTAATAAAG +AATATTACCTCTATGGTAAACTTCAGTATTTGCAATGTTATATTCATTGCCATCAAATAT +ATCTAAAAAATCTACTTGATCCTGAACCTCTACTAAGTATTCTTTGCGTATAACTGACGC +CATATCGCTATTACTAATACGGTCAATTTGCTTATTTTCAACATATTCCCAACGACCAGG +CGGGCAATACACGAGTTCTAAGTCGGAAAGCGGAATATTGAATTCTTTTGGATTAGGATC +ACCCTTTAAAGTATCTCCCGCTGCAGGAATAATAGTAACTGGGTTTTTCTGCCAAGTATT +AAACACGTCACGGAAGCGAACTACAGTGTTGTAATCTTGAACTCGTCCTTTAGGAAGTTC +AACTGTCATTCTTCCGGCAGTTGTATCTAATGCGTATGAATGACCAATCAAAGCTTTTAA +AGTTGACTGATCAGAAGTTTTAAACGTTTTCCAAGCACCAGCTGCATGTGGATTATTGCC +GTCGCCAAGTTGATAATAAAGATCGTTAAAATTATTATTAATTTTTTGACCACCTTGGCG +AAGGTAATCTCCTGAACCATCGTCAACAACGGCGCCAATTTTTAATTCTTGTTTCATTGT +GCTGCTCCAATTTTTTGAGTTGCTATTGATTTAATTGCAACTCTTAGTCCAGGAGCACCG +GTTGCAGTAACAGAAACTAAACCAGTGCTTTCAATTTGAAATGAGTAAACTGCAATATCA +TCGTCTTCGTTTAAAGCTCCGACTCTCATTACTGCGTATTCGGTCGAAATTATTTCACTA +TTGACAGTGTCTACCATGATATTTATTTCTGATGATTTCATTCTTTTCCCGTCGGAAGTA +CGAGATGTAATCAAAAATTTAGCAGCAATGTACTCGCTTTTATCAAATAACATTGCTTTA +ACTGAACCAGATCCTGGGATTTGCCAAGTCCCTTGAACAGGATCATGATAATCTCCGAAC +ATGTTTCTTATTCTGTAGTTCCAAACTGAACGACCACCTTCATCTGAAATACACCAAACA +GTTACTTCAGCATATGGAGCTGTCACTACAAGCGGTCCAGAGAGTCCCACAAAGCTATCT +GATGGAACTATTGTCAATGGTTTAGAAACTGACACAGTGCCGTTTGAGTTAATAAATTTG +ACACCTTCTCCTCGTACTCCTTTGGTTAAAGTTACTACAGCTGGAAGATCGGTGTTATCA +ATATCAAACATAGATCCATTTTGAACTGCCGTAGCGAATTCAGAGTTTGCTAATTTCTGA +TAATAACCAGTTGCATGAATAACTTGACCAGTAGGACCTTGCCCGTCATCTAGTGGCATT +TTTCTTTGATCGCCAAATGCGTTATAGATGGCGTTAATATTAGAGTTTAATTTTTCACCG +CCGTCGAATAGGATATCACCAGTAGAGGCGTTTCCGATTTCACCGACGTCAATGAGTTGT +TTAGGTTCTTGTATGTACATATCAAATCCTCAAGTGATATCTGTCTATGCTTATATTTAT +TCACAAAAAAGGGACCCCGAAGGGTCCCTAATTAGAATTCGAAGATGATGTTAATTTCTT +CTGTTTGGTCCATTGCCATTATAATAGGAGCTCTATTTTCCATGTATATCATTTCACCTG +AATGCCTTTCAAGCTCATGTGGGCTATAATAAAGACCTTCAGCTTTAACATTAGGATCAC +TAGGAAGAGCTTTCTTTTCTAATGGATTGACAATTACCGCAATTTGACGAAATCCAGAGT +TTCCAGGTAATGCAGCATCAGGAAAATATACCGAGTCAAAATATGCTTTAAACCTAATGG +TATATGCTTTTACTCTGTAAATGATATTGAAGTCATTTTGTTGCCATGTCAAGTTGTTTT +GAAATCCCCAACGTTGAGGATCTTCTGCTACTTCTTCTGGCCAAGGAACTACAATATACT +CGTTTGTGCAACGGTTGATACTTACATCAGGTGGAATCTCATACAAATATTCCCAAATGT +ATCCATCACCAGTATCAATTTTCTTCTGATCTGAAATAGCATCTCCTCTACCTCTAGGTG +GAGCAAAACTATCTATAGATGAAGTCCATTTACCACCGAGTTTAATACATTCGTCTTTGG +TATTCATTCCTTGAATTGAGCATGTTCCTTGAGAAGGAATATCAAGTACTCTATATACCA +ACCATCCAGCACCAACTTCGGTAGCATTATATGGAGCGCTGTTAACAACTACTATTTCGC +CAATTTGGAAGTTCTTGGGATTAGGATAACGAATATCACCCCAATCCTTACGTGGAACAA +TAGCATCGAGCATTGATGGCATAACTTTAACGGATCCCATCATATTCGTCCACATATCTA +CTACACCCTGTGTATCATCAATTGGATAAGGTGGAGCGAACCCCACCTCGTTTTCATTTT +CAGACCAAGGTATAGAACGACCAAATGTAATGTAAAGAGAATTTTTGTCATCACCATCAC +CTATTGAGTCGTGAAAGTTTTGCATTTTCTCTGTGCGAAATTTGGATGTAACAATTGCGC +GATAAATTACGCTTGAATCATTCATTTATTTTAACCTGTGTTGGATTTTCAGGATCACGA +GGAATACCAACCTTATCAATAAGTCTATCTTCAACTAGATCTCTGAATTGAGAAAATGTA +ACAGCTGATTGATCAAATAATGGACTCATTGCTTTGCGGCGTTCGGTTGGTGTCTGACCT +TGAAAAATTGAATTATCGTTTTCGGAGTCATAATCTGCTGGAACCGGATATGGAATACCA +GTCATTGGCCCTGCAGTGTAAATAACTAATCCAGTAATAGGATCTTTTTCTGGATTTCCA +TTTGAATCCAATGAAGCAACTCTGTCTGGATAAAAAGTAGGAATACCAGCATCCCATTTA +TAATTCTTATATTTATTGATTATAGTCTGAACGTGCTTCATTGTCAGACCCACATTAATG +AACATAGTCAATAATGTGATACCAATAAATCCAAATCCAACTGGATGAACAAACCGAAGC +ACATCATTACGGTAACGAGAAGTTGGAAGATTAGACTTTATCTTCATAACGTAATAAGAA +CGATTTCGGTTGATATAGTCTATGTTGTTTTCAACTAATTCTTTGCCACGAACTCCTCGA +ACAATCATGCCATCAAATGACAACATACGCTCTGATTTAACTTCCTGTCCAACAATAAGA +CGACCCAAAAGGTTGTGTATTGTAACTTTCCACTGAAGCTTACCTTTGGAATAAGATCGT +TCCAAATAAGTCACGTTACATCTTCCGGTTTGAGTATAAATTGTTTGACCAACTAAATTC +TCGTTGATAGAATCAGATTCAATAATGATGTCGTATTCTGTGCCGGAATTTGACTCAATT +TCAATTTGAACATCTTCATTATAAAGAAGCTTAAACAAAAATTGATATGAAGCTTCAACA +CCTTTGGTTGAATAAAAATCTGAACGACGAGCTTCAAAGAATCTTGATACTGCATCGCGT +TTATCTTTATTCAAATAGATATTGCGTTTGTATATTTCAGACCACAGATATTCCCATGAG +TCTTTTTCACGAGGATACTGATTGCGAATAAGATTCAATAAATTGTTGTACTGAGTTCCA +TAACCGTCAGACAGATATTGAATATAAGCTTCACAGAATTTTTCAAAATTACTGTCATCA +AGCAAATATGAATCTGGCATCATTTTATTGATCAAAGGACGTAGATCTGGATCTTGCTCT +CCTGACCTTTCTTCCGGTTTCCATTCCTCATTACGTTCTTGATTTTGGAGATATGCTTTA +AGAAATATCTCAGAAGGCTTCCAAATAATTTTTACTTCATCACGAACTCGGTAATTGAAC +TCATAGTATGAAATTATTTCGCCAGAAGACTTATAAAACAATACTCCTGATGCGTACTTG +GTAAAATAGTTGAATTCTATATTAGGAGATGTTACGGTACAAACTCCTTTATCCCATATT +TCATGTAATACTCTATCTGGAGAACCAGAACCATGGGTATCAATTATTTTAGCATGAGTG +AAACCTGAATAAACTACGACTACTCTATTAGAGTTATCAATCCAGCATCGAGTTCCAGAT +TTACGAGACCAACTGAAGAATGGTTCGGCATAATACTGCATCGGTCCAGGTGTAAAACTT +TCAAATCCAGAATCTTTATTAGATCTGAAAGACATCATGTGATAATGTTTATCTGATAGC +CATTCACGAGGAAATTCGTATTTTACAGCTCCAAGCAGCTGGTACTTATCTGCTGTTTCA +GGATCATCGACAATTTGGTCTTTTTGGAATTTGAAGTTACTGGAAGAAATAAAGATCTCT +TTTCCGTTTGTAGACATGCTAGTATATCCATGCTCAATACGGCGTCTTTCTTCCTCTGTA +TTACCAAATACTCGTTTAAATGTTCCGTCATCCTGAAGAATATAAACTCCTTTGTCTAAA +GAATCTACAACGTTTTCAGGAATAGTCGGATCCATTAACTCTTCTTTAACTTCTCCAGTA +ATAAGAACAAATACTTTTCCATCAACTGAATCCATCTTATAAACAACTGCTTTACTATTA +CCGGTTATAGTCAGAACTTGGTCTTCAAACAGCTTTTCTCCAAATGTAGGAGATAAAGGA +TTTTGATCTATTGGAGCATTTTTAGTTTTAGCAAATCTAACTTTATCTCTAGCAGCTACA +TAAACGTAATTGTCATTGGCGGTAATAGCTTCTGCTTTACGAGATACGTCTCCTGGCAAT +GTCGCATACGTTCCAAATATTTCTACATCAAATCCAAGCTTTAATTGATCTCCAATTTTA +GCAAATGTGACGTCCTGTGAAGAAAAACGAATTTCATCAGAAGACCAGCGGACATCATTA +CTTTTGCGTCCGTAAAACAATTTATCATAACCAAGCAAATATGTAGTATAATCGGTTTGA +TAATATGGTGTCCGTGAAACTGGATTACCTGCCCTGTCAGAAAGAAGTTTGACGGCTTTC +CAAGTTTGTCCTTTGTCGTTTGACACTTTTACTACAGGCTGAAAGCGCTCAAATAGGTAA +AGAACACCCTCAGATTCCATAAGCATGACTCGATTTACGTCTTTACATACCGCTGTAATA +GATCCTTGAATTTCGTGATATTCTTCTTCTTTTAATACAAAATTAGAAACTGAAGAAACA +ATCTCATAGTTTGGGCTAAATTGAAATGATTCATTCATCAAAGCCGCATAAATGGTATCA +CGGTTAAAATTGACGTAGCTTTGATTATTTTTATTGAACTTTTCCTCAATAAACTTTTTA +GCTAACGTCATTTCAAGCATAGTTTCAAATGTATAAGCGTTTTCGCTAAACATTTGAAAC +TCTTCAGTTTGAACCCAATTTGAAGGATCAAATCCTTGCGCTGCGGTTTGAACGCGCATT +GTGTAATAACGATCAGGAGACACAAATGTATCTTCAAAATATTCGTTAGTAGCAACGTAT +CCTAATTTACGCCACTGGTAATTTGATGGGGGAATAATATTCCCCATCATATCTCTTGTT +TCTGCGAGTTCAACAAAATAGTAAAAGTTAGCACCAACATCATCCCATTTAATATAAACA +TGGTTGGCTGATAATTTAACAATTCTGAGACTCGTTACTGATGGTGCTTTTACTGTCATT +GCGCAATAGGCTCCAATGTTATTGTTGTATACTGTGGACGTAGATCATTCTCAAACACGA +TTAATGAACCGTCTCTAGTATAAATGTTATCTTCCACTGGATCTGCATAAAGTTCGATTG +TTTGAACTTCAAACTGATCAGATGAAAGATCTATAGCTGAAATATTCCAATACGTAGTAT +CTCCAGGATAATTAATTTCACCTATAACGAAATAAAGTGTTTGATCACCAATAATTTCAC +GATCGAAATCTGTTCCTGGATATGGTTGAATATCGGTATTTTCTTGCACATCACCCGGTT +TAAATGGCCCAATTACTACTTTACCTTTGCCATCTTCGTTTCGATCTGTACCAAGAATCC +GAACATTATAATCTCCATTTTTTCCATGAAATGCAAAAGCGTTGGATTTTAATGAACGAT +TAGTCATCTGATTATAATATTTAATTCCAGATTCTGGAGTCTGGAAAAAGTTCTGAATTT +CACGAACCATTTGAATAGTAGCTGATGAACCAATAATACTGTGATCGGCATCATCAATAT +AAGTCATCATTTTAGATTTAGCAAATGATGCGTTAAAAATTTCAACTTCTTCAATGTAGT +AACGATCGATTTGATCCAAAATTTTACCACGTAACCATTGGTCAGATTCTTGCAATTTAT +TCAAAGCGTAAGATACTTTGATATTATGACGAAGGAACAAATAGTTAGGTGAAATAACTG +ACGGAGTTATTGGTGCTAAGTTATACGGCTTCAAGTAATTTTGAATATCTTCACGCTGTA +CAGATGTTAAGTACAATCCAGATTTAGGCTTAATTGCAATAAATGCATAACCAGGTTTAT +AGTTATCAGTGAATGTTTGAACAGCTTGAACGATTGAACCAAATCTTTCTGAAACAAATG +TGTCATAATCGCTAGCTGTCACGCATCGAGCTTGAGTTTCACGCTTGATAGTTCCAAGTT +CACGGATTCGTTCAATATCTTCTGGATCACCGCCGCCATCAGCACCTACATAATCTTTTG +AATTATCTGGGTTTTCAAAAATACGTTGAACTGTGATATAAGTTAAAGTATCTGCATAAG +AAAACTCAGTTGCACCGTTAGCAGCTTCACCGTCAGTTCTGATGTATTCTATTACAATAG +TAGAATCTTGAATAGGTTTTAAACCACCGATATAGTTTGATTCAAGTACTCCACCCGCAA +CAGAAGTCGATTGTTCACCTTCACCAAAGAATATTTCGGTATGCCCATCAACAGTTTCAC +GCATATAATAAATCGTAGAAGTAGAACCAGCATGAACCATTGAACGGCGAGTCCAGTTTG +TCCATTCTGTTCCATTAACAGTTAATTTCACTTCATTACGATCTATATTCGGATCACGAA +TAAGAATCGGCTTCATTTTATCGTAACGCAATTCAGTACGAACGATTCGTCCCTGCACTA +ATTTAACTCGTGGAAAATATTGATTATTCGCATCTTTTACTGCTGTAACTTCTTCTGTCG +TAACAAAAGAATATGGATCAGCCGAAGTATCACGAGCATATGCTAAAAATCTAGTTCCAC +GCGGAATGCGAAGCATATATGGGTTTAATGCGTGAGTACACTCCAACATGATTTCTGTTT +GAGCTGCTGATCTAGATGAAGGAAAATATCCATTATCTTGAGCGCCTTGTACAACAGAAG +AACGAAGGTTTGCAGTCCGCATAAATGATTCATAAACTGAACTGTTACCAAACTGTTGAA +TATAAAGAGTGTTGTATGCTAAAAGATCGAGCAATACGTTTAAACGAGAACCCTCAAAAT +CATAATCTAAGAATTCGTTTTGTCCGCGCAACCAATCAAGTAAATCTCTCTTAATTTCAT +TGAATGTTCCTCCAACGAAAATATCTGGAATGGCATTGGCTGTGCGTGTTAATTGATAGT +TAATTGGATCTGCCATTATTTTATGAACACCTTATATGATGATTGCGAGACTGTGTCTCC +GCATGAAATTGGGTCAGCCATTTGAACAGCTTTCTTTCCAGTAACAAATACTTTGGAAGT +GCGGGGTTGAACTGCCCCGCCATGTGTATCATAAGGCTTTATTGTTTTAGTGTGAGGAGT +TATTTGATCTCCATCTACCAATACAGGAATTCCACCAGTAAAAACCTTACTTTGTGTAGA +ATTAACTTCGGTCGGTGGATAAGCATCGTGACCGGAAGTAACGCATTTATCGTATGATAG +TCCAGCCATCATCCTCTCCTATAAACATATTCTCTTAATTGATTTCCCCATTTTGACCAG +TTACCGTAAACAAGCTGTGTGTATGTTTTAGTGATTTTCTTTTCTACCGGAGGTGTTGTA +GTACCACCTCCTGATTCACCAGAACCACCAGAAGTATCTTCTTCCTGGAAGTCATATATA +CATTCAACTGTGTATGTAAAGGTCCTCTCAAGTTTAGAGGGGGCTCTCCAGAGATAAAGA +TCAACATCTTCTGCGGGAGGTAATTCATCCCAACTTGATGCAGATTTTCTTTCATCTCCT +TCTCTATAAAGAAGTGAATCACTACCAACCGAAAATACACTTTCGTAAGTTCCATGATAT +CTTGCTCCAGAAACATTTATTCCTGGAGTGGGCTGGTAATCAATTATATTTATAGACTTC +AAAGTCTCAGTTAACGGGTCAAGTTGAGCAGTAAAAACCACATCAACTTGACCTCCCTCT +TGGATATCGCCAAGGTCATTATTAGCTGGAAGTATCTGCGCCATTAGCCTAGATCTATAC +GTGAACCATCAATAGTGTATTGTCCAGAAGCTTTAGAACTCATCGTGCTCATTGTTTCTG +ACCAATTACCAGCAACAGTCATGTCTACATTACCAAGGACAGACCATTTGACGTTTCCAT +TTACAGTGTAATCGTGATTGCCTTGAACTTCAGTTTTAGCATCACCCTCGACTAAGATGT +CAGCGTTTCCTTGAACTACAACTTTTATATTTCCTTTAACTAAAAGAGTCCCATTGCCTT +CTACTGTCTTGGTTTCATCACCGCGAATAAAAATGGTATTTGACCCATCAATTTGATGAA +GTTTGTCAGCCATGTTATAGTAAATTTCATTCGCGCCGACGTTAACATTTTTGTCACCAG +AAACAAGAAAATTACCGTCACCGTTTGTTATATCGTAAAGATCATTTACAGTTTTACGAG +TGCGTCGGCCGTCAGGAGCAACTTCTTCATAAGTGCCGGTTGGGTGAATTAGGCGATAAC +GTTCATACCCTGGTGTATCATCGAATTCTTGGATATGTCCAGACTCAGTTTCCATCGTAT +GCACATAAGGATATTGTCCTTCATATGAAGATACAGGTTCTTTAAAAAGAATCCTTGAGT +CATTCGGAATTGGTGGTTCACTCGGATCAGAGCTTGTGCGAACTACCGCTGCCGCAGATA +AATCTTTTTTGCCTGATGTGCTTACTGGAATACCATATGACTCCATATTCCCTGTCAATA +TGATCATTGATACTCGAGATGCACGTCCTTTTGTTTGTTGGAACCACAATGAATCTCGTG +CAGAGTCATATGCTTTTTTCCAATCACCGACGAACATGGCTTGAAGCATATTATTGAACT +TAGCAACACCGCCTACACCCATTTGAAATGACATGTTCTCAAGAGCCATTTTTCTTGATT +TGTTCATCTTAGCATAAACTGGGCCAACTTTCGCGTTAGTTTTAATATCCCTCTGAACTG +CGTCTAAGTCTTCTTTAAATAAAGCGGATGCTTCATCCATTGAAATAGAACCAGGATTTC +CCTTTACTTCTCGTCCAACCTGCTTAGATAGAATCTTGTTGATTTGATTCATATCTCGTA +TTTGTTGCATAACAATAAGGTGACCGATACCGATTGTTGGATATCCTTCGGAATCCCAAT +AAACTTTCAAACGAAGACCTTCGTCACGACGAAGCATAGCTTCGATGGTATAGTCGGGAT +TGTTATCTTCTGGAATATCAGCTAAATCCATATCATCAGGATTTATACCAGTGTCAAGGT +TAGCATCTTGAATTATGTTACTGGTTGAATCATATCCTACTTCTCCGCCTTGGTTCAATA +CGTTAGTATCATTTCCAAGATATCGAGGATATTGGCCGGTTGGATCAGAGAATCCTTCTG +TGGTGTTAGGGCGAACTCTAGAATTAGCTCCATAGGTACCCATCACTAAACCGTTTGTTC +GATATTTGTCCAACCAATGACCATATACATGAGTGCCTTCAACCGGGCCAGTAACTGATC +CACCAATTCCAGATATAGAGGCAGAAGTTATTGGCTGAAGAACACTCATCCATGGAAGAT +CTTCAGTTGGGATACCAGAAATAGATCCTTGAGTCTTTTGAAATGGATGTAATCCAATTA +CTCGAACTCGAACTCTACCTTGCTTAAGAGGGTCCATTCGATCTTCTACTACACCAACGA +ACCATTCTACTGCATTACTAATCATATCCATTATGCTATCTCCATCTCACGAATTAAATC +GCTTAAGAATGAATCTATGTCAGCAGGAGAAATTATCTTGATTACGCGCTTTTGCTCATT +ATCTAGCACCGACGCTTCATAAGTATCAACTGCTGCTAAAGCGCCATTATATTGCGGGTA +TTTCATTTCCAAGTCACCTTTATCATACCAAGTCCCTGGATTATTGGGGTCTTCAATAAG +GTTGAAATATTTTTCGCCTCGATCATCGACATGATAAAGTACTTGGTTTCCACCAACCTT +TTCATATCTTTGATCAGCAGCTTGATATGCAGCTTCTTGTGATGTTATCCATCCATAGTA +AGGGTCGTAGTTTTCATTACAGAAAAGCAACACCCAATAAAGTTGTGTGTTTCCATAGAT +CTGATAAGCTAATTCTTCTGGACGAGGACTACCTTGAATGTAATAAGTACGAAGACGATA +TCCAGAAGCTACTCGTTTGAAGTATGCTTTATAATTCCTGAAAATATCAGTCATTTGAAT +AGTCTTAGCATTTTTGTCCACTGTTTTAGCTTTGTAATCTATGGGATCAAAAAAGGAAAA +TATCATGGTGCCTCCCATTTATAAATAATACTAATATTTATTAGAGGAGAACAATATGGC +TTACTCCGGAAAATTTATGCCAGTTAATCACCAAAAATACCGTGGTGATATAAGAAAAAT +TACATACAGGTCTTCATGGGAAAGTTGGTTCATGAAGTGGCTTGACACAAATCCTCAAGT +AGTTAAATGGAATAGTGAAGAAGTGGTAATTCCATATTTTAGCAATGCTGATGGAAAGAA +ACGTAGATATTTTATGGACTTCTGGGTTAAATTTGACACAGGTCAAGAATTTTTCTTTGA +AGTTAAACCTAAGAAAGAAACTATGCCTCCTCCAAAGCCAGCTAAATTGACTACTGCTGC +CAAGAAAAAATATATCGATGCTCTTTACACGTTTTCAGTGAATTCTGATAAATGGAAAGC +TGCCTTGGCGGTTGCAGAAAAAAATAATATCAACTTTCGTTTGATTACGGAAGATGGACT +TAAGCGTTTAGGGTGGAAAGGCTAATGGCTATTTTTGAATACATCAATGAAGGCGTTGCA +CCAGCTCCAAAGCCGGTTTCACGAAATGAAAAGAAATGGGTTGAACTTGGATTAGAGTTC +AAGAAGGCAAAAGCTAAAGGTGCTACTGCTAAATCATTTGCGGAAGAAAAAGAAATTCCT +TATGCAACATTCACCAAGGCAATGTCCCGGTACGCATCTAAAATCAAGTTCGCGGAAAAG +ATCGCGAAGCTTGAGGGTAAACCAACTCATAAGCTTTCTAAACAAGAACGCCAACTGATA +ATGATCAATAGCTTTAGATCATCGATCAGAGATAAAATTAAAAACGAAGGCGCAGCGGTG +AACAACAAGTCAGCTAAATGGTTCGCTGACACTATTAAAAAGAACATCCGTGGTCATTCA +GTATCTAAACCTACTCCAGGTAGATTATATGCTTATATGTATGATGCTAAGCATAAAGAT +ACACTTCCATATTGGGATAGATTTCCACTGATTGTTTATCTTGGACTTGGTAAGCAGGGA +TCAACTACTTTAATGTATGGCCTGAACTTACACTACATTCCGCCTAAAGCACGTCAGCAG +TTTTTAGAAGAACTGCTAAAGCAGTATGCAAATACACCTACAATTACTAATAAAACAAAA +TTGAAAATTAACTGGAGTCAAGTGAAAGGATTTGCTGGTGCTGACAAGATGATCAAGGCG +TATTTGCCTGGTCATATAAAGGGGAGTTTGATAGAGATCAAGCCCAGTGACTGGGCAAAC +GTAGTTTTACTTCCTCTTCAACAGTTCATATCGAAAGGCAAACGTTTCTCTTCAAATACT +GTCTGGAAATCTTAATTCTATTTCCATCTTCCGGTTGATTAGATGTTATTGATTGACCGG +AAGGACAAATTCATTATAACACTCCCTAGAGATAAGCATATGAATACACAACAGATTTTT +AACCAGACCAATATCACCAACTTCTTGGTGGACATCCCTGATGTCGGTCTGACCAAAGGT +TTTACGCTTAATGCGCAATCAGCTAATATACCAGGCATACGGATTCCCATCACTGATGTG +CCATCGGGCACAATGGGGTTAGGACGAGCAAATCTTCCAGGCTCAACGTTTGAGTTTGAC +CCATTGATGATACGCTTTTTGGTTGATGAAGAACTTGAGTCTTGGCTTCAAATGTATAGA +TGGATGATAGGCATCAATAACTATCAAACTGGTGATAACTTTGCTTGGAGAGATGGTTCA +AGTCCTGAACACGTTTCAGTTCACATCTTAGATAACTCTAAGACTCGCATCGTGCTGTCT +ATCCATTATTACGGATGCTGGATTTCTGATCTTGGCGAAGTGGAGTTCAACACAACAGAA +GATACTGACCCGGCTATCACATGCCAAGCTATTCTTCCTTACAAGTATTTGCAAATAGAA +AAAGATGGTAAAATAATTACTACAAGACAAAACATGACTGAAGCAGCAAACAGTCGTATC +GGAATGCATCCTTCTATGAGGAAATAATGAAACTATTCTTTTTGATTGGCAAAAAACGTA +GTGGTAAAGATACAACAGCCGACTACATCATGGATAACTATAACGCGTTTAAGCATCAGC +TTGCGGGTCCAATTAAAGATGCTTTGACTTTCGGATATCAGTCTGCGGTAATGGCTTATG +ATTGCAACCGAGTTCATCCCATTCTCACTCGTAAAGAATGGGAAGGTGAAGGTTACGACC +GTGAAACGAAACTCAATTTAACTACACAACAAGTATACACAATCATGGAGCATTCTATGT +GGTACTTGAACAATGAATTGAAAATTAAAGGTGTACGTTTCAATTCAGAAGGTGAAATGA +GTGCTGGAGCATTCGGAACTATTAAGCGGGTCATAAATAGTATTGAAGAAGACTGGTCAG +TACGACGTCTCATGCAGACCCTTGGGACTGATATCATGGTCAATCACTTCGATCGCATGT +ACTGGGTTAAATGGTTCTCTGTTGTCTATATGGATTCATTCGATAAAACGTTTGAATACT +TCATAGTTCCAGACACTCGTCAAGACCATGAACTTGATGCTGCTCGGGCGATGGGTGCTA +CAGTAATTCATGTAGTTCGTCCGAATAACGAAAGTTCGAAGGTTGACACACATATCACAG +AAGCTGGATTGCCTATTCGTGAAGGCGATACAGTAATCATTAATGACGGTTCTCTTGAAG +AACTTTATGCTAAAATTGAAAAGGCTATCAAATGACAGACAAAATTAAACAACTCGAAAC +CGAAATCGTTTACCTAAAAGCTCGAGCTTTTGAATTGACCGAAGGTAAAGCACAGCTCGA +ATCTCATATCCAGCAATTGTCTGGAGTTCTGTCTAAAGTAACAGAGCTAGTTGGTATTGT +AAGCGAAGATGGTTCTGTTAAGGTTGAAGAATTGTACGCAGCTATCGAAGCTATGCTCCC +TAAGCAGGGCGAAGCTGAAGTCTAATGAAATTCCAGGACTTTAGTTCTGGACTCTACGTT +GCAGCAAAATTCAGTGAACAAACACTAGATGAAATTGAGAACCTTCAGAGAGATTTGAAG +GTTCCTAATCCAGTTCCTCGGCATAAAATCCATTCAACTATTTGTTACTCCAGGGTAAAT +GTTCCTTATGTAGTTTCAACTGGAAGTTTTGAAGTTGCCACTAAAGGACACCTGGAAATA +TGGGATACACAAGATGGTAGAACTCTGGTTCTTGTTTTAGATTCAGAGTACTTAAAGTTC +CGTCATCAATACGCCAGGGCATTAGGAGCTACTCATGATTTTGATGATTACACTCCACAT +ATTACACTGAGCTACAATGTAGGACCTGCTCATTTTGAGGGTGAGGTCCAAGTCCCTGTT +GTTTTAGACAGGGAATATAAAGAACCACTTAAAACCAATTGGGCGGAAGAGTTGAAATGA +AGTTTACCGATTTCTTAAATGAAGCGATGGAACCTGCAACCTTTCGTCATGTTGCAGTCA +CATGCACCGAAGAAGAATTTAAGCAATCTGTTGCTAAGATCAACTGGTTATCAGGCGAGC +TTGTAGATGGAATCGCTAAGTTTTTCGGACCAGAATGTGACATCGATGATTGGATAAAAT +CAAACAGGAATTTTGTAAAATGAAAACATATCAGGAATTTATTGCTGAAGCCAAGGGGCA +TGATGAACTGCCTATAGTTACTAAAACTATAGACGGAACATTAGCGGATTTCAAATCTCT +GCCATCCGACAAACTTCAAAAGCTAACTGCGTCAGTTTCTGAGACTAAAGGTAAAGTGAC +CTTCTCTGCACGAGGTTCTGCGAATCTCAAGAAACTTCTGAAAGCAGTAGGAGCTTAACC +GTTTACTTTCCTTGAGGGCTATGATACTATAGCCCTATCAACAACAAGGAGAATAAAATG +AAACGCTGTGAAATCATTGGAAACATCACTACCGTAGTAACTCTAGGACTTCTCGGAACT +GCCATCGTCGGATGGCCATTCCTTAAAGCTCCTGAACTCATCACCATTATGCTATCAGCA +ATTAGCACTGGTGCTATCTCATTCGTCATGGATAAAATTGCAAATGAAAAATCTCGATAA +ATTCAAGCAGTACATGAAAGATCACTACGACTTCGATAAAGAAGAGTTGACTGTTTGGGA +CTATGCTGCTGGAGGCGTAGCAGTTGGTCTGGTACTCGCAATGATTAAAAGTATCACGCT +AGTTGTTGGTGTTGCACTGTTTATTGGGCACATGACCTTTAAGAAGTAATTTGCTTAAAG +AGATCAGTGTTATTATTAATCTATCAACTAAACATAAGGATTTACATGAAACGCATTGTA +CTGAATATTGATCACGACGCTACCTTTGAACACTGCCATGGCAAAGTTCCTCCTGTTCTG +CGTTTTGTAGTCCGCACTGATAAAATTTGCGGTTGCCAAGAAATGCTAGATGGTTCAGTT +AAGGTAACAATTGATCAAGGCGAAACTCGTCCGATTATGTCAGTAGTTGTACTGGAAAGT +TTTGAACAAGTTTCATCTGCGATGATGGAGTGAATCCATGAGTGAGAAAAATAAAATGAC +AATAACTCAAGCTTTGAATGAACTGCTTTCGAAAATCGGAAGCATTAAAGCTCATGATTA +TTACTCCGCTTCAAGCGGTATAAATATTCCTATGATCCTGGTAACTCCAAAGTTTACCAA +GACTGATTCAATGGGAAATGAAATTTTTCCAGGAGATATCGTAGCGTTCACTACAACTGG +TAAATCTCCTGGTTCGCAGGTTGGTATTCTGCTTGGTTTCACTGAACAGGGTTATCGAGT +ACTTCCATTCAATACTTCGAGAAATAATCCTGAATGGCGAACTTTAACTCGCGGTATTAA +TACACCATATGATGTATGGTTAGTGAAATCTAAAGGATCTGTCATAATTTAAGAATTTGG +GGAGTTATTTCCGTAGAGGTAGCGGGGCAGACTGTAAATCTGTTGCTCATAGAGCTCGGG +TGGTTCGACTCCATCACTCCCCACCAATTCAGGGTTGCTAGCTCAGTTGGTTAGAGCACC +GGACTTTTAATCCGGGTGTCCGAAGTTCGAATCTTCGGCAACCCACCAAATAAGGTCAGT +TGGCTGAGAGGGTAAGCGGCGGACTGTTAATCCGCGTCGGTAACGACAAGGCAGGTTCGA +TACCTGCACTGACCGCCAAATTAATATTCTTTCTAGACGATGCTGCTTGCAGCCTCCGGA +TTGGGAACGCGAACCATTCTGGACCAGTCGTGACAACGCATCTTATTGCAAGAGAGAATA +TTGATATGGTGAATAAATAATAAAAATAACAAAGAGGATTACCATGAAAACTTACAAAGA +ATTTATTGCTGAAAGCAAATGGTCTGGATTTAATTTCTCTTATGAGATTAAAGGTGACAT +GAAAGCTTTCAAGAAAGTATCAAATGCTGACCTTCAGCGATATCAAGCTGGCATCGAAAA +AACTAAAGATGGACGAGTTATTGTTCATTCTGTTGAAAATAATGGTGCCGAAATGGTCGT +TTATAAAGCGTTTGGTACAGACTCTGAGCAAATGAGAGATCTAAAAGCTCAGCAACGCAA +GCAATACTAATAGAAATCCATGAGACGTCATGGAAAGTCGTCCGAAACTTGTTCCCTGCT +ATGTGGTCCCTCACTGAAACCTACATATTACAGCAAGAAGGGCGCACCTAATTTGGACCT +ATAGTTTCAGCGGTTAAAATACTCGCCTGTCACGTGAGAGTCACGGGTTCGAATCCCGTT +AGGTCCGCCAAATACGAGGCAGTTCTTGAAGATGAGTTAGAGTCCTGTAAGTAAATGCCG +AGGACGAAGTAAGTTGTTCCCACGGGATAAGCTCTATATTCGGAAGATTATACCTTTAGC +GTGTTCTACATCGAGCTACTTGTTTGTACTTCAAGAATCCGGATAGATGCGGGTTAACTT +CAGTTGGTAGAATGACGGGTTCATATCCCGTTACGCGATGGTTCGAGTCCATCACCCGCC +TCCAAACAATTTGGGGTATAGCCAAGTTGGTACGGCAGTAGATTTTGATTCTACGATTCC +CTGGTTCGAGTCCAGGTACCCCAGCCAAATTAATATTCTTTCTAGACGATAAACGGAAGG +CGCGCTCTGTGAAATCGTGACAAACGTTAGAGGCACCTGGAACTGACCGGGGTCCAGTGA +GAGAATATTGATGTGGCCGTAGTTCAGTTGGTAGAACTCGAGATTGTGATTCTCGTAGTC +ATGGGTTCAACTCCCATCGGTCACCCCAATTCGGAAGCGTGGTAGAGTTGGTTTATTACA +CCGGTCTTGAAAACCGGAGGCCGTAGTGATACGGTCCGTGGGTTCGAATCCCACCGCTTC +CTCCATTTCATAAGCTATTTAACTTCAGTTCAAGTATTCTATACAGTTTACATGGTATAA +ATATCAGTATATAATAACCTCAATTCAATAAAAGGATATACCATGAAAAGTTTAAAAGAA +TTTTTAGCAGAAAACACTGCTCAGACTTTAAACGAAGATGCAGACGATGCACTCTTTAAT +CAGATTTCTAAAGCTATGGACATTATTCGTGTCGGCAGTAAGCTCCGTAAAGCTATGGAA +GCTTATGAAAAATCTGGTGATAAAGACGCTTTAGCCGCTATCAAGAAAGCTAAATCAGCA +CTTGATGCTGCAAGTGAAGCTCTGGTAAATGTTTCTTCTTCACTGCCTAAATAATTCGGT +TAGTTGAACCCCTCTGCATCCATCGTATAGCGGCTATTATGACTGGCTTCCACCCAGTAG +ATGAGAGTTCGATTCTCTCTGGATGCTCCAAATTAATCAGTACGTAGCGCAGTCTGGTAG +CGTAGGAGCTTTGGATGCTTCGGGTCGTAGGTTCGAATCCTACCGTACTGACCAATTTAG +TTCTTGTAGCTCAGTGGATAGAGCAACGGTCTTCTAAACCGTGGGTCGTTGGTTCAAATC +CAACCAGGAACACCATTTCGGCCCCTTAGCTCAGTTGGTTAGAGCAGTCGACTCATAATC +GATTGGTCGCTGGTTCGAGTCCAGCAGGGGCCACCAAACAAATGAGGAAAATATTATGCT +TTACTACGAAACCACCTATGACTTAGATAAGTCCCCTCCAAAGCGTATGTCACGCAAATC +ATTCCAAGAACGTTATGGTAGACCATTTAGCTTTAAAGAATTTAATGTAAAATTTGAACA +TGCTCATGTAACAGACGATCCTTACTATAGTGATGTTACGGGTGAAACAAAACCTTTAGT +AGAAGGTTCCCTTGAAGATTTAATTATTGCAGATTATGACGCAGGACAAGAAATATGACA +ACTTTTTACGGCCGTGGTGCTTCAATCATTAAGATCGACAAAGATTATCAAGGCGACTAT +TATAAATTTCGCCCACACCGAGATTTCATGCTAAATTCAGATTACGTATTCACATACGAT +GAAAAGTCTCGCACTTGGAAGTTTTTGAAATATCGTTTTACTATGGAAGGTTTCAACTCT +GAGTTTTTTAACGTAGAGCAACTTCTTAGTTATGTGTATAAAGATATTTGTCTTTGCTCT +AGATTCCATCCAGCTAAAGATACATTCGGAATTATCAATGTAATTCGTAGCGAAAAAGCT +GCTCTTAAAATGATGAAAATGTGGAGAAATTATAATGCTTAATTTTGGTCAAGTAATTCC +CGCTGGTTACGCGATTCAAATTGAAAGTTGGGAAAACGATGGTGATGATTATAACCGTCA +GTATTTCTATGGTCTAACAAAAGCCGATATTGAGCAATTCGCTCATGTTCTGCCTTTATT +TAAAAGCTGTCATGGCTGGAAAGAATCTGGCTTAGGGAACAAAGAATTCTCTGAAGTTGC +CGAAGAACTTGGATATTCCTATGCTGAGCTTCTCCGTGATGGTAAGATCAATCTAGAATT +CGCCAATGATTATCTTGGGTTTAATCCTGATAAAGTTGACTTGCTTTTCCAAGATTGGGA +AGAAGAATTTGAAGAAATGATTGCCGAATGGGCAGAAAAACGCATCGGTCTTCCTCGCAT +TCAAGAAATTCTTGGGTATTCGGATTCTTATGATGATTTTGTCCGAGTATTTGAAAGCGC +TAAAGTTATGTTCTTTGAAAAAGAACTCCGTATTCCTGCTGTTAAATTTGAGAAATTGCT +ATGAACATTAAACAAGACTTTAAGAAAGCTGTCCGTCACTGGATTCGCACTCGTTTCCAT +TCAATTAAAGCGTCAACCAACGGCACGACATACGATCGTTATGATACAAAATATGGATTG +CATAAATGCTATACACAGTCAAAATAAATTCCGGCTATGTTAGCGGTGAAATCTTTAACC +TAGCCATCACCCATAAACAAATTATTCTTGATTTGTTGGCTATTAAAGATGACGAAGACC +GTTTTATAGAACTTCGCATTTATTTTAATAGCATTAACCCCGGGCTTCAAGCTTACTTTA +TACGTAAGATTGGCGCCGTTCAAATAGACGAATACTCGTATGAGGCCTAATGTGGATATT +TCATCTGGCTGCGGATACCCTGCTTCTTCGTTAAGCAATTTCGCTCCTCATGGATTTGAA +ATTGATGGTGTACAATGTGCTTCAATGGAGGGGTTCCTGCAATCCCTCAAATTCTCATCA +ATTGAGATGCAAGAACATGTATGCACATTAGTTGGAAAATCTGCGAAGTTCAAGGGTAAG +AAAAAACGTTGGTGGCCTACTCAAACTCTTTATTGGAAAGGCGTACCAATTCATCGTTCA +TCAGAAGCTTATCAAAATCTTTTGACAAAAGCATATGATGCATTAGCTTTAAATGAAGGA +TTCCGAAGAGCATTATTAGCAACTCGTAATGCTACCTTAACCCACTCCATGGGCAAGAAT +AAAGAATCTGAAACAGTGTTGACAGAACGAGAATTCTGTGGACAACTACATCGTGTACGT +GAATTGATGAAATGATTTTTGCCCTGGGTATCTTAGTGATGTCCAGGGCATTTTTGTTTG +TTCCTAATATGATTTATCATCCTTCCCAAGAAAGTTCCTCTCAACCGTTCTGGTGAATTC +ATTTAGCCGTTTACATTCATTAAAGACTATGGTATAGTATAAACTCAATCAACAACAGAG +ATCATTATGAAACGTACAAAAGTGATTAACAAGTCTCGTTTTCGAAAGGGATTTGTTTTA +GCATCAGTGGTAGCTTCGACATTTGCTTTATCTGGGTGTGAAGTTGCTGATCAAACAGTT +AAGATGTATCAAACTGTAGATGAGTGCTCTACTATTGAAGCAACGGAGTATCAGTCCGCT +GAACAATGTAAAGCGTCGTTTGAAGCTGCTAAGGCTGAGCATGAGAAATCCGCCCCAAAG +TTCCAGGGCTACAACGACTGTTCAGCGGAGTTTGGTAATTGCAATTATGATTCTAGTACT +GGATCATTTATGCCTGCTATGATGGGCTTTATGGTTGGTCAGATGATGGGCAACATGCAA +GCGAACTCAAACTTCCGAGCATCTCAGCCTATGTACAATAATCCGAATGGTGGTTACAAA +GACATCTCGGGTAAAAGTTACTCAAACATTAAGCCAGGTAAACCTTTCCAGGTTACTCGT +TCCGCAATGAGTTCAAAACCTGCTTCGACAATACAAGCCCGTTCTACTACTTCATCTCGC +GGTGGATTTGGTGGATCGGCACGTTCTGGAAGTTTTGGCGGATAATTTTTCTTCAGCCGT +TTACATTGGTATGAGATGATGATACTATTACCTCATACCAAACAAACGGTAAAACTTAAA +TCGGAGAATAAAATCATGGCTAAATTCAATACTGTTACTATCGTTGAAATTACTGATAAC +TTTGGTGAATTTGATCGTTTCCGCGCAGTTCTTAACAGAGGTGAACATGAGTATCATGTA +ACAGCTATAGTTGAATCTAAGAAAATGGCTTCAATCGTAGAATATGTTAATTCTCATTGG +CCTACAGCTGAAGTTATTTTTGGCGAAAAGATTTAATGCTTTAAAACAAGAGTATAAAAT +AATCTCTCTTACACTGAAAGGAAATACTATGTCTATTCTGAAAAAACTGGTTGAATTCAT +TCGTTCTAAACTGGGTACCTTCGTTGCTCGTAACACTACAATCGAAGATCAGTACACTCG +TGCGGCGAACTCGATCATTGATGAAATTCACAAACTGCGCACTCGCTACGTAACTGCTGA +ACGTGAAATCAAAGCAAAACGCGATCTGGCGACTGAAAATGATGCTAAGGCTGAATCTAA +AGAAAAAGAAATTCGTCACATCATGGCCAATAACCCGGCACAAGATGTAACAACCCTGGC +TAAACTCGGTCTTCTGTATCGTCGAACCGCCGCAGCTCTTCGTGGTAAAGCTCAAGAGCT +GGAAGAAATGAAACTGGAAATTACAAAAACAGTAGTTGCTCTGGACGATCAGCGCCAAGA +TCTGGCTGTGAAACTTGAGTACATCCGCGAAACTCAGAAAGCTAACTCTATGGGTCTGGA +CACTGGCGCCGACATCATCGAATCTGCTGAACTGGCTAAAGTAGATGTTCAAACGATCAT +CTCTCGTATCGATACCTTCAATACTACACCTGCTGGCGTTGAGACAACCTCTGCTGATGT +AGCGGAATATCTGGAATCTCTGAAGTAATATAAACGGGGCCTTCGGGCCCCAATCGGATA +ATAGAGGAGAATATCATGTTTTATGAAATCGGTGCAAAAAATAAAGCTGATCCCAAAAAA +TTAGAAGAAAAAGACGTTGTTCCAGTAATACATGAACGTATTCGTAGACAACTTCTGAAG +GGCGGAGTTCCAGGGCATTTAATTGATAAGCTAGCGCCTCATACATTTCCAATGAGCCTC +GATTATGAGATTCATTCAGATCAATTTAAACGAATTAAATGCGGTCATCTTTTAGTTCGT +ACATGGTCTGATTTGCGAAACTTTCTTAGCACTCTTCAATCTGAATGCTATTCTATTCGT +TATACTGGAGTGTATGGTTTTTATTATAGCGACATTTCAATCAATAACTTGGACCGTGAA +ATCTCCATCCCGGGAATGACTTTATTTGAAGCAGCAGGTCACTATGCATTTAACTTAGAA +TGTAAAACTTCTACATTCGGTAAACCATTTCGTATTTCGGTTCTTATTGACCGTGAACGC +GCAACTAAACGCAACGGTTTTGAAAAGAAATATGACCTTGAATGCTACTCATCTGAACGA +GCAGAACGTGTCAACGCTATAGCTAAATTTATTTCTAACTATAATAAAACAGATGCGGTT +GATACGAATCTTGACGATTTCATTAATTTATGTCGTGATGAATTAAAGGTGAAAGAATGA +AATTAGCAAATTACTGTTTTGGACATGGTCTTGCTCGTTATGATGTATGGCCAAAATCTC +CGAGTTATTCTTTAGGTTGGTGGTTTCATTCTTTTATGATTGGATTGATGCTAATTTTTA +TCTCTCTCCCAGCTTCAATGATGTATGTAAAAGAAACTGACCGAGTTATATCTGATATAA +ATGTTGTGTTAATTGCTCTTGTAACCGCTGTTGTAACGGTATTTGTTCCGCATATTACTT +ATTTGACTTATTTTTACTTAAAACGATTGAATTATAATGTCCAAGTATTCGTCCATAACT +TGGACTATAAGAAAGAAAAGAAACGAGAGGCTCTTGAAGCTGAACTACAAGCAGCTAGAG +TAGCTCAGAATAAAAAGACTCGTGAGGCTATGGAATTCGTTATGGAGATGCGGAAATGAA +ATACTACCGTCCTGGACCATCTTATTTGTACCAAGATTCTGAAGATGGAATCGCATTGGT +ATTAATAGCATGTGCTATAGTTTCTATGATATCATCTGTTGTGGTAATGTTTATTTGGGG +CGCTATGCATAACATGGATTCTCCGAATTCAGAAACCGTTGAATGGATGGTAAAGGGATT +TGTAATATCATTCATTGCTTCATATATTTTTGGTAATGGTGAACGTCATTTGAATAATAT +TATGGCTGCAAGGAAGTGGCGCAAAGAAGAAAGGGAAAGACTCGCTGATATCACAGCGAA +AAATAAAGTAAACGAGCATAAACAACTGTTGAAATTTATTGAGAATTGTAAAAATGAAAA +ATGATGCTATGAAGAAAATCCATGACATTCTGGATGAAATGCGTGCCGCCGAAGATGAAA +TTTTAGGTCGTGCTGAAGCTGTAGCAGATGAATTCTGTGAATCATTCTCTTATGGAGAAT +ATGGCTCAGGTCGTACATATTTCCCTAAAGGTACTGATGCTGATAGTATCCCTTGGGATT +TTGAAGACGAAGCTAATATTGAAGATGGAAAATCTACTGTTGGCATTTGGGTTTCTTCGA +GTGAGATGTGCTAATGGATAATGTAGATAAGAAAGAAATTAAAGTAGCTCTTGAAAAACA +CGTCCAGGATGCTATTGATTATGCTAAGAAAATAGCAGATAAGTATGATTTGACTTTTAA +TATGTATCCGGCTTATGGTATGGGTGGAAGCTATTATTCTCCAGGCTACTTAAAACAAGA +CTTGGAACATCATCAGTCTAATGGTTATCCACAATTTGCAATTGTTAACCAATATGAATA +CTACACCAGCCTCGAAAATGGTGGTTGGGTCTCTTCTTCTATGGAATGCTAAGGAAAAAT +TATGTCTCGCTCTAATGAATTGATGGAAAAAGCTGCTGCACTGTCTAAACTGTTCAACGA +AGTTGCTGAACTTGCAAAAGACAATGATTACGGTCTGAAATTTGATACTTCCGATGGTAC +AATGGAATTCAATGACTGGTTGTCTTCATCTTGTTTTGGTGAAGGTGATGATGGCTTCGG +TGTCAATGCAGATGGTAGCATCTGGCAAAGTAGTAGTTGCTAATGAAAGCCCTTCAGGGC +TTTTGTTGTTTTGTACTCTGCCGTTTACATTCATTGGTAGATGTGTTATGATAGACTCGT +AATCAACTAGGAGGAAAAATGAATATATTTGTTAAAGACGGATATCTAAGTTGGGAAAAT +GATTATGGCACTTATGATTGTTGCTCGGTAGTTTACTTGCAAAGAGTGTACAATGACCCA +AATTATAAGCCGTCTTTAAGATCCGCTGCTTGGATTTTAGACCAAATGAAGATTGATGTA +AAAGTCAATGCTTGCGGAGTGTGGGACCATGATAATCGAGATTGGAAAGACGTTAAAGAA +AACTCTCCAATAGATATCGAACTTTTTATTATGCAGTGCAGAATGAGGAAAACGGAATGT +TGCTAGTTATTGGTTCTCGTGCTTTACACCATCATGGTTTAATTGAATCTCGTGATATCA +AAAATTCTGATTGGGACTTCATTGCTGATGCTGGTGAATGGGAAGCATTTAAAGGCCAAA +TGTTTGGAGCCAAAGTCGAAGTATCAAATCCAAACGTCAGCGCGTTTAAATGTATGCACA +ATGGTCGTGAGACTCATTTTGAAGCTTATATTGTTCATCGGTCGCCGAATGACCAACCAA +AAGATTCGAGTGAGTTACTTCTCGAATATGCGGAAGGTAACTGCAAATTCGACCGTTTGA +CTGGGTTTCGTTGGGCCAATCCGAACATGTGCTTGGCTATTAAGCTGTCTCATCGTTACA +AGAAGAACAATCCACACTTCCGTAAAACGATGCAACACATCCGATTCCTGCGTAATAAAG +ACGTGCGTCTTACTGAATATTTGATGGACATCAGCAAGCTGCGTGAAAAGGAAACTTTGA +GTTATGCTCATCCGGTATTGGATACAACAAAAGATAAATTCTTCAAAGATGATATCTACA +CCTACGACCATGATACAATTCATGAAGCGGTTGCATTGATGGATCGTCCAGCTTATACTT +TCTACATGAAAGATGGGTCTCAGGTAATGACTGATAAGAAGAAATTCTTTGAATTGCCTA +AAGAAATCCAATTGGCTGGTGTGTACGAAGAAACTTGCGTATTAGCGTTGGAACGTTCTC +AAATTCCAAACGATTTCAAAAATGTTTCATCTGAACATTCCTTTATGATGGCTTTGGAGA +AGGTTTGTACTTCAATCACGAGCGGTTGGTTCCGCGAATATGCTTGGGAAAACTACCACA +CAATCGTTGCGATGTACAAGAAGCTTGGTGTTAATGATTACATAAAACGCTTTAAAGAAA +ACCAAGATCTCCTGAAACCTTTCACTCGAGGCGAAAATGCATAAGCCATTTGAAGAAAAA +GTATGTCCAGGCTGTGGTAAAGTTTTTAGAATAAGAGCTGGTGTAGGACAACATATTTTT +GCTGAACATATTCGTTATTGTAATGCATATAAAATTTCATGGCAATATCCGCGTTAATTG +AGGAAAATATTATGAATAGCCCAAAAACTTTTGATTCACAAGTACGATTCCGCGGCGAGT +TTGCTGAAGATATCACGAATGATCAAATCAAGAACGAAACGATGTTCTTTAACTCTGATT +TGAATTTTGCTTGGGATAAAGGCGGCCCGATTACTCGTAGCTTTATTGATAATCTTCCTT +TTGATTGGACTAATAGAGATGTGGTATTCGATTCTCGAGTGCACATGTTGATGCCAGGTT +GGTATCCTGCTATTCCTGGTTATCACCACGATGATGTTCCACGTCCTGATATTCCAGTAG +GACAACATTTTATTACTGCTGGACAACCTGATTACGATAATCCTCGTTACCATTCTGAGC +ATATTCTTGGATTGGTTAATGCTGATATTTGTCCTACACATTTTGCTTATGGCGAAGACG +TTACATTTAGCCAAATCCCTGAAGGCGAATTGATTTACCGTCAATGGCACAAAGAAGTTC +TTCAGAAAATTGAATCTGGAGAAATGGTAAAACTTGAAGCCCCGGATCGTACTTTATGTG +AATTCAACTGGCAGTCTTTCCACACTGGGTCCATGGCTGTCGGTAACGGATGGCGTTGGT +TTGGTCGAGTATCTCGTAACACCGACCGTGTTAAAAAGATCACTAACGAAATTCGAGTAA +ACGCTCAGGTTTATCTGGAATTCCCAATGGAGGGATGGTAATGGGATACTCAGCTATGAA +TGTTAGTGAATATACAAAAGAGAATTTCCGTAAACTCTGGAACGAAAAGTACGCTGATAG +AATTTGCTGGATTGTACTAGTTTTAACTGTTATTGTGATTAATGGATTAGTCATTTATTT +CAATGGAATAATTATTGAGCTTATTTTTAGTGTTCCTTTGAGCGGTTTCGCCGGTATGAT +TGTAGCTTGTATTAATGACGAAATACTTAAGGCGATTTATATTAGAATTAAAATCAGACG +TAAACTTAAGGCTGATGCTAAACAAAAAGAAGCTGAAAGCTTTAGCAATTTCATAAATTC +ATGTAGGATTAAACGATGAACTATTTAGAAGAACTTCGTGCTCGTTTAGATGAAAATGAC +ATACCATGGGAAATAGTAAAAGATCCTTATGTTCGAATTTTAACAAACAACGGATATTTT +ACTCTGACATTATGTTCAGCTGTTGGTGAAAACGTAGATGAGCTTGTTCAAACATGTCGT +GCATATGCTAAGGTCTGGATTTACTCTGTTGACCCGTTAGCTGTTTCTGGTAAATTAATG +ATGCGTTTTGCGTGGGTGAAATAATGAGTGTAGCAATTTATGTAGAATCAGAATCTGGAG +ACGAGTATCTTTACTCGTTTGGTGATGGCGAAAGCGAAGAAGCTATTAAAGACGAATTGG +AACGTCAAATGGAAATGTTTTCTCCGATGTGTAATTACATGATCTCTATTTCATCTGGAA +CTTCGCCTTCTGTTGACACCCGATTGGAAGAGTTTATGTCTGAACTTTTTGATAAATCGT +GGAAATTTGAGAGGGAAAATGTCTAAGAAAAAAGAACTATCAGCTGGAATTTTATTCTTC +ACCAAAGATTCTCGTCTTTTCATGGGTCGAGTGACTAACTCTGGTTTAGGTGGTGGCCCG +TCTCGTTGGGATATTCCAAAGGGTCATGTAGAAGAAGGTGAAACTCCTAAGCAAGCAGCT +ATCCGTGAATGCCAAGAAGAAACTGGATTCACTGATTACGACCAGGGTTTACTCTTTGAC +CTAGGCCAACATGACTACGCAAGCAATAAAGATATACATCTGTTCGGATATCCTGTCCCT +ATGGATCACTCGCAATTCAAGAATTGTATTTGCACAGCATATCATACTGCAGAAGACGGA +ACGACATTTCCTGAAATCGATGCATTTGCTCTGATTAAGCCAGAACAGTGGAGTTATGTT +ATGGGTCCATCGCTGTTCAATGTAATGCAGAAACTTTACCCGGCACTAGCCAAACGATAA +ATACTCCTATCAAACGATAGGAGACGACATGAACATTTTTGAAATGCTTCGTATCGATGA +AGGATACGATTCTAAAATCTATAAAGATACACGCGGATATTACACTATTGGGATTGGTCA +CCTTTTGACTAAAGACCCGTCTTTGGCTGTTGCTAAAGCTGCCTTAGATAAATTGGTTGG +TCGTAAGTGTGACGGTGTAATCACTAAGGCCGAAGCTGAAAAAATCTTCGCTAAAGACGT +CGATGATGTTGTAGCTGGTATTCAACGTAACGCTTTACTGAAATCAGTTTATGATTCTCT +TAATGGAGATGATCCGCGTCAAGCAGCATTGATGAACATGGTTTTTCAGATGGGTGTAAC +CGGAGTTGCTGGATTCACTAACTCAATGGCTCTGATTAAATCTAAACAGTGGGATAAAGC +CGCTATCAATTTAGCCCAGTCTAAATGGTACAAACAAACCACCAATCGTGCTAAACGCGT +TATTGCAACATTTAAAACAGGAACATGGGCTGCATATGAAAACCTATAAAGAATTTTTGA +CAGAATCTCAATCTAAGATTGAAGAAAACACTCCGATTGTAGAAGCAACAATCAAAGATA +AAGAAGGTTCAACAAACTTCTCTTTGGTATCAGGAAAAGACGGCACTTTCTTCCAGATCG +GTTCTGAACGCTTCCAGACTTCTAAATTGCAGGATGCTGCAGTAGCTAAAGTTCTGCGTG +GCGGCGGTAAATGGAAAGGAACTGAAGGATCTACACAAATTGGTATTGCTGTAGATAATA +ATAGCGCATTTTTCCGTATCGGCGGTGAGTCTTTTACTCTGAGTTCTAAAGCATTCAAAG +AACTTAAGGCTGCGTTTAAGTAATGTTGTACATCTCCTGTGGTTGTGTTACTATACCTAT +ACTGACACAGGAGAACTAAATGACTCGTATTAACTTAACTCTAGTATCTGAACTGGCTGA +CCAGCATCTTATGGCTGAGTATCGCGAACTACCTCGTATCTTTGGTGCAGTTCGCCATCG +TATTGCTAAAGGCCAAGGCTTCAAGGATATTCCTAAAGATTTCTGTCTGGGCGCTGGACA +TGTTAAATTCTTCTATGACAAAATTGGCTTCTTGTTCTTACGCCAAAATGCAATCATCGA +GGAACTTCTAAAACGCGGATTTAAAATTGCAAATACTCATGTCAACGTAGCTGACATTCC +TCTGTACTTAATGAATGACTTTGTTCCTTCTGCCGAGGATATTGCTTTAAGCCAAGCTCG +ACTGGATGAAAAAATTGCTCAAAGGCCTTTGTGGTATAAACATTATGGTAAGGCTATATA +CAAATAATAAAGGCACGCCGACCCTCTCCTATGAACAATGTTCCTCTAGTGAAGGACCTT +TTCCCACCTGTAATAAGGTCGAGCCCGAGTGCGGTAAGGGGTTTACATTCGGTGAAGGCA +AGGAGCCCAATTCGATTCTAGGAAGGAAAATGGACTACCACGTGCCATGGAATGGCCCTC +AACTAATCAGGAAATAAAATGCAATATTTAACATACCCGTATCTAACCTTGATGCATGCT +TTCAAAGACCGTGCATTCGAACGCTTAGATCCACATAATGATTATTGGAAGTGCTTAACC +CCGATGTCTCGAGTTTCTGAATTTGGAACTCTTCGCCTAGACGGTGGACGACAAACCGGT +AAATCTGAAGCTGCTGCTTTATTCGCTGCCGATTGGCTTCATGATGGTAACGACGTAATT +GTTATTTCAACCAAAGCCGCTCAATCCAGAGAGCTCAAAGAACGAATTGAACGTAAAGCA +AAAGGCATTCAGCGAATAGATTCTAATCTCCGTGGATTTTGTGTACATGATACCATCAGA +AGTTTTCTGGATGAAGACTTTAATAAGTACAGAGGTCTTTCGCTTACACGAGCATTGATT +ATAATTGACGAACCAATGAAGATGCCTGATGTTAAGAAGTTCTATGAATCATACTTCTAT +CTGGCTAATCACTGTTTATGCCAAGGCGATAAACCTTTACCTCTTTTCTTTGTGATGGGA +ATGCAATGATGAAATTTATGTTTATGGATGGTCCCTTTCGGGGAATGGTAGTTCGTACTA +AAGCTACTAAAGCTGAATTGAATTCTGTTCCAGATATTCCTATCGAATTTGTAACCGGGC +CTTTTGAAGGCCTGATTACTCGTAGCTTGATTTGCTATGATCGAACAATGATCGAAGCAA +GAGAGCTTAAAATGCGCCCAGGTTCTCAGGGCTCCTATAATTGTACTTTGGACATATCTT +ATAATGGCTAAAATTGTAATTGAATGTGCTGACCATCTGGTTAAAACTTTCTGTGGATGG +TTCAGTAACCAAGGCGAGCAGGATCTTTTTGAAGCTCACTCAAATGGTAAGTGGAATGAA +GAAATCCAAAAGTGGGAAGAGCAAACGACGTATTTGGCTACTGAGGGTTATGGTATTAAT +GAGCCTATTCGCTTAGTTGAATATGATAAAGAGACCGACGAGAGAGTCCCTTATTTCGAC +GGTGAGAAGCTTAGTGCCATACAGGCGATGGTCCCTAACACAGGTCAAATCTTTGAATTG +AGGATGCCGAAATGATTGAAGATATTAAAGGCTATAAGCCACACACCGATGATAAAATCG +GTAAAGTGAATTGTATCAAAGATGCTGAAGTTCGTCTTGGTTTGATCTTTAAAGCACTAG +AAGAAGAACATGTAGCTGCGTACATGGCTCTTGATGTAGACACTATGAGCGATGAAGAAT +TCGATTTAGCTCATTCTAGAATCACTCAGATTCGTAATGCAATTGATCGTCTGAAAGAAG +CTAGTATGTGGGCATGCCGTTCTGTTTTCCAACCTGAAGAGAAATACTAATGAATGATTT +GATCCAAGCTTTACTCCGTGTAGAAGATGAGTGTGCTGGCATTTTATTAATGGCTAAGTT +TGATCCATTTGGTCAAACTGGAATTGATGAAATCAGAGAAATTCATCGTAACACTGTAAA +ATCTTTAAAAGCATTAGTTCAGCTTAAAACTGAAGAAATCTAACCGTTTACATCTCCTGC +AAGATGTGTTACTATGATCTTACACTTACAGGAGAAACAAAATGAACACAACTGAAGTAT +ACATCCGCAGAAACAAACTCCGTCGTCTCTTTGAAACGGAGTTTCATAAAATCAATGCTA +AAATCAAAGACGCATCGAAAGCAGCTGGGGTTCCTGGCTTCCATTTGAAGTACTCTCAGC +ATTTGCTTGACCGTGCTATTCAACGTGAAATCGATGAAAATTACGTCTTTGAACTTTTCC +ACAAGCTTTCGAATCATGTGGTTGAAGTAAACGCTTTCCTGGAACTCCCTGAGCGCCCGG +ACGTTGAAGAAGATCTGGATCCGAACATCGAGTATCGCCCTCTCCGTCTTGAAATCACTG +ACCAAAAGTTGTGGTTAGGATTTACAGTGTCAAAACCAGTTCCAGGTAAAACGTTTTCTA +CTCCATACACACTGAATTGTCGTATGGCTTTCATCAACACAAATCGTCATGAAGGAAAAA +TTAGTAAAACTGTAATCAATCTATGAGGTAAACATGAAAAAAGCTCTATGCGCAGGTCTC +TTGGCCTTCTGTTCAATGGCCTATGGGTCCGAGCACAACTTCAGTAATGTCCAACTCGAA +AATCTGAATTATGCGTATCAGTTTGGAGAGCAATTTGCAAAGGATGGCAAATACAAGACA +CAAGAAAAGCTATATGACAACAAAGGCCTAGGCTATGTTATGGCTGCTTTACTTTGGCAA +GAATCTTCCGCTGGGTTAAAAACCAAAGGAAAGTCAGGTCATCAGGCGTATGGAATGTTC +CAGAATTATTTGCCGACAATGAGAAATCGAGTCGCCGAAATAGGATGGAAAATGACTGAT +GCTGAAATTATTAGAATGTTGAATAAACGATCCAATTCCGCTTCATGGGCGTATATTGAA +CTTTCTTATTGGTTAAATAGACATAATGGTGATATGCGAAAAGCAATTGCCAGTTACAAT +GCCGGGAACAACTGGAAATCTGGAAACAAATATGCCAGTCAAGTCCTAGAAAAAGCATAT +TACCTGAAGTCAAATAAACTTTTACATATTGAGGTAGAATAATGCAGAAACTAGCTCTGG +TACTTGGTCTACTGATTTCATCTGGTGCTTATGCATCAGGTGGATCACTGGAGGAATCTC +TTAAAATTGCTAAATCATTTTGTGCGACAAACACAGAATGTATTGACATCTTGTCATTGC +AATTAGATGGAGCATATGAAGACGGAGTCAGAGCATCTAAGTCTAAGGTAGAGTGGAATG +TACTCATGAACCGAAAGACTAAGCAATTGAATAACCTTTGCGATAAAGCACCTAACGTTG +AAATCTGTCTGGATTATAGAAATCGTTTGATGGAGCAATATATGAAAGGGTTAACGGAGT +GAAAAAATATTTATGTTTGTTGATGGTTCCGTTTGTGTTGAACGCCTGGGATATTCTCCC +AGGCTATCCTGAAAAGATACTTGCGGTCCAAGGAAAACAAATTGAAACAAGCGGCTCATT +TAAACGAAACGTTGAGTTAATCTTCGTTCCAAGTAAAGAGCTTTTAGGGATATCGTTTTA +TAATTATAAAGATAAGGGTGACCAAGTAACAATTCCTTATGGCACCTACAATATCAGAGG +TTGTGAAATGAAAGCTTCTGGTGAAATTGAAGGACCTTACTTTGTCTCTTCGCTGAACAA +TTACAACATCAGCAAAAAGATTATTCGATCATGCTCTACGTTCTTCATTAGAGTATACGA +TCAGACCGATAACTACTCAACTTACGTGGTGGAAAATGATTAAGCAATATATCAAAGGTG +ATATTGTAAAGATCTTCTTGGAAGGTCATAATGTGGCTCACGGTTGTAACTGCTTCCATA +CAATGGGTGGAGTGGCTGGACAATTAGCGAAGGCTTATCCTCCGATTCTAGCAATTGATG +TTCATGAAACTGAGCATGCAGATCCGGATAAATTAGGGAATTATACCCAAGCCACAGGTG +AACGCGGACAAATTTGCTTTAATCTTTATACTCAATATATGCCCGGTAATAATTTAGATT +ATGGCGCATTGCTATATGCTTTCCAAAGTCTAAATTATTGGGCTAGTAAACGCCAGGAAA +CTCCTACGGTTTATATTCCACGTATTGGAGCTGGTATTGCCGGTGGTGATTGGGAAAAGA +TTAAGACTATTATCGATTGGTTCACTCCTGATGTTGATATTATCGTTGTCGATTGGGATG +GTGAATAATGACACATTATATCCATCCGTTTGACCCGAAGAATAAAGCTAATATTCATCG +GCGATGGATTGAAACCAGAAAAACTAAATGTCCAATCGATAGCCCGCATAACGTAGACCG +TTGGTATATTGGTGAATATGTCGAGTACACTTTTATTGATAAAAAGAAACGTGTACAATA +TGTTGAAGAATATTGTCTAAGGATTAAATGGTTATGATGTCCAAAGAAGAACGCGAGCGT +ATTATTGATGATATTGATGAATTGATTCGTTTAGCTAAACACGCTGGTGTTATGGCTGAA +CTTGGTACAGATGATGAATACGCTATGGCCGCTTCAGCTTTATGTAAACAACGATATAAT +GTATTAAGCAAGGACGGAATCGAATGATTACCAGAGAACAAGCTCGTGTAATTTATGATT +TAGTCAAAGACATCGAAGATGATTCTGCTTTCTACACCGGCCAATCATGCGAAGGGACTT +ATTTTGGATTAGAACAAGCTAAACTTGATTTAAGTAATTCCAAAATAAAATTTGAACAAT +TTATTGAGAGTTTAATTGAATGAGCGGCCAATACAAAATTTTGATAACTAGCAAATGTTA +CGCTTATGGTCAAGGCGAAGCAATCTCTGTTCATACTGTTATTGCTGAATTTGATACTAA +AGAGCAGGCTGATTTAGCATTTTATAATATGAAAAATAATTCTGGACCGAGTGATATCGG +CGTTCGCCAAGCTTATATAAAGTTGTATTAAAATGATTACTAAAGAACAGAAAGAGACTA +TTGTTGATTTAGCTCGGTGTTGGGCCCAAGCTGAATCGGCAGTAGCGTGGGAATACTCCT +CATGGAGTTCGAGCCGATCTGCTAAAAAGCAAGCTGAAGAAGAATCCGCCGAAGCAGAAG +CAGATCTTGAACAGTATCTTGATGGGATTATGGCATAATGCAAACTTATATTAAAGAAAA +GCACCGTTGCAAAGATTGTAAATGGCCTATCGTGTTTTCTTTATGTAATGATGGTTTGAT +GGATACTCCACCATATAAGATGTGGGATTGGTGGTTATATTGCTCAAACAAAACATGTAA +GAATCATGCAGGTGAAGGATTTTTCCAATATACTCCAGAGTGGATTGAATCAGGGGAACC +AAAATAATGGCACAACTTTACTTCAACTATGCGAGTATGAATGCCGGGAAATCGGCTAAC +CTTTTGACAGCTGCTCATAACTATAAAGAACGTGGAATGGGCACTCTGATTCTTAAACCA +GCAGTTGATGACCGTGATTCTGCTTCAGAAGTAGTTTCACGAATTGGTTTAAGACAAGAT +GCTAATATAGTTACTCCTGATATGGACATCCTGGAGTTCTTTAAATGGGCTCAGACTCAG +CGGGACATTCACTGTGTATTCGTTGATGAAGCACAATTTTTAAGTGCAAGACATGTTGGA +GAATTAGCTCGGATTGTAGATTTGTACAATGTCCCAGTTATGTGTTACGGGCTCCGTACA +GACTTCCGGGGTGAATTGTTCGAAGGTTCTAAAACGCTTTTAGCGATAAGTGATAAATTA +GTAGAACTTAAAGGTGTTTGTCATTGCGGACGAAAAGCCACAATGGTAGCCCGCATTGAT +GAAAATGGTAACGCAGTTCGTGACGGTGAAGTGGTTGAAATCGGCGGAAACGATAAGTAC +GTTTCCTTATGCAGAAAACATTGGTTTGAGATGCTCGATATATGATTGAACTGTTAACAA +TTCTGGCTGGAGTAGCTGGGATATTCGCTATAGGCTTTATCCTTTACGTTATTCTGATTT +ATTTGGTGTGCTTATGAAATCAACAACTTATAATAGTTTTTTGCTATTAATGACAGTGTT +ATTATTCATTTGGATTGCTGTTGCAGCTTCTACTCAATCAGAACGAAATAAACAACTAAA +AGATCAAAACACTATTTTATGTGAATCCAAAACTGATGGCGCATTCATTGCCAATAGTCT +CGGATGTTTTATTAAGGTAGAACAATGACAGAACAAGAAGCATGGCAAGAACTTCGGGCT +TTACTTAAAAAGCATAAAGTTGAATTACATTTGTCTGAACCTCCAGTATCTCTTGAAGAG +ATTATTAACATCCGTGAAGGCCATGAAGTTTTGCAGAAAATTGACGAGGTATTAAAATGA +AAACGAGTTTCTTTAAATCTGGATTTTACTATCGATTGTGTAACTACAACCGAGGTAAAT +CAAAATGTCAAGAACTATCCGTCGTAAAGGCTGGCATGTAACAACTTCTTCTAAATGGCA +CAATCAGAAGAATAACGAATTCGCTTATATCAAGCGTTATACCGAATACGTTAAAACCAG +CAAAGATAAAGCAAATCAAGCTAAATATGTCGAAAGATATATCGCTGAAAATAAGAAAGA +ACCAGTTCGTCTTGAGAAGTTGATGAAAGAACGTCATCGTGATTCATTCTGGAAGACTCT +ACGTTGGAGTCGCTATGCTTCACCTATTCCTAGAGTGTTTCACAAGATGGAAATTAAAAA +CTCGTTGAGAAACGACACTGATTATAACTGGGACGAGAAAGCCGCTCGTAAGTGTGAGAA +AGGCATCGCTCAAATGAATTGGGATTAAAATTTCGGAGTACAGATGTACTCCGTTATAAA +TACGTTTACTAACTAATGAGGTGTATATGCAGCATTTAGACGTTAAAAAGCTTCGTAATC +TTACTGTAGAACAACTCGATGAAATCAAACGTGAAATTGGGCATGCTGTTGCAAGTCTAA +ATGAAGAAATTCGTCAAAGTGGCTCACGGGCAGATTATATGCGTAAGCGAAATCTGGAAA +AATACCTCGACAATGTTAAGGCTGTACTTCAGCACAAACGTAACACTGGTCAACGATAGG +AGGCCTTATGGCCTTAAAAGCACTGGCATTATCCGCGCTTATTGGAATCATGATGATTCC +TGCTTCCTACGCAGAGGTCGACTTCAATCCGAAGTTTGATGAATATTTTGAGGGTGCATT +GAAGGTTTACTCTCAATATAAGATATACAATAAGCAGGAAAGTGAGCAGTTCTTCACATT +TGTTAAATCAAAATGGGAAAGGCAACCATGCACTAATAACTGTGAAGCTGATGGAGCTTT +GGTTGCGCAAGAGTATTACACCAACCGATTGGTAGAAGGCAAACATGAAATTTGAAGACT +TTGCTAAAGGCAAAGCATCTGAAGCGGATGCTTATCTTGGGTTGTTGATGGCTTCTCGCT +CTTACTTTCATTCAGCCCACTTTGAAACAGAAAGCTATGCTCGTCATAAAGCATACAACT +TCATATTCGACGAGCTTCCGGATTTGATTGATAAGTTCGGTGAACAATGGCTTGGCTTTT +CCGGAAAGAAATATGCTCCTCAAATTCCAGAGCAAAAATCTCTTCCTACTGACACTATAA +AAATGATCGATTTGATCTTGGCTGAATCCGACAAGATCTACTCCAAAGTCCCTCGTGCTA +TTCAAAGCACCTTAGATGATATCGTTGGAACTTTCTACCAACTCAAGTATCTTCTCTCCC +TGAAGTAACACTCTGCCCTGGCTTCGGTCAGGGCATTTTTGTTTATGCTGTTTACATCCT +CAAAAGACTATGATACTATAGACTAGTAATCAACTAGGAGAACAAAATGAAAAGTTTGGT +AGTCGTAGCTTATCTCTACGTTCAGTACAATAATCCGCTTTTCACTCGTAATGTTATCGA +TTTTATCTGGAGCCAATTATGAACGAAGAGAACAAAATCAAATTGCTCGATTTGATTGAA +AAATTGCGTCAGGCCGATTTAGCATATGTTGCCCGGTATGAAGGTTCCGGCACGGCAATT +CCTCAATACAAAGCTATGCAAGCTGCTCAAAAAGAAATGTTTGATTTTATTCAATCTCTG +TGAGGTTTTATGGAAATCCAAGAAAAAGTTTTAGATTATGGAAGTCGTTTCCAATCTATC +AAACGCACTATTGAATACAACAACGGTCATGATGAAAACATGCTGATGGTTGATTTTGAC +AATGGCGAAGCTGTCGGAGCTTCTTTCAAATTCAATGGAACTTTATCATGTGGAGGCGGT +TCTTATAAGATTGAAGAACTTCGTCGCTTTAAAGCACTTTTGAATAGTTTTGAGGAATTG +TAATGGCTGTAGGATTTGCAAAAGACGGAGCAGAACAACTTGAAGTTGAAGCAGTAGTTC +AAGCTGCAATAGTTCATGCACGGTCTCAATTTAACACTACACGAGAGTCTCTTTATCGTT +GCTTTGATTGCGAAGAGCTAATTCCAGAATCTCGTAGACAAGCAGTTAAAGGTTGTTTGT +ATTGTGTAAAATGTCAAGAAATGCATGACGAAACTTTTAAACGTGAACCCCGTAATTGTT +GGCACAGGAGCATGAGATGAGCTTTCCAAAACTTGAAGTTGGTGATCTAGTTTTAACTCG +TACTTACACTGGTGGACAATCAGTAGAAATTTGTCAATATCGCGCACAGACCGGTAATTT +GATGTACATGGCTTATCATCCAGAAGCTATCTTAAAATGCCAGCTGGAGCGCTTCATTAA +AGATACAGATTCAATGCCTTATAGTGTAGATATTGTACGCAAAAGTGATTCTGAAAAATG +GGCAAAGGTAATGATGAGCATCCAAAAGAGGCCGGAGTGATTATGAATTTTGTTTCGCTT +TACGGGTATGAGATTATAATTTCATTACTGATTTTAATCATCGCAATTTTAATGACGAGA +AAATAATGGCTAAATTAATTTGGGAAGGTGTCGGATACGGCGCAAAAATTGAAGAAAACA +TTCCAGGTTCTAATCAAAAGTGGTACACAGAACTTGATGTTATTTCCAACCAGTCGCATG +TAAGCATTTACGATGTTGATAATGGTGATGAAGTTTCACTCACTAAATCCGAAGCGGAAG +CTTTGGTGAAATATTTAAACTCTGTAATTCCAACTATGAAGGAGCATCATAATGAATATT +AATGAAAAATCTTGGCACTGCCGTTTACATGACTTTGCTTTTGACAAATACTCTCGTCCT +CGCTCTCTTTGCCCGTACTTCTGGAAAGTAGTATTTGCTTTATTTGGTATGACGTCATTG +ATTGTATTATTGTCCATCGCATTTACTTTAGTTGGTTGGGAATTGGCTGCAGGTTGGTTA +GCTAAAATTGGTATTACTTCGGTCTGGGCTATTGGAGCTTCTGGATTTACTATCGGGGCA +GTTGGTATTTTGAGTTTAGTTGGTGTAGTATTTGGCACTCTGTTCGGTTTAGCCAAATTA +AAAGATTTGATTGAAGATAAAATCAAAGAACGTAATTATGAAAAATATATTCAAGAATTA +GAAGCTCGTAAAGACCCAAATTACGTTCCACCTAAAAAGAGTATTCTGATGGAATTCATC +CGAGCTCGTAAAGAAAAATTCTGTCCATCTCTGACTTTCACTGAGGAATAAAAATGATCG +GTATACATAAGTTTGAATCATTCGAAGAGACTGTTCGTTTGGAAATTGGTGATCGTCTGA +AGGTGACCCTCAAGGGAACTACAAAATCTCTGCTAATTAAAGTAATTGGTATCACGAACT +ATGGTAGATGGACTGACGGTGATCGTCTAAACGTTATTATTGGCAAAATTGATGATGTTG +CCGGTCATTCGGTAGTTTACATTAATAACGTAAACGGACAGGTTATTCATTATCTTCCTA +ATGCAATTCACACATACAATGTTCACGATATAACTCACAATGATACTTCGATCGCGTATG +AGGATGAAACGCTTTATCCATCACGAGTTAAAATTGCTCGTAAACAAGTACTGGTTCCTA +TTAAAGTCGGTGATGAATTGACTAAGCCAAATCGTTCTGGTACTTACACAGTAGTTTATG +TAAACAACACTGTAAGTAATATTGTTGTTGAACGAAATACTGATAAGAAAATCGAAGTTA +TCAATTTTAAAGATACCGTCGCGCTAAAAGCCTTCGGCCTTAAATGGAGAAGTTGATGAA +GACAGTTGTAAAAAGTTATTTTGGGTCCCAGCTTTATGGGACCTCCACTCCAGAATCCGA +CACCGATTACAAAGAAATCTTTATCCCACATGCAAAAGATATTCTGATGTGCCGGGCAAT +GAATCACACTAACCTGAATACCAACAACTCTGCCACCAAAAACACTCATGATGATGTAGA +TCATGAGTTGTATTCCTTGAAATATTTCCTGGAATTGGCACAGAATGGTGAAACTGTGGC +ACTGGATATGCTTCATACTCCTCCAGAATTGGTCGTTGCTTCTGACCTTCCTGAAGTGTG +GAAATTTATCCAAGACAATCGTAGTAAGTTCTATACCACCGACATGAAAGCTTATCTTGG +TTATGTGCGTAAGCAAGCAGCTAAGTATGGTGTTAAAGGTTCTCGTTTAGCTGAACTTCG +TCGTGTATTGGAAGTTATCAATAAATTTCCTGAATGGAAATACGAAAATCGTCCGAAAGA +TAAAGCTAACAACAGTCGTTGGAAAGTAGCTGATATTGCAAGTAAACTTCCATTGAGTGA +ATTTTTGTTCTGGGAAGATTTTGTTGATGCTAAATGCGGTAAACAACGTTTCTATCATGT +GCTTGGTCGTAAATTCCAGACAACAATCACTGTAGCTGAAATGAAGTACTCCTTAACTAA +ACTTGAAGCTGAATATGGTGAGCGTGCTCGTAAGGCAGAAGCTAACGAAGGCGTAGACTG +GAAAGCATTGAGTCATGCATTACGTGGTGGACTTCAACTTCAAGAAATCTACTCTACCGG +TGACTTAAAGTACCCACTGAAAAACGCACAAGACATTCTAGACGTTAAACTTGGTAAACT +TCCGTTTGTTCAAGTCCAGCAGATGCTTGAAGATACAGTAGATGAAGTTGAGCGTTTAAG +TATTCAAGCTCATAAGAATGGTATGCCTTCGAAAGTTGATATGACATTTTGGAATGATTT +CCTAGAAAAAGTTTACTTGGAAAACCATGGAGCTTACTACAAATGATATGGTGGTACATA +GTGCCTGTGATAATTGCAGTGATTTACCTCGTAGCTGGTTGGTATATCGTAAACGCTCTC +GTTAAACGAGGGGCAATAGAGACACCTCAAGGCTATATCTTTATATTACTATTATGGTTA +CCTGTCGCGGTCGTCTCGATCATCTGGCGAACCCTAGCATGGTTACTACTGTGGCCAAAG +CGCTTTGCTGAATCCCAGATAAACAAACACTCTTCTTAACCTCCTTCGGGAGGTTTTGTT +GTTTTTGAAAAAATGTTGTACATCTTAACTCAATGTGTTATTATAGACTTATCAAATAAA +TGGTAACCCGGAGAAACAAAATGACAGCAGAACAAATTAAAGAGATGATCGCAGCAGAAG +TAAAACGCGTCATCCGTGAAGAACTTAAAATTGAGTACAAATCATCAGAAGATGCTTTGG +ATATCGATTTATCTCTTGATGGTGAAATTGTATCAACGATTCAACTGTCTAAGTATGATT +TACCGATTTAATTGCAAATATTTTTGCTGAACCGTTTACATCAGTTCAGCAATTTGATAT +TATTACCTCATACCAAACAAATAGTAACTCGGAGAATAAAATGACAACCATCACTATCAA +CAAAGGTATTAACTTCGGTAAAGAAATTTCTGGCACTTTCGAATTAGTCGGAGAATGGTT +CCCAGAAACTCTGAAACCCGAAGATGCTGCTCAAGGTGATGGTAAAGTTTTCGTTATCAT +CGACGGTAAGAAAAAAGGTGTTTGGGTTTACAAATCAGACATTTCTTATAACGGAGTAGC +TAAAAAGATTGAACTGATTGAAAGTGTTGATGATATGAAAGCTCGTATCAATAAACGCTT +TAACGTTATGGGGATGATGACTGCCGGAATCATTAACGGAAACATTCGTTCACTGATTAT +CTCGGGAGCCGCTGGTATCGGAAAAACTTACTCCTTAGATAAAGCATTGAATAAAGCAAA +TGATGAGGATAAAATTGAATACAAATCAGTGAATGGTAAAATCTCGGGTATCGGGTTGTA +CTGTCGCTTATGGGAATCACGCTTCGATAATTCAGTTCTGCTTATTGATGATGTAGATGT +ATTCTCTGATATGGATATTCTGAACCTTCTGAAAGCTGCTTTAGATTCTGGAGAAAAACG +TAAAGTTTGCTGGAGTACTGCTTCATCTTACTTAGATGAAAAAGGTATTCCAAATGAATT +TGAATTTGAAGGAACAGTCGTTTTCATCACTAACGTTGATATTGATAAAGAATTAGAACG +CGGTAGCAAATTAGCTCCACATCTCGCTGCTTTGGTATCTCGTTCGGTTTATTTGGACCT +TGGTGTTCACTCAAACGAAGAAATCATGGTCCGAGTTGAAGAAGTAATTATGAATACTCG +GATGTTGCAAAGCCGCGGTTTACGTAATTCCCAGGTTGTTGAAGTATTAGATTTTATGCA +AGAAAATGTATCTCGTCTTCGTAATGTATCTTTACGTACTGCTCTTTATCTCGCTGATTT +CGTCGCCACTGACGAGAAAAATTGGAAAGATATCGCTGAAGTTACGATGCTTAAATAATA +CTCCGGGAGGAGAAATCCTCCCTAAATTTTTGAGGAAAATATCATGGCACATTTAATCTC +TTATCAAACTAAAATTGTTCTGTTTCGTAATGGTAGCTTTGTATGTGATTCTAAAAGTCG +CGAGTCTCTGAAATATATGTCAGATGCTACAGCAATTTCTTATATTGACTTAAACGGGAG +CTGGGTACAATGAGCTACAAATACTATGTGAGAACCCACGCTTGTATTTTCAAAAGTGTA +TGTGAAAAAGACACTGCTGAATATATTCTTAGTCATACTAGAAATTTAACTGCAATTCTT +TTCACTGATTTACAGAATCCAGCTTCAAATCACATTATGGAAAGAATTCGTTTTAATATT +GAAAATCGTGATGTGCAAGCTTTAGAACGTCGCCTCAAAGAAGGCTATGAATATGCTGAA +GAAAATAAGTGGAGATATTAATGAGTATTTTAATGGGAAATTGGGTGAACAACACCGCTT +ATTATCCACCTGCTCATATCTATGCTGGAATGGTCCAGAGTAAGGCTGAGAAGAACGCAA +TCCGCATCTGTGAAGAACTATACAGATTCAACTTCGGAGATTCCCCTAATGTATTGGGTG +AATTGAGAACGGCCTTTCGTGAATTAGATGTAATGCTCCATATGAAGAATTCTTACCCAT +CTCATATGGAACTTCGTCACGAACACGTTGCTGAAGTATTTGGCACGTTTCTTTATTGGG +CTATTCGTGCTAATACTGAAATGGAGCGAATCTATAAGCAACACCAAGACCTTTGGAAAT +GGTATAACACATCCAAATTAACTAATCGTGAAATTAAAGATTGGTGCAAACAACAACTTG +ATTATAATTTGAATTGCATGATGATTGATGTTTACGATAATTTAGTCAGGAGCAAAGGCT +AATGGGCTACGGGTTGGATGAAGATTGGGAATACGAAGACGAGGAAGAATTAGGCACTCG +TTACAGCATAATGAAAATTGTTTATGAACGAAATGCTTCAGCGAAAGTAGGGGCAGAAAT +GTATTGTCCTTATTGTCGAAAGGTTATTGTAAAGCGTAGTTGGCAACATAAGTTTTGTAG +CACTCCATGCAAAGACAAGTATTGGAACTGCGAACCTAAGCGTGCTCATCGAGCAGAATT +CTTTAAGGGCAAATTATGCAGGTAGAACAATTAAAAGAACTTATTCAATTAGTTTCTAAA +GAACAAATCAAGGAACTTATTCGTAATGAGCTGAGAATTGAAGTTCAGCCAGCGGATTAC +ATGGACCCATGCCGAATTCAACTATGGTGGGATAACGAAGTTATTTCCGAAGAATGTATT +TACTTGAGTGATATTCAACGATGATATCGAAAAAAATTATATTATCTCGTATTGAAATGA +TGAGAAGCAATTATGAAATGGCTATGAAACTTTCCGTGTCAATTGCTTTACGTAATATTG +GTGACGAGCGAAGAAGCAATTTAAAGTTTTGTGCACCTGATGACCGCAGAGCGCAATTAG +TTGAAATTACGAAAGCACTTATTCAAATGGACCATTACCAAATCAAAGATGCTAAGATGC +TAGCTACCGAAAAAGAAATTTGTGCTAAAGCTTTAAGGGAGCATCAAAAACAAACTCCAG +TTTCATCTTGGTTCCATGGTGGAGCAGACAAACCAGCCTATTTTTAACTCTAAGTTCGCC +CCGGCTTCGCTGGGGCATTTTTGTATGTTATCCATATAATCAATCATTCCCTTCTAGAAA +GTTCCTCTCAACCGTTCTGGTGAATCCAAAAATTTTTAAATCAACCGTTTACATCCTCCA +AAGATTGTGGTATGATAGTCTCGTAATCAACTAACGGAGAATAAAATGTTAACTGAAATC +ATCACCTCGCTTATCGAAGAAAATCGTAAAGCTCATCAAGATCGCCGAGCGAAAGTTGAA +AAACGCGCTATGGAATTAAATGCTGGATGGACTAAGACCCGCTACGGTCGTGAAGGATTT +GATAAGGTAGTAGCCCCAACTTGGGGAGTAGATGATCGTCCTCATGCACCTTTTGATGGG +TACCTCTGGGAAAATGAATTAGGAGAAGTTGAGTCTTATCATGGTGGTAGTTATCTTCCA +TACGTTACTGAACTCGACTATCTTGATAAGCCTGAATACACAGGAGATCATGGTTGGTGG +AAGCTGCGTCTCACTTCAGACATGCTTTCTGAACTCATGATTTTAAGGCATGAAACTCAG +TGTATTGAGATTCGTACACCTTACAAAAAGTGGACACTCGAAGATAACACTATTGTGGTA +ATGAGTGAAGTACGTGCTCATAAGACGATTCTTAAGGCAATTCAATCTGCTTCAGAAGAA +TGGTTCAATAACTACTACAGTTCACTTAAAGTTAACAAAGGTGAAGCACCGGTTGGTAAG +CAAGTAGTTAAAGGTAAAGTCGTTTCAACTAAAGTATACCAAGACTATTGGGGTGTATCT +GCTAAAATGATGGTCCGTCTTGAAAACGGAGCTACAGTTTATGGTTCTTTACCTGGAATC +GTAGATATCAACTATCGCGGTACTATCGAATTTAAAGCAACGTTTGAACAAGCAAAAGAT +GACTCAACTCACGCTTTCTTCAAACGTCCATCTTCTGTAAAAATTGAAGAATAAACGCTT +TAAGAGAGTCCGTGTTATAATGGTTTCACGGATTCTCAATCAAATCATACGCCTGACAAT +GAAAGAAGAGGAAAAGATTATGAACTTCAAAACCAAAGATGATTTCTATGTAGAAGTATT +TGAATTGATGGAAGTGGTGAACAAACATTCAAGTACAGTGTTTGCTAATCAGAAGAATAA +GATGCTAATTAGTCTACTCCGTGATCGTCTCGTATCAAAGCATAATATCATTGCAGGTAC +TGAATTGAACTCTGTTTTGGCTAAATACGACCAGTACACTCCATGGACAAAAATCGCTGT +AGTAAAATCTTTAAGCAAATCTAAAATCACAACTTATGTTATGTCTCACATGCGCTTGCG +CAATTATCTGAGCATTGATATTGAAGATGAGCGTAACCGTTTGAGCCAAAACCGTGTTGT +CGTTAATACACATCTCAGTATTATAGCTGAAGTTGTACGGACAATGTCCAGTGATATCAA +AGAAATTATTAACATTGGTCGTAAACTATGTCATGCTATTGATAGTCAGAAATTTGAATT +CATCAATGATTTCTTGGGCTGTGATAAAACTGTTTATCCGAAAGTTCGTGTTGGTGTATC +TGGCCGCCCTGAATTTGATATGGCGATCAGAGTAAGCGCTGGTTATCGAGTCAAAAATAC +TCCAGCCCAACGTAATGTAGTTGCTCGTTTGACAACTCAGCTTAAGAAAGCACTGGAACA +AATTCCATTCATCAATACAATTTCTTTGGTTGAACGTGAAAATGATAAGGTGGTTCACTT +CTGTGTCGACCAAGAATTCTTTAAGCCAAAAGAAGTTGCATTGAGTTCTAAAGAACTTCA +TAATTTTGTTCATGATACTGATGTTCAACATATGTACTTGACACCAATCAAACCATTGGT +TATCGAATCGGTTATGACTCAGCAGCTTAATGAGTTGATTGCTAAAATCGATATTGAAAT +TGAAAAAATCGATGCGGATATTGAATCATGGCAGGAACAAATTGCAACTAAACGCGCCGA +AGCAATTAAGCTTCGTAATCGTCGTCAGAAATTGGCATCTGCTGTAGAGGCTTTAAATGA +ATAATCAATTAAAAGAAGATATTGATTTTGGCACGTGGTTTGAACCCTGGACTGATGTGG +ACCTTGAGAAAGGTCCAGAATGGGAAGCTCCTGCTGGATTCGATAAAGGTCTTATAGATT +GGAAAGCAGTTTTAGAAATGGCTGATCGTCGAGAAGCTGCAGCAAAACAAGTTTCGCCTT +GCCCTAAATGTGGTACAATTCAGGTTCAATTGATAGATTGGCGAACTGATACTTTGAAAA +TGAAGTGTCGTCATTGCAAACATAAATTTGAGAAGAAATTAAAATGACTCGTATTAAAGC +AGCTATTATCGCACTGATTCTTATTGTTATTCCATTAACAATGAACCATTTCAACGATTA +TATGACATACCAAAATTATGATGTTAAAGTTGTTAGTGTAGTATCTGGTATGTCACCCGG +AAAGTACTCATCATTAGAGTTCATTGCCATTTACGAACTTGAAGATGGATATCGGTTTGA +CCGACGGATTTCCGCCGCGTCATCAACGCAACTCAGTCCTGGTCAAAATATTACATTAGA +ACTTCGACCGTTTGATGTTAAGCAAACCCCAATGGAAAATACTATCTGGTTCATTGGTGG +TGTATTAGTCAACTGTGCCGGATTTGTTTTTGGCGCAGCATTTGCTTTAATCGCTATTTC +TCGTCGTGTTAATAATTGGATGAACTCATAATGATTGATTTAAAACTTGATACCAACGCA +GTAATGAAGCTGTTTGATACCGAAGAAGCTCGCGTTAATCTTCAGCAAGCAGTTATTAAT +AATGTGGTCAAAGAACTTGTGCTGAAGAATAGTAAGAACAAAGTACGAGAAACTATTCAG +AAAGAAATTTCTTTGGTTGGGGCTCGTCTTCCTGATGTACAGCCGATGGTTAAAGAACAA +CTCAAATACTTCTTTGAATCTAAAGGCTGGAACAAGGTTCAAGGTACTTTTGAATTAGAA +CGTATCATGCGCGAAGAAGCAAACCGTATTGCTTCTACTCAAGTACTTGAAGCAGTCAAT +GCTCAGGTTGATAAAGCAATGAAAGATCTTGAATATAAAATTGATCAAGTGCTTCGAATG +TCAGAAGTACGCATGGAAGAAATGGTCAGTAAACGTCTTATTGATTCATTCGGCTCTGTA +ATTGATAAAGTTATTGCTGAGCGTCTTAAATCTGTATTTCCAGAGGTGGCAAAATGATTG +ATCACAACCCATTTAAAACCACTGGAATCGCTGAATCTGATGAAATGAAAGCTCTTTTCA +AAGAGCTTCGTGAAATTAATGCTAGAATTTGTTTTCAGTATGCAGAAGAAAAAGGAATTG +AGTTTAACGTAGATACAGTTCTTCGCAATATAAATGCTTTAACTGAATTCGATATCGTAA +TGTTCAGGATGTTTGCATATACCGCATTAGCCAATCAGCCTGAAAATACTCTACCAATTG +ATGAACGAATTATCATTGCAGCTAATGAAGCATACAACAAGGTTATTGAAATTGGCTAAA +CGAAAACAATATATGCTTACAGCCGAAGAAGCATTGATGTCAGTTTATCGTGCTTATTTT +GCTGAACATGGCGATATCCCATCAAGCCCGGCGGTTATTAAAGCAGCAATGACTAAAGCG +CATAATGCATTCCATGCTCGAGTCTCTGAAGCAGCTAGGAAAAAATTTGGAAAAAGGTAT +TATAATAGTCCTAATTACTTCGATGAACTAGACCAAATAAAAAGAGAAATGTTATGCTAA +CAATTTACGGATATGATTCTTCAATTCACCGCTGTGTTCACTGTGACAATGCTAAACGAT +TAGCTGAAGTTAAACGTGAAATGTATGAATTTAGGAATGTAATGCCAGAAAAAGGCGTAT +TCGACGATGAAGTTATTGCTGAACTTCTGACTCGTTTAGGTCGTGACACTCAAATCGGTT +TGACAATGCCTCAGATTTTTGATGGCAATGGCGCTCACATCGGTGGTTTTACTGAACTCA +GAGAATATTTCAAATGAAAGAAGGCGTAGACTACATTCATGATTACAGAGGCACAGCTAT +TGGAGTTGGTGATGTAGTTGCGCTTTATTACGGATATGGCGGCCTGGAAACAGGCGAAAT +TATTCAAGTTAAAAATAATCGTGTTAAAGTTGAAGTAACTTATAGCAATGGCTCAAAAGT +TATTTCTAAATGGAAATACGGCGAATGCATGGTGAAATTATGAGTGATATGAAAGAAGTG +GATTTAGTATTCTCAGCCGGCGATCAAATTGATTTAGAGCATCTGCTTGCAGTTGAAATG +ATTCGTCGTGCTTCTGAAGATATTCAGTACGCAATTGATAATCCTTGGGGTGAATTCCGA +ATTCGTCAGGGTAAAGAAATTCACGGTGTTCAATGGACTTATGTTGGTCTGGAACCTGAA +GATTACGAAGAAGTAATGACTGAAGACGGGCGGATTGACTATAAACCTATCGGTCCTTGG +CACTGGGAGTATGGCGGCCCAGATTTTGAAGTTTCATGCTCGTGGTTGGAAAGTAAAGAT +GAAGACTGATTATATCCAAGTATCTGTCAAAGAGTTAGATCGTTTACGTCGGTGCGAAGA +GCTGCTCTGGGAAGTGGAAAGTTCTTTACCATCGGGTTTAGAGAGCTGGATTGATTATGA +AGAAGAACGTGAATTAAGAGGTGAAGAATGACTCCTGAATTAAAAGCAATTTATAGTGAA +ATTATGGAAGACCATGATGGATACTCCGAGAACTACGACTTCGAAAATTCTGATTACTTA +GAAATAGTTGACGAAGAAGAATGGACTCAAAATCATAAGTATCAATATCGTCAAGTGGTT +TATTATTCCAAGAAGCATGATGTTTATGTTGCTGTAAACGAATCTCGTTCAGGTTCTTAT +CACAGTGATTGGTACTACAGCGATCCTGAAGTTTCATTAGTTGAAAAGCAAGAGCGAGTT +GTTACTCGTACAATCACGGAATGGATTACGCTTTAAAGCCTTGGTACACGGCTCGATGGA +AAACCGTTGAGCCAGAGGAAGAAGAACGCTTTCCTGAAGATGATTATAATGAACCTACTA +CAAATGATCTAATTGATATGGAGTTTGGCTATGAGTTTAGTGAATAAGTGCTTCAAAATT +GTTAAAGAAGATAATGACGGCGGCGTTTTCGATATCTATCCAGAACTTACCATTGGAACT +GAATTCAAAGTTCTTTCTGTGGATAAAGAAAATCCAGATGGTATCACTTCTATCTTGATT +AAGAACGGTCCTTACCTTCATATTGGTTCTCGTGAATCTTGGTATTGGTGCTTCTGGGAA +CAAGACACGATGGGTGAAATTGAAGAAATTGAAGAGCTTTCTTCTGATCAGTACAAGATC +CCAGACACAGCTCATTTGTTCAAAGGACGTGATATCGCATCTCAGCTGTTTAAAGTTGCT +GGTGCTGAAAATTGCGATGCCGAAGAACATGATTTAATGCAGGCGGCAGGCGAATATATC +CGTCAGCTTGAAGCTCAATTGAAATTTTCCGATAAGGCTTTCTAATGCAAATTGAATTAA +AATACGTATCATGTCAAGAATCTGGTTGGCATCTGTCGTTTGAATTTGATGATGGATTTG +GAGTCGCTAAATGGTTCCCTTCTAAGCCGACCAAAGCTCAAATCCGATATTATAAGAAAT +GGGCTCGTATATATTGGTTGTATGATTAACAATAAATAGGTTCATCTGATTAAGAGGTGA +ACCTATGTTATTGACTGGCAAATTATACAAAGAACAAAAACAAAAATTTTATGATGCACA +ACATGGCAAGTGCTTAATTTGTAATCGCGAATTAAATCCTGATGTTCAAAGTAATCACCT +TGATCACGACCATGAATTGAATGGACCAAAAGCCGGTAAAGTTCGTGGGTTGCTATGTAA +TCTGTGCAATGCTGCAGAAGGACAAATGAAGCATAAGTTCAACCGCTCTGGTTTAAAAGG +TCAAAACGTTGACTACCTCGAATGGCTTGAGAATTTGCTTGTCTATCTGAAAAATGATTA +TACTAAAAATGACATTCATCCTAATTTCATTGGCGATAAGTCAAAAGAATTCAGTCGACT +TGGTAAACCCGAAATGATAGCTGAAATGAATGCTTACGGGTTTACCTATTCTGAAGATGA +TTCCAAACCAAAGCTTGTTGCTTCATTCAAAAAGCAACTTCGTAAGAGTTTAAAATGACA +ATTGAATCAGAAATCCAGGGGTTAATTAACCGCACCAATAAAGATCTACTTAACGAGAAT +GCTAATAAAGATTCTCGTGTTTTTCCAACTCAACGAGACCTGATGGCGGGGATTGTTTCA +AAACATATTGCTCGTCAAGTTATCTCTCCTACTGTTCTAAATGCTCATGATAAAGGGCTT +ATTCATTTTCATGACTTAGACTATTCTCCAGCTCTTCCATTCACTAACTGCTGTTTAGTT +GATTTGAAGGGAATGCTTAATAACGGATTTAAACTTGGTAATGCTCAAATTGAGACTCCA +AAGTCAATTGGAGTAGCAACCGCTATCATGGCTCAAATTACGGCTCAAGTGGCTTCTCAT +CAATACGGCGGAACTACATTTGCAAACGTAGATGTTGTGCTTGCTCCTTTTGTAGAGAAG +ACTTTCTTTAAGCATTTACGTGATGCAGAAAGATATGGCATTGAGCATGTTAATGACTAT +GTATACGCAATTGAGAAAACAGAAAAAGACGTATACGATGCATTCCAAGCTTATGAATAT +GAAGTCAATACTTTGTTCAGCTCAAATGGCCAAACACCGTTTGTAACAATTACCTTTGGT +ACTGGCACAAACGAATACGAGCGGATGATTCAAAAAGCTATTCTCAATAACAGAATTAAA +GGTCTTGGACGAGACGGAATCACTCCAATCTTTCCTAAACTTGTTATGTTTGTTGAGGAA +GGAATTAACCTTCATCCTACTGACGTTAACTATGATATCAAACAGCTTGCGTTAGAATGC +GCAAGTAAGAGAATGTATCCAGACATTATTAGTTCAAAGAATAACCGTCTGATTACTGGC +TCTTCTGTTCCAGTTTCTCCGATGGGATGCCGTTCATTTCTTAGTGTATGGAAGAACAAG +CACAACGAAGAAATTCTAGATGGACGCAATAATCTCGGTGTAGTGACTATTAACCTTCCA +CGAGTAGCACTTGATTGTATGGTTGATGGCCGTCCAGATTTAACTAAATTCTTTCATATT +CTTGATGATCGTTTACTTATTTGTAAAGAAGCTCTTTTAGCTCGTATCGAATCACTTCGT +GGTGTAACAGCTTCGGTTGCTCCTATTCTTTATCAAGAAGGCGCTTTCGGTGTTCGTCTT +AAGCCAAATGACGAGATTATTGATATCTTCCGAAACGGTCGTTCTTCAATTTCATTAGGG +TACATCGGAATTCATGAAGTTCAAACTATTCTTGGATTTGAAATCGGTTTACTTTTGCTT +AAATGCATGAATGATTATTTGAAAGAATGGACCAAAGAAACTGGATTTGCTTTTAGTCTT +TATTCAACTCCGGCAGAGAATTTGTGCTATCGCTTCTGTAAGATTGATGCTGAAGTTCAT +GGAGATATCAAAGGAGTTACTGACAAAGGTTGGTATACTAATAGTTTCCATGTTTCAGTC +GAAGAAAAGATTTCTCCATTTGAGAAAATAGATCGAGAAGCTCGTTACCATTATATCGCC +AAGGGCGGTCATATCAGTTATGTAGAACTTCCTGATATGAAAAGCAATCTAAAAGGTCTT +GAAGCTGTGTGGGACTATGCTGTTGAGCATCTTGATTACTTTGGTGTAAACATGCCAGTT +GATAAGTGCTTTACTTGTGGCTCAACTCATGAAATGACTCCAACCGAAGATGGTTTCATC +TGTCATGAGTGCGGCGAATCAGACCCTAAAAAGATGAACACAATAAGACGCACATGCGGT +TATCTTGGCAATCCTTCTGAGCGCGGATTTAATCTTGGTAAGAACAAAGAAATAATGCAT +AGGACTAAACACTGTGAGATATGACAGAATTTATCCTTGTGATTTTGTAAATGGCCCTGG +ATGCAGGGTCGTTCTTTTCGTTACAGGATGCTTGCATAAATGTGAAGGATGTTACAATAA +ATCTACTTGGAACCCAAGCAACGGCCAATTGTTCAATGCAAACACCGTAAAAGAATTAGC +TGATTACATTTCGAAGCCTTATATTCAAGGACTTACTCTCACCGGTGGAGATCCTTTATA +CAGATCTAACCGCGAAGATATTGAAGCTCTTGTAAAATGGGTCAAATCGCGGTTTCCAGA +AAAAGACATATGGATGTGGACTGGTTATAAGTTCGAAGATATCAAGGACTTAGAACTGCT +TAATTACGTAGATGTTATTATTGACGGTAAGTATGAAAAAGACTTACCGACTAAAAAACT +ATGGCGTGGTTCTGATAACCAACGTCTGTGGCAAAAACAAAATGAGGTTTGGACACACGA +TGCAATTACATTACCCTTGGATTCATGATGTACAAGTTCACATGAACCGTTATGTTGAAA +AGATGGGCGAAGAATTTCCATCACTTTATTTTCTAGTTTTCTTTGGAATGTACTAATGAA +AGTGGAAATTTATGGAATACCAGAAGAGGTTCATAGATGTCCTGGGTGTGTAAGCGCTCG +TCATCTTCTTGATTCTCTTGGTATTGAATATACTTTCTATTCTGTCATTAATAAAAGCCA +AAATTCTCTCGGTTTTGATTATGACCGAGAGCGTATAACTGAATGTGCAAAAAGAATAGG +ATGCTTTCCTAATCTTATGCTTCGTTATCCAGTTATCTTTATTGATGACAAAAAAGTTCC +TCGTTTAAAACAACACCTTGAAGATCTAGGTTACGATACTGATCTCTAACACGGTTCTAA +GACACTCTCTCCTCTCTTCCATATGTTTATATGGTCATCATTAAGGGAATCTCTCAGGTT +CCCTCATTTCTTTCAAACAACCGTTTACATCCTGTACTCTCTGTGTTATTATACTTCTAT +CAACTACGGAGAAACAAAATGATTATTAAATCTAAAGTATCACACATTGTTATCGATTTC +AACGTTTCAACTGAACGCGGTCGTACTGATCTCATGGTTGAAATTAAGGGACAGGAAGTT +ATCTTCCGAGCTCGTTCAATTCGCTGTGAAATGTCCTTAAATATCGCTAAACATCATCCA +AACGCAATTAATGATTGTGTTAAAACCTTGATCTCTGATATCTACCAAAGCGAAGCTGAT +CTCGTCGTTCGCGAAGTATTTCATACAGTAGGATATGCATAATGTTCAATATGACTTGGG +AAGAAGCCAAACAGGCTATGCGGGAAGGTAAATCTGTTCATCATCGTTACTTCTGTGATG +AATGGTTCCAAATGACAAACGGTCGTATCGTTGATGAAGCTGGCTACTTCATGGATAAAT +GGTACACAGGCGAAGAGTGGCAAAACACTGGATGGGCAATTTATGACTAATTTAGATCTA +TTTCACAAATCAGCACGGTTTTCATACAACGTTCCCGAAGGACGTCTGTTTGTAGATATC +ACATCTGATATGTTTTTGAAATTTATTCATAACAGGCCCGGAAATAATAACTGTATGGAA +GTTCTGGACGTTAAGAATGGATTCAATACTCTTGATGTTCAACACATCATTGCTAAAATT +GGGGTTGAATTATCAGTAGCAGATGCTTATCTGATAAAAGAACAAGTTACTAAATTCCTG +GCATAAGTTTGCTATAGAGATTTTGATATAAGATCTCTATGTCAAAATAACACACTGAGG +AAAATACTATGTCACAGGCTATCAAAAACGTTCTGAATTCTTTCGCATACCCGAAAGTAG +AAGCTATCATGGCAGCAGGTGCTTACGTAACTCCAGAAATTCTGGACAAGTGGGAAGTTG +AACTTCATGGTACGATGAAAGAGAATGGCCAGAAGATCGGTAAAGCTCGCATTCGTGAAT +TGGTGGTAGCTTACATCATCTCTGAATTTGATATCGATGCTTTCGGTATTCCTACTCCGA +AGAAGAAAGAAATCTCTGATACTGCGATTCGCAAGATGAAGAATCAACGTAAGAAAGGGT +TCTCTGATCTCAAAATTGTTAAGGTCGCGAAATGAGTCTGAATATTCAAAACTGTCCGGC +TGATGTGCGGTTTGTAGTACTCAAAATGGAACGATTAGATTTTTGGCACTCAAAAGTTCA +AATAGTGCATTCTTACATCGGTTCCATTGAGCTTAAAACTTATTATGACGCTGGAGTTCT +TCATAATTGTCGAGTGCTCCCACGACCAAAAGGCGGATTTGATACTCATTATCATTGGTC +TGAACCACCGCTCAGCGAAATGTGGGAAGAAGGCCTGAACATGAAAGAACTTGAGGAATA +TCTTGATGCTTAATTTGCCTGAAGACGTTTCAGTTATAATGACCTTTAAAGAATTTGATG +GTAAAATTCATCGAGTTCGTAAAATGACCCAAGGCTTCATCATCGCAAAGGCTTGTGTTG +CATTCAGAGAGTCAAGACGGGATTTTAGAATATTCCCAATGAACTCTAAAACGAAGTACA +CTAAAGTGTCTACTGAATTAGCTTGGAATGAAGGAATGACTCTTTCCGAATTTGAGGAAT +ATCTGAATGACTAAAACAGAAATTGTTGATGATCTTCAATTAGCCGGATATTTTGCTTGT +GTTAAAGATGACCGCATCTGGATTGAAGGAACAAGTGAAAATGGGATTGATTGGGTTATC +GAAGAAGACTTTGATGCTTGGTGGCTATATGAGTTCACCGGAAAAGATTATCATTCAGTA +GATGCTTTTGGTAATATGGATCACGCTTTGAATGGAGCAAACAAATTATGATCAGAGAAA +TTATTTTATCATTTGAATTTGGCAATGTGACGGTAGAAAAATCCGGTAAGGTTGAATTTT +TCGATCCATGGGCAGAAATCTATTGGACTTCAACCGTCAATGAATTCCAAATGGAGTTCG +CTGAAGTTCAATTAAAATCGTTGTATAAAGAATACGATGACTATTCTCTGATGCCGCCGG +ATATTCAGATGACTGATATGCTTTACATCCGTCGTATGGTTACCAAAACATGGGAAATGC +TTAAATGATTTCTGCACTCAAATTTGATAGTCTAAAACTTGAAGTTGCCAATTATGGAAC +TTTCACAGTAACTCCTTTGATGGGAATTACTTTAGACATTGAGTGGTTTGATGAATTTCA +GTGGGTGTCTCATTGCTCTTTGTTGAATGTTAATGGGTATAAGATTGCGTACGAAAGCCT +TGATAAGTTCTGGAAAGAAAATGAGCTTCATCATGCTGCTGATAATATCTCTTTCGATGA +GTTTTGTCGTATCGGTGAAGCCCTGTTCCAGATGTATTTGATTCTTCGTAACAATTAAGT +GCTTTAAACCTATCGCTCGCAACTATGTTATAATTGATATATAAACTTGAAGCGATGAGG +TATTAATGGCGAACTATGTAAACAACAAAGAATTGTATCAAGCAATATGCGATTGGAAGG +AAAAATGTCGCAATGCCCCCGAAGGAGTAATCGTCCGTCAGAATGATATAATTGGTAAAG +CAATTATGCTTATATCTGAGGGTCTGTCAAAACGTTTTAACTTTTCAGGATACACCCAAT +CTTGGAAAGATGAAATGATTGCAGATGGAATTGAGGCCGCAATTAAAGGTCTCAAGAACT +TCGATGAAGAAAAATACAAAAACCCACATGCATACATCACTATGGCTTGTTTCAATGCCT +TCGTTCAGCGTATCAAGAAAGAACGTAAAGAAGTTGCAAAGAAATATAGCTATTTCGTTC +ACAACGTTTATGACGCCCGTGACGATGATATGGTTGCGTTAGTAGATGAAACGTTCATTC +AGGATATCTACGACAAAATGACGCATTATGAAACCTCCACCTACAAACAGCCAGGGTCTG +ATAAAAAGAGCGATATTGTAGATGAAGGACCGACTTTGGATTTTTTATATGAGGCTAAAG +ATTAACCTCTCCGGATTCTTGGAAGAAGTGCCAGACGCAGATGCTATCCCTTATTTGCTT +AAAATGTATATGAGGGAAGTTCTCGAAATGGACATTCACATTGACCCCAAAGATCCACAT +GATACAGAGTTCAAGTGTGATGGCAAGGACTTGAACTACAACTACCACATATCTGATGAT +GACTTTTATATCACATTAGAATACTTTCCAGAATGAGGAATTATGCTACAACCCGGCGAA +GCATTCCAAGCAGAACTTGAATATCAAGATAAGCTGATTGAAGATCCAGACCACCAGAAA +TTAATGGAAGAAGATCGTCTTGCAGCTATCGAAGAAGCACAAGCTCGAGTAGCTGCAACT +GCTAAATCTCAAGCTGATAAAATTATCAAGAAAAATAGTCGCGAACTTGAACGTTTGAAT +AAACATGCTCAACAATCAGTTCTCGATAACAACTTCGCGGCGTATAAGTATGCGATTGAA +AAATCTCGTAAAATCTTACGTCAACCATTCAATGACGAGCTTATCAAAGTCCAATGGGAA +ACCACTCGTCGTCAGATCTGGGAAATTGTAAATGGCTATAAAGCAGGTTAAATTCAAACG +CCTTAAGGTAAATTCAGGTTTCACTCTTTCTGTTGCTGATGGTGTAATGGCTATTAAAGT +ATCTGAAACCCATTACAAAGTCTTGGGTGAAACAGGTCCTATCAATCCAGTAGTTAAAGC +TACTAAAAAGGAATTAGTCTGGGCTGATACAATTATGGTGAAGCCATGGTGGAAGCTGTA +ATATCAAAAGCTGCTGTAGTATCCCGCAACGGAACTGTTTATTCGGCTGAAGCTTTAGAA +AGAGCTATTGATTATGCAAAGATTCATAACGGCAAGACTGAAATGATGAGGCAGTTCAAG +ATGTCATATGATAAAGCTAAGGCTGAATGTACAATTACATACAAGAAAATTTAAGGGCTT +CGGCCCTTGCTCTTTAAGGTAAAATATGGAACAAATTCATGTAGGTGGAACCGACTTTCT +TGTCGCAGTGGTTATTCATCCAGTTGATAATCAAAACGAATTCAAATATGATGTGACAGT +TCGTCATTATCAGTTTGACCGAATTAAGCACGTCGATATCATTGCATTACGCAAAGAATA +CGATAAAGTTGGATATACGGGTGAGCTTAAACTTGTATTAAAACAAGGCTATGAAGAAGA +TTATCCTTGTAGTTCATTTATTAATAATCCGGCTTTCTTTAGTTCAATGACCGAAGAAGA +ACGAGACGAATTTATTGATAGAGTAAATAAGTCTAAAATCCCAGAAATATTACGTAAGAA +ATAAAGGACCTTCGGGTCCTTTTCTGCTTTTTGGAGCATAGAATACAATATCCTTGAGGT +AAAATATGATTACTTACTTAGGTGTACTCTGTTTAATCGTAGGGTTGTACTTGTTTGGCC +GAGCTTGTTGGGTTGGATTCTTTTCTACACCAGATGGGTTCATTTCTATGATTTTAATTC +TTTCAGCTATGACGGCACTTGAAATATGAAAATTTTGCATACAGGTGATTGGCACCTAGG +AGTAAAGGGTGATGACCCTTGGATTCAAAACATTCAGCGAGATGGAATTCGTCAAAAGAT +TGAATATTCTAAAAAGCATGGAATAAAAACTTGGATTCAATATGGAGACATCTTTGATGT +TCGTAAGGCGATTACTCACAAGACAATGGAATTTGCTCGTGAAATAGCTACAATGTTAGA +AGAAGTAGATATCCACATGATTACCGTCGTGGGAAATCACGACATGCACTATAAGCATAA +GATCACTCCCAACGCTTCAATGGAAGTTCTCGGTAAGTATAAGAACATCACAGTCGTTGA +AAAACCAGTTACAATGGATTTCGATGGTGCTTTGATTGACTTAATTCCATGGCTTTGCGA +AGAGAACGTTGCTCAAATAATGAAGCACGTAAAAGAATCTTCTGCTGAGTATTGTGTAGG +CCACTGGGAGCTTAATGGCTTCTATTTCTATAAAGGGTTAAAATCTCATGGTCTCGAACC +AGACTTCCTCAAATCATATAAGCAAGTGTGGTCAGGACATTTCCACACAATCTCCGAAGC +AGCTAATGTCAAATACATTGGAACCCCGTGGACGCTTACAGCGGGTGACGAGAACGACCC +GCGAGGATTCTGGGTTCAAGACACTCGATTACGCACCTTTGATTTCATCCCTAATGAAAC +AACATGGCACAGAAAAATCTTCTACCCAGTAACTGGGCCAATTGATTTCAACGACTATAA +AGACTTATCAGTTCGTGTCGTTATCACAGAAGTTGATAAAGATCTACCGAAGTTTGAAAG +CGAACTTGAAAAAGTAGTCCATGAACTTCGAACTGTTTCAAAAATCGACAACTCTCTTGA +AGTTGAAGATAGTGAAGAAGTTGAAGTAAAAGGCTTATTAGAAATTATGGAAGAATATAT +CAATGCTCTCCCTGATTTATCTGATGACGACAGAACTGCTGTAATTCTGTACGCCAATCA +ACTCTACACTGAGGTTACTAACTCGTGAAACTCCATGAATTTAATTTAGGTGATGGATGG +TTCGGTAATATCGAATACTGGCCAGAAGATGGCGGGTTTAAAGGCATTATGTTTGTTACA +TCTGAATATTCATTGGGCGTATCATGGCAAGAACATTTTGATGTAATGTATGTGTCTGAA +GATTTTATGTTAGAATGTTGCCGCAACTATATTCGTGAGAATAACACATGAAGACGTTTA +AACTTAACCGAGTCAAGTATCAAAATATTATGTCAGTGGGCGGTCAGCCCATTGATATTC +AACTTGACAAGGTTCAAAAAACTCTAATCACCGGTAAGAATGGTGGTGGTAAGAGTACAA +TGCTTGAAGCAATCACGTTTGCTTTATTTGGTAAACCTTTCCGAGATATCAAGAAAGGAC +AATTAGTTAACTCAGTTAACAAGAAGAACTTACTCGTCGAGCTGTGGATGGAATATGATG +GTAAGTCTTTTTATATCAAACGAGGACAGAAACCAAATGTCTTTGAAATTTCAAGAGATG +GTGTCCGACTTGATGAGTCCGCGAGTGTCAAAGACTTTCAGCTCTACTTTGAAGAACTCA +TCCACATGTCATATTCATCATTTAAGCAAATTGTCGTACTTGGAACGGCGGGATATACTC +CGTTCATGGGCTTATCAACACCAGCACGACGAAAACTCGTTGAAGATTTGCTCGAAGTGT +CTACATTGGCTGAAATGGACAAATTGAATAAGTCTCATATCAGAGAGATTAACTCTCAGG +TATCAGTGATTGACGCAAAGAAAGATGGAATCATTCAGCAGATTAAAATCTATGAAGATA +ACGTTGAACGCCAAAGAAAACTTTCAGGTGAAAACGTTGCACGATTCCAGAGTATGTATG +ATGACTTGGTTCGTGAAGCTAAGTCAATAAAGGCTGAAATTGAAGATGCTACGACTAGAT +TGACTTCAATAGTACTAGATGAAGACCCTCGTGAGTCTTTAACGAAGATTGGTCAAGAAT +CTTTCTTGATTAAGTCCAAGATTGACTCATACAACAAAGTGATTTCTCTGTACTCTTCTG +GCGGTGATTGTCCAACGTGTTTCCAACATTTAGACCAGGGTTCCTCTCTGATCACCAAGA +TCACTGATAAGGTCTCTGAATGTAATCATACAGCGGAGCATATTAACAGTCAGAGAGCCG +TTCTGGAGTCACTAGTGCATGAATATGAAGCCAACCTCAACACCCAGCGTTCACTGGCTC +AAGATATTCGTGCTAAGAAGCAAGTGCTGATTGGAACTGTAGATAAAGCCAAAAAAGTTA +AAGCTGCATTAGAAAAAGCTTCACAAGAATTCATTGACCACGCAGATGAAATTAATTCGC +TTAATGAAGAATTGAATAAAATAATTGATACCAAATCCAATATGGTGATGGAAAAATATC +ATCGTGGCATTTTAACTGAAATGCTCAAGGATTCTGGAATAAAAGGCGCAATCATCAACA +AATACATTCCATTGTTCAATAAGCAGATCAATCACTACTTAAAGATAATGGAAGCTGATT +ATGTCTTTACATTGAATGAAGAGTTCGCTGAAACTATCAAGTCCAGAGGACGAGAAGAAT +TCAGTTATGCTTCATTTAGTCAAGGTGAAAAAGCACGTATTGATATCGCTTTGTTATTCA +CATGGCGAGATATTGCTGAGAAAGTTTCTAACGTTAAGATTAACTGTCTTTTCTTAGATG +AAGTTTTCGATTCTGCAACCGATGTGGAAGGTGTAAAATCAATTACATCAATTCTTAATG +GTATGCTAAACTCTAACGTGTTTATTATATCACACCGCGATCATGACCCTCAATCATATG +GACAACATCTTCAAATGAAGAAAGTTGGACGATTTACGGTGATGGAATGAGTAACTTTGT +AAACGGTCAGAATCTTCTGACCGCACCAGAAATAAAGCGGTATGTATTGAAAAATAATTT +TTCAGGACAAGAGCATCTTGCAACTGAAGAACAACTTCGTGCTGCTTTTAAAAATAAGTA +TGATAAAATAACATCCAATCGCGATTCCGCGTGGACAGTATACGAATATTTTGAATAGGA +ATTATTATGAACCTGAATTATGCAATCGAAGTTAAAGACATCCAACCTAAAAACGTACGT +TGTGACTCTAACCCGAATAATCAAAACAAAATCCGTCGAGCATGGGTAACTATTCTAGGT +GAAGAAGGTGCCGAAGCTATTCGCAAACGTTTCCCTGTTGCTGAAGTACGTCATGCTTAT +TATGCGGCGATTGATAATTCAGTCAATGAAAAGTGGATCTCTATTATGCAGAAACATTAC +CAAGACTCTATCAAAGCCGGCGCTAAAATTGTTCTTGATCGTTGTGGTGGTGAGCGTCTG +GAAGATCAATACTGTCTGGATGCTGATGAACAATTAATTTCAGCTGCTCTGATTGTAGCT +GAAGAAGTAGCTATTGAAATCTCTAAATAAGACTTGAAAGGAAAAATAATGAAATTCACT +AAAGAAACTCTCGCAATTCTGAAAAACTTCTCTACCATCAACTCCGGTGTTATGCTTAAG +CCTGGTAAGTTTATTATGACTCGTGCGGTCAATGGTACAACTTACGCAGAAGCTAATATC +GCTGATGAGATTGATTTTGAAGTTGCGATCTACGATCTTCCGAGTTTCCTGGGTATTCTG +GGGCTGGTAAGTGAAGATGCAGAGATCTCTATGGCAGATGACGGTAATATCAAAATTGCC +GATGCTCGTTCAAAAATCTTCTGGCCGGCAGCTGATGCGTCTACAATCGTATTCCCGAGC +AAGCCAATTCCATTCCCAACCGCTTCTGTTATCGTTGATTTCAAAGGCGAAGATCTTCAG +CAGCTGATGCGTGTATCTCGTGGTCTTCAGATCGATACAATTGCTATCGCAAATAAAGAA +GATAAAATCGTTCTGAGCGGTTATAACAAGGTAGAAGATTCTGCTTTGGTTCGTCCGAAA +TATTCTCTGACTCTGGGCGATTATGACGGAACCAATAACTTCAACTTCGTTATCAATATG +GCGAACATGAAGATGCAACCAGCAAGTTACAAACTTCTGCTGTGGGCAGATGGTAAGAAA +ACTGCCGCTAAGTTTGAAGGTGAAGCTGCAAGTTATGTAGTAGCTATGGAAGCAGATTCT +ACTCACGACTTCTAAGTACCATGGGCCTTCGGGCCCAATCGTTTTGAATAAAAATTTATG +AGGAAATTATGTTAAGCATTAATGAAAAAGAGCACATCCTAGAACAAAAATATCGCCCTT +CAACTATTGAAGAGTGTATCCTTCCAGCTTTCGATCGAGAAGTATTCAATACTATCGTTA +AGAAAGGAAAAATTCCTCATCTTATTCTTCACTCTCCATCACCAGGCACCGGTAAGACAA +CAGTAGCAAAAGCATTATGTAACGATGTCAATGCTGATATGATGTTTGTCAACGGTTCAG +ACTGTAAGATTGACTTTGTCCGTGGGCCATTAACTAACTTTGCGTCTGCTGCTTCAATTG +AAGGCCGTCAGAAAGTTATTGTAATTGACGAATTTGACCGTTCAGGTCTTGCAGAATCAC +AACGTCATATGCGTTCGTTTATGGAAGCATATAGTTCAAACTGCTCAATCATTATCACTG +CGAACAACCTCGATGGAATTATCAAACCTCTTCAAGACCGCTGCCGAGTAATTGAATTTG +GTAAGCCTACTCCTGAAGATGAAGCACCGATGATGAAAGAAATGATTCGTCGTCTGATTG +CGATTTGTAAAAATGAAAATATCGAAATCGCTGATCTTAAAGTTGTAGCAGCTCTCGTTA +AGAAGAACTTTCCACGTTTCCGCAATACAATCGGTCAACTGGATATGTACTCTTCGAAAG +GGGTACTTGACGCTGGTATTCTGAGCGTAGTGACAAAAGAATCTGGTTCAATCACCGATG +TTTTAGATGCTTTAAAAAATCGTGATGTGAAACAACTTCGTGCATTGGCTCCAAAATATT +GCACCGATTATTCTTGGTTCGTTGGCAAACTTACATCAGAACTTTATACTATGCTCAAAG +GCCCTGGTATCATGTCGATGTATGAAATCGTCGGTGAAAATAACCAGTACAAAGGTGTAG +CATCTAACGCAGAACTTCACGTTATGTACATGTTCTTACGTTTGACATCTGAACTTAAAG +ATGAGTGGAAATAATGAGCTTATTCGATGATGACGTTCAACTAAATGAGCACCAAGTAGC +TTGGTATTCAAAAGACTGGACTGAAGTCCAGAAAGTATCTGATCAATTCAAGCAGACTGC +TGAGAACGAATTCTTCGAAATCATTGGGGCAATTAATGAGAAGAAACCTTGCTCCATAGC +TCAAAAGAATTATTCAAGGCATATGGTTGAAAATGCTCTGTCTCAACATCCAGAGTGCAT +GCCGGCAGTTTACGTTATGAACCTCGTTGGTTCCGAGCTTTCAGATGAAGACCACTTCAA +TTATATGATGGCTGCTATTCCTCAAGGTCGTCGTTATGGTAAGTGGGCTAAGTTAATCGA +GGATACCGGAGAGTTACTTGTACTCCGGGTATTAATGAAATATTATACGATTAACTTGAA +TGACGCTCAGGTTTATAGAGATACCCTGGTGTCAAAAGGGAAACTATCCTTGGTACTGAA +AGAAGCTAAGGCTTTGGTTACTGACGAGTTCCTGAAGGAATTGACGAAAAACGTCAAAGA +ACAAAAACAATTCAAAAAACAAGCATTGGAATGGTAAACATGATTGAAATTACTTTGAAA +CAACCTGAAGACTTCCTGAAAGTAAAAGAAACCTTAACTCGTATGGGAATTGCTAACAAC +AAAGATAAGATACTATATCAAAGTTGTCATATTCTTCAGAAACAAGGTCGTTACTACATC +GTACACTTCAAGGAAATGCTTAAACTTGATGGTCGTCCGGTAGTGATTGACGAGGAAGAT +GAAGTACGTCGTGATTCAATTGCTCAACTGCTTGAAGATTGGGGTTTAGTTGATATTGCT +CCAGGACAACGTTCTTATATGTTTGAGATGGCCAATAATTTCCGTGTTATCTCTTTCAAA +CAGAAAGACGAATGGACTCTTAAATCCAAGTACACAATAGGTAATTAATATGGACGATAT +CAATTACAGAAAACTTCGAATCGAGTATGGTCTGAGACAATGGGAGACTATATTCGATCT +ATGCGAAGTCGCTCAAGAAGAATTCCAACGTGAACTCGCCATTCGCAATGGCGCTCAACC +GCGTGATGTTCTCCAAGTCTTTATCAGAACTGAATGCGAAGATGATGACACAGTAGATTA +CAAAATCACTCGTAAAACTATTGAAATTTAAGTAAGGGCCTTCGGGCCCTTCATGCTATT +CTCTCGGATGATAAAATATCTACAACAAAGAGACTAATAACTCGGTCTATAAACTAAGGA +AACTCATGCAATTCTATATTTCAATTGAAACAATCGGTAATGACATTGTTGAACGTTATA +TTGACAATGGTGTTGAAAAAACTCGTCGTGTTGAATACGCTCCGACAATGTTCCGTCACT +GCACTCATAAGACTAAGTTCGTTGACATCTATGGCAAAAACTGTGAACCTCAAAAATTCG +CAAATATGAAAGATGCTCGCGACTGGATTAAACGTATGGAAGACGTCGGTCTTGAAGCAA +TGGGTATGGATGATTTCAAACTGGCTTATTTGTCAGACACTTATGGTTCTGAAATTGTTT +ATGATCGCAAATTCGTTCGTGTTGCGAACTGCGACATCGAAGTAACAGGTGATAAATTCC +CAGACCCAATGAAGGCTGAATATGAAATCGATGCCATTACTCACTATGACTCAATTGACG +ATAAATTCTATGTGTTTGACTTGTTGAATTCATTGTATGGGTCAGTTTCTGAATGGGACA +TTAAGTTAGCTGCTAAGTTAGATTCTAAAGGCGGTGATGAAGTTCCACAGGATATTCTTG +ATCGTGTAGTTTATATGCCGTTTGACACCGAAGCTGAACTACTGATGGAATACATCAATC +TTTGGGAACAGAAACGTCCAGCTATTTTCACAGGTTGGAACATCGAAGGCTTTGATATTC +CATACATCATGAATAGAGTCAAGAATGTTTTGGGTGAACGTTCAATGAAACGATTCTCTC +CAATCAACCGAGTCAAATCGAAAGTTATCACTAACATGTACGGCGATAAAGAAGTATTCT +CGATTGATGGCGTAACAATTCTCGATTATTTAGATTTGTATAAAAAGTACTCATTCACTA +ACCAGCCGTCTTATACTCTGGATTATGTCGCGAAGTATGAGACTAAAAAAGGCAAGCTTC +CATATGACGGACCGATTAATAAACTTCGTGAAACTAACCATCAACGTTATATTAGCTATA +ACATTATGGACGTTGAGTCTGTCGGTGGTATTGACCGCGTTCGTGGTTTCATTGATCTGG +CACTTAGTATGTCTTATTATGCTAAAATGCCGTTCGGTGGGGTTATGTCTCCTATCAAGA +CTTGGGATGCGATCATCTTCAACAGTCTTAAAGAGCAAAATAAAGTAATTCCGCAAGGTC +GTTCTCATGTTAAGCAATCTTTCCCAGGTGCTTATGTATTTGAGCCATTAGCATGCGCTC +GTAAGTACATTATGAGTTTTGACTTAACATCTCTGTATCCAAGTATTATTCGTCAGGTGA +ATATTTCTCCTGAAACGATTGTTGGTCAATTTAAACTTCATCCAATTCATGAGTACATCG +CCGGAACAGCACCGCGTCCATCTGATGAATACTCATGTTCACCTAATGGTTGGATGTATG +ATAAGAACAAAGAAGGCGTAATCCCAACCGAAATCGCGAAGGTATTCTTCCAACGTAAAG +ATTGGAAAAAGAAAATGTTCGCGGAAGAAATGAACGCAGAAGATATCAAGAAAGCTATCG +CTGCTGGGGTGTTTGGTTCAGGAAGCTGTGAAGAAAAACGATATGTTCGTTTCACTGACG +AGGAACGTGCTGCACTGAGTAGTTATTCAAAACTTGTTCTTGAAGCAATGCTTGCTCGTT +GTGAAGCCGCCGCGATTTTGGCTGATACGAACCAGTTGAACCGTAAGATTTTAATCAACA +GTCTTTATGGTGCTTTGGGGAATATCTACTTCCGTTATTACGATCTTCGTAACGCAACTG +CAATCACTCTGTTTGGTCAGGTTGGTATTCAATGGATTGCTCGTAAAGTTAATGAATATT +TGAACAGGGTTTGTGGTACTACTGGTCATGATTTCATTGCAGCCGGCGATACAGACTCAA +TTTATGTTTCTGTCGATAAAGTTATAGAGAAGGTTGGTTTAGATCGTTTCAAAACTACCG +ATGAAGTGGTTGAATTTATGAACCAATTCGGTAAGAAGAAAATGGAACCGATGATCGATA +AAGCTTATCGTGAACTTTGTGAATATATGAACAACAAAGAACACCTTATGCATATGGACC +GTGAAGCAATCTCTTGTCCTCCATTAGGTTCTAAAGGTTGTGGTGGATTCTGGAAAGCTA +AGAAGCGTTATGCATTGAACGTATATGACATGGAAGATAAGCGATTCGCTGAACCACACC +TCAAAATTATGGGTATGGAAACACAGCAATCAAGTACTCCAAAGGCGGTTCAGGCTGCAT +TGGAAGAATCAATTCGTCGTATGCTTCAGGAAGGCGAAGAATCCGTACAGGAATATTTCA +AAACATTTGAAAAAGAATATCGTCAACTTGACTATAAAGTGATTGCCGAAGTTAAGACTT +GTAACGATATTTCTAAATATGACGATAACGGTTGGCCAGGTTTCAAATGCCCGTTCCACG +TTCGTGGTGCTCTGACTTACAATCGAGCAACTGCCGGGTTCAGTGCTACTCCGATTCTCG +AGGGTAACAAGGTGATGGTAGTTCCATTGCGTGAAGGTAACCCATTCGGTGATAAATGTA +TCGCGTGGCCGTCAGGTACTGAACTGCCGAAAGAAATTCGTCAAGATGTTTTGGCATGGC +TTGACTACAGTGCTCTGTTCCAGAAATCTTTTGTTAAACCTCTTACGGGTATGTGTGAGT +CCGCAGGTATGGACTACGAAGAGAAAGCGTCATTAGAAGATATGTTTGACTTCTAACTGT +TTACATCCACATGGAAGTGGATTATAATGTTCTCACATTAACCAAACGGATAACAAAAAT +GACTCATCGCGAAATTCATGCTCTTCGAGCTAAACCCGGAAAAGCTGCCGAAAAGAAAAT +CCTGATGAAGGATTATGAGTTGATGAAATCTGTATTATGGAACTTAGTAATTCTATCATG +CGGGAATGAAAATTCCACTTATAACGGTCTTTACCCTAACGGTGTAGGTGCTGCTTTAAA +AGCTCATCGTGAAAACATTAAAACTCTTGAAGATAAAATAAAAGATATCTGTCATTAATG +AATTGGGCCTTCGGGCCCTAAACGGAGAAACAACATGAAATTGAAAATTGCTTTAATCGC +TGCTGCGCTGGCACTAACTGGTTGTCAGGCTTACCATGGACCTATCGTTGGTGAACATCA +AGTTGGCCAAATTTCTTATAAAGGCGGAACTGGACTTGTCTATACTCGAGCAACTCAACA +AGTTTCGCAAGAATCTTTGAGCGCAGGTGACGAAATGGAAGAACGTCGTCGCAACAGTCC +ATTAAGTAAAGCTATCAATGAATCAGTAGCACGAGGTGATGCGTTTCAAAAAGAGCAAGA +TCGCCGTGAATCTGCGCAAAATAAGTGTGAATTCATTGTTGAAGCTCATGAAGCTGTATT +GACCGAAAACGCTATCAAAACTATGAGTGACAAAGACCGCCTGGCTTTGATTCACTATCG +TTCTTCCGGTAAAGTTCGTGCATTCAATAAGTGCATGCAAAACGCTAACAAATAATTTGA +TATAATAAATCAACTGAGGATATTGTAATGGAAATCATCGCAGGTATTATTTCACTGGTA +GTTTACATGATTCCGGCTATTATCGCGTTTATTCGTGGTCACGGTTCAAAATGGGCTATC +ACCGTAGTTAACTTTCTGTTTGGCTGGACATTCATTGGTTGGATTTGGGCATTTATCTGG +TCTCTGACTGGAAATAAGCCTGCTCAGCAACAGGTTATCATTATTAAAGAGGCAAAATGA +TTGTAACACCTTTGACAGTAGAAGATATTCGTGATGAACTTTGCTATGCGCTGGAAAGTG +AACAGTTTGTAATTGACAAAACTGGTGCAAAGACAATTGAAATTATTGGCGCATCATTTA +TTGCAGATGAAGAATTAATCTTTGGCGCAGTGAATAATGAATATGTTGAACGCGAACTTG +AGTGGTACAAATCTCAATCTTTGTTCGTGAAAGACATTCCTGGCGGTACTCCATCTATTT +GGGAACAAGTTTCATCCAAGAACGGTGAGATTAACTCAAACTACGGCTGGGCAATTTGGT +CCGACGAAAACTGTTCGCAATATAATATGTGTCTTGGCGAGCTTGGAAATAATCCAGATA +CTCGTCGTGCTATTATGATTTACACTCGTCCATCAATGCAGTTTGATTATAACCGTGATG +GTATGAGCGATTTTATGTGCACTAACACTGTGCAATATTTGATTCGTAATAAGCGAGTTC +ATGCTATTGTTTCAATGAGAAGCAATGATGTAGTCTTTGGATTCCGCAATGATTATGCAT +GGCAAAAATATGTTCTTGATAAATTGGTGTCTGATTTAAACGAGGGTGATTCTTCTCGTG +AATATAAAGCTGGTGATATTATCTGGAACGCTGGGTCATTACACGTATACGAGCGTCACT +TCTATTTGGTTGATCATTACCTGAAAACTGGCAAGTCTCACGTGTTGAAGAAAGATTATA +AAGGTGAATGGAAATGATTCAGTTTGTAATTCCAAGTTATAATCGTGCTGGGGCAGTTAC +TGCCCTGGACATGTTCCCTACTGGTTATGTTGCTCATTTAGTAGTTCGTGAGTCTCAGAA +AGAAGAATATGAGACTCACTATGGTGCAATTGCTAAAATTGTAACTATTCCTGATGATGT +TAATGGAATCGCTGGTACTCGACGGTTGATCACCGAAATGTATCAAGGCATGCGTATTTG +GATGCTGGACGATGATACAACAATTCATACAACAGAAACTCGTGAACGAGACAATCGCCG +AATTCTTCATGACGTCGGTATGACTTGGGACGAATTTAATAAGCTTTGCCAGTATGTTGA +AGCTGCGATGGATTGTGGATTTTATCATGGTCATTCTCGTCTTCCAATCTTCAAAATCTC +TGGTGATGATGCAAACTTTCGTGAGAACTCTTATGGATTCACGAACACGTTCTACGACTT +AAGCAAACTTTCTGCTGATGACATTGGATATGGTATAGTAGACCTATCCGAAGATACATA +CGCATTCCTTAAACTCATTAATATGGGTTATCCTCATCTGGCGATTTTCAAATATCTCGT +CAAATCGGGTAAAGGTCAAGCTCCAGGTGGTGTATCGTCTATGCGTAATGCCGCTAAACA +AAACCGAGCATTAGAAAAAATCCATGCAGACTTCCCTACGCAAGCTCGTTGGAAATCAGA +AGGTGACCCAACCAAAACTATGTTTGGTACTGATGAACCTTTGAAAGTACTTCGTATGTG +TGTTGCTAAAAAGCAGAAGTCTGACGCATTCCATAAATTTAGTGAGATTGAACCTAATCT +ATGAAAATTGCTATCATCAACATGGGCAACAACATTCAGGGGTTTAAAACAACCCCTGCT +TCTGAAACCATTTATCTGTCTGAGTGCTTGAAAGATATGGGTCTTGATGTAGACCTAATT +TCAATGAAGAACACTCAATATGGAATTTCTTTTGACTCTGTAGAAGACCCGAACGTATAT +GACCGTCTGTTGGTTGTTAACGCTGCTTTGAACTTTTATGGTGGCGAAGAAAACGCAATG +AACAAAGCGGCTTATATGTTCATGAACAAATATAAGTCAAAGATCTATTATCTCTTCACA +GATATTCGTTTGCCATTTGAACAAGCATGGCGTCGTATGTCAAAGAAAAAATGGTCCAGC +AAGTACAAAGAAGAACAATTCATTGTAACTGCTCCTATGCGTATTGTATCGCAAGGTCGA +GATCTTGAACAAGCAAAACGTATTCACTCTGAACGTCTGGTGGGATGTCAATTCGGTAAA +CTAGAGTTCACTCACTTCGCTTTAGACCGTCATAAGATGTATCACAGCGTCTTTAAAATT +GCACCAGATGGAATTAAAATGCGTGACCTGATTTACGGCGGAACATTCCGTTCTGGCAAC +CGTGAAGCTAAGATGGTTGAATATCTGTTTGATACTGGACTTGATGTAGAATTCTTTGGT +TCAGTTAAAGCTGAACAATTTAAGAATCCAGAATTCCCATGGACTATTCCTCCAGTATTT +CCTGGTAAGGTAGATTCTCGTGAAATGGTTCAACGTAACTCTACTGCTTATGCGACTATC +GTATTAGGCGATAAGACTTACGATAATAACCAGATCACTCCTCGTGTATGGGAAGCACTA +GCATCAACTGCAATTGCATTCTTTGACCATACATTTGACCCTGACATGAATATCATGGAT +GGGAACGAGTTCTTTTACGTTAAAAACCGTCAAGAACTAGTTGCTAAAATTAATCGCATC +AAAGAAGACGAAGATTTCCGAGTTCAAATGCTCGCATATCAGCACTCTATTCTCCAGAAA +TATCTGGATGAAAAGCCACAATGGCAAGCTGAATTTAAGAAAGCTATCGATCTGTAATAC +AAAGAGGGTTTAAAATTTTAATTAGCTTTAAACCCTCGGTTATATAATTAATCATCCTTT +AAACCAGTGAGAAAAATATAATGGAGATCAATGGAAAATATTGAATGTCTGATTTAAAAT +CTCGTCTGATTAAAGCATCCACTTCTAAAATGACCGCGGAACTGACTAAGTCTAAATTCT +TCAATGAAAAAGACGTAATCCGTACTAAAATCCCGATGCTGAATATCGCAATCAGTGGGG +CATTAGATGGTGGTATGCAGTCTGGTTTGACAATCTTCGCTGGTCCTTCAAAACACTTCA +AATCAAATATGTCTCTGACTATGGTCAGTGCTTATATGACGAAGCACCCAGATGCAATTT +GTCTGTTCTACGATTCTGAATTCGGTATCACTCCGGCTTATCTGAAGTCTATGGGTGTAG +ATCCTGACCGTGTAATTCATACACCAGTTCAGTCTGTTGAACAACTTAAAATTGACATGG +TGAACCAGCTCGAAGCTATTGAACGTGGTGAGAAAGTTATTGTCTTTATCGACTCTATCG +GGAACTTGGCTTCCAAGAAAGAGACCGAAGATGCTTTGAACGAAAAATCCGTCGCGGATA +TGACTCGTGCTAAAGCATTGAAATCTCTGTTCCGTATCGTTACTCCATATTTCAGTATCA +AAAATATCCCATGCGTAGCGGTTAACCACACAATCGAAACTATTGAGATGTTTAGTAAAA +CTGTAATGACTGGTGGTACTGGTCCAATGTATTCAGCAGATACCGTGTTCATCATTGGTA +AGCGTCAAATCAAAGATGGTACAGATCTTCAAGGTTATCAGTTCGTTCTGAACGCTGAGA +AATCTCGTACTGTCAAAGAGAAGAGTAAGTTCTTCATTGATGTTAAATTTGATGGTGGTA +TTGATCCATACTCTGGTCTGTTGGATATGGCTCTGGAACTTGGATTTGTAGTTAAACCTA +AGAATGGTTGGTATGCTCGTGAGTATCTTGATATCGAAACCGGTGAAATGGTTCGTGAAG +AGAAATCATGGCGCGCTGCCGCTACATCTTGTGTAGATTTTTGGGGTCCGCTGTTTAAGC +ATCAACCGTTCCGTGACGCAATCAAGCGTAAGTATCAACTCGGTGCTATTGATAGTAACG +CAGTAGTTGATGCTGAAGTTGATGAACTGATCAATTCGAAGACTGAAGTCTTTAAAGCAC +CAGAAGGCTCCTCTGCTCCTTCAGCTGCTCAGTTGGAAGATGATCTGGACAATTTTGATG +ATGTAATGGGGCATCCAACAGAAGGTTTATAATGAGTGATTACGATTTAAGTGATCTTGA +CCTTGAAATCGTAGAAGATACCCCCTCTCAGGAGGGGGAATTCGAAAGGATGGAAAGGAT +ATACCAGCGTTCCGCTGAGATTGTTAAGAAGGCTATGGAGAATGTCATCCAGGAAATCCT +GATAACACTAGAGGATGGTTCAAACCATATCGTATATGTTACCTCATTAACTGTTGTTGA +AGGCGGTGGAGTGTCACTAGAGTTCTCTACGTTATCAGAAGATCGTAAAGCCGAACTAAC +ACCACACGTTGAAAAATGTATTAAAATGCAGATAGAAAACTCTTTTAAAGAGAAGAAGAA +AAACCGTTTCAAATTATTTTAATGAGGCTTCAAGTGGTAGAAACAATATTATCGCATTTG +ATTTTTAACCAAGGCTACTTCGCAAAGGTGTGGCCTTATATGGACTCTGAGTATTTCGAG +CATGGTCCAGCTAAAAACGTATTCACCTTACTACAAAAACATATCAATGAATATTCAAGT +GTTCCATCGTTGAATGCTTTGAATATTGCATTAAGTAATTCTTCGCTGGGTGAATCGGAA +GCTGAAGGCGCACAAAAGCTTTTAGACAAATTAGCCGATACTCCTGAAGACTTGTCATGG +TTAGTTAAAGAGACTGAAAAATATGTCCAGTCTCACGCGATGTACAATGCCACATCAAAA +ATAATTGAAATTCAAACTAACGCTGAATTACCTCCAGAGAAACGTAACAAGAAGCTTCCT +GATATCGGTGCTATTCCAGATATCATGAGACAAGCTCTTTCCATCAGCTTTGACTCTTAT +ATTGGTCATGATTGGATGGATGATTATGAAGCTCGTTGGTTAGCATATCAAAATAAAGCT +CGTAAAGTTCCATTCTTGATGAATATCCTGAACCGAATCACGAAGGGCGGCGCAGAAACA +GGCACACTGAATATTTTGATGGCTGGTGTAAACGTCGGTAAGTCGTTAGGATTGTGTTCA +TTAGCAGCCGATTATCTTCAGACTGGTAAGAACGTTCTTTATATCTCTATGGAGATGGCT +GAAGAAGTATGTGCTAAGCGTATTGATGCTAACTTGCTTGATGTGTCTTTGGATGACATC +GATGATGGTAATGTATCTTATGCTGAATACAAGGGTAAGATGGAAAAATGGCGACAAAAG +AATACTCTCGGTCGTCTGGTCATCAAACAATATCCTACTGGTGGTGCACACGCAAATACA +TTCCGCGCACTTCTGAATGAATTGAAACTCAAGAAGAATTTTGTGCCAGATGTCATTATG +ATTGACTACCTCGGTATCTGTGCTTCATGTCGTATTCGTCAATACACTGAAAATAGTTAC +ACATTAGTTAAAGCGATCGCAGAAGAACTTCGTGCACTTGCGGTTGAAACTGAAACTGTA +GTTTGGTCTGCTGCTCAGGTTGGCCGTGGTGCTTGGGATGCTTCTGATATGAACATGAGT +GATATTGCAGAATCGGCGGGTCTACCAGCAACAGCAGACTTTATGCTAGCAGTGATAGAG +ACAGAAGAACTTGCACAGATGGAACAACAACTCATCAAGCAAATTAAGTCTCGTTATGGT +GACAAAAATAAGAACAATAAGTTTTCTGTTGGTGTTAAGAAAGGTAATCAACGCTGGGTT +GAAATCGCACAAGAAGGCGGTGATAAACCTACACCAGTAAGCGAAACATCTGGTGGTCAG +CAGCGCGTAGCAGAGCAAAATCGTATAGCTAAGGTTGAAGTATCTCGAGCCAAACTCGAC +GCATTAGCCGAAGATATGAAATTCTAACCGTTTACATACACATGGAAGTGTGTTACTATG +ATCTTATACAAACAAGAGGAAAACAGCATGAAAAAGATTATCTTAGCAGCAATTTTATCT +CTTTCAGCTTGCGCTGGAACCCCAGCAATGGCAGCAGATGGGTATTCAAGCATTCCATGT +ATTAAGTTCATTGAAGGCGACTGGAAAGATCAAAAGCCTCGCGTCATTAAAGACTTACTA +GCTGTTGCAGATAAAAATCAGGCAATGCTTGTAGAAGATCTCGATGACAATGACCTGGTA +GTTGCTGGTACTAATCTGTACTGCGAAAATATTCCGGCTAAAGATGTTCTGACTTGGGTG +GGACTGTAATGAATATCATGTTAATGTATCAACCAGCATCAGAAATTGTGCGAGGTATGA +AAGTTGAACACCGAGTTCCTGCATTATGGGAAACATTCCTAGATACTGGTTCTAAATTAA +ACTTACCATTTGGCGAAGTAACAATTTTCCAGACCGGTACTAAACCAACTAAACGCCAGC +TCCGTAAGTTCAAACGTATTCATCGTGTCAATATGGTTAAAAGCATAGCTGAGCATGAAT +TTAATAATTCTTGGGAAGGCATCCATTGTGATGTTATGGGGCTGTAATGCATATTTTTAT +TCTGATTCTGGCTCTGACAACCGGCGATTCCGGTGGCGCTGCAATTGATAAGGTTGAAAT +AAAATCTCAAGATTATGCTGAAGCCAGTAAGATGTGCGACCGAGCGGGTGAAAGTTATCG +AAAAGACGTTAAGTCATTCAACGTTTATCCGGAATATACTTGCATCTACGCTGGTGTTAA +ATAGACCAGGAGGTGTTTATGAGCACTATTAAAGGGGCGATGGACGCAGTATATGCGTAC +AAATTTATTCGCCTGATGTCTAAGCCTTTCACTGAGTGGAAAGCATATGAAGCAAAAATA +ATTGACGAAAAGGGAACTGTGCTAAAGCGTCCTAGCACTCCAGAAGAGAAAGTGGCTTAC +TCTGCCTTTCATGCGAGTGTTCGGTCAATTAAGCGTATGATGTCTACAGTTCCAGGATTA +AACGGCGTCGCGTCGATGATGTCGGCTTGGAGTACAGTAGCATCTCGGTACAATATAACA +GAATCCGAACAAAAAGAGATATTTGAGGCTCTTCCATTGTTCGAGGACATGGTAGCTGGT +GATTCCGGTGGAAGTGTCCAGAATATCGCCTCTGGTACCACGACCGGAGCAATCACAAAT +AAAGGTCCTGAGCAAATCCCCGCAAAACGTAAGCGAATCAAAATCAATCCTAACAAGTTG +TGATAAAATGGCCTTAGAAATAAGGCCAAGGAGAATAATATGTCATGGGTTCACAATGAG +TTCGCATTCCGCGCACTATCTCATCTTCCAAAATTCACTCAAGTAAATAACGCAGCACAA +TTTAAACTTCGATGTCGTTGTCCGGTGTGTGGAGACTCACAAAAGGACGAAAACAAAGCA +CGATTCTGGGCGTACGGTCTACCTGATGATGTGCTATTAAAGTGCTATAACTGTGACTAT +GTAAAGCCGATTGGGATTTATTTGAAAGAGTATGAACCAGATCTTTATCGTGAATTCATT +CTTGAGTTACGAAAAGATAAAATGGTTCAACGTGAAAAACCTGTTGAAAAACCTAAACCT +GTTGTGGAAGAGACTAAAGGAATTAAGCTCATTCACTGTGAACGTCTCGATAAAATGGAC +CCTAATCATCCAATAGTTCGATACATCGCCGGACGTAAAATTCCAAAAGATAAATGGAAT +AGACTTTATTTTACGTTGAAATGGCCAGCATTGGTGAACTCAGTAAATCCAGACACTTAT +AAGACGGAGCGAGATGAGCCTCGATTGGTTATTCCAATATTCAATTCAGAAGGAATAATT +GAATCATTTCAAGGCCGTGCTTTAAGAAAAGATGCTCCTCAAAAGTACATCACTATTAAG +TCCAACGAACATGCGACCAAAATATATGGGACAGACACTGCAAAGCCAGGTAAAAATGTT +TATGTACTTGAAGGCCCAATAGACTCATTGTTCTTAGATAATGCTATTGCGATTACTGGC +GGTGCAATGGATTTGAGTTTAGTTCCATTCAAAGAAGATAGAGTCTGGATAATGGACCAT +GAACCACGAAAAGATGACACAATTAAGCGAATGAAACGTTTGATTGATGCAGGTGAAAGG +GTTGTCTTTTGGGATAAAGCGCCATGGGAAAAGAAAGATATAAATGATATGATTCAAAAG +GAAGGGGCTCGTATTGAAGATATCCAGAGCTATATCGAAAACAATATAGCGTCTGGTTTA +ATGGCACAACTCAGACTCAAGAAGTATAGTAAGATCGGTGTTTAAATTCCAACCATTATA +TGAGAAATAACTTGTTCCAAAGGAACAGGTGGAAGTGTTATCCCGTATGCTAACGCAAAA +GGTATGATAATATAATTCCAAGTTGCTACAGCAGCAGAAATTGCTCCGACCAGAATAATC +TTACCTTTCTGGTCTTTTATTTGAGTCTTTAGAGATTTCTTTTCAGTTTCTTCAGACATA +TATCCTCCTAAGGCTATTTAATATGAATCTGCAACAACACACGTTCTTAAAGCTTGGAGA +AGAGTGCAACGAAGTCGCGATGCTCTGTTCCAAGATAATGCAATTCGGTTTAGACTCCGA +GTATCAAGGAGTTACTAATCGGCAACGATTGCAAAATGAATTAAATGATATAATGGCTTC +TATAGAATATATCAGACAATACTCTGATTTCAAATTTGAATCTTCTGAATATGAAATTCA +CAAAAAGATTGATAAAATGAACCACTTCCGAGATATCTCAGAAGAACTCGGTCTTGTAAC +TAATTAAGAATTGAAAGGAAAAATAATGGCACACTTTAACGAATGTAGTCAACTGATTGA +AGGCGCTGATAAAGCACAAGCAGCTTATTACGATACCCTGGTATCTCAGCACAAAGACCC +ACTACAAGTAATGCTCGATATGCAGAAATCTCTGCAAGTTCGTCTGGCAAATGATAAGCC +TGAGCATAATCGTCATCCTGATTCGCTGGAAACAGCTGGCGAAGTCTTGGCTTGGTTACG +TGCAAACGATGATTACATCACTGACGAAACGCGTGAACTGTATACGGCTCTTGGCGGTAT +GTCCAATGGTGAAAAAGCAGCATCCGCAGTATGGAAACCGTGGAAGGCTCAACATGCTGA +AATGCAGGCTCGTAAGATTTCTGAACTGTCTCCTGAAGACCAGCTCGAAATCAAATTCGA +ACTCATCGACCAGCTTCATTTCTTCCTGAATAAGTTCATGGCTCTGGGTATGGATGCTGA +AGAAATCTTCAAGCTGTATTATCTGAAAAATGCTGAGAACTTTGCTCGTCAAGACCGAGG +CTATTAATGAATCATACAATATATGAAAAAGATGGGGTGTTTTTGCTCCATTCCAGATAG +CGAAGATATTTCACTGAACAATATTCTTATTTCTCATGGATTTGTTCCAACGTCTGATCG +TCAAATTATTGCATGCGAAAACAAAGAAGAACTGAACGAATTCTTAGCCTATTTTTATGG +CGAATATTAAAAGGTGAATTTGATGCAATCGAACTTTGATGTCTATCAATGGAACAAAGA +TGGTCGCCCAACTGAGCCGGCCTATGGTTCTGATGCATATGAACTTCGTCGTCACTGGTT +GTTTGCGGTAAATAATTACTTCATGCTCAATGGCAATCCAACTCGACTACATATCAGAGG +CGGTGGGTATGTAACCGTTGATTCAAAATATTATCGCGGTAAAGACTGGGAGTGGTATCA +ATGATTTCATACGCTGATATTGAAAATGCTTTAACCAATTATTGGGGATTGACCCACCTA +ACTCTCGAAGACCGATGTGAGTGTCTTCGAGAACTGATTGCTGATTCTGAAGACATTATT +GCAGATATAGCAGAAGCTCTTAACAACTATTGATATAAATACTCCTGTAATCAACAAAGG +AGAGTTTATGAGCTATGTAAATATCAAAACCTTCGAGCATACCAATGCCGATGGAGTAGT +TGCCGCGATGGAAGTTTCTGTAGCATTTAAGTTGTACAGTGACGTTCATCGTATTGCTCG +TTCTCATTATCAAATTTTCCCTTCAGAGAAAGCTGCTTACTCTACTGTATTTGAAGAGAA +TCAACGAGACGCATGGATTGCTAAAAACGCCGATATGTTTAAGGGCGTTCCAGCATCTGG +TGGTTGATTTTAGGGACTCCTTCGGGAGTCCCTTTTTTGCTTTTAAATGATGTGATATAA +TTCTTTTATCAAATGAGGATAACACAATGAGAACACCGTTTCAAAATCCACTACCATGGA +TTAAAGCTTGGATCAAAAGTCGACAAGAACCTAATGATTGGGTAGAAGAATTTCATTCTG +AACTACGAAAAAATACTAACGCTGAGTTCAAAGAAAAAGAAATTAAGCATCAGTATGAAG +AAGCTGAAGCACTTGCTGACCAATATCTTGGAGATAAGATAAAATGAAAACATCTGCTTG +GATGAAACCAGTTGAATCAGTCGGTGGTATTACAAAACTTATTACTGATCGTTTAATTCA +CGATCATCTTTTTATCATGAATAGCCCAGATCTTTATGATCTAGTAGATATCTTTATTCA +TTGCTATCGTGAAGAAGGTACTACATTACGTGTTGTATATGAAGCTCATTTTCACTTCGT +TGGTGAGCAAGCTGTTATTCGCTTTGGGACATCTTGGTTATGATTAAATGGCTTAAAACT +TTATTCACTCCGGCTCAACCCGATGATCGTCTGGTTCCTTTATCAGTTAATGATGTTATT +GTTCCTATGCAAGAACCAAAAGAGTATGTTTATATTGGTGATGGAAAGATGGAAGAAGTT +ATTCGTCCAAAGGAAACCGATATGCAATACCTGATTCGTCGTAATCATGAAATTCAGGCA +GAACGATTTAAATCTAGGTCTCAGCCAAAAGCTAATCCAGGCCCAAGTGCTAAGCCGTTG +AATGCAAAAGAACTTAAAACTCGAGTACAGGTTGTTAAAAGTCGTCAACAAACTTCATCG +AGTGCTCAATATAATTACAGTCCAAGCAACCCGGCTCCATTTATTTCAGGAGATTCATAT +GACTCAGGTTACTCATCCTGTGATTCAAGTAGTTCAGGTGGATGCGACTAAGTATAACAG +ACCTACATATAAAAGTCCACTAAAGAAATCTAATTTTGATATGTGGTATCGTTCTATGAA +GGCTGCAGCATTTTTGATAATTGCTGCACCTGCTATGATTAAAGCAAACGATAAGTGGTT +TGAAGAAAATAATATTGAAGAAGGTGCTATCTGTGGAAAAATGCGTAAACATCAGTAAGA +AATACTCAATTGAACTATCCAAAAAAGTAAATGGAAGAACTATAATTCAGCAAAATGATG +TGTTTACAGTTATCATTTCAGCTTTTGCTTCAAACTCTTCAACGAAGCATGAAGACTATT +TCAATGAGCAAATCGATAAACTAATTAATGGATTGAGTTTTCCTGAATCTGCAGTATGCT +TTATTAGACATGAAGCTGACGTTACTCAAAAGCCTGGGACTCCATTTGGTCATATAGAAG +CATTAAATCGTCTTGGATATGATGTACCTCGATATCAGCCCGGTGATAAGTTGTTTATTA +ACACTGAACAAAGAACGATATGGAAAAAGTTCCTCATCATTGATAACAATGATTTTGATG +AGCTCCAAAAATTCATCTGGAACCACTATGAAGATCGTGGATTGATCTTCACTGAATCTG +AATCGGCAAAACTCGCTCGCGAAAGCCTATATGAGCAAATGCGTCTTGATAACCTATCAC +TTCGGTACGGTCGATAATGGATCTGTTTGATATGCTAGAGCCGGCTGAAAAGCCGGTTGT +CGATTTACATAAGGTTGATATTTCAAAAGAAATTTTTGAAGTGCTTAAATCACACGGTAT +CGAATCTACTAAAGCTGCAGAAGATCTAGCTGATCTATTCTGCTTTCCTCCTCCCTGGGC +TCCTTGGGCCTAACCGTTTACATCCTCCTCTCACTATGATATGATAGCTTTCGTAAACAA +ACGGAGGCTATCGTGTTATATCAAAAAGAACACCTCGAAGAAATCCGCGAAAGCGCGGAA +CACAACTCGTCTTACTACGAACAAGCTATTTCACAGTTCAGTGACTACGAACAATCTGTT +TTATGGCAATGCTTCAACGATAAAGCGGACCCTAAATTGCATCTAGATTTAGACCCAATC +GTTCGTCGCAACATCACTTCAGACGTGCCAGTAGAACTATATCGTGGAGTATCAAAGAAA +ACCGCCGCTTGGCTTAGTCATATGGAAGTTGGTCGTATTATTGCTGATAATCGTGTCACT +TCATTCTCGTCTGATTTTGCCACTGCAAGACAGTTCGCTGGCGCTTATTGCTATAACACA +AAAGTCATTCTTTCACTTCGTAATTGTCCATTTGCTTTTAACTTTCAAGAGCATGCAATG +AACTTGGTTCTGGCTGCGCCAGATTCTGAGTTTAGATGTAATGCAATTAATGGTGATGAG +AGAATGGAAAAGTTGGAGATGATTAACGCGGAAGATGAGTGGATGTTCCCTATCGGGACT +CAGTTTGAGATAGTCAGTATTGAAGATTATCAGTTAGACCCGTTATCTCCGGTCTACAAA +ATCTATCATTTGAACTTCTATTCTTTCTGACCGTTTACATTCAACGGAAAGTGTAATAGA +ATAATCCTTGAAACCATTATACCACCTTCGCAAATAAAGCAAATAAGGATTCGTCATGGC +TATGCCTCGTGAAGTTGTTATCGCTCAACGTTTAGTTCAAACTTACAAAAGCGCATCATC +ACGCAGCAAAGAGTTCAATCTGAGTATGGACTATCTTTTGAACATCATGGCACAAGACAC +TTGTGCATACTCAGGTGAAAAATTTCATAAAGAGCCCGGTGATCATCAAATGACACTTGA +GCGTTTTAACAACAAAATCGGATACGTAGAAGGAAATGTGATCCCGGTCAAGTTGAAGTA +CAATCGTCTTCGCGCTAATCATGAAATTGAAGATCTTATTCGTCTCCAAGAAACAACTGC +AGCACGTATTGTTGCTCGTGTAGATGCCAAGAAAGATGTCGCTCCAGTAGTGAAAGAAAA +GCCTATTCAAGACGTTCATCGCATTGATTTACCTGGATTTGAAGACATTAACTTGATCTA +TGTCCCTAAGACTCAACGCGAAGAAATCCGCCGAATTGTTCAGAACATCAAATCTCGTCA +AGCTCATATATTACAAAAAGGTGTAACTAAAGAACATAAAAAATCACTGGAAGTTCGTAT +TCACGGCGGAATCACTCGTATAAAAGCGATAATCAAACAAAAATATAAAGCTCCACAGGT +TGTGACTTCCCGCGCTGCGTCGAAGAAGACTTCTAAAGCAGAAAATACTTCTTATGATTA +TGGTATAATTATCCAGGGTTTGAATCGTTTTCAGAATCTCTCTCGTCTTGATAAAGCTAA +ACTGAAGAAAGGTTTGCCACTTTCCGCCACCTTCTTCCAACTGTTAAGAGGTAAAATGTG +ATGCACTATGGTTACATGTTGGTCTACAAAGACAAATCCGGGTATGAAATCCCGGTATAT +GAATTCTACCGAAATAATCCAATCGGCGGAGCTATGATTTACACTAATAAGAATGACGCT +CGTCATGCTTTAGCTCAAGAAGTTGCTGAGTTACAAGAACGTCTTGATCGCGGAATGAAA +GTTGTTACTCAGAAGAAAAAATGGCTTTTCTTCAAACGTGATATTATTACTTACATTCCA +GTTAAAGATGAAGAAACTCGTCGTCATCTGCAATTGCTCATTAACACCATAAAAGTAAAA +CGAGTTTCAGTAGCCTAGGAGTCATTTTGAGAATTACATTTGAACAATTAACTCGAAGCC +AAAAAAGTACGTTTGATACGACTATCACGGCTATTAAAGAGAAGAAAACTCACGTAACAA +TTAATGGTCCAGCAGGTACTGGTAAAACTACTCTTACTCGCTTTATTGTAGACCATTTAA +TTTCTACTGGAGAAACGGGTATTATCTTAGCTGCACCTACTCATGCGGCTAAAAAGGTGT +TGTCTAAATTGTCTGGTATGGCTGCTGCTACTATTCATAGCATCCTCAAAATTAACCCGA +CGACTTATGAAGAGAATATGCTCTTCGAACAAAAAGAAGTTCCAGATTTGGCACAATGTC +GAGTGCTTATTTGTGATGAAGCTTCTATGTGGGACCGTGAGCTGTTCAAGATTTTAATGG +CCTCAATTCCTAGTTGGTGTACAATCATTGCAATTGGTGATGAAGCTCAGATTCGTCCGG +TATCTCCTGGCGATTCTTCAACTCATAAATCGCCTTTCTTTACTCATAAAGATTTCCTAC +AATTAGAACTCGACGAAGTAATGAGAAGTAACGCTCCGATTATTGAAGTTGCTACTGATA +TTCGTCAAGGCAAATGGATTTATGAGCATACCAGAGATGGTCATGGTGTTCATGGATTCC +AAAGCTCGACTGCATTAAAAGATTACATGATGCAGTATTTTAGCATCGTAAAATCTCCAG +AAGATTTATTTGAAAACCGAATGCTAGCATTCACAAATAAGTCAGTAGACAAATTGAATA +GCATTATTCGTCGTAGGTTGTATCAAACTGAAGATGCTTTCGTTACTGGTGAAATCATCG +TTATGCAAGAACCTCTCATGAGAGAGTTGATGTATGATGGTAAGAAATTCACTGAAACAT +TATTCAACAACGGACAATATGTTCGTATATTAGATGCTCAGTACACATCAACCTTTTTAG +GTGCCAAGGGAGTCTCTGGTGAACACCTAATACGTCATTGGGTGTTAGATGTAGAAACAT +ATGGTGATGATGAAGAGTACGCCAGAGAACAAATAAGGGTCATTAGTGACGAACAAGAAA +TGAACAAATTCCAGTTCTTCTTGGCTAAAGCTGCTGATACTTATAAAAACTGGAATAAGG +GTGGTAAAGCACCTTGGTCTGAATTCTGGGAAGCTAAACGTAAGTTCCATAAAGTTAAAG +CACTTCCTTGTTCTACGTTCCATAAAGCTCAAGGTATTTCTGTAGATACAAGTTTTATCT +ATACTCCGTGTATTCACGTTAGCAATGATAACAAATTTAAGTTAGAATTGCTTTATGTGG +GTGCTACTCGTGGTCGTCATGATGTTTTCTTTGTGTGAGGATTTATGTACAGTTTGAATA +TTGATGATTTTGAAAAATTAATTGATGCTGTTAAGATTAACAAGCCTGATGATAAATGGT +GGCAGTGCCGTCAAACTGAATTAGTGTCAGAGCTAAATGAAATCCGTGACAAAGCTTTAG +CTATTGCATGGTTCCAAGGCGAATGTCCGCTTATCGGAATTAGTGATAATATTGCTCAAC +AAATTTATGATTTGAAGGTAGAACTATGTCGTTAAGAGATTTTATTATTGACTATGAAAC +TTTTGGAAACGTTTCAAATACTGCTGTCATCGATTTGGCAGCGGTAGTATTCGATCCTAA +TCCAGAAGTTATCGAAACATTCGATGAGCTAGTTTCTCGTGGAATGAAATTAAAATTCAA +TTTGAAAGCACAAAAAGGTGTTCGTCTGTTCGGTGCTTCTACAATCGAATGGTGGAAAAA +GCAATCCGCTGAAGCTCGCGCTAACTTAGCCCCATCTCCGGAAGATATTGACCACGTTGA +AGGCTTGTATAAACTTCTGGCATTCTTGAAAGAAAATGGAGTTAATGCTTGGGATTCATT +CGGCTGGTGTCGTGGTCAATCTTTTGACTTCCCTATTCTGGTTGATATTCTCCGCGAAGG +AGAACGCCGTAAAGGTATCGCAGATAAAGATATCGATACGTTTGGTTTAGAACCATGCAA +ATTCTGGAACCAACGAGATGTTCGTACCGCAATTGAAAGTCTTTTGATGACTCGTGGTCT +TACAACTACTCCGCTGCATAAGGGTGTTCTTGACGGATTTATTGCACATGATTCTATTCA +TGACTGTGCTAAGGACATCTTGATGTTAAAATATGCTCAACGTTACGCACTAGGCTTGGA +CGAAGCTCCTGTCGGTGATGAAATTGATCCACTCTCTTTACCTAAAGGTCGAGGTTAATA +TGTTTAAGAAAAATGATAAAGTTAAAGTTATTTCTGGTAAGAATGCAGGCGTAGTAGGTG +TAGTTCTCGGCCATTCAGTTCGTGATGGTTATCGCATTCGTAGCAATAAAGATAAAGTGA +TTTACGCTAAACCTTACTATGTAGTTGAAGATCCGATGGCTGAACGCACTGAAATCCGCG +AAGGCGATGTGGTTATCGTGATGGAACCTTTCGCTGCTCATTATAGCACGGTTTCTAAAA +GTCCGTATGAATGCTTATGTAAAAACAAAAACGCGATGGTATCTCTGATTTACACCGACG +AAGAGTATGGTGAAGTCGCGAACATCGTTTACCAAGGCCAGTATGCAATCATCCCTCTGA +GTGCTCTCCGACGTAATCCTACCCGCATCGCAGGAAAACACATCACAGTATAACTCTGCC +GTTTACTTTCCTTGAGGGCTATGATACTATAGCCCTATCAACAAACATGGAGAAACAAAA +TGAAACAGCAACTCACTCAAGACCAATTCGAAGATATTCTCTTCAACCCTGATCTTACTG +TGGTTCAGAAAGATGTATCTGGTCATCTTGAGCACACTACATACGCTTACGTGTATCAAG +GAGCTTTGGCGGTTTACGCTGCAGTTCGTCATATCACTGAAGCCGGCACAACTTACTGGA +AGGAAGCTATATAATGAAATTTGAACAGAAATTTGAACAAGGTAAATTCTACGCTTTTCG +CGGTGAAAATTCTCGCGATAAATTTGAGAGTAGCCATCACACAAATAAAGCTATCGTAGA +TGCTATTATTGCAAATGGCGGGGTATTTGAAGCAATTCAGGTTAACGGTTGGGGTGCATT +AGAAGTTGCCCGCTTTACATCTACTGGCAAAGTCTATCATGAAAATGATGATTGCCAATT +TTATCTGTCAAGCGATGAAGCTGAATTCTTCATCGAAGTCGAGCCTAATGGAAATCCAGC +GAACCCTAAGCCTTCATTGATTCAAATTGATCAGCAGATGACTGCTGCTAATGATGAAAA +ATTTGAAGATTCTGAAATGATCACCGATGATATTCCAGTGCCTCATACTACTCTGGTGAT +CACTAATCTTTCTGAAGCGATTTCTGCTTACAAAATGTTGAAAGGAATTATTCCAGATGC +CAGTCTATAACTATAAATGTCCAGCATGCGCTCAAGAAATTGAAGTCATTCGTAAAATTT +CTGAGCGTGATAATGAAATGATTTGTCCAGTACTTTGTTGCTCGAATCAAATGGAACGTA +CAGTGGCCGCTCCGAAATCAGTTCATGGCGGCTTTTACGACAACTTAAAATCAGGCGGTA +GCAATCTATGAAATGGGAATTAGGAAAGACTTACGCATTTGCTGATGCTAGTGCATTTCA +GATCGGTGTTAATCGTGAGATTCGCGAAATCATTGAAAGTAATATAGATGGATTGTTTCA +GGTTTCGCGTTTGAGCAAAAATCCACTCACTGATGTTGATTATCATGTATATGAAATTGT +ATTATCTGATGGTCGCATAATTGATGGTGAAGTAGCTCGTGAATTACATGGTTTTGGCAA +ACATGATATTTTCGCAATTTTTGCTAGTGAGCGCAAACATTTTAAAGAAGTAGAATGTAA +TCGCGATTTCAAAATTTCTATAGATGATTGTCAATATGACGAATTTGACGAATTTGACAA +TGAATCAGAAGAAAATGTGATGCCCAAAATCGAAGTTGAAGGTCGCATTGCTATTGGTTC +TATTTCTTCCGAAGAAGAACGCCTTTGGTTGATTGATTCACTAAACAGGATTAAATTCAA +ATGAATTCAAAACCTCGTAACATCATCAAACCAGGTGAAACTAAAATAATCAAACTCGCT +GATGGGCGAGTTTTTAAAATCAAAAAGGCTACGAAATGAAAAAGATTCTGATTACTCTAG +CTGTTGTATTTGCAATGGTAGGTTGCACCGATGCCGATAACGCTGTGAAAGTACTGCAAG +CTAATGGTTTCACTAATATTCAAACCACTGGATATAGCTTCTTCTCTTGTGGTAATGATG +ATTCTCAAGCTACCGGATTTACTGCAATAGGTCCTACTGGTGTTCCGGTTAAAGGTGCCG +TTTGTTCAGGCATTTTTATTAAGAACTCAACTATCAGGTTTGAATGATGGAAATTATTAA +AAGCTTTGGCAAGCACAAGTATGAAGATCGCTTGTGCTTTATTACAACATTAAAGGTTTG +TACTCGTACATTGACAAGTTACACCACTGCTCCAATCACTCCAAAGCATTTGCGTGGTAT +GAAGAAAAATATCCGACTACTAGCTTCAGGAAGACGTTTTCCTCATGAAGTTTTAGGTTG +CAAAAACAAAAATTGTAAGCATTGTAAATAGGGCTTCGGCCCTATGGAGGATATATGATT +TTCGGATTAACAACTGCTCAAAAACAAGCCAAGGCCCATCTTGAAGTTGTTGAACGTGCT +ATTGGTCGTTATCGATTTGCTTGGTGGCCTACTCGAATCACAACAGGTCAGACGATTTGG +CTTCAAAAATATTATGAAGTTGAAATCAGAGATATTGTAATACAACTCGATAAAACTTAT +GGTGTAGATACTAATATTACGTATGCAGTATATGCTTATTCTGATATTTCAAAAGCCGAT +TGGAAAATATTCGAAGCTTATAAACAAAACTACGGTCTGTATTACGCAAACAAATGTCAT +AAGGAAATAAAAGGTAATGAGGCCTTTGATCATTTGATTCGTTACAAAGCTGAATTAAAT +GCTTTACTAAATCCGTGATAATATTTCACCACTAAACGAGGAAATGAAATGATCAATAAC +GAAATTAAAATTCTTAGTGACCGTGAGCATATTATCAAACGTAGTGGTATGTACATCGGT +TCTTCTGCTCATGAATCTCATGACCGTTTTATGTTCGGTAAGTTCGGTGCAGTGAAATAT +GTTCCAGGAGTAATTAAACTGATTGATGAAATCATCGATAACTCCGTAGATGAAGCAATT +CGTACTAACTTCAAGTTTGCGAACAAAATTTCTGTAGACCTTAAAGGTAACAAAATTATT +GTTACCGATAATGGTCGTGGACTTCCTCAGGCTGATGTAGTTACTCCTGAAGGTGATACT +ATTCCAGGTCCAGTAGCTGCATGGACTCGTCCTCGCGCAGGTGGTAACTTCGGTGATGAT +GCTGAACGAAAAACTGGCGGTATGAATGGCGTGGGCAGTGCTCTGACCAACATTTTCTCA +GTCACTTTCGCTGGTGCAACATGTGATGGTAAAAATGAAATTGTAGTTCGTTGTTCTAAC +GGTGCTGAAAATATTTCATGGGAAACTGTTCCAGCAGAAAAGAAAGAGCACATTCAAACT +AAGACTGGCACAATCGTTTCATTTATTCCAGATTTTAGTCATTTTGAAAGTACCGGACTG +ACTCAAATTGATGAAGATATTATTCATGATCGTCTTCAAACTCTGTCAGTAGTATTCCCT +GATATTGAATTCAAATTCATGGGTAAGAAAGTACAGGGAAACTTCAAGAAATACGCAAAG +CAATTTGATGAAGAAGCAATTGTATTTGACGAAGAAAATTGTTCAATGGCGATTGGTCGT +TCTGATGATGGTTTTCGTCATCTGAGTTATGTGAACAATATTCATACAAGCAAGGGTGGT +TCTCACGTTGATTTGATTATCGATGAACTGAGTAATGAACTCATCCCGATGCTTAAACGC +AAGTATAAGCTAGATGTTAATAAAGCTCGTATCAAAGAGTGCTTAACTCTGGTTGTATTT +GTTCGTGATATGAGCAATATGCGTTTTGATTCTCAGACTAAAGAGCGTTTAACCTCTCCA +TGGGGTGAAGTAAAAGCTCATATGAATCTGGACTATAAGAAACTTGCGCAGCAAGTTATG +AAAGCTGAAGCAATTCATATGCCGATTATCGAAGCTATGTTGGCTCGTAAATTGGCGGCA +GAGAAAGCAGCTGAAACTAAAGCTGCTAAGAAAGCCCAGAAAGCTAAAGTAGCAAAACAT +ATCAAACCAGCTAAATATGGTGATGATTCTGTTGAGACTACTTTGTTCTTGACAGAAGGT +GATTCGGCAATCGGTTATCTGATTAATACTCGTGACCGTGATCTTCATGGCGGATATCCG +TTGCGTGGTAAAGTAATGAACACGTGGGGAATGTCGGCTGCTGAAGCGATGAAGAACAAA +GAAATCTTCGATATTTGTGCAATCACGGGATTGGTAATCGGTGAAGATTTTGATACTTTG +AACTATAAGAATATCGCTATCATGACCGATGCCGACGTCGATGGTGTTGGTTCAATTTAT +CCAAGTCTGTTAGGTTTCTTCAGTAATTGGCCTCGTCTATTTGAAGAAGGTCGTATTCGT +TTCGTTAAAACTCCAGTCATCATTATGTCAAAGGGTTCTGAACAGAAATGGTACTATTCT +GCTGCTGAATATGAAGCTGAAAAAGAAAAATTATCTGGTTGGAAACTTCGTTACATCAAA +GGTCTTGGTTCTTTAGAAGAAGACGAGTATGAACGAGTTATTCAACAACCAGTTTATGAT +GTAGTTTCTTTGCCTGATAACTGGAAAGAATTATTTGAAATGGTTATGGGTAATGATGCA +GCTCCTCGCAAAGTCTGGATGAGCGAATAAATATACATGAGCAATTCTGCTCTATAATAA +GGAGAGTTTATGTCTCAAGCTTGGATTACACTCGTAGACGGAAGTTATGGTTACATGTGG +GCCGACGCATTGCCACTTCCTGGTGATTGGGTAACAATTCGTGTAAGGCAGATTGATAAC +TCTTTCAAGAAAGTATATGGACAGGTATCGAGAGCTACCTGGTAATTTTAGGGACTCCTT +CGGGAGTCCCATTTTTGTATGGTGACTTATGAAAATGCTTAATGGAAGTTATGTTAATTT +AGACAACGTAGCAGCAACAATCCGAGACAGTAAGAAGATGTACTTGGATAAACTCAAGAA +TACACCAGATGATCTCTGGTTAAATCAATTCTCGAGGATGATAGAACATATTGCTGTACT +GGTAGAAAGTAACCAAGCCATTCCAGTTAACCTACAACAAACAGCACTGAAGCTGGTCTA +TTCAGCTCGTGAAAAATATACTATAAGAGAATTCGCCGCAATTCTCAGAGAGGTTGGTAA +TGAAAAGTTATAATGTGAATTTGACACTCTTTGATGACGCGGTATTCCGTGAGTACAGAA +TCATTCAGCGGTTCTTCGACATCAATGAAGCTGAAATCTTTAAAGACCGCTTTAAAGAAA +TTCGTATTAAAATTAAAAACGACACTGCAACTAAAGATGAACTCTTGGAAGTTGCAGATT +TAATTAAACGACATAACTGATAGGAACAATATGATTATTTCCCAAGAACAGGAAGTGGTA +TTTGGTTCTGCGAATCAACAAGCAACTGCTTTCGGAATTGAAAACAACGCAAAGGCATTT +ATTCTTCTTTCTGATAAGCTGTACACTAACAAGCCTTATGCGATTGTACGTGAATTGTCG +ACAAACTGTCTTGATGCTCATAAGTTGAATGGTCAGACTCGTCCATTTGAAGTTAAAGTT +CCTACACGTTTAGACCCACGATTCGTTATCCGTGATTTTGGTCCAGGTCTGTCTGATGTT +CAGATTCGCGGAGCTAATGGTAAGCCCGGTCTGTATAACACTTATTTCGCTTCGACAAAA +TCTGATTCAAACGACTTTATCGGTGCAATGGGTCTTGGTTCTAAATCTCCGTTTAGTTAT +ACAAAAACATTCACAATCATTTCGTGCCATGATGGCCGCAAAATGGGTTACACCGCGATC +ATGAAAAACCTCGGTCCAGAAATCATTCCTTTGTTTAATGAACCGATGGGCGAAGATGAT +GTGACTGGTATTGAGATCACCGTCCCGGTTAAGACAGACGATATCTCTAAATGGGAAACG +GAAATTAAGCGAACATTCCGTACGTTCGTTGGTGTTGAACCTAAGATTCTAGGCTCGAAA +GTTGAAATTAATTATTTCCCTGAATTCACTCCAGACAAACAATGGTTCAGTCTTAATTCA +AGCCCATTTGAAAATGATCAATCGTTGTACGCAGTTTATGGTCGAATCGTTTATCCAATT +AAGATGAGCGAAATTCCTGATATCAAAGCCGACTGGCTTCTGAACCGATATGGTCGAGTT +TATGTTCATTTTGATTTAGGTGAACTGGATATTACTCCATCTCGCGAAGAACTTTCCTAT +AATGAAGAGACTATCGAGAACATCCGAAATAAAGTTAACAATCTTGAAAATATAACGTTA +GCTGCTGATTTAGAACGTTTCCAAACAATTGAAAATAAACGTCAACTGTCTCGTGAATTA +CAAGCATTAAATCATCGTCAGCGGACGATTTTAGGCACTCGTTCTATTCTTATCCAAGAT +AAGCCATATCAAGATTGGGTATCGATGTTCCACCATAGCAAAGTAGAGAATTTGGTCTAC +AATGCGAATATGGTAGCTTATTATGTCGGTACAGATGCTGAACGGCGTCGAATTTCTAGT +TCATGGAATGTTCGTAATCGTCTGTCAGCACAAAGTTTGTTCTCGATTGATCACAAGAAA +ATAGTATTCATGATTGATGATAAACCATCTCGTCGTGCTTCTACTATTCGTGGTATGTAT +GCTCTTGATATTCACAAGTACTGCTATGTTATCTTGGTTAATCCAGATAATGAGAAAGAA +GTTCATGTGATGAACGAAATCACTAAGTTGTTTGAAGGTGATGAAGTTATATGCTTTAAA +TGTTCTGAAATGGAACAGGCAAGAGTTAAAGACGCTGAACTGAATGCAGCCAATTCAGAT +AAAGAAGGCGCTAAGCGTCCTAAATCTCCTAACATCCAGAAATGGTCTAAGACTGATGGA +AAATGGGAAGTTGATTCGTTCTGCATGAGTGCTAATGAAGTTCGTGAACTTGAAGGTTAC +GCAATTGGAATATCTCGTGATTCTATCGTCGAGTTTCCAAGCGGTAATGAAACATCATTC +GACCAGACAAATATTAAAGGCGCTTGTGACTATGTGAATGTTTCAGAGTTCTGGATGATT +CGTCCAGCTGCAATGAAATACGCGCAAGATGCTGCATTGGACCCTCTGATGGATGAGTTC +GTCAATAAGTTCATTGAATTGATTGATAAAGTTGATGCTGATGTTATTCCACCTTCGACT +ACATCTCGACGTCAAATTAATAATATTCTGTCAATTAAAGCGTTGACTCCGTTAATTAAG +AATTTTTACGACGTTAAAGATTGGCAATCTTCTGTAGAATTGAACCAGTTTGTCAAGACA +TTTAATGGGTCAATTCATGGTGAAGGTGAAAACGCTGAGAAATTAGCTTTATGTCAGAAG +ATTTATAATAGACTCGTAGAAACCGCTAAGTCAGATTTTGAAATAAAAGCGGAAGAGTTT +GAAGAAAAATATCCGGTCATTTGGTATATGCTTGATGAATACTACATCCATGAAGCTAAA +AATCACAATGATCTTGCTAAAATTGCGGCGCTGTTAGGCGCCATCTAAGAGGTTTATATG +GCCGTACAAATGTTTTCTGATCGTCAAAAGATGTCCATCGTTGATTATGCCGAAGCCGGT +TATACTCGCACATGGATTGCTAATCGTTTCAATTGCTCGACTGATACTGTTCGTCGAGTA +ATTAAAGAAATGAAACCTGTTGAAGTCGAAGAGACTGAAGCTCCGGCTCCAGAAGCTCAA +TACATCTGGAACGCATCAAATAAATTCATCAGCATCACCGATCTGTCTACTCATAAGACT +TATCCAGCTGATCATAAAACGAAGGGCTTTAAAATTGCTCTGCAGCGTCTGATTGATGGT +GACATCCCAGGTGCACTGGAAATTATCAACGCAGAAAAAGGTCTGACTACTTTCGTTAAA +GGCAACGTTAAAATCGATAACGGTGCTTTGTTCTTCAAAGACATCGAAATCAAATCCGGT +TTGACCGAGCGTATTCTGGACTCTATGGAAAAAGGTGAAAACTTCGAGCGTTATCTGCCT +TTCCTGGAAAACCTGATGTTGAACCCAAGCCGTAAAGCTGTTTATCGTCTGTTTGATTTC +CTGGTAGCGAACGATATTGAAATCACTGATGACGGTCATTTCATCGCATGGAAAGTAGTT +ACTAAAGATTATAAAGACTGCCGTACTAATAGTTTTGATAACTCTCCTGGTGTAGTGGTT +AAAATGGAACGCAACCAGGTCGATGAAGATGATGAACGTACTTGTTCTTCTGGTCTTCAT +GTTTGCTCCAAGAGCTACATCTCTTACTTCCAGGGTGGTTCTGATCGCGTTGTTTCCGTT +AAGGTGCATCCACGTGATGTAGTAAGTATCCCGGTTGATTATGGTGATGCTAAAATGCGT +ACTTGTGAATATCTCGTACTTGAAGATGTTACTGCACAATGGGGAGTTCGCTAATGTTAC +CGCATCAACACCGCGTAGTGCAAGAACGTGATGATCTTGCTGTAAAAATCGAAGCTTTAG +GCACGTTTATCGATAATCAAAACCCGGTATTCAAGAATCTTGATATTGAAGACCAATTTC +TGCTTAAACAGCAATTAACTATTATGGTTGAATATCATCGAGTACTCGATGCTCGTATCA +ATCGATTCTAATTAAAGCCCTTCGGGGCTTTTTCTTTGAGGTTACTATGATCAATCCAAT +GAACGTGGGCGATTCAAGCATTAAAGAAATTACTTTGCATGGAAACCATTATGCGGAAAT +TTTATACGCATTAGATGTTATCTTAGACCCAAATGCAGATGGTGTATTGGTATGCGAAGA +TTCATTTCTTGGAAAGGAAAATGTTGATAATGCTTTAATGAATCTTGAATGTTTGAATTA +TAACGATCGAGTCTACCAAGGAGTAGTTCGAGTACGTGACTTTTACATTGGTGGTAAAAA +TGAACAAGCAGAACGTGGGGTTAATGAGACTGAAGAGCCTGCCGACATTTACCCGGTCGA +AGAATGATCTTACAGGTGAAGACAAAGTCAAAATAAAAGGCACTGTCCAGTACTCGATGG +AAAAAGATCCAGACCAGGATTTAGAACAAGTTAAACGTCGATGCATGATAGCTCAAATGG +CTGAGCGAGCAGTTGCAACTTGGGTTGATGGATATGTTGCTAACATGAAAGCTGATTACG +AAGATCCATTGACGTTCGCTTGGGATGTGTTAGCTCATCCAAAGTATTCTGGATTGAGAA +TTGAAGTTAAGACTCATCAGTCTGACGCTAAGTGGATTTCTGTCACTACTGGTTATGGCG +GAGACTATCCGTATGGATCTGGGGTTAACCTAGGACCTTTCTTGACGCATCAAGTAGCGG +ACTGTATTATCATATTAGATGTGGAAGAAAGTTCACCGGGGCTGTTCTCGTTTACCCTAA +AGTTCGTTGGGGACCAAGAAGACCTAAAGAAAGTTGTTCGGCGTAGTAACTACACTGGAT +GGTACTTGAACATTTAATCTTCAGCCGTTTACATCCACATGGAAGTGGTATACTATAGAC +TAGAAATCAACTAACGGAGAACAAAATGAAACGTTACTACTTGAAAAACCGAGCTTCAAA +AGAAATTGTTACTGCTACTTTCGATGCTAATGAAGAAGGTGATTGGACAATTCTTGATTT +TGATGGCGAGCAGCCTTATTTTGGTTCTAAAGAAGAACTGGAAAGAATTCGTTCAGGGTT +ATGTAATGATTCATGGTCTTACGAAATAAGCAAATTCGTTAAACTTGCTATTAAATTAGA +ACTTCTTGACATCATTGAGGTTGAATTATGATCCGTGTAACTGTGATGGTCTGGTTTGAA +CTAGAAAACGGTGAGCCTCGTTTCAAAACCTGGGACGATTATATGCATGGCGGAGACGCT +CAGTATGTTGCTAAAAGACTTGCCGAAACATATCGTTATGGACAGTGTAAGATCTTTGAT +AATCTGGATCGTGTCATTGGAAGTGCAGGATTTGAATAAAAGCGGAGGGTAATGCCCTCC +TGGAGCATAAAACTTTAACTAATGAGAGAAATAATATGAAAACTTTACTGACTGTATCTG +TTCTGGAAAAAGCCGGAGCGACTGTTCTGGGTAAAATCAAAAATGCTGATTGGTTTAACA +GTGAACCCGCTCGTGAAGTTCTGAGTGAACCTGGGTTTTATTTTCTGGTAAATCCGGGTT +CTTACACTACAGCTCGTTTTTATGTAGGTCGTCAACGCTCTAAATGCGGATTCAGCAATG +TTCTATCTCAATTAAGTCGCGGTCGTTCTCAGCTGGGTCGTACTCTTCGTTGTAACGATG +TAATCTACGATGTATTCTTTGTTCCAGTTTCAAAAATGAAAGCTTTGACAACTGGTTACA +ATAAAGGCCAACTTTCTCTGATGTTTACTAAATCTCATAAAGAAGCGTTTCAGAACCTGG +AAGAAATGAATCGTATGCTGAATGATAACTTCCTCTTCGGTCTACAGAGTTACTAATGAA +ACAGATCTGGACTTTGGTGTTTCTGATTATGATTTTGCTTCTGGTGTATTCATCTGGGAG +GGATTAATCCCTCCGCCTCGAGTACTTGGAAGTATGTGCTTTGGAATTGCGATATTAGCT +TTAGAACGAATTTTTCTTTTATGTGGGATTGATTAAATGAATAAATTAGTGGGTGCATTG +GGTTGCGGTATTTGGGCTGGGTTATTTGTAAGTTTTGCAACTGGAGTTGCTACTCCTACG +GTATTTTCTTCTAGCATAATGGCGTTGACTCTGTTCATTGTGACTTTGATTAATTTAGTA +AAATGAAACGTGTAACTGCTTCTATTGTTATTTTGGCCATCATTATGATGGCCGTTTTCT +ATGGTGTAGCTTACGGCATTACTGAGATTTTGCTTTTCTTAGTTAATGTTATGATTGACA +TCGGTTCAATAATTTGGTAATCATATGCAATTAAATCAAAGAAGTCTTCAAAGTATTATT +GATAATGAAGCGAAAGAATTCGCAATTTACACTGTCGAGAACCGTGCTATTCCAAACATG +ATTGACGGGTTTAAGCCGGTTCAACGTTTTGTAATGAAGCGAGCTCTTGACTTAAGCCGT +GGAAATAAAGAAAAATTTCATAAACTTGCTTCTGTTGCAGGTGGAGTAGCGGATCTTGGT +TATCACCATGGTGAAGGCTCAGCTCAAGATGCCGGTGCATTGATGGCTAATACATGGAAC +AACAACTTTCCACTATTAGATGGTCAAGGTAACTTTGGTTCTCGTTTGGTTCAAAAGGCT +GCAGCATCTCGTTATATTTTCTGTCGTGTATCTGACAATTTCCGTAAGGTTTATAAAGAC +TTAGAGATTGCTCCGGCTCACAAAGATAAAGAACATGTTCCACCAGCTTTCTATCTTCCT +GTAATTCCTACAGTTCTTTTGAATGGTGTTCGTGGTATTGCAACTGGTTATGCAACAAAT +ATTCTTCCACATAGTTTTGAGTCTGTTGTTGAATGTACTCGATTAGCTTTGGAAGGAAAA +CTTGATAAAGAACCTGAAGTTAAATTCCCTAAATTCAACGGGAAAGTAATTCCAACTGAA +GACGGTGGAGTTGAATTGCACGGTGTGTATAAATTCACTTCAGCAACTCAGATGTATATC +AGTGAAATTCCATATAAGTTTGACCGTGACACTTACGTGGAAAAAGTACTCGATCCTTTA +GAAGAAAAAGGTCTGATCACTTATACTGATGATTGTTCAAAGGCTGGATTTGGTTTTAAA +GTTAAATTCCGCGGTGTGTATAATCTTCCGGTGTCAACTGAAATGCGTCATGACATGATT +ATGCGTGATTTCAAATTGATTGAGAAATTGTCGCAGTTCATTGTTGTCATTGACGAAAAT +GGTAAGTTGAACGATAAGTTCACTAAAGCGTCTGATTTGATTAAGCACTTCGTTGAAGTT +CGTAAGACTTTCGTCGAAAAACGAATTGAATACAAAACAGCTGAAGTCAAAGAGCAATTA +ACTCTGGCTGTTGCTAAAGCTCAATTCATCAAAGACGTTGTTGACGGAAAAATTGTCATT +CAAGGCAAAACTCGTAAAGCGTTGGTATCTGAACTTGAGAAAGTAGATTTATTCAAAGCT +CATGTTGAAAAACTTGTGTCAATGAACATCTATCACATCACAAGTGACGAAGCCAAGAAA +CTGGTAGAAATTGCAAAAGATCTCAAGAAAGAATACAAGTACTGGCAAGAAACTACACCA +GAAGCTGAGTTCATTAAAGATTTGGAGGAGCTATGCGAGTAGCTATAGTTCTTCTTTTGT +TAACTGTTATTCTTTGGTTCATGCCTGCGTTTATTATAGCGCTTGTTCTTGGAGCTCTCG +TAGTTATTGGATTTATGGGCTTCTTGTTGTCACTTCTTCTGATTTTTCTTTAACTCTACA +CGGCTCTAGTGAATTCTAGAGCCCAGTCCATATAATTAATCATCCTTCCTTGTTGTATCC +TCTCAACCGTTCTGGTTAACTTAAAAATATTTTCATTCAACCGTTTACTTCTGCTTTAAG +ATTTGATATTATTACCTCATACCAAACAAACTGATAGAATCTTGGAGAATAAAATGAAAG +TTACCCTGAAAATCGAAGTTACCAAAATGAAAGCTAAAGACGCTCTGACTTCTAACAAAC +TGATTGTCGATAACGTAGAATACGATATCTGCGGAGTACGTGAAGTTGAACCTGGTACTT +TGACTTTCTTCACAATGATTTTTAGCCCAAAAGCTGAAACAGTGTTTAAGCAGTTCGTTT +TCAACCCAGAAGATGAAGTTACTGTTAAAAACGCAAACTTCAAATAATAACCGTTTACAA +GTGCTATAGTATATGATATTATAGCACTATCAAAACTTAAGGAGAATAAAATGAAAAACG +TAATTATTGCCGCTCTTGAAAACGAAGGTTTAATCATCTGCCATAACGATATCAAAGCTG +TTAAGTTTGTACGTAAAACTTGCAACGATAATGTTTTAGGAGCTATCTATCATGCTATCG +TTTATGACGATAACGAAGATATCTTCAACGTGGTGAGTATTTTTATGAACTGTGAAGACC +TTACTGCTGACTTCGGTGGCTCAGCACACTTTGAAGGTTCTTATGATGAATGTGTTGAAT +ACTTCAACGAATGAGGAAATTATGGAAACTTTAGTTAAAAACTTGAAAATGCTGCTGGAC +AACACTGGTGGTGACTTTGAAGATAAGTTAATGCTCGCTCGTCTTCACTCGTCAAATACT +GATACCAACAGCTATCTGACTGTCTGGCATAACGAGCTATGTGGAGGCTACTATCTAGCT +TGGGTTTACGTCAACAACTATGATATGGTTGTGGTCCTGGATGACGAAATCGAAGATGTC +GCAGAAACTTTGAATCAGGCCAAAGAGTTGTTCAAAGAGTTTTTCCGCTAAGCTGTTTGA +TAGCATCTTTTTCGGAAGGTGCTAAACTAACCGTTTACATCCGCTTTAAACTATGGTATT +ATAGCTACATCAAAACAAATATGGAATTCGGAGAAACAAAATGTCAAAAGTAACTTACAT +CATCAAAGCTTCTGAAAACGTTCTGAATGAAAAAACTGCGGCAATTTTGGTTAAGGTAGC +TAAAGGTAACTTCATCACTTCAGCTGAACTTCGTGAAGAACTCGTTGAAACGATGAATGC +TTCTTCAGTTAACAGCAACATCGGTGTTCTGATTAAGAAAGGACTGATCGAAAAATCAGG +TGATGGATTAGTTATCACAGGTGAAGCACAAGACATCATCTCAAATGCTGCGGTACTTTA +CGCACAAGAAAATGCTCCTGAACTTCTGGAAAAACGAAATACTCGTAAAGCTCGTGCAAT +TACTGGCGAGATGGAAAGCGATAAAGATTTCATGATGGAACTTCTGGCAACTAAAGAAGA +ACTTTTCAAAATCAAAAAGCTGGATGTTTATCGTAGCAACTTTATTGCAGTCCTGGAAAA +ACGTACTTTCGGTATTCGTTCGTTCGAGGTTAGCAATAAAGGTAACTTCCGCATCTCCGG +TTACAAAATGACAGATTCTCAAGTGAAACACTTTGAAGATCTTGGTATGACAGCTAAACA +TTCTAAGAACGGTAACATCTACTTAGACATCGCTCGTACTGAAGAAAATATCGAAAACAT +CATCAACTCCGTTGACACTCTGTAAGGAAACTGAAATGACTATCCAACTGAATAAACTGG +TAGAAGATATTAAAAACACCATGAACCGCTCAGAGATTTTGAATGAACTTCAACGCTGCG +TACAACGAGTTGATGATGAATACCACTTACCAACCAACGCATGGGAAGTCTGGTTCCGAG +GTTCTCATCTCGGCTCGATTGAACTGAAAGCCAAAGGTTGTTATGCAGTTTATAGTTCTC +TTGGTCGTCATTGCGGTGATTGTCAGAACTTTATGCAAGCACTGGCTCGCTTCATTAATT +CATGCGCAGTCATTATCGCCAAGCAGCAAATCGAAGAAACCGAAAAATGGATTGACGAAG +TAACTAAAGAGCCAGAAATTCGTCGTTGGGGCGTTACTCGTAAATCTCGTTGGATTGATA +AAGTCAAAGGGTGGTTCAAATGATGGAAACGATGAATCAAAATAATGAATTGGCGGTTCC +AGATATCTGTTTCAAAATCGCCGATTGGTGGGATGGCCGCAAACTTCAACGTCGTATCGT +CTGTGCAGCTAATCGTTTTGAATTAAAAGCGGGTGGGTATCTGGTTATTCCAGGCTCCAG +ACATTATTCAAAAGATATGGCAGAAGTACTGGACCAAGTGAAAGATAAATTAGTGACTGA +TCACGTTCACGATGAAGACCAAGGATTTATTGATCAATGGGGTGAATATCATAATCGTAA +AGATGCACTGATTATTGCTACACATTCTGGTCAAATTAACACAGTCCGTAAAAAGGGCGC +ACCATACGACACATTATTTTCTGAGGATCTTTATTAATGATTAAGTCAACTCGTCAAGTA +GAAATGGTTCGCTATGAAAGTGCGGCTCTTAAAGCGTTTTACGATAAATGTAAAGAAACT +GGCCTAGACTATTATAGTTCCATTGCTGATGAAATCTCAAATGATTTAATGTGGCAATGT +CAAGACGATGTTTTGAAATTAGTAGATAAAGGCGATTTTGACATTATTTCTATTGGTCGT +CCGATTGAAGAACTTATTAAAGAAATTGAAGAAGTTGTTGAAAACTACGAGCTTGAGGAT +TATTTCTAATGAACGCTAAAGAATTACAAATCGATGCAATCAATAACCGAATTCATGCTT +TAACCCGTGCTAATGAAATGATGCATGAAAATTGGGGCACGTACACCAATGAATCTGGGT +TTAAATTCTGCGAGTCAGAATTGGCTAAGAAACTCACCGGAAAAGATTATGTTTGCCCAT +TCGCATCACCAATTAATGGAATGATTAAACCATTGCTGATGGAACTTTACATTCAAATGA +ATGAGTCCATGATTGAAAGTCTGAAGTATCAACTTAAGGTATTGGGTAATGTACAGACAA +AGAGCGACCAAAGCTGAACAAGAAAATGCTAAGCTTCGAGCTGAATTAGCTAAACGTCCT +GATTACGAATGGTTCGTTGAATTAATCAGACGTCATCTTAAGCAAGATGCTACTGTTCCA +TTACAACACTTGGCTGTGCAAGTTAAACAACTTAAAAATGCAAGAGGACATTTAATTGAA +CAATCCAGTAGCGAAACATGATTTCAATAAAGGTGGAGCTCATAAAGACAAAAAGCGCGC +CTCAAATGATTCCAAGCGCAAACAAAAACATAAGGGAAAAGATCATGAATAGAACTGAAT +TCGAACGTCTTGCATATGACCGGTACCTCGGAGTCGTCACACAAGTTAAAGTAAAGCACT +CCATTGATTTGGTTATTCGCATGATTGGAGAAGATTCTGTCCGGCGCGGAATCTTTGTTT +CAAAAATGCTTGCTTATATTAATGTTATGGCGAAGAATAATCACCATGAATTCACTGATT +GCGATGTCACAGTATCCGAAGATAATCGCGGAATTTATATCGAATCTTGGGATAAAGGTC +ATGTGATTAATATGGCTTGGGCGCTTATGTCTTTTGCCGAATCGCTTGACATGACAATCA +CTACGCACTGAGGATAATATGAGTTTACCAACCAAAGCGTTATTTTACAAAAACGGTAAA +GAAATTAAGCAAGCATTTGTCAACTGTGGCTGGGCTTACGATGAAATATCTGCTATCGAT +CAAGTCATCCGAGCAATAAAGGAACATGATATTGATTATGACGAATTTATTGTGTATGGA +AAGACTTATAATCACCGGGTAGAAGATTTACCAGAATATATTGAGCACCTGCGAGCTGAG +ACGAGAAGAATTAGAACCGAAATGCTTGAAAAAGCAAGAAAAGTTTCTAAATCTTCTGAT +CAAATTATGCAAATGGCTCGACGCGCCATTCCAGAATTGTTAGCGAAAGATATTCTTAGT +GTTCAACCAATGAACGTTGATATTAGAGGTTTGCATGAGTCATAATCTTGAAAATGTAAT +TGAGTTCCAGCGCTCTCTTGAAGGTATCATGAACAAGTTGGCTCTCGGAGATATGGTAGA +CTATAGCTTTGACGAGGCAATCAAAATTTGTCACTGGATGGGGCGTAGGGTTCGTCCGAT +AGGTGCTGAATGGTATATTATTGCAGAGAAGAAAGAAACTCGCTACGCGCTCTGGATTGA +CTCTGGTGACAGAGAATACATTACTCAACCAGAACATACCACTCAGCGTTGGGAAGTATT +GAACTAACCGTTTACATTTTCAGAGTACTGTGATACTATACTCTTATCCTTTAAGAAGTA +GGAAATAAAATGACTAATTTTGAAATTGTTCGTGAAGTTGTTACTATTGCATCTATTTTG +ATTAAATTTGGCCGGGATGATATTGTTGAAAAGCGTGATCACTTCATTGCATTCATTAAT +GAAACTCACGTCGATGATAAAGATTGGAGACGGTTAAATCAAGGAAGCTTCCGCAAGCTG +ATCTATGAATTAACCGTTGATGAGAAAAAATTGCTCGTCGAAGAATTCAACGAAGGATAT +GAAGATATTTACCGCCATCTGGCAATGTACACGAATAATTAACGAGCTCTCCCTAGTGTT +CGCGCGGCTTGGTCGCATATAGATTCAAGTCGTGCGGTATTGATATTTTTATTCTTTTCA +TACCAATACATTGTAACAGTTCCTGCGTATACATTATCTAAATTGAAGAATGGACAACTA +AACATATACTTCAATTCTTGAGTTTTGACAGTTGATGGTAAAAATACAAATTCATTTTCA +GAGTAAAAAACCCTTCCTCCTAAGTGAGTTGAATACTCCTGAGACGTTTTGTCAACAGGA +AATCCACCTAAACTTTTTTCTGAAACAGTAGAAGGAAGTTTCCCTTCATATGCTATCAAG +TCCACGAAGTAGTTCAAGTTTTTTGGTCTAAATGAATATACAGCACTGAATTCAGCACCG +CTTGACACATGTACTATTTGAAGTTGTTCTAGAGCAGTGGTTTCAAAGCGAGCTTCTCGA +TCCTTTTGCTGAATATTAGCGTAGGTCTCATAACTCGAATCTTTGTAAGCATTTAGTATT +GCGTCACTCTTAACCCAAGTCATTCCCAAAATAAAAAACACAATTAACACAAAAACCCGG +GAGAAAAGAACTCTCCCGGTTGCATTATCTTTGAATACTCTATCCCAAACTCCAAAAACG +ATGTCTGTTATTGGCAGACTAATTTTTGAAGCCATAAGTTTTCTCCTTTGGAATATTTAT +ACTCGAGATCCATATATAGTGCCTACGTTTTGCCATGTTGGAGCAGTACCAGCAACAGCT +GCTCCACCACCTGCAGGACCCCATCGCCAGCCGGATTCAAAGTTACCGAATGCCGGGTTA +CCACCAGCAGCTACATCACCACCTGTACCGCCAGTAACCGAACGTCTCGATGATTCGTAT +CTAGAACCTGTCCCTGGAGACGAAATAGAAGCATCTGTACCATAAGCGGTGACGCCTGCG +CCAGACGCTGAGCGTCCGTATATACCAAACGGCCGTCCACCGCCACCACCTGCTGATACG +CCTGAAGATTCCCAGCCTTGACCGCCACCGCCGCCGCCACCGCCGGCTATAGCACCGCCA +TTGTTAATTCTTAAGCGTCCACCAATCCAGTTATGGATACACGGGCCACCTTGTTGAGCT +GCAAATACCCAACCAGTAGTTCCACCAACGCCATATCCACCGCGACCAAAAATTGTTACC +CCGTGAATATTCAACTGGACATATTCATTAGCCAAATCTCCAGGGAATTCAAATAATGGA +ACTGTTGCATCATATGAAACCATGTCTCCACGAACATTAATTACAACTGGAGCATTGCCT +TGTTGACGCATCCAACCAATTAGCCAATCTTTATTATAATTATGGTTAGCTGCCAAGTCA +ATAACAACTTCTCTCGATCGACCTATCAAGTGTGACATCCAAAATGGAGTCCCTAATCTT +ACTTGTGATGCAGCAGTTGACATCCAACGTTGTCCAGTTTCGGCTACTGCACTACTTCCA +ACCCATGGTCCTGTTATAGCCATAAAAACTCCTAGGGCCCGAAGGCCCTTTATTAAATTA +ATGCAGAAATGATTTCTTCAAGCTTCTGAATGCGAGCTTTTAATTCAGAAATCTCATCAG +TGTGTTCGTTAATTGTTGCAGTGTTTAGCGCAATAATACCATTGTAGTTTAGACGAAGTA +ATCCTTCTGGATTATCTTTATCGACTGAAATCAACTCAGGCAATACAGCTTGAACTTCTT +GAGCAATTAAACCGGATGACTGTTCCCAATTAATAGAGCCGTCTTCTTTGAACCCTTTCT +TCTGAAGATATAGATATCCATTCATCTTCTTAAGAGTTTCAGAAGGAGATTCAAATTTAC +GAAGTTCACTCTTAACACGAATATCCGAACGAACATAAAGATCGCGAACATAAGTGGAGT +GTGAATCGCTAGCTTGGTTCAAATCGCCATAAGAAACTAATGATTTACCAGCTTCGGTTC +TAAATCCACCTGTATCTAATACGGCAATTTGCTGAGCATTTGCCCAGTGGGTAACAACAC +CATCACTGGCCCAATATATTCCGGTATCTGTATCACCGATAGTTATACCAGGTTTATTTG +GATATGCTGATGATCCTACTCCGGTTAAGAAATGAGGAGCAGTAACATACCGTTGAGATA +TAAAATCACCTTCAATTGTAAAGGTAAAAACACCTTGACGGTTTCTGTCAGGTTCGTATC +CAAATGTACCGACACGGATAATTCCTCGGCCCCATTGAGCTCCACCGTTACGAAGAGTGC +CGAGTTCAATTTTTGTGTTATACCCATGGGTCGTTGCTATAGTTTTCTGAGCAATACACG +GATAATAATCAGAAACCCCAGAGACATTACCCATATCAACTGTAACAGGAGCGTCATAAG +CCCATTGCTCGGCATAGGCATGACCATATGTCAATGGCCATTGTGTTGCATCAATTAATA +CATGATCTTTATTCAGCAGAAGCTTATTAGTTCTTCCTTGAGATGTTACAGTAACAGAAC +CGTCTTTGCGCAAATTTAGACTATTATAACTATTTGCGTCAGCTTTATTTAAATGAGATG +CAAACGAGACAAGACTATCGTCACCTCTAGATTTACCAACCAACCATGCTGATGTATTAT +CTGCTATATTACCTTGCAAATATCCAGAACTAGCAACTGAGCTGTTTATTTGAAAAACGC +CACCAGCACCATTTGCTTTTACACCATTCTTAAGAATTAATGTTCCATCATTCTTAAGAG +TTGAAATCCAGGTATCAATACCTCCACCGGTCCAATAAGTCAAACCATAAGAATAATTAT +CATTTGTGGCTTCCTGAATCATTGCTTCATGCATGATAGTTCCGCCTTCACGAGCACGGA +AGCGACGCAAGCGGTTTAACGATAATGTTTCACCAGCTGGCTGATTAGTCATGCTTGAAA +CATCATAAGATGTTAAAGGCTTATTATAAGTGTCCACACCTGCAGCAGCATATAATACTG +TACTTGTACTTAATACTAAACCGTTTTTAGTATCTACATAAGCTGCACCGCTACCTCTTA +AATAGTGATTATATTTTCCACCTTGGTTAAGTGCAATATAAGTATTACCATCTCCAGCAG +AATTACCATCTAAGTGAGTATCAAATCTTGCCAATGAAGCGCCATCCGGCGCAATAAGAT +TATTATTACCATAATCAGCATTACTATAGCGAAACACCAAAGAACGAACTGATTCTGTAT +ATCCATCTTGAAATCTTAATGTCGCGGCACCATTTGATACCATAGCATATTTGCCATCAG +CTAACCATTTAATGCCAGTGTCAGAATCACCTATTGCAATGGAGTTATCACCTAACACAG +TAGTTCCATTCCAGACAGTGCTAATAACTAGTTTTTTAGCTACGGGATAACTTAGGAACA +CATCTCCATCTAATACAGACTTTACAGCTCCTGTGACCGGGTTAATACTAAATGTTGCTA +TATCTTTGAAATTAGCTTCTGTATCGCCATAAAAACCGCGAAGAATCAAATCAGTGATAT +TGGCGGTTCCAGCTAGAATTTGCTCAAAACGCGTAACTGAGCCTCCGTAGTTAGTGATAA +CCAAAGGCTTTCCTGCGGCGCTGCCGTTAGGACGTATGATAATTGCTTTAGTGGAAGTAT +CGCCAGAGAACGCAACATTTATTGCGGTCATGTCGCCATTTAAATCATAGCGCCCGGTTT +GAATGTAATTGCCAGTTTGAGTTACGTTACCATCGATATTACCGCCTTTAGCAAATCCTA +AATCGATAATAGCACCATTCTGATCTTTAGTGAACAGCATACGATCGGTTAAGTTAATAG +CCAGTTCACCTTCGGCAAGCTGGACGGCTGTAGGTTTCACTCCAGCCTTTGTAGTTCTTT +TGAACTGAATTTGTTTGATAGTTGCCATATATCCTCAGCTATAATATCCGAAGTCTTGAA +TTGAATCTTTTATAACGATTTGATCGAAGCGTGGAACGTGGCCATTTTCTGTTGCAGGCA +AATTACTAAAAAAATTAGGAGCAGCTAAAGGACCACTCATTGTTTGAAGAGTAGAATTTT +GTAATTGTACTTGCTTAGCATTATCTACCAGTGGTAAACCTACTTGTGTTTTAGTTGGTG +GATTACCAGGAGAAAATGTACGTCCGCTTGAATCATACACCTCATTACCAAGTACATTAC +CCATAGTAATAAGACGATATACTCCATTAGTTTTATCTGGGTCAAAGATAACGAATGGTT +GACCGTCTGGTGTTTCCAGAGCGAATGGACCAGTAACTTGAAGTGAAGCTTTATATGTTA +TATCAGGGCTAGTGCTCTTGGGAACCCCAAGTGTCACTGGATTACCCTGCTTGTCATTAA +AGGTTAGGCCTTTACTGAATCTAACGGATTCTGCATAAGTTCCGCCTTGTGCTTTAGAAA +CGAAATCGTTGTCTATAGCCTGAGGTTTATCTTTTTCGGTATAAACCTTGTATGTTTTGA +AGAGCAGCGTGTCACCGGTTGGAAATAGAGGGAAATTGCCCTTATGCCAAATAGGTGAAC +CACCGACTGTACTGTTTGATTTTAAATCGGCCATAAGTCCTCTCTATTGTTATAAGACTA +TTTATACGACAAAGGGCCTTTCGGCCCTATATATCTTTTTCAAACTCCCGCCAATCTGCG +CTATAAACATGCCCAGAAAGATCTTCTCCTGGTAATGGATTACCATTTGAATCAACTTCA +GGTACAGCCATTTTAAGCAGAGTTGGATCATCTCCAGATACTGCTGAGTTCATTCTTATA +CCATTAACAGAAATTGATGTTGAAGTAGAAGATTCTAATGATCTTGATACTTTAGTCAAG +ATCATCGAACGAACTCCTCCTTCTCCTTCAATATTGGTTCCATTAGTTTTAGATACAACT +ATTGTGTATCGTTCTGCGCCTACTGGAACTTCTATGTTCTTTTCGCTTTTTATCCAACTA +CCTGCGTTTGCTGGATTAGAATCTATATTTTCGCTTTTAATTAAACTTCCAGTATTTGAA +AGCCATCTTAAGCTAACTCGTATATTTCTACCGCCAGGAACCATTTCTGTGCTCTGTAAG +AATTCAAATGACCAAAGCATTTTATCAGAAACTTTTATTCCAACTGAAGCTATAGGAACT +CCCAAAACGTCTGATATTGGATAACGCTTTATCGTATCATTTGAAGAAACATACTCATTA +AAATCTTCTAATGCTCTATAAGAAAATCCGGTTGCGCCTATATCATTTGCCTTATCATAC +ACATACTCAATAGCCGGACGCAGGTCTTCTTGTCTAGATACGCCATCTGAGTAAGTAACG +TTTTCAGCAATCATTCTATTAGCTTCAATAGAATAAAGACCAGCATAAATGGCAAAATAA +TTATCTGTGTGCCATTTACTTGGCCATAAAGTACTCCAAAAGGATTTAAATTTAGCTTCA +ATAGATGGAGATTGATTAAAATCACCAAATGTAGTAAAGACTAAAAGATTTTTGCTTGTT +GAAGGAAATGTAGCAAGATATTCTAAAAATGCTTTATTTGGAGCTCCAGAATCTTCTTGT +AAAAAGTTGAAAGTTTTCTTTGGTGATATAGTATTCATCGCTGGATTGAATTCGCGGACA +TTTATTCCAACACCATCTTGTTGGTCACCAATGCGTTCATCTTGAAAGGTGAAATATCCT +AAAGAAGGGGTCGATAATTCCGACCCCGGAGCGAATGAAAACTTATATTTGATGGAATTA +TTTTCTGAAATAACTTGAGTTTTGGTATATCCTTCACCAAAACTCGCCATCATTTTTTCC +ATTAAGGAATCCAAGTGAAATCTACAGATTGAGTTACCGGGTTCGGAGTTATGCGAACAT +TTCCGATCTGCAACCAATCACGAATGGTTAAGTTATCAAATGCAGAACCACCTGCCGCAA +CAGCACCAATTTCCTTAGAAGTAGGTGGGTTATTGCTGGTGTACATTCTGCCCCATTTAT +CCCATTTTCCAGTCAGGGCGTTGAAGTTACGTATCCAGAAAGTTTGAGCAAAATAAGTAG +CATCGACTGTTGCTGGGCGAGGTGCCCAAATTTGCCAAATGCTATTTTTGTCAATACCAT +GCTGAGTTAACGTACCTGGACCTTTAACTTCGGTGTAATCAATTGCAACTGGATTACCTT +CTTCATTTGTTCCATATACCGGAACAACAAAGCCCGGAAGCTTATTGTACACAGCAGAAG +TTTTAATTGAAGAAGACCATGCGTTCGGATTAGAATCAGTTGGCGGCTGATCTGGAGTTA +CAACACCTTCAGGAATCTGTACGCGAAGCGTAGCATCAATCAAAAGGTTTCCGGTCATTG +AATCGCCAATTTTCTTGACGAAGTTCAATCCAATTTGATTTACCATGTTCTTGGTAGTGA +TTACCGTCGAAGTACCGGTTGAATCTGTCACTGTCAAATTACTAGCATCAGTTGTCTGAA +TATTAGTAGCTTTTGACTGTGAACCAAATTGAATATTTGAAGGGGTAGGATTAATAACCA +TTGTACCACCTTCTACACTAAAGCCTTTCATAGCATCGATTCGATTGCCAGCTTTAAGCG +TTTGTCCAACTTCAACATCACCGTCACGCTTCAGACGAAGCACATCAGCATCTGCAGTGA +AATCCAAGTAAGTTGATTGAGACTGAGCATCGATCAACCAATGGTTCCCTTTGGCATTCA +AATTAATCACAGAGTGACTTGAGTCGTCACCAACGGTCAGGTCTATAGTAGCCATTATGT +TGGTGAACTTCGATGTACTAGAGGACACCAGAGGTGCACTAGTGTTTAACTGCTTGGTCA +AAGTTAATGAACCATTGACAGTCTGATCGATATCACGGCGAATAAACTGCAGTGAATCTA +ATCCATCGAGCTTCTGTGAATCTACAGCAATTGCGTTGATCGGCAAAAAATTCTGAAGTG +TTTTATTCATTTCAAATGGAGATACTGCATATCCAGTTTTGAAATAATTTCCAGCATTCA +GTTTAACATCATCATTCTCATAAAGACCAGTAACAGCATTGAACTTCACGCCAGAACCAG +TTACTTTATCACCAACGAAGGTTAATGCTAATTCAGTCAATTTAACTGGACCACGACGAG +TAGGCGTAGCTTCCCAATCAGCTTGTTCTTGAATAGCATACTTCAGATGACTAGGAGGAA +CTGCTTTATTACCCACAGTACCAGTCGTCGTTTCAACACGAGTCGCTATCTGAATAATAC +CTTCTGTCGTTTCTGAAGTCTTCTTATCTTGAAGCTTTTTAGGAGTAATGATAGTTTGGT +CATCTACACCAGCATCTACCAGAATCTTAGTAGCAACCGCCAAAGTACCACGCTGCGTCT +CAGATGCTTTCTTAATATCAAGTTTATAATGGTCCCAGGAGTTACCTGATTCAACCAATC +CAGACAACGGTTCAACAGAGTGTCTAGCAGGATCAGAGAAATAAGTTTTAATTTTAAACG +GAGTCGAGATGACATCGTCCAATGCGCCAGCATTGAATTCTGGTTGAGTAGCGATACGAG +ACAATCCAGTTAAATCTTGGGTTGCTTTACGTCCAGATAATTTCTTAGGCGTAATGTATC +GGAAATCGTCAGTACCAGCATCAGTTTCTGGCTGAGTAGCTACTTCAGTAAAACCAATTC +TTCCTTCCGTTGCGGTCTTCTTATGTAACTCAACTGGGGTTACAACAGTTGGAACTTTAG +GATCATGCACAGTTCCATTAATTACTTCGTTCTCAGTTGCTAAATAAGTACCACCTTGTG +AGGTATAAGTTGCTTTATTTTCAAACAGAGAAGCTGGAGTTACAGCTTTTGTATTTTCAG +TATTATCGTATACGTTAGTTCCTTTAGTATCACGATCTACACCAGCGACAGTGGTTATTC +CGGTTTTAACTAGAGCTAAAATACCAGTCAATATTTCTGAAGCTTTACGATTATGCAACT +TTTTAGGAGTCACAATCGTAGTATCATCAGTTGAACCATCAGTTTCTGACTGAGTAGCAA +TTTCAGCTACACCACGACGAGTTTCAGTAGCTGTTTTCTCATTCAGCATTGCTGGAGTAA +CAATTACATCATCAAGATGAGGACCAGTGGTTGGAGCTTGAATTTCAGTTAAAGTAACTA +AACGAGCAATACCACGGCGAAGTTTAGTAGCAACTCTTTTAGCTAATGTTTCTGGAGTAA +TTGCTAATTCTTTTTCTGGGTTATTTTCAAGATCTACTTGAGCTTGAGCTTCAGTAGCCA +AAGCAATAACACCTAAACGAGCACGAGTAGCATCGGTCTTTGAATCGACACGTTCTACTG +TCGGCGTGTTGTCAGAAACAACCCAGTACTTCAATCCAGCATCTTCAATATAAGATAAGT +CTAATACTGGAACGTAAGAAGTATCACCATTGAAGCTCAGAGTTTTATTTTGAACCCATG +TAGCATCTGGTGGATATTCTGAACGTTTAGGGAATTGAAGCAATGAAACTGAAGCTGCTA +TAGTATCTGTCCCAGTACAAGTGATATCAACTGTTTGACCTTTGCGCATATAGTTCAAAG +AAATCTTAACAGTATCACCAATAGCTGGTGCCGGAGGCAAAGTTATTACAACTTTTTTAA +TAGTACTATTATTTGTTCCAAATACCATCACATGATCATTCGGACGTACTTCAGTATCTT +CTTTTATGATGCGAAGACGAGTGCGGAGATCACCATCCCAAATTCTCCACAGCTTTTCAA +CCGCGTCAAACACGATAAATCCATCACCAGAAGAACGCACTTCTAAAGAAGTTGTTCCTA +CTGATCCAATAGAAGTATTAGCATCGTAAGTGCTTACAAACATATGGAACAGAGGGTTCA +TTCCATCCATGTCTGTGAAATTAATAATATCTCCATCATTCGCAAACTTAGGAAGAGTAA +CACGAATAGGAGCCCCAGTCGTATAACGACGAACGATGAATTCATTAGCTTGTGCTTCAT +GAATACTCGCCGGGGTAATGGTTGTTGCCGTGCGCTCATTGTCGCTGACATAAAGTTGCC +ACAAACGATTACTGAAAACTAAAACCATTTGACTATACGGATGAGTCATTCGTACACTGC +GAACTTGAGATCCAAGCCAAACGATAGATTGAATAGAAGCATCAATTGTTACATCAGCAT +AACCTGGCTGACCACCTATATCTTTTAAGAAAATAGTATCGCCATCTTGTGGATTATTAG +GAAGAACAAATTTAACAGTTCCACGATTTTCTGTATCAACAGAAATAAAGTCCCCTGATT +TAAGGACTACTGTTCCAGCAGATTCGGTTTTCCATTTTGCGTCAGTACGCAAAGCTGTCC +AATAGAGTTCGTTGAAAGCGCCAGAAGGTTTAATGATTTCGCGATTTGCAACCCAAATGC +GGTTATCATAAATCACTGCGAAATTCTTTGGATACCATCTGGTTTCGTCATATTGTTGTA +ATGTATTTTCTTGGACGAGAAATTCAACGTTAACACCATCGCTCGGAATAGTACGATCAG +CGGTTTGTACGTTAATTACTTTCTCACCCGCCGCATCCAGACCTTCTTGAGCTCTGAATT +TTCTTTTTAAATCGGCCATGATGACTCCTGTTTGCTATTATAGAATGTATTTATAATTGA +TCTGTCACTAATTGAACGAGGTTCAAATGAATTTAAACGAAATGTTCGGGTCTGAAGAAG +ACCAAAAAGAAGGTATTGCTTTTGTCGACTTGTCACAACTTGCACTTGCTGTAGCGTTGA +ACACATTTGCGGATGGTGAAAAGATTCCAGTTCCAATGGTTCGGCATTTATTCTTAACTA +CGTTAAAGAAAAACGTATTACAATTCCGTAAACAAGGTTACACTAAAGTTGTTATTTGTG +TAGATAACGCAAAATCTGGCTACTGGCGTCGTGATTTAGCTTATTACTACAAAAAGAACC +GTGCTAAAGGTCGTGAAGAATCTAAATGGGATTGGGAAGGTTACTTCACCGGTATTCGTA +CTGCAGTTGAAGAATTTGAAAAGTACATGCCTTATGTCGTCATGAATATTGACAAATATG +AGGCGGATGATCATATCGGTGTTCTTGTTCCTTATCTTTCTTTGAAAGGGCACAAAATTA +TGATCGTTTCTTCGGATGGTGACTTTAAACAGCTGCACAAATATCCGAATGTTAAGCAAT +GGTCTCCAATGCATAAGAAACTTGTTAAGATTAAACCTGGTGAAGCTGATCTTGAGTGCT +TAACTAAAGTTCTTAAAGGCGACCGTAAAGATAACGTTGCTTCTGTTAAAGTTCGTTCCG +ATTTCTGGTTCACTAAACTAGATGGTGAAAGAACTCCTCCATTTGCAACTAAACTTTTGG +AACAATGTCTAGATGCTGGTCCAGAAGGAATGAAAGAGCTTTTAACAGAAACAGAATACA +ATCGATACCTCGAAAACAGAGTACTTATCGATTTTGAATATATCCCAGAGGATATTGCTA +AAAAGATCATAGATTATTATGAATCATACAAAATTCCTCCTCGCGGTAAGATCTACACGT +ACTTCGTGAAATCAGGTCTTTCTAAATTAACTTCAAAAATTAATGAGTTTTAAAATGGCT +AAAGAAAAGAAAGTTGCAGTTGAATTTGATGAAGCAATTCATGGTGAAGATCTGCGTAAG +AAAATCAAAGAAGCTTCAGATAATATGCTGAAGATCTCTGGTTATAAAGTACTGATCGCT +GATCTTCGTAATTCAGCCAAAGATGATCTTGGTGTTGAAGGCAAAGTATTTAATCAGTTG +TTAGCAATGTATCATAAAGATACTCGCGATCAATTTGAAGAAGAAAAAGATAAGGTGGTA +GAACTGTATGACTCTGTTTTCACTAAATGACGAATCTGTTCAAGAAAAAGCCAGTGTTGA +TGAATTGCTTGACAAACAACAAAATGGCTTTACTATTGAAGCGTTAGTCAATGAACAGGG +TCTTGGTTATCTAGAAGCCACTACAGCTTGGATGGAGGAAAACTCCATCCCTGAGACTCA +GTTCTCAAAGTATATTCCTTCTGGAATTATTGAAAAAATTCGTTCTGAAGCCATTGATGA +ACACATGCTTCGTCCTAGTGTTTCACGTGGTGAAAAGACTAATACATTAGACTTTCTGCT +ATGATTAAAATCCGCATGCCTCCAAATAATAATCGTTACATCAACGGTAAATCAGTTTAT +CTACTTTACTTGATGTTGAAACAACATTTTGCTGGCAAATATGACGTTATAAAATACAAC +TGGTGCATGCGGGTTTCTGATAAGGCGTATCAAAAACGCCGTGATCGTTATTTCTTCGAG +AAACTCGCAGAGAAACACACCCTTAAAGAACTCTCACTCATTTTCATGAGTAACCTGGTG +GCTAACCAAGATGCATGGATCGGAGATATTTCTGACGCAGATGCTCTGGTATTCTATCGA +GAATACATCGGTAAATTGAAAATGATTAAGAGTCAATTTGAAGATGATGTGAAAAACATC +TATTACTTCTCCAAAAAAGTTGAAGTCAAAACGCTTAATGAGATTTTTGAATATAATAAT +AAGGTAAATACATCTTATATCTTCAAACTTCTTCAAAGCAACATTATCTCATTCGAGACA +TTCATAATTCTGGATTCATTTCTGGACATTATAAATAAACATGACCAAGCAACTGACAAT +CTAGTCTGGTCAAACTATTCAACGAAACTAACTGCATATCGCAAGATATTGCAAGTGGAT +TCGTATGAAGCTAAACAACTGTTCATAAAAACTGTAAAAAACTGTAAATACTAAGGTAAA +AATATGTCTATGTTCAAACGTCGTAACCCTGCTGCTCTGCGTACTCAACTGGATTCTCTG +TCTGGTGGTAATAAATCTTTCGCTGACGCTGATAAAGGTGAATGGAAACTGAAACTCGAT +AACGCAGGCAATGGTCAAGCGGTAATTCGTTTTCTGCCTTCTAAGAACGAAGAAACTGCT +CCGTTTGCAATTCTGATTAACCACGGCTTTAAGAAAAACAATCAGTGGTATATCGAAAAC +TGTACTTCTACTCACGGTGATTACGATTCTTGCCCGGTATGTCAATATCTGTCCAAGAAT +GATTCTTATAACACCAACAACGAAGAATACAAACTTCTGAAACGTAAAACTTCTTACTGG +GCGAATATCTTGGTTGTTAAAGATCCTGCTGCTCCAGAAAATGAAGGCAAAGTATTTAAG +TATCGTTTCGGTAAGAAAATTTGGGACAAAATCAACGCAATGATCGCTGTTGATGAAGAA +ATGGGTGAAACTCCAGTTGATGTAACCTGCCCATTTGAAGGTGCAAACTTCGTCCTGAAA +GTTAAGAAGGTATCTGGTTTCAGCAACTACGACGAATCTAAATTCCTCGGCCAGTCTGAA +ATTCCAAATATTGAAGATGAAGCTTATCAGAAAGTTCTGTTTGATAGCTCTGTTGACCTG +TCAGAAATGACAGCAAAAGACAAATTCAAATCTTTTGATGACAACCTGAAGAAATTCGAG +AAAGTAATGGGTACTGCTGCTATGGGTGGTAATGCGGCTCGTGCCGAACGCCAAGCAGAT +AAAGTAGCTGATGATCTTGAAAACTTCGACGAAGATCTGGCGAACTTCAGTGCCGGTTCT +ACAACTCCAGTAGATATCCCAGAAACTTCATCTAGTTCCGATGATGACCTGGACGATATT +CTGAACGGTTTATAATAAGAAGGAGCCTTCGGGCTCCTTTTGTTGTTTCTGGACTCTGCC +GTTTACATTCGTTGAAAGTAGTGTTATGATAGTCTCGTAATCTACTAGGAGTAATAAAAT +GAAAGCATCAGTAATTCTTTTCGTAACTTGGATCGGTGATTTTGAAGTAGACATTGAAAA +GTATAACTTGTTCATGGATGACGTATCACACGAGTGTGGCGGTTACGAAGTTACTTTGAT +GGGTGAACATGAAAAACTCGTTGATTTTCTGACTGATTGCTATATTCCAGGTATGGAAGA +ACAAGACGTTGAAGAGTTAATGAATTCAATAACTGTTTACAACGAAGAAGAACTGTGATA +CTATAATCTAGTCAACAACTGAGGGAAATATTATGGCACGTTTAGAACTTGATATTGTAG +CTGAAGTTCATCGCAATGAATACGGTTATGCAACTGACTTGATCTTCGATGATGGCTCTC +GTTTTTACGATGTTGATCACGGTCTCGACTTTGATCTTATCGAAGAACACGGCCCAGGTG +GCGGTTGGCCAGTGATTTATCTTCGCGGTTCAGAAGCAAATATTCGTAAGTGGCTAGAAG +ATAACCAGTGGGAAGACATCGATTGGATGCTTGAAGAATTTCTTGAAAAAGGTGAATAAA +TGAGTTTAGTTAAAGTACGTTTATTGAATGACGGCGGGTTCAACGGTTTTGTTGATACCA +AATTTCCCGTAGTAGTTATGGGTCAATTGGAAGAAGACTATGGCGCTGTTATTATTAAGC +CAGACGAGTTAAGACGTGTTGGATATGATGTTGATAACTATATGAATAACACAGTGAATG +GCACAAGAACATTTTTTCTTGATGCAGAAGCTGAATTAATTTAACTTCAGCCGTTTACAA +CGTTGATAGGGTATGATACTATTACACTATCAACTAAATGGTAAACAAAACCTTGGAGAA +CAAAATGAAAACTTTAGAAATCGTAGTTAAAAATGTTGAATTAGCTCGTGAAATCGCTGC +ACAGGTTAAAGCGGAAATCGTTTCAGAAAAATTGATTTCAAAATGCACTCTGATCGTTCT +TAAGGGTTCTTTCGATCAGCTGATGGATTTCAATGACGAAATGTTCTTCGAAACTAACCC +AGGTGCTCATAAAGAATACCTGAAAGAAATCATGGCTTAATGAGGAAAATATAATGGAAA +CTGGTAAACTCTACACTTTTAAACCGTCTATGACTCATCTTTTTGTCGCTGAAGCATGTT +CAAACAAATCTATGGCTGAAGCTATTATGTTTAACGGCGGCTATTTTGAAGTTGAAGCTA +TGATTGTTTTGAATAATGAAAGATATGTCACTGCCGTGAAGTTCCCTAAAACTGGCAAGT +GTTTAAACGATGATGGTAGCGGAGACGAGTATTTTGAAATCTATGAAAATGAGTTCAAGT +ACTTCACTGAGTACAATGAAGTCGAACTTAATGACGGTGTTCGTTCGATGACACTTGATG +TCAACAAAGCTAACGCTGTTGAAATGATTCAACTCATTCAACAAATTTTCTTAAAATAAT +GTTTACATCGGTGTAAGATTGTGTTACTATGATCTTACACCAACAAGGAGAATAAAATGA +AACTTCAACGTCAAAGCATTAAATTAGGTTCTGGTTATCGTGGTAAGTGGAACTTCTGCA +TCCTGGACAACAATCCAGAAGAAATCGAACGTGTAGAAGAAATCCTTTGTGGAATGGACA +CTGGCTTCTCTGTTGGCGGTGAAGCTAAAACCTGGGGTGATTATTGCGACCAATGCCCAT +GCTATGAAGACGGTTATAGTTCTGGCTTTTGGATTGACGTTGAAGATGTCCCGGCTTTCA +AAGCTGCATTCAAACTTGCTAAGGCGAAGAAATAATGGCTGATATTTGGTGTTCTGCTGC +TCCTGTAGTTAATATTCGTTGTCAGTTTGATCATATTCCCGGTGTAACGCATATTTCTAT +GCAATACGAAGATGGACGCGGACAAAAAGTGTTTTGTAAAATTAATTTTTCAGGTGGCTT +CGGTCCAGAAGTAGCTTTAAGCGAAAATGACCTTAATGCGGTATTAACTAATGATACCAA +GTTCGGAACTTTGGGACTATTTAATGAAAATGTTTCAGTTGAGCTATGTGAAGCTATCAA +TAAAGGATTTGTAATGCTTCGTAAAATGGTAATGGCGGCTAAGAAGGTAACATCATAATG +CAAATCACTATGGATAAAGATGAATTTGATAAAGCTATCAAAGAAGCTACTATAAACGGA +AAGGCACTTGCACTCGACGAATTAAATGAAGTTATGACAGCGTCATATGAAGATATGACG +AAGGGGTTTTTCAGTCGAGTTGGACGAGAAGCTAACATCGAAATGACTCGGCGCTTTCTG +AACATCATCAGAGAAAGGATTAGAGCTCTATGAAGATTCTTTCTGATTGGGAATGCAAGT +ATTGCGGAAGCGGACTTTTGTTTGCGGGTGGTATTTGTCCTAACTGTAAAATGAGGCAAG +GATAATGGAACTTAAACGTAAAACCATTGAGTTGATTGAAGTTTCGCCTGAATTCTGCTT +AGATACACTTTACGCTAATACTGCTTATGACACCCCGTTTGTTATTGAAGACAAATACGG +GATGCTTCATACCGTATCATTAGCATGGTTCCAAGGTCGTTATAATAATTATCGCTTTGT +TATTGGTGATGATGTGTTCAATGACTTATCTGATTTTGTTCATCCAAAAGATGTTGCTTT +TATTGAGGTGCGATAATGCTACAATTAGTTTATGCAGTATCTCCGACACGTTCGGTTGAA +GGTCAAAATGAATTAGCTTTTGGCCTTGATGATGGTCTACCATGGGGTCATATTAAGCAA +GACCTCCAGAACTTTAAAGCTCGCACCAAAGACACTATATTGATTATGGGTGCCAAAACA +TTCATGGGATTTGATGAGCCGTTGCCTGGACGCAAGTCAATTGTTGTCCAAGATATGTCT +CGTCCATTAGCTACTGCAAAGAATGGTTTCTTCGCCGATGCTTATGTAAGTGAACTTGAA +TTCACTGGGTTCTTGGGCGGCGATATCATGACAGCCAAAACTTCATATAGTCAATACCTG +ATGTTTGACCGTGATAAAGACTATTCTGTTATTGGTGGAATTGAGCTTATTAAGAAAGCC +ACTCCACACGCTGACCGAATTATTCAGACAACTATTCGTAAAAAGCATCGGGTTAATTCA +ACAGTGCAATTTCCTTACGCTACTTTCTGGTATCCACAAGAAGAAGCCACTGGATTTAAG +TTGACTGAAACTCACTGGTGGGCTATCGACGAATTAACTAATATCTCTGAATCGGTGTAT +GTAAAATGAGCCAAGTAAGAATCGCTTTAATCAAAGAAGATAAAGTTCAAAAGAAATGGA +CCGGTGCCCAGAAGAACATGGTTCATGGAATTTATGAGCTGACTTTTCCTACAAGTTATC +TATGGGTCTGGCAGGGTGAATCTAATTTGACTGTAGTTCCTGGATTTGGGCAAGTTGAAT +TAGGCCGTGATATGAAAGATGTTCTTAATGCTATCGAGACTGGAAACATCCAAGTTAAAA +ATGGCATTACAACTATCATTGGACGCTTTGCTAAGAAAGGCGGAATTTTATTCTTCAATC +CGGAGACAACTCGTGGCGAAATTAGTTTTAGTTAAAGCAGATGACACTCAAAAGAAATGG +GCTTTAGTCCTTCAAGGAAATCCACCTACTATTGAGCAGTTCAAACGCGACCCATCCAAG +TATACTGCTTTACCACATGGATTTTATAATGTACAAGCTCCACTAGAAGATATTTGGGAA +GCTGATAACGGAGTTCGAGTTAAGAGCAATGAAGGCGTATTCAAGATTCTTTACTCTAAT +CGCAAATTCATTGACTTTATCGACCAAGGGCTAGTCAAAGTCTCAGGTGGAGTTCTTAGT +ACTACTGGGCGTTTCGATAAGCGTGGTAGTGAAATTCTCTTTATTGTAGGTAAAGAATGA +AACAATACCAAGAACTAATTCAACATATTTTTGACAACGGATATGAGACTGATGATCGCA +CTGGGACAGGTACTATTGCAGTCTTTGGTACTCAATTGCGTTTTGATTTGCAAGAAGGAT +TCCCGGCAGTTACTACGAAGAAACTAGCATGGAACGCTTGTCGTTCTGAATTACTTTGGT +TCCTTCGTGGGTCAACTAATGTCAATGAACTTCGTCAAATTCAACATGGCTCTCTTATTG +AAGGGAATACCGTATGGGACGATAACTACAATAATCAAGCAATTGATATGGGTTATTCCG +GCGGAGAACTTGGTCCAGTTTATGGTAAGCAATGGCGTGATTTTATGGGTGTTGACCAAT +TGAAAATGGTCATTGATCGTATTAAGCAAATGCCTAACGACCGCCGTCAAATTGTTACAG +CCTGGAACCCAGTTGATATTCCAAAGATGGCTCTTCCTCCATGTCACATGATGTATCAAT +TCAACGTTCGTAATGGATTCTTGGACCTTCAATGGTATCAACGATCTGTCGATGTTTTCT +TAGGACTTCCATTTAATATCGCATCTTATGCCGCTCTGATTCATATTATTGCTAAATGCA +CCGGGTTAAAACCAGGACATCTGGTGTTCACTGGTGGTAACACTCATATCTATGTGGATC +ATATCAGTCAATGCAAAGAAGTTCTCCAGCGTGACCCTCTGGAACTCTGTGATATACAGA +TTAACGGTCTGCCGTATAAATTTAGACATCTTTCTACAGAAGAACAAATCAATCGAATCA +CTAATCTTCGAGCTAAAGATTTAATGCTCATTGATTACAAATCTCACCCAGCTATTAAAG +GTAAAATGGCTATATGAAAACTGTATTTGTGAACGAAAACCGTACTAAAGAATTTGGCGC +AACATTAGAACAAATTAATCCAATTCACCTTGTGGTTGGCTCAAAAGTAATGGTCGATGG +TTGGTTCTACATCGTAGATGATAGTTTTGTTTCTGTTGAACATAATAAAACTCCAGAAAT +GGTAGTGGTGGTCCACAAGGCATGAAACTTTGTCGAGTCGTGAATAAATATAAATCCGAT +TTCGACGTAAATATCCAACGTGGTACCATGTGGGGCAATGACGTTGGTAAAAATGCTGGC +AGCCGTGAGGCTGCCATTGAAGCCTTTAAAGAGGACTTTATCCGTCGCATTCGGTCAGGA +GAAATAAAACGTGAGCACCTAGAAACTCTCAGAGGAATGAGACTAGGTTGTACATGTCAC +CCGCTTAATTGCCATGGTGATATAATAGCTCATATAGTTAACAGACTTTTTAAAGACGAC +TTCAGAGTAGAGGATTTATGCAATTAATTAAGTCATCAGGAATTGGACAAGATTTTATCC +CAGAGAAATTCATCAAGGTTTTGTCTTGGGCAGCTAAAGATACAAACGTAGACCCATATG +AATTGTATGAGCAAGTTAAGCCTCATATAGTTGACCGTATGACTACAAAAGAATTACAAC +GAGCAGCTATTAAAGTTGCGGCCAACCTCATCACTGTAGACGAACCGGACTATCAATATG +TTGCTTCTAATCTGGCTATGTTTGCGCTACGCAAAGAAGTGTACGGACAATTCGAGCCAC +CATCATTTATCGACCATATTTCTTATTGTGTTAATGAACGCAAATATGATCCGGAATTAT +TGTCCAAATACAGCGCAGAAGAAATTACTTATCTGGAATCGCGAATTGTACATGACCGAG +ATTTCGAATTAACTTATGCTGGTGCGATGCAGCTAAAAGAAAAATATCTGGTCAAAGATC +GTTCAACCGGAAAGATCTACGAGACTCCTCAATTTGCTTTTATGCTGATCGGTATGGCAC +TGCACCAAGAAGAAAAAGAAAATCGTTTAGCTCATGTCATTCGTTTTTATGATGCTGTGT +CTACTCGACAAGTTTCTTTGCCTACTCCAATTATGGCTGGTGCTCGTACACCTACTCGTC +AATTCAGCTCTTGTGTTGTTATTGAGGCAGGTGACTCACTTAAGTCAATTAACAAAGCCA +GTGCAAGCATTATTGAGTATATCAGTAAGCGTGCAGGTATTGGCATCAATGCGGGGATGT +TACGTGCAGAAGGTTCAAAAATCGGAAATGGTGAAGTCAAGCATACTGGAGTTATTCCTT +TCTGGAAACACTTCCAAACTGCAGTTAAATCCTGTTCCCAAGGCGGAGTTCGTGGTGGTG +CCGCGACATTGTACTATCCAATTTGGCATCTTGAAGTCGAGAATCTACTCGTACTTAAAA +ACAACAAAGGCGTAGATGAAAACCGTATTCGTCACCTGGATTATGGTCTTCAAATCAATG +ACCTGATGATGGAACGTCTTGGTAAAGATGAGTACATCACTCTGTTCAGTCCAGATGTTT +CTGGCGGTGAACTGTATGATGCTTATTTCCGTGACGAAGATTTGTTCCGCGAGTTGTATG +AAGAATTAGAAAAAGATCCTCTGATTCGTAAGAAGAGAATTAAAGCCACTGAACTGTTTG +AATTATTCATGACAGAACGCTCAGGTACTGCTCGAGTTTATCCAGCATTCACTGACAACA +TGAATAACTACACTCCATTTATTCGTGAAACTTCTCCTATTAAGCAAAGTAACCTCTGTT +TAGAGATTGCTCTTCCTACCACGGATGTTGGTTCTAAAGATGCTGAAATTGCTCTGTGTA +CTTTATCTGCTTTCGTCTTAGGAAACTTTGATTGGCAAGACCAAGACAAGATCAACGAAT +TAGCTGAAGTTCAGGTTCGTGCACTTGACAACCTCTTGGACTATCAGAGCTATCCAGTAG +CTGAAGCATTGAAAGCTAAAGAACGGCGAGCATTGGGTGTTGGTGTAACGAACTATGCTG +CTTGGTTAGCTGATAACTTTGCTACATACGAAGACGCTAACGATTTGACTCATGAATTGT +TTGAGAGATTACAATATGCACTTATCAGAGCCTCAATTAAACTCGCAAAAGAAAAAGGAC +ATTGCGGTTATTATTCAGAAACTCGTTGGTCTCGAGGCGAGTTACCTATTGACTGGTACA +ATAAAAAAATTGACCAACTCGCAGCGCCAAACTATGTCTGCGATTGGGAAGCATTACGTG +CAGACCTTAGGACTTACGGAATTCGTAACTCAACTCTGTCCGCGCTCATGCCGTGTGAAT +CATCTAGCCAAGTTTCGAACTCCACGAATGGTATTGAGCCACCACGCGGGCCGGTAAGTA +TTAAAGAATCTAAAGAAGGTTCTTTCCGCCAGGTAGTTCCGAATATTGAGCATAATGCTG +AACTTTACGATTATGCTTGGCTGATGGCTAAACGTGGTAATAAAGGTTACTTGACTCAAG +TTGCTATTATGACAAAGTGGCTTTGCCAATCAGCTTCAGCGAATACTTATTACGATCCGC +AGAACTTTGCTAAAGGTAAAGTTTCGATGGCTCAGATGCTTGAAGACTTGATTTACTTCT +GGTTCTTCGGTGGTAAGACTTTGTATTACCATAACACCCGCGATGGTTCAGGAACAGACG +ATTACGAAATCGAAACTCCAAAAGCCGATGACTGTGCCGCTTGTAAACTATGATATAATT +GGACCACGGATGGTCCTAGGAGTATATTATGGGATGTAAAGAATTCACTTTACCGCCTCT +TCCAATTCAACCACGGCCAAAGCCGCCAAGGGTTCCTAAATGAGTACAGTTTTTAACACT +AAACCAGTTGACGTATTGAATGAGCCAATGTTTTTTGGTTCAGGTCTAGGTATTGCTCGA +TACGATATTCAACGCCATAAACAATTTGAAGACCTAATTGAAAAGCAGTTGAGTTTCTTC +TGGCGCCCAGAAGAAGTTAACTTAATGACTGACCGAGCTCAATACGAGAAGCTTCCAGTT +CATCAGCAAAATATTTTCATCAACAACTTGAAGTATCAAAGTCTGCTGGATTCAATTCAA +GGTCGTGCACCTGCTGCTGTCTTGTCTGCTCTGATTAGCGATCCTTCTTTGGATACTTGG +AACCAGACTTGGACGTTCTCTGAAACGATTCACTCTCGTTCCTACACTCACATCATGCGT +AACTTGTTCAATGATCCAGCAAAGATCTTTGATGAAATTGTTCTTGATGAAGCAATCATG +AAACGTGCCGAATCAATCGGTGTATATTATGATGACGTTCTGAAGAAAACTCGTGAATGG +GAAAATGCTAAAGAACGGTGCTTTAACCCTGACAACTATGAAATCGTTGATGCAAAACGA +GACTTAATGAAGAGTCTTTATCTCTGTCTTCATGTAATCAATGCTCTTGAAGCTATTAGA +TTTTATGTATCTTTCGCTTGTACTTTCAACTTCCATAAGAACATGGAAATCATGGAAGGT +AACGCAAAGATCATGAAGTTCATTGCTCGTGATGAACAACTTCATCTGAAAGGCACTCAG +TACATTATTCGTCAGCTTCAACAAGGTACCGACGGTGAAGAATGGGCTGAAATTGCTCGT +GAATGCGAACAAGAAGCAGTTAACATCTTCATGGAAGTTAACCGCCAAGAAAAAGAATGG +GCTGTTCACCTCTTTAAAGATGGCGGATTACCTGGGCTGAACGTTGAAATATTATGCAAC +TTTATTGATTACTTGACCATGTCTCGTATGAATAGCTGCGGATTACCATGCCCAATCAAA +GATGCTCCTACTCGTCATCCAATTCCTTGGATCAGGGAATATCTGAACTCCGATGCAGTT +CAATCAGCACCCCAAGAAGTTGAAATCTCTTCTTACTTGGTAGCTCAAATCGATAATGAC +GTCGATTCAGAAGTAATCAAATCTTGGAAAAAATACTTTTAAGGTGAGGGCTTCGGCCCT +CTTTTCTCATGAAAGATATTGCTAACGAATTTTCATTTATAAAATATGCTCAACTGGAGC +TTCTACCAGATGCGACTATCGCTTTAGTAGAAGTGCCCAACAAGAAGAATGTAGTATATG +CTATTTCTGTTGATGATATATTAGTCTACATCGGAAAGACGAAAGACCTTCGGAAACGTA +TCAACTACTACCGGACTGCTATCAACAGAAAGGACCAGACATCTGATTCAGTGAAGTCTG +CTAAAATCCTTGAAGCTTTGATGGAAGGCAAGAAAGTAGAGTTCTATGCTCGGCAGTGCT +TTAATTTGCTTATTAACAATGAACTTGGGCAGATGTCAATTTCCACAATGGACCTTGAAG +AGCCGATGTTTATCAAGAAATTTAATCCATGCTGGAATACTCAGCATAAGAGGAAAACAA +AATGAAACTTGAATTAGACCCAGATGTTCGCCCAGAGTTTTTGTCATACACAAACACATT +CAAATCCAAGTATGGCGAAGTCGAAGTGTGGGCTTGTAAAACTTCTGAATCATTTGGTAT +TAATCAGACCAATGATAAAGCAGACGAAGACATTATCGTCATGGATAAGTATGATTTGCT +TAATCTACAGAAACTCGTCAATCATGCAGTAGAAATTATGGAAGGTGAATAATGAAACAA +CTTTATGAAAATCTGATGGCTCTATGTGATGCGAAGGATGAATCCAAATTCTACTTCACT +GACGATGTGTCTCCATCTGGCAAAGAATATCGAATCTTTAGCTATAACTACGCATCTTAT +TCTGATTGGCTACTGCCTGATGCTTTAGAATGCCGTGGCATTATGTTTGAAATGATGAAT +GGTAAGCCAGTTCGAATTGCTGCTCGTCCAATGGAAAAATTCTTTAACTTGAATGAAACT +CCATTCACAATGAATCTTGACCTCTCCAAAGCGCAGTACATGCTTACAAAGGCTGATGGT +TCTTTAGTTTCTTCATTTATGGACGGTATGCTGCTTCGTTTCAAATCTAAAAGCTCCATT +AAATCTGAGCAAGCTCATGCAGCAACTGCGATTTTGACGATGCCTCAACATGAAGCCTTA +TTGGCTCGTTTGATTGATCTTTCTAATGAAGGTTTCACTGCTAACTTTGAATACGTAGCA +CCAGACAACCGAATCATTCTTCCTTATCAAGAACGCGAACTTATTCTTCTGAACATTCGT +GATAATGATACTGGCGAGTACGTAGATTACGATGATATCTATGCAGATGGTGTTCTTCGT +CGGTACTTGGTAGAAGCTCAACCAGTTCCAGAAGGTGATTTTGTTTCTGATATTCGCAAG +ATGGAAGGCATTGAAGGTTTTGTCTTTGTGATGGAAGATGGTTTACGTTTTAAACTCAAA +ACTGAATGGTACTGTGCGCTGCATCATACCAAAGATTCTATCACTAAGAATGATCGTCTT +TTTGAAGTTATCGTAGCGAATGCTGTTGATGATCTGAAAGGTATGTTCGTTGATGATCCT +TATTCTCTGAACAAGATCAACGTTTTCGAGCAGATCTATGTCAATTATCTTGGTGCTGCT +TTTGAGCTGTGTCATGAAACGTATGCTGCTCTTCGTGGTAAAGATCGCCGCGAGTATGCG +ATTGAAGCACAGACCATCGTGAACAAAGCTCAAATGCCAATGTTGTTCAGCGTTATCATG +GCGATGTACACTGGCGGTTGTGACAACGAGAAGCTTCTGGCAGGTGTCAACAAAATGTTC +CTGAAAAATCTCAAGCAGTTCATTCCAACAGAATACATCTAACCGTTTACATCTCCATTT +GGTTGTGTTACTATGATTCTACACCAACCAAATGGAGAAACAAAATGATCAATCAAATCA +CCGCAAACGAACTGGTAGAAATCTACGAAGGTACTCACCACGATGATATCCGCGTATTCA +AAGGACATCGTCCTATCGGATACATCACTGATCTTCGGGTTGCATACTCTCGTGATCAGA +AACGTCAGAAAGCTCGTAAAGAGTACACTAATCGTATCAACGAAGAACGAGCTGAGAAGA +TGCCAGAAGCTGTAAATGAAATGGTTGACTTTTTGAAAAACAACCTTCGTATGAATGCTG +ATGTAATGATTAACATCTCTCAGCCTAACGTTCATGTAAATGGTTGTAAGTGTTATGTAA +TTGTTGACCCGATTCGTGGTAAGCATCGTCTGGGTGTATCTAACCCGAACAGAACCGCAA +GTGAAATGGCTTTAGATGTTGATCCATCTTTCAAAATCCAAGAATCTCCTGCGGAACATC +ACATCTTAATTAATGGTCTTTCGCAAGATGACATCGTGGAGCATATCCGCAAATTATGTT +CAAAATAACAAATCTTCATGTAGCTGCAATAGCAATATTACTTTTTGGTTACGGAGTAAT +TAAGGTTCAATCTGCTAGAATTGATGGGTTGAAATCTGATCTCCAGACTATTCAGCAAGT +TGCAACTCAACAGGGAGAAGCAATTAAGCAGCTTAAAACTGATTATTCTATCATCATGAA +ATATGATGAACAACGTAAAGCTAATCGCGTTGAAGCTGATACTTCGAACGCTAAAATGAC +TAAAGACTCTAAACGAGAAAATGTGGTCAAAGCTAAGCCGAAGTTAGTTGAAAAGCAAAT +AAATGAGTCGTTCAATAAGTTCGCTCTGGATTTACAGGAGACTACTAGATGAAATGTTTA +GTGGCAGCTGTAATAACTATTGGACTACTATCTGGATGTTCCCAGAGTGTACCAGAGGTT +CCCAGAGATAACACTCTACACCCATCATGGCCTGATCCAATAAAGCCATACACTGGTAAG +TGGCAAGTCAAGATAATTGACGATCGCCCATGGGTTGGAATGCCTTTTGATGAATCTCAG +GAATTCAGAATTTGGATGAATGACGTCAATCGTTGGGCTAAAGATGCTAATGGAATGATA +TGTTACTATCGTAAAGACTTAAACGAAACAAGGTGCAAGCAATGGACCCGTTAGTAGTTC +TAATCTTATGTGTAACATTCTACAAAATTTGTAAACTCGCAATGCGATAAGGGCTTCGGC +CCTTTGGATAATAAAATTTTAATGAGGATATCATAATGACTCCACGTTCTAATGTATTCG +TTAGCAATCCGGAATTATTGACAAAAATTAAAAATGAAGATCTCCGTAACTGTATGAAGG +TCGGTTCCTGCATCCGTACTCCGCTGGAAAAGAAGACCACGTTCAATTATTCTTGGACTG +CCACTGAATACGGAGATCGTGTTATCAGTGTAACATGTTATCAACCTAATTCTAGTATTC +CAGGAAAAACGTTCATGGTTGATATTGCTCTATCAGACTGGTATCGTATTACAAGCTCTG +TATCCAATTTTGACGAATGGCAGGAAGAAGTAATTGAGAAAGACCGCATTCGCTCAGTTA +TTAAGACCTTTGAAGAAGCAGCTAAAATCCATGGTGCTCGTCAGGCTACTGTAGCAAATG +GATTTATCTCTGACGCAGAACCATTCTTGCGTGAATCTGGTAAGATTCTGAAAGATGCTC +GAGCAGCTCTTTATGAGGAATTCGATGTATGACACATTCAATTCTTCGTGCATTACTGAA +AGAATACTTTTCTCGTAGTACTGCATATTGTTACGCTTTACACGAAGATGATAAAGTAAG +AACTGGTTCGTCAGCTGATACAGTTAACTACATTGCGAATGATATGGAGCTTCAAGCCGA +TCGGGTTATTGCAGCTTTTAAATCATTTGAAAACGGCGAACTCATGTATAAAGCATTTAA +TTCAATAATGATTCGTCGAATATGGGCTCATGGTGCTTATATTAGTAGCGTAATGCGAAA +CCGTTCTTCACAATGGTCTAAAGATGCTAAGAAAAGACTTGATGACATTGACGAACAAAT +TGATATTATGATTGGGGTATTGAAATGACTCAAACTGAACATACAAGATTAATTCTGTTG +TTTAACAACTATGAGCAACTTAATAATCAGATGAATTTAGCTTATGCAACAAATTCAGAC +CGTGCTGCGTATTATTATACCGAAGCTGCAAGGGCTCGTGAAGCTTTTATCAAGAAACTA +GCGGAACATATCGAATGAAAAAGTTAATCTTAACTCAGGGATGCCCAGGCTCTGGTAAAT +CTACTTGGGCAAATGAATATGTTCAGAAAAATCCTGGATTCTTCATCCTGACCCGTGATG +ATTTCCGTGAAAAGCTTTTTGGGCTAGAAGCTCGTAATCAATATCGTTATAGTAAAGCAA +AAGAAAAAGCAGTATCAGCTGCTCAATTTGCTGCCGCTGAAGCCTTGCTTAAAATGGAAA +CGACTAAAGGCATTATCATTGCGGATACGAACTTGAATCCCTCGACTGTCAAAGCTTGGA +ACGATTTTGCTTACAATGCATTCAATGACGTGTTGGTGGAAATTGAACGTTTCGACGCTC +CGTGGACTGAACTTCTAAAACGCAACCAATATCGTGGTGATAAAGCAGTTCCGATTGATG +TTCTGCGTAGCATGTATAAACTATCAAGACCACATGATGTTTATGTCCCAGATGAGTCTT +TGCCAAAGGCAGTTATTTTTGACCTTGATGGAACATTAGCCGACAATGATCATCGTTCTC +CTTACGACCTTGAGAAATGTGGTGAAGATGCTCCAAAGAAAATGGTTATTGCTCTTTTAG +AAATGCTTCGTGCTCAAGGGTATAAAATCATTACTGTATCTGGTCGAGAATCTGGTACAA +AAGAAGATGCTACAAAATATCGTCGCATTACTACAGAATGGCTTGATAAGCATACTAATG +GCTCAGATGAGCATTACCAACGAGCACAGGGTGATTCTCGTAAAGACGATGTAGTGAAAG +AAGAAATCTTCTGGAATTGTATCGCTGATCGTTTTAACGTGCAATTAGCAGTAGATGACC +GAGCACAAGTCGTAGAAATGTGGCGTCGTATTGGGGTTGAATGTTGGCAAGTTAACCATG +GAGATTTTTGATGGCTGCTTATCATGAAGGTTGGGCTTTAGTCAGTCCTAAAACTAAATG +CATCGCTCATATGGATGACGGCCAATATGCTGTCTTCACTGAAGAAAAATATGCAATTTT +AAAAGCTGCAGAAGTTCTTAAGCAATATGGTAAAACCCTTACTATCCGCCGTGTCAAAAT +TCCTTTGCCTTGGAGCATGTGATGTATAATAAACACCATGAAATTGAAGAAGAGGCTTAT +GGGCTTCTTCGTAAACTCGTCGGTGCAAAGCTCGATCCTGCATTGATTAATGCTCTTGCT +GAAATTCGAACTGATATCAATACTCGATACAAAAACGAATATCATGTAGAGTTCAAGCCC +GTTGGTGAAGTAGTAACCAACTTCGTTGTTAACGTTAAAGTACATACGGTGCACTAATGA +TTACAGTTCAACCTCACGCAAATACTTTGCAGAACGTCGCAAAAATGGTAGTAGCTCAAG +TAGTGGATAACTTTATGTTCACTCCTAATACTGCCGCGAATCAAATTCTTATTCATTCTG +ATATTGTTGCTGTGATGAATATTCTATGGAAAGATACTGATTTCAGAGTAGTTCCTCATT +TCGATAGCTTCGGCTTTACATTTGACTTCACTATTGATCCTGGAACTCCAAATGCTTTCG +GTTTTTCCGTGAAATATTCTTGGGATAATTCAAATGATTTCCGACAAGAAATTTAAGCAA +GAAGAATTTGTGGGTCTGTGTAAAGATTTTGCACAAGCCTGTGCTAATAAGAAACCTGGG +TCTCAAGTAACGTTAAAACAAGACCCTTTAGTGAATGGAGTGATCGTCACAATTAGTTAT +AATGGTAAACAAAGCCATGTGTCTTTGACATTAACTCGTGATGGCTCTGTTAAAATGGAA +ACAATTCTGGGGTATGTATGATCACATTAGTAAGTGCAATTGAAATGATTCGTGAAGCTG +AGTATCAGCATGTTGGTGACAAGTCTTACTTCAGTGAAGAAGGTGTTTTGGATGTTAAAG +CACTACTAGAGTTTGATCGTTGTTTCCAATCCGTTCCAAGTGACACGTATGATGCGGTTA +TCTTGTCGTGCAAAGATTTAATCAACGTAAATGCTCGTGGTTTAGGTGATGTCTTCTTTG +ATAAAAATAAACGCTTTCCAGATGGTTGTTTTATAATTACTTCACAGGTACTGGCAGTTG +AACAACTGTTCAGTGAAATTTATCGAGTTAAAACTAAAAACTCAACTTATCTGGTGATCA +TGTAAATGAAAGCATCCACGGTCTTACAAATTGCTTATCTGGTTTCTCAAGAATCCAAAT +GCTGTTCCTGGAAAGTGGGTGCAGTGATTGAAAAGAATGGACGTATTATCTCTACCGGTT +ATAATGGTTCACCTGCTGGTGGAGTTAACTGTTGTGATCATGCCGATGAAAAAGGTTGGT +TAGTTAAAAAGCCTTCATCTGGTTTACGTCAAGATGGTCCTATTCCAAAATATGGGTTAT +CTACTAAATTTAGAGCAGAGCATAGTGCTTGGTCTTCTGTGAATGAAATTCACGCTGAAC +TAAATGCTATTTTATTTGCTGCTCGTAATGGCTCTTCAATTGAAGGTGCTACGATGTATG +TGACATTATCACCATGTCCAGACTGTGCTAAAGCAATTGCTCAATCCGGTATTAAGAAGT +TGGTGTACTGTGAAACATACGATAAAAACAGAGAAGGCTGGGATGATATCCTTCGTTCGG +CTGGAATTGAGGTATTCAACGTTCCTAAGAAAAATCTTTCGAAGTTGAATTGGTACAATA +TTGATGAATTTTGCGGAATAGAAGAATGATTACTTTTGAAAAAACTCCTGAAGTCGTAGT +GTCAGATATGACTGAAGAATTTATCTTTACGATGGAAGCAAATAATATCCGCTGCATTAA +AGTTCAGCCAACGTTTGTTATTGAGCATCTTGAAAAACAATTTGGCTGTGAAATCCTGTC +AAAATCTACTACTGATTATGACTATGTAATTAAACAGTTCGTTGAGCTTAAGCCTGAGCT +GGTGTTGGTTCGCGAAGTTAAAGAAGAATGTATTGGTGATGATGTCCGGTATATCTTCCG +TGTTGACTATATCAAGGTTAAAGCATGAAAGCTCGATTAGTGCAAACTACTGGATTGAGC +TCTTACGGTGATATCAACATTTCTTATGCGGTCGAATACAAAAAGGGATTTTTCTCTAAA +TGGAAAACTCTTTATCAAACCGATTATGTTGATAGCACAGATGAAGTTCGCACAACCGAC +CGTCGTTCAAAATGTGAAAAACTTCTGAAAGCATTAAAAGAACGTGGTGCACATAAAATT +AAAACTGTTATAGGTGAATAAGATGAAATTAACTAAAGATCAGAAAATCCAAGTTCGTGA +AACATTAAAAGCAATCCTGTCTAATGGTGAATCTCAGGTAGTATTCGAGAAAGCTGATGG +CACTATTCGTTCCATGCGTTGTACTCGTGATAGTGATTCAATTCCGAGTGATCTGGTAGA +AAGCACTGTTAAACCAGCTCGTGCCGAATCTATTGATATGCTGCCAGTGTATGACACTGA +AAAAGAACAGTGGCGCGGATTTAGCTTTGAAAAACTGATCTCTGTGAATGGTGTTAAAGT +TGAGCATTTGATTCAACTGATCACTCATTAATTGCTTAAAGTAGAGCATGATAATATTAA +TTCATGCTCTTAAACATAAAGGTTAAACATGCAGTCTTTACCAATTAAAGCTTTAGGTGA +ATATGTAATCCTCGTTTCTGAACCAGCTCAAGCAGGTGATGAAAAAGTTTCTTCTTCTGG +AATTTTTCTAGGAAAAGAACACCAAGGTCAATTACCAGAAATGTGTGAAATCTACGCAAT +CGGTGATGATGTACCAAAAGGATTTGTTGAAATTGGAGATTTTACTCCTATTCCAACAGG +AAGCATTCGAAATGTCGTTCATCCTTTGGTTGCAGCAGGGCTTAAGCAACCGAAGGAAAT +CAAACAAAAATTCGTAACTTGTCACTATAAATCTCTTTCTTGCGTTTATAAGTGATATAA +ATATCATTATGAAGAACTGGAAAACTACCTCCAAAAGGACATGACCAGTTATTCTATGTG +GTGGATGCGCAGCTTAAACGCTGGTACCGTCCACCAAATTTTCACCTCATTTGAGGAACG +ACATATGTCAATGCACAAACAACTCGAACACGCTCTTACCCTGCAACGTACCGCTTGGAA +TGCAGGCCACGAAAACTATGGCGCATCTATTGATGTTTACGCCGAAGCATTGGAAGTTCT +TAAAGGTTTCAAACACCTGAATCCAGTTCAAGCCGATCTTCGTGATGCGTTGGTCGAAAA +AGACGAACTGAAATTTGCAAAATCTCTTTGCAGCTCAGCTCGTAAAGCTGTTCGTCATTT +CGTAGTAACTCTGAAGTAATATTTTATAAGCGGCAAGTGCATGCTACCCCGAGGCGATGG +CCAATCGGGAGTACGCCTCAAGGCCTATACATCCATCGGTGTATATCTTATCCTCGAGAA +ATCGGACCCGGACCCTTTAAGCTAACGGTGTGCAACAGATAAGAGCGTATGAAGTTAAGG +TTGTGAGAGCCGGAACGTTAAATATAAAAAACTCACAAAGTACCCTTTGAGGGCTTGCGG +GAGCTACAACTGAAAGAACTGTCGAAAGAAGTTGAAACTCAGAAGAACGTGCTCCCATGT +ATTTCTCCAAAATGGAAGATCATAATGTCAAAAGCTAAAAAAGCAGTTAAATCAGTAAAA +GAAGTTGTTGGTACCTCTAAGCGTGCTGGTTACAAACGTAGCACTAACGCTCGTATTGAT +AAACTAGGCGATCAGCTGGCTTCTCGTGCCCGTAAGGTTCTGGCACATGATGCGGCTTTT +GGTAATCCACGTAAGAAAGCGTAAAGCATAAGTTAGGGACTCCTTCGGGAGTCCCTTTTT +TGTTTGTACTCTGCCGTTTACATCCGTTGAAGAACGTGTTATAGTATAAACTCAATAACC +AATACGGTATAATGGAGAATAAAATGAAAATCAATCTGAATGCTACAGTAAAATGTAAAG +ATCACGATGGATATAAAGCTCAGACAGTAAAAGAACAACAATGGATGCTTACTAAGCAAC +AATTTGAATTTGTGAACTGTATGACTCCTGAAGGTCCATCAGATGATTTCTCATGGAAAA +TTATTCTGATCAACTTCTTCACTGGTGAAGAATATGAGTTGAACACTTTGATTCTCGGTA +AGATTCGTTGTGAAACTTATGTCGATGAAGAAGATGGTTACTCGGAAGATGTTACGTGGT +ATCAAAATGGTCGAATTACTGCTGATAATCTGATAGAAGCAATTAAAGCTAAAGGCGTAG +TTGATCTCACTTACTGGACTAAAACTAAATGAGGCCTTCGGGCCTATTGAGGAAAGCATG +AAACTTGATTTAAGCAAAGAATATGTTCTGGATAATGTTAGCGGCTATCTTTATGATAAT +GGAAGCAATACTCACATCAACAATGAAGTAGTTAAGTTCATTGGTGACCGTAAGTTCACG +ATTAAGACAACTGGTTACAACCGAATTGATGGGATTTCTTTTGATAAAGGCGAAACTTGG +GTATCGTTGAAAGATATCTCAGAACAAGCATCAGTGTATGGCTACATTTTCTCTGCTGAA +GAGATCGACCGTGGAGCTATCAAAGTTGCTCCAGAAGATAAAAGTGTTCGTGAATACATG +GTGATCTACACTGACGAAGATGACATTCCGAAGGTTGCTTACAGTGGTTCAGGGAATATG +TTCACTGAAGAAGAGGCTAAGACTGCTTCACTTGAGCTGTTCACCGAAGGCTACAAAATT +AAGAACGTATTAGTCGTTAAGAAAGCGTTTGAAGCTTTGTCAAAAATTGAAGTAAGTTTC +GTCTAACCGTTTACATCCTCCGTTGTATGTGTTATAGTATAAACTCAATCAACAAACATA +CAACGGAGAATAAAATGAACTTTACTAACTTTAATCGCAAATATGTTCAAGGCGCTTTTG +ATAGCACGGTTTGTCTTTGGGAGCATAAAAACGGCACAGTTTGCGAAATCGATATGTACT +GGACAGACAACTACGTCTACATCAACTTTGAAAATGGAATCACGTTAGATATTTCTTTTA +AAGGTTCAGTCATTAAGGTTGGTTTTCATGATGATGTTCGCACTCGCGATTTAGGAACTC +ATCCGTCTTGGAACGGAGATAATCGTAGAACTCTGGTTAAACTTTATCTTCGCCATATCT +TAGGGCAGAAGACTACTGAAGAACAGCGTGAAGCAATTTGGGACATCGTTTCAAACGAAT +TAAAATTTTAATCTTAAACCGGGGCTTTGGCCCCTCTGAGGAAAATATGCAAATTTATCA +ATTCAACGTTGGCGGTTATAAGGAGTTCATTGATGTACATAGTCATGAATTTGTAGCCGC +ATGGGAAAAGAATATGGAAGTAGCCCAAGACTTCATAGTTCTAGCCTATTCAGAAGAAGG +GCATATCACCAAAGTCAAGAATATGAAAACCGGCGAAGTCTTTACCGCTCTTGAAGGTGA +CTTTAAGAATTATTTCGCGTTCTTCTTGGTTCATGAAATTGGTAATGGCAAACAGGTTTC +AGTAGTTCGAGAAATTGTGTCAGAAGAAATGAGTGCGTATGGAATTAATGAAAATGAATC +TGGCGAATTCATTGTAGTCTCCGAGTACAATCAAAATAAAAGTATTTTAGGTCCTTATAA +CTATGAAGAAGCTCTGAAAAAGGCTAAAAGCCAGATTATGCATGGTGCTATTGGCGTGAC +TGTAAAGATCTACAAAGCAGTAAATGAAGTTGAACTCGCTGTCAATGTAAAATCACTCTG +AGGGAAATAACATGATCGTATCTATCGCTAAATCTGTCGCTGCTAAATTTGAACGCATCA +TCGATTGCCCAATGATTGATATCATTGAAGTTCGCGTTCGTAATCATTCGGTTGAGTATG +AAATTGATGCTCCTGATTTCTTTGAATTTCCAGATTGGGCTGTTGTATTATGAAATTATT +CATTGATCTAATGAAGCATCTCTATCCAACATACAGGTTAACATTTGATGTAATTGATGT +TTCACCATCGGGTTGGATAAAAGATCCGTTTCAAGTAACCAGAGAAATCAAACTCTGGTT +TTGGGAAGATAAAACAACAGAATTCCATAAAGTTGAAGAAGCACTTCCTCCGTTTGGTGT +ATGGAGTGATGTTATTCTGAACAAGGTTGAAAAAGTATGAAACCAAAATACATGATATTC +CAAACGGTTCAACTGAAAGGCTCAGGAATTCCCGGAGTAATAAGCGATGTAGCTAATGGA +ATTCCTCGCTATAAAACTCAGCCCGCTTATGAAGTTGATTGGGTTGATGGAACTCGTTCG +GTTCATATGGAAGAAGAAATTTCTCCGATATCTCAATTAAAGGTGATGTAATGCTTTACT +TAGATTTAGATATTATTGCTCAAATGCCAACTAAATCTGGGTATTTGAATCAGCTAGTTA +CAAAGACACTTATTGAAGGTGGAACAGTAGCTTTCACCTCATTTGAGGCTGAATTATCAG +ATCACACAATTAAAATGATTGAGGAAAAATTATGCTTTTACAAGAACCAAAACCAGTCGT +TGCCACCGACGTCGATGGGATTCTCATCAAGTGGCAATCAGGTCTTCCTTACTTCGCGCA +AAAATATGATTTGCCGTTAGATGAAATCCTGAAAACTATCGCAAGTGACTCTTTTGTTAC +TCCAGCAAAATTGTTCAACTGCTCAGAAGAATTCGCTTCTAAATTACTTTTGAAGTACAA +CAACTCCGACTTTATTCGTTATCTGTCGGCTTATGATGATGCTCTTAAGGTAGTTAATGA +GCTCAAGAAGCATTACGATTTTGTTGCAGTAACTGCATTAGGTAACTCAGTAGATGCTCA +CCTGAATCGCCAGTTTAACTTGAGTGCATTGTTTCCGGGAGCCTTTAAAGATATATACGT +CTGTGATTACAATGAATCTAAGGATCACCTGTTAACCCGAGTGTTGGAAAAGTACGGTGA +TCGTGTAGTCTGTTACGTTGATGATCTTGGAAAACATATTGACTCTGCAATTGAAGTTAT +GTCTCATTTAAAAGACTTCAAAACATTCTATTTGCCTCGTGGTGAACGGGATCATTTGCC +ATCTCATTCTGGAACAGCTCATCATACTGTAAAGAACTGGTACGAAATCAAAGATATTTT +GGTGAGTGATTCTTCATCAAAAATGGTTGAACAATTCAAAAAGATGGTCGATGAACTTAA +TAAGCCGGATCGCCCATCCATTTATGATTTTTGGAAACGCCAAGTTCCAATTTTCGAACC +AACCCAGCCATGGCAACGTCCATATCCGAATTATGGTATTGGAACCGGGATTGAATATCT +CATGAATCAACCTAATGCAGTGGTGAATTGTAAAGTATGATTAATGTAATTTTCTGGAAT +CCGCACACTGACAAGACTCATCGAGTAGAGTCTTTCGCTACTAAGCAAGAGTTGTTTGAA +AAGCGAATTACTTCAAAAGCTAATCAGGAACAGCGTGAAATGCAAGATGCGGTATTCAAT +GGAGCTTTGTACATCACTACTCTTCCAAACGGTGGAGATATCTATCACCGAACTCTGGCA +GAATTGATTCGCGAATATAAGTCAAAGATCTAGCTTATCGAGGGGTGTGATATAATAGTC +ATATCCCTAAACACAAAGAGAAAATTATGATTCTTGAAATTATCAATGAAATCGCGTCTA +TTGGTTCAACAAAAGAAAAAGAAGCAATCATTCGTCGTCATAAAGATAATGAACTTCTGA +AACGTGTTTTCAAATTAACGTATGATGGTAAGTTCCAATACTACATCAAGAAATGGAATA +ATCCAGATTGTTATTTACCTAATGCAACACAATTTTCATTGAACTCTGCCTTAGATGTTT +TAGAGAATTTGTTTGCTACTCGTAAAATAACTGGCAATGCTGCTTTAGATAAACTTTCTG +CTACGCTTCAACGAATGCATGAGTCTGATCGTGAAGTACTTAAGAAAGTATTGCTTCGTG +ATCTTCGTTGTGGTGCATCTCGTTCTATTGCAAATAAAGTATGGAAGGGATTAATCCCAG +AACAGCCACAGATGCTTGCTTCATCGTATGATGAAAAGGGTATTGAAAAGAACATTAAGT +TCCCTGCATTTGCTCAGCTCAAAGCCGATGGTGCAAGGGCATTTGCCGAAGTTCGCGGTG +ATGAATTAGATGATGTAAAAATTCTATCTCGTGCCGGGAATGAGTATCTTGGTTTAGATT +TGCTGAAGCAGCAGCTAATCGATATGACTAAAGAAGCTCGTAAACGACACCCTGGTGGTG +TAATGATCGACGGCGAATTAGTTTATCATACTGTTGTTGCTTCGTCTGGTCCATTAGATG +ATATGTTCGGCGATTTGCCTGAACTTAGTAAAGCTAAAGAATTAAAAGAAGAATCTCGTA +CAATGTCCAACGGATTGGCAAATAAATCTCTGAAAGGCACTATCTCTAAGAAAGAAGCTG +ATGGTATGAAATTCCAAGTTTGGGATTATGTTCCATTGGATGTGGTTTATTCTGAAGGTA +AAGAATCTGGATTTGCGTATGATGTTCGGTTCCGTGCTCTAGAGTTAATGGTTCAAGGCT +TCTCTCAGATGATTCTAATCGAGAACCATGTTGTCCATAACCTCGAAGAAGCTAAGGTAA +TTTATCGCAAATACGTTGATCAAGGCCTTGAAGGTATTATTCTGAAGAACATCGGGGCTT +TCTGGGAAAACACCCGTTCTAAGAATCTTTATAAGTTCAAAGAAGTTATCACTATCGATA +TGCGTATTGTAGGGATTTATCCTCATAGTAAACACCCTGGTAAAGCAGGTGGATTCTATC +TAGAATCAGAATGCGGATTAATCAAAACTAAATCAGGTTCTGGATTAAAAGATAAACCGG +GTCCAGATTCGCATGAGTTAGACCGTACTCGTATTTGGGAAAACCAAAATGATTATATTG +GCGGAATTCTTGAATCAGAATGTAATGGTTGGTCAGCAGCTGAAGGTCGTACAGAATATG +TTAAGCTGTTCCTTCCTATTGCTGTTCGTATGCGTCGTGATAAAAATGTAGCAAATACAT +TCGCTGATATCTGGGGCGATTTCCATGAGGTTACTGGGTTATGAGTTATAAAATTCTTTT +AGAAGTTACCGTGATGTCTTCGACTGGACATGTGGCGGTTAGTACTGAACAGCTGGATTT +TTATAGCTGGGATAATGCTAATATGTATTATGAAGCAGTAGAAGTTTATGAAGAAACGCC +AGATATTAAAGTATGGCGTCAAGTAACAAAACTTTATTAAAGCCCTTCGGGGCTTTTGTT +GTCTATAAATATAGTAAACTATAGAGGACTTTTTATGATCGAATTAAATGAAGTCTTCGA +TGAAGGGAAAGAACGTCTAGCAGTTACGAACCTTTATCCGAAGCTCAAGATTCCACAAAT +TTTTGCAATAGACAACACTAAAGTAGCTTATCGTATGTGCTCATATACTGGTGGTGGAGA +TGCAAATAAAAACATCAAACCCGGTGATAAAATGATGCATGTCATTGCATTAGGAGTTAC +TGATAAAGGCCTTGGTCAACTTAAGACCTTAGGTGATAATCCAATTGCTGTTATTGATAC +AATCTTTAACCACGTAATGGGTATCATGAAGTTTTATCGTTTTGACGCTGCTTTATTTCG +TGTTAAAAAGAATAAAACTGGTGGAGCAGGTCGCCAGATGCAAGTTATTGTTGATCGTCT +AATCAAGAAGAAAGGCGGTGGCAAATTCGTTATGCTTAAAGAGTTGTATGATTTTGATAA +GAAATACAACTACATTTTAGTATACAAGAAGAATGCTGATCTTGTCAATATCCCTGGAAT +GACTGAGATCATGGACTCAATTTATAAGAAAGTAGACACTGATGTAGGTGATGCTTATAT +CAACGTTGAGACCGGCAAACAAGTATCTAAGCTTGAAGCTATCGCGGGTTCAATCGCAGC +AGAAAATGATAAACGCTCAGACCAGGCGGTTGCGTCTCGAGCTAAAATATCTCGTCGTGC +TTTAATGGCTTCTCAATATTCAATCCAAGTGGGATTTGATACTCGTAAAGATGCGGTAGA +ACATGATAAGCGATTAGATGTAATTAACTCTAAACCTCCGGTTTATTTGACAGATAAGTC +TTCTGACCAAGTATCGAATATTCAAATGGCTATTGATAATTTCAGAAATGATTCTCAATC +AATTGCTAAAACCGGCGAAGCGTTTAAGACATTTGACCCGTCATGGAAAATGGATGATGA +TCGTCATTCTACTGGTACAATGAAAGCCCAAGAACTTGTTCTAAGGCTCACTAATATATT +AACCAGTGGAACAGTAGACGATTTCAGTCAACATCCTACTGATAGAAGAGAAGCATTTAA +AACATTAGCGGTCAGAGACATTTATCGTATTGGTGAAGCCTGGTCTAAATTAGAGCCTAA +TGACTATTATGGTGCTATTAAAGAACTTACTCGAGTCGCAATGGAAGACAAAGAATGGTC +TTCTGATGCAAATCGTGAATACGCAGTAAAAGAGATTGTAGAATTAATTTCTAAACAGTT +CTCTGATTTAGCAGCTAGCATGTACAAAAATACATCAGATGTGGATCGTTATACTCCGGT +ACAATTGTCAGGTTTACATGCTTACGTCGGTTCATCTTATAAGTACATCAACGACTATCT +TTTAGGCCTTGATGATTATGGCAAAGAAACTGTTGAAAAATGGATTGAGTCTATCGATTC +TGCGTTTGAAAATGGTGTTCGTCTTCCGAAGGGAACTAAGCTATTTCGAGGTCAACATAC +TAAGCGCGAAGCTATTGAAGTTAGTTTAGAAAACAAGCACTTCTATTTCAAGAATTATGT +GTCAACTTCAATGGCTCCTATTATCTTTGGTGGATATGGACGAGCATATGATGCAATGGA +CCCCGCTGCATTGAACACAGATACATCGACTCCTAAAGAAGTGCTTGACTCTGTTTCAAC +TGTTCGGCCTGATAGTATTACTAACTCTGAAATGGGTGAATTGCGTTTAGCGTTCGTTAT +TTCTGGCGCAGAGAAAATAAAGACTATCGTAACCAATGCTGGAATCTCAGGATTGTCATT +TGAAGCTGAAGTTATTCTTCCTCGTGGTACTGTTCTTAGAATTGATAAAATGTATGGAAC +AGCTCAGAAACTTCAAGCTAATGACTACACAGCATCAAAGAGTGTTCTTATGGAATGCAC +TGTAGTATCTCCAGAACAATTATCTGAAACTACAATTTATGATGGCGATAAATTGTTAGA +AGGTGAATTGGTTGAATCTGATTATTCGTTCAGTTCTTTTATTGGTCAATTAAATGAAGC +TAAAGTTGAAACACCAGATTGGTTAGGTGAAGCTCTAGCATCATTTGTTGACATAAATAA +TTTACCAGAACGATTCATAAATTAATATTTTCACATGGACGTGAATTCAGAGAGGGCTTT +ATGGAAATTTTAAACGAAGTACTAGACGAAAGTAAACTGGATTTACCAGTTACGAACCTT +TATCCAAAGACGAAAATTCCACAAATTTTTGCTATTCAAACTAACTCCGAGGGTTCACTG +CCAGCATTCAGGATGTGTTCATATACATCTGGCGGTGATACCAATAAGAACGTTAAACCT +GGCGACAAAATGATTCATGTTGTTATGCTATCATTGAGCGAAAAAGGATCATTAGTTAAG +CTTAAAAACTTAGGCGGCGATCCAATTGGTGTTATCTCTACTACGTTCAATATCGTTTAT +TCAACGATGAAGCAGTATAAAATGGACGCATGCTTGTTCCGAATGGCCAAAAGCAAAATC +GGTGGACAAGCTCGTCAGATGCAGGTTATTATGGACCGACTCGTACGTTCTCGTACTGGT +GGTAAATTTGTTATCCTGAAAGAACTCTGGGATTATGATAAGAAGTACGCATATATTCTT +ATTCATCGTAAAAATGTTGATCTCTCAACCATCCCTGGCGTCCCAGAGATTGATACTGGA +CTGTTCACTGCAGTTGAAACTAAAGTTGGTGAAGTTTATGTTGAAAAGAAATCAGGTCAA +CAAGTAACTAAAGCCCAAGCCGTTGCTGCTTCTATTGCAGTCGAAAACGATAAGCGTTCA +GATCAAAACGTTATTTCTCGTGCTAAGATAAATCGTCGTCAAGCTATTGCTGCTCAGTAT +TCTGTTGATGCATCTAGCATCCAAGGCGATGATCGTGCTGCTGAAGAATTTAAACGCTTA +GAAGCTAAAGTTCCAGTTAAAAGCTCTAAAGGCGCTGAGTCATCAGACATGGTAGCAAAA +GTTAATACCATCGCTGACCGTCAAGGAAATGAGTATATCGGCAAAGTACTAAACTTCATC +ACTAATCCTGAAACATCTCAGGACACAGATGGTAAAGCATTGACTGCACGAATAGGTCAA +TTGCGCCAGTTATCTAAAATGCCTAAAGGTGCCATGTTATCAGGTGGATTTGAAACTGGT +GGTATGAAGTACTACATGGAAAACCAAAAAGAAATGTACAATGAAGTTCGTTCATTTGCT +CGATTGATAGCTGGGGTGAATACAACTAACTCCTTTCAGACGATGAAAGATTTAGTTAAA +ATGGCTTCAGCTGGAACTAGACCTGAAGATCGTGAACAGTTAATTGCAAATTTAATTGGA +TTAGCTTATAAAGAAATAAGTGCAATCATCAGAGATTCATACCAAACTGCAGCAAGTTTA +TCTAAAGAGAATGATCATTATTCTAAAGATGAAAAACAAGCTATCAGTGAATACTGCGCA +AACGCTTTCGAATACGTGAATATGTTCTTAATCGGTAAGCCGGAAGAAGGGTATTCAACT +TCTGATTCTCTCGAGATCATCGATAATATGGACTCTGCGTTTGAAAAAGGAACTCGTTTA +GACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACA diff --git a/test-data/exported.vcf b/test-data/exported.vcf new file mode 100644 index 00000000..bc93f16d --- /dev/null +++ b/test-data/exported.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +##fileDate=20200608 +##source=. +##reference=/data/temporary/apollo_data/25-test_organism/seq/genome.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO diff --git a/test-data/exported_cdna.fa b/test-data/exported_cdna.fa new file mode 100644 index 00000000..490df09d --- /dev/null +++ b/test-data/exported_cdna.fa @@ -0,0 +1,103 @@ +>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Unknown +CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG +CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT +CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT +ACTGCTGAGACGCCTGATGATTATAATCGTTTCGCAAACCCAGAAGATATAATTGATATT +GGCGCTACTCAAAAAGACTCATTTGACGATAACAATAATGATGGAACATCTATTAATATC +GGCAAACAAGTTAATTTAGGATTCGTTATTTCCGGTGCTGAAAATGTTCGAGTTATTGTT +CCAGGTTCTTTAACTGAATATCCAGAAGAAGCGGAAGTTATTCTGCCTCGTGGTACTCTT +TTGAAGATCAATAAAATCACTACTCAAGTAGATAAACGCTCGAATAAGTTCATGGTTGAA +GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT +CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA +TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC +ATTGATGACATGCCACGTAAGTTCCGCTAG +>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=Unknown +ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG +TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG +CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA +AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT +GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA +>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Unknown +ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT +ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA +TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA +GGTGAGTTCACATCAGGACTCACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGA +AAGTCTGGTGTATCGAAATATCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTA +TTAGGTGAATTTGAAGTCGGAACTTATTTGTTAGACTTCTTTAACATGGCTTATCCGCAA +TCTGGATTGATGATTTATTCGGTCAAAATTCCAGAGAACAGATTGTCTCATGAAATGGAT +TTCAACCATAACTCACCGAATATTAGAATAACTGGACGTGAACTCGATCCGTTAACTATA +TCATTCAGAATGGATCCCGAAGCAAGTAACTATCGTGCAATGCAAGATTGGGTGAACTCC +GTTCAAGACCCGGTTACTGGATTGCGAGCATTACCAACTGACGTCGAAGCTGACATTCAG +GTTAACCTTCATGCTCGAAATGGATTACCTCATACTGTGATAATGTTCACAGGTTGTGTT +CCTGTTGCGTGTGGAGCTCCTGAGCTTACATATGAAGGAGATAACCAAATTGCGGTTTTC +GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT +GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT +AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC +ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA +>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Unknown +ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA +ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT +CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT +AAGAACGGGTTGTTGTTCACTGCGTATGATATGAATTCTCGTACAACCGGTGATATGCGT +AGCATGCGTTTAGGTGAAATGAAACGTACTGCAAATAGTGTAGTGAAATCAATCACTGGA +ACAAATACTAATAAAGTTGATAAAATTCCAGTAGTGAATATTTTACTTCCACGCTCGAAA +TCAGATGTTGAATCAGTTTCTCATAAATTTAATGACGTTGGAGATTCACTTATTTCTCGT +GGCGGCGGTACTGCTACAGGGGTATTAAGTAACGTTGCATCTACTGCTGTCTTTGGCGGA +TTAGAGTCATTGACTCAAGGATTAATGGCTGACCATAACGAGCAGATCTATAACACTGCT +CGATCAATGTATGGCGGCGCAGATAACCGTACGAAGGTATTCACGTGGGATTTAACTCCT +CGATCAGTACAAGATCTTATTGCTATTATCGAGATCTATGAATACTTTAACTACTATAGT +TATGGCGAAACGGGAACGTCTACTTATGCAAAAGAAGTTAAGTCTCAATTAGATGAATGG +TATAAATCAACTTTCCTTGATACATTAACTCCAGATGAAGCTAATAAAAATGACACTGTT +TTTGAGAAAATAACTTCATTCTTAAGTAATGTTATTGTTGTAAGTAACCCTACTGTGTGG +TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA +TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT +ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA +TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA +>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=multiexongene +ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT +GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT +AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC +AATAACTTAATTGATTCAGTTGATAACGTCGCTGAAGGTACTGAATTAGCTGCTGAAGCA +TCTGAACGAACTACTGAGTCTATCAAGACTCTTACTGGCGTAGCGTCAACAATCAGCGAC +AAATTAAGTAAACTCGCTTCGATGCTCGAGTCGAAGGTTCAGGCTGTGGAGCAAAAAGTA +CAAGAATCTGGTGCCTCAGCTTCAACTGGGCTGTCAGTGATAGAAGATAAGCTTCCAGAT +CCTGATGAGCCTTTCTTTCCACCTGTCCCTCAGGAACCCGAGAACAACAAGAAAGATCAA +AAGAAAGATGATAAGAAACCTACCGATATGTTAGGTGACTTGCTGAAGACTACGAAGGGC +GGATTTAAAGCTACGATATCAATCACTGATAAAATATCGTCTATGCTTTTCAAATACACC +GTAACAGCATTAGCTGAAGCTGCTAAAATGGCTGCTATGCTATTTGCATTAGTATTAGGC +ATAGATTTACTTCGTATTCATTTTAAGTATTGGACTGATAAATTCATGAGTAACTTCGAT +GAATTCAGTGCTGAAGCTGGTGAATGGGGTGGACTGCTTCAATCAATTTTTGGAATGTTA +GGAGATATTAAAAAGTTCTGGGAAGCTGGAGACTGGAGTGGATTAGCAGTAGCTATTGTC +AAAGGATTAGCTGATGTGATTTACAACCTGAGCGAAATAATGTCTTTGGGAATTTCAAAG +ATATCTGCTTCTATACTCGATGCACTTGGCTTTGAAAATGCAGCAACTACTATTCGTGGT +TCAGCACTAGAAGGATTCCAGGAACGCACTGGTAATTCATTGTCTGAAGATGATCAAAAA +GCTTTGGCTAAATATCAGAGTAAGCGTATTGAAGAAGGTCCTGGAATTATTGATAAAGCT +GGCGAATTTAAAACTCGTGCATTTGATTGGGTACTAGGAAGAGAGAATAAAATCGATTCT +ACACAAGCATCTGACCGTGATCAAGAGACTCAGAATCTTAAAGCAATGGCTCCTGAAAAA +CGCGAAGAAACACTGATCAAACAAAACGAAGCTCGTGCAGCTGTTCAGCGTTTAGAAAAA +TATATTGGTGATGTTGATCCAGAGAATCCAACTAATATGCAATCTTTAGAGAAGGCATAT +AACAGTGCCAAAAAGTCTATTAGTGATTCTGCTATAAGTGATCAACCAGCTACTAAAAAG +GAACTCGATAAAAGATTCCAACGAGTAGAATCCAAGTATCAGAAGCTCAAAGAAGATAAC +ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA +AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC +ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT +ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA +>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=cds-not-under-exon +CTTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACC +TCTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAG +TAACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCA +ATTCGTTTTTGTATGGGAAGATATCTATGGCTTGAACATGATGGATTATGACGCAATGAT +TAACCAAGAATCAATCAAGGTTATTGTCGGTGAACCACGCACAATAGGTCAATTTGTCGG +TGAGCTGGAATATAATCTCGCTTATGACTTCCAGTGGTTAACGAAGGCTAATGCCCATAC +ACGCGATCCTATTTTTAACGCTACAATCTATTCACACTCATTCTTGGATAATAACCTTCC +TAGAATAGTAACAGGTGATGGACAGAATAGCATCTTCGTTTCTCGCTCGGGTGCATATTC +TGAAATGACTTATCGAAATGGATATGAAGAAGCTATCAGGCTTCAGACTATGGCACAATA +CGACGGTTATGCAACTTGTAAAATGGTTGGAGACTTTGAAATGACTCCTGGAGATAAGAT +TAATTTCTTTGATCCAAAGAAACAATTCAAAGCTGATTTTTACATTGATGAAGTAATTCA +TGAAGTAAGTAATAACCAAAGCATAACTACACTTTATATGTTTACTAACTCTCGTAAGTT +GGAAACAGTAGAACCAATAAAGGTTAAAAATGAACTTAAATCTGATACTACCACTTAAGA +AAATACAAGTCAATGGCAAAACCATTTCTATTCCTAAGCTTGGCTTGAAGCATCACACGT +TGATTAAAGATGTTCGTGCAATGGATGAGAACATGGGAATTCTTTTGGACTCAATTCATC +CTGGGCTAAACGCTGCTGAATCAGATTTAGTGTCTATTCATTTGCTAGAGTTCAATGGCA +AGTTAAAATCAAGTGTCGTTAAAGATGGATACACTTACAATATCAATGACATTTATATTT +GCCAACGCCTTGAATTCCAGTTCCAAGGAAAGACAT diff --git a/test-data/exported_cds.fa b/test-data/exported_cds.fa new file mode 100644 index 00000000..b2b24c17 --- /dev/null +++ b/test-data/exported_cds.fa @@ -0,0 +1,68 @@ +>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Unknown +CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG +CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT +CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT +ACTGCTGAGACGCCTGATGATTATAATCGTTTCGCAAACCCAGAAGATATAATTGATATT +GGCGCTACTCAAAAAGACTCATTTGACGATAACAATAATGATGGAACATCTATTAATATC +GGCAAACAAGTTAATTTAGGATTCGTTATTTCCGGTGCTGAAAATGTTCGAGTTATTGTT +CCAGGTTCTTTAACTGAATATCCAGAAGAAGCGGAAGTTATTCTGCCTCGTGGTACTCTT +TTGAAGATCAATAAAATCACTACTCAAGTAGATAAACGCTCGAATAAGTTCATGGTTGAA +GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT +CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA +TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC +ATTGATGACATGCCACGTAAGTTCCGCTAG +>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 9 residues [Merlin:752-1039 + strand] [cds] name=Unknown +AAATTTTAG +>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 108 residues [Merlin:1067-2011 - strand] [cds] name=Unknown +CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA +TCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTATTAGGTGA +>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Unknown +ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA +ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT +CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT +AAGAACGGGTTGTTGTTCACTGCGTATGATATGAATTCTCGTACAACCGGTGATATGCGT +AGCATGCGTTTAGGTGAAATGAAACGTACTGCAAATAGTGTAGTGAAATCAATCACTGGA +ACAAATACTAATAAAGTTGATAAAATTCCAGTAGTGAATATTTTACTTCCACGCTCGAAA +TCAGATGTTGAATCAGTTTCTCATAAATTTAATGACGTTGGAGATTCACTTATTTCTCGT +GGCGGCGGTACTGCTACAGGGGTATTAAGTAACGTTGCATCTACTGCTGTCTTTGGCGGA +TTAGAGTCATTGACTCAAGGATTAATGGCTGACCATAACGAGCAGATCTATAACACTGCT +CGATCAATGTATGGCGGCGCAGATAACCGTACGAAGGTATTCACGTGGGATTTAACTCCT +CGATCAGTACAAGATCTTATTGCTATTATCGAGATCTATGAATACTTTAACTACTATAGT +TATGGCGAAACGGGAACGTCTACTTATGCAAAAGAAGTTAAGTCTCAATTAGATGAATGG +TATAAATCAACTTTCCTTGATACATTAACTCCAGATGAAGCTAATAAAAATGACACTGTT +TTTGAGAAAATAACTTCATTCTTAAGTAATGTTATTGTTGTAAGTAACCCTACTGTGTGG +TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA +TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT +ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA +TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA +>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=multiexongene +ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT +GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT +AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC +AATAACTTAATTGATTCAGTTGATAACGTCGCTGAAGGTACTGAATTAGCTGCTGAAGCA +TCTGAACGAACTACTGAGTCTATCAAGACTCTTACTGGCGTAGCGTCAACAATCAGCGAC +AAATTAAGTAAACTCGCTTCGATGCTCGAGTCGAAGGTTCAGGCTGTGGAGCAAAAAGTA +CAAGAATCTGGTGCCTCAGCTTCAACTGGGCTGTCAGTGATAGAAGATAAGCTTCCAGAT +CCTGATGAGCCTTTCTTTCCACCTGTCCCTCAGGAACCCGAGAACAACAAGAAAGATCAA +AAGAAAGATGATAAGAAACCTACCGATATGTTAGGTGACTTGCTGAAGACTACGAAGGGC +GGATTTAAAGCTACGATATCAATCACTGATAAAATATCGTCTATGCTTTTCAAATACACC +GTAACAGCATTAGCTGAAGCTGCTAAAATGGCTGCTATGCTATTTGCATTAGTATTAGGC +ATAGATTTACTTCGTATTCATTTTAAGTATTGGACTGATAAATTCATGAGTAACTTCGAT +GAATTCAGTGCTGAAGCTGGTGAATGGGGTGGACTGCTTCAATCAATTTTTGGAATGTTA +GGAGATATTAAAAAGTTCTGGGAAGCTGGAGACTGGAGTGGATTAGCAGTAGCTATTGTC +AAAGGATTAGCTGATGTGATTTACAACCTGAGCGAAATAATGTCTTTGGGAATTTCAAAG +ATATCTGCTTCTATACTCGATGCACTTGGCTTTGAAAATGCAGCAACTACTATTCGTGGT +TCAGCACTAGAAGGATTCCAGGAACGCACTGGTAATTCATTGTCTGAAGATGATCAAAAA +GCTTTGGCTAAATATCAGAGTAAGCGTATTGAAGAAGGTCCTGGAATTATTGATAAAGCT +GGCGAATTTAAAACTCGTGCATTTGATTGGGTACTAGGAAGAGAGAATAAAATCGATTCT +ACACAAGCATCTGACCGTGATCAAGAGACTCAGAATCTTAAAGCAATGGCTCCTGAAAAA +CGCGAAGAAACACTGATCAAACAAAACGAAGCTCGTGCAGCTGTTCAGCGTTTAGAAAAA +TATATTGGTGATGTTGATCCAGAGAATCCAACTAATATGCAATCTTTAGAGAAGGCATAT +AACAGTGCCAAAAAGTCTATTAGTGATTCTGCTATAAGTGATCAACCAGCTACTAAAAAG +GAACTCGATAAAAGATTCCAACGAGTAGAATCCAAGTATCAGAAGCTCAAAGAAGATAAC +ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA +AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC +ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT +ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA +>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 6 residues [Merlin:5011-6066 - strand] [cds] name=cds-not-under-exon +CTTTAA diff --git a/test-data/exported_peptide.fa b/test-data/exported_peptide.fa new file mode 100644 index 00000000..5f97b8ec --- /dev/null +++ b/test-data/exported_peptide.fa @@ -0,0 +1,29 @@ +>c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Unknown +RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY +TAETPDDYNRFANPEDIIDIGATQKDSFDDNNNDGTSINIGKQVNLGFVISGAENVRVIV +PGSLTEYPEEAEVILPRGTLLKINKITTQVDKRSNKFMVEGSIVPPSEQIDESVEIYDGD +LFMETGEVVKLSGFMQFVNESAYDEEQNQMAAEILSGFLDIDDMPRKFR +>74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 2 residues [Merlin:752-1039 + strand] [peptide] name=Unknown +KF +>5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide] name=Unknown +HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR +>8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Unknown +MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY +KNGLLFTAYDMNSRTTGDMRSMRLGEMKRTANSVVKSITGTNTNKVDKIPVVNILLPRSK +SDVESVSHKFNDVGDSLISRGGGTATGVLSNVASTAVFGGLESLTQGLMADHNEQIYNTA +RSMYGGADNRTKVFTWDLTPRSVQDLIAIIEIYEYFNYYSYGETGTSTYAKEVKSQLDEW +YKSTFLDTLTPDEANKNDTVFEKITSFLSNVIVVSNPTVWFVRNFGTTSKFDGRAEVFGP +CQIQSIRFDKTPNGNFNGLAIAPNLPSTFTLEITMREILTLNRASVYAEGF +>154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=multiexongene +MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG +NNLIDSVDNVAEGTELAAEASERTTESIKTLTGVASTISDKLSKLASMLESKVQAVEQKV +QESGASASTGLSVIEDKLPDPDEPFFPPVPQEPENNKKDQKKDDKKPTDMLGDLLKTTKG +GFKATISITDKISSMLFKYTVTALAEAAKMAAMLFALVLGIDLLRIHFKYWTDKFMSNFD +EFSAEAGEWGGLLQSIFGMLGDIKKFWEAGDWSGLAVAIVKGLADVIYNLSEIMSLGISK +ISASILDALGFENAATTIRGSALEGFQERTGNSLSEDDQKALAKYQSKRIEEGPGIIDKA +GEFKTRAFDWVLGRENKIDSTQASDRDQETQNLKAMAPEKREETLIKQNEARAAVQRLEK +YIGDVDPENPTNMQSLEKAYNSAKKSISDSAISDQPATKKELDKRFQRVESKYQKLKEDN +TPKPAAPATSEDNQRVQNIQKAENAKEQSKKSTGDMNVANTQVNNVNNSKTIHQVQTVTA +TPAPGVFGATGVN +>ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1 residues [Merlin:5011-6066 - strand] [peptide] name=cds-not-under-exon +L diff --git a/test/io_test.py b/test/io_test.py new file mode 100644 index 00000000..581aeb11 --- /dev/null +++ b/test/io_test.py @@ -0,0 +1,90 @@ +import re + +from . import ApolloTestCase, wa + + +class IoTest(ApolloTestCase): + + def test_export_gff3(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_gff = wa.io.write_downloadable(org['commonName'], 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tgene\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\tmRNA\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content + assert 'score=["-1335.034872"]' in gff_content + assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content + assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content + + def test_export_vcf(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF') + if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: + raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) + + vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text") + assert '##fileformat=VCFv4.2' in vcf_content + assert '##fileDate=20200608' in vcf_content + assert '##source=.' in vcf_content + assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content + + def test_export_fa_cds(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content + assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content + assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content + assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content + assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content + + def test_export_fa_cdna(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content + assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content + assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content + assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content + assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content + + def test_export_fa_peptide(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content + assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content + assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content + assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content + assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content + assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content + assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content From 3583d2fb408ff3ef0811053742e286c1b950a331 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Mon, 8 Jun 2020 20:11:15 -0700 Subject: [PATCH 02/21] begin to refactor gff3 --- apollo/annotations/__init__.py | 203 +++++++++++++++++++++++++-------- apollo/util.py | 16 +++ test-data/local-arrow.yml | 2 +- 3 files changed, 173 insertions(+), 48 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 27c06d69..d749753c 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -10,7 +10,7 @@ from apollo import util from apollo.client import Client -from apollo.util import add_property_to_feature, features_to_feature_schema, retry +from apollo.util import add_property_to_feature, features_to_feature_schema, retry, features_to_apollo_schema class FeatureType(Enum): @@ -18,6 +18,10 @@ class FeatureType(Enum): TRANSCRIPT = 2 +def get_type(rec): + pass + + class AnnotationsClient(Client): CLIENT_BASE = '/annotationEditor/' @@ -1233,13 +1237,87 @@ def _write_features(self, new_features_list=None, test=False, verbose=False, tim if verbose: print("Features returned") print(returned_features) - sys.stdout.write("success" + " " + str(len(returned_features['features'])) + " features returned\n") + # sys.stdout.write("success" + " " + str(len(returned_features['features'])) + " features returned\n") + # sys.stdout.write("success" + " " + str(len(returned_features)) + " features returned\n") del new_features_list[:] return returned_features else: if verbose: print("empty list, no more features to write") + def _get_subfeatures(self, rec): + if len(rec.features) > 1: + return rec.features[1:] + else: + return None + + def _get_type(self, rec): + return rec.features[0].type + + def _get_subfeature_type(self, rec): + return rec.features[0].type + + def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=None, + disable_cds_recalculation=False, use_name=False, verbose=False): + type = self._get_type(rec) + subfeatures = self._get_subfeatures(rec) + if type not in util.gene_types and type not in util.coding_transcript_types: + print("AAAAAA") + if subfeatures is not None: + print("BBBBBB") + # process noncoding transcripts + for subfeature in subfeatures: + self._process_gff_entry(subfeature, new_feature_list, new_transcript_list, source, + disable_cds_recalculation, use_name) + else: + print("CCCCCC") + # if its not a gene or a transcript type then process as a simple singleton + feature_data = features_to_feature_schema([rec.features[0]], disable_cds_recalculation, use_name) + if source is not None: + add_property_to_feature(feature_data[0], "DatasetSource", source) + if verbose: + print("adding " + str(type) + " to write list: " + str(feature_data[0])) + new_feature_list.append(feature_data[0]) + else: + print("DDDDDD") + if type in util.gene_types: + print("EEEEEE") + transcript_type = self._get_subfeature_type(rec) + if transcript_type in util.coding_transcript_types: + print("FFFFFF") + feature_data = features_to_feature_schema(subfeatures, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + if source is not None: + add_property_to_feature(feature_data[0], "DatasetSource", source) + new_transcript_list.append(feature_data) + if verbose: + print("adding gene with MRNA type " + str(type) + " to write list: " + str(feature_data)) + else: + print("GGGGGG") + feature_data = features_to_feature_schema(rec.features, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + if verbose: + print("adding gene with noncoding transcript type " + str(type) + " to write list: " + str( + feature_data)) + if source is not None: + add_property_to_feature(feature_data[0], "DatasetSource", source) + new_feature_list.append(feature_data) + # self._process_gene(rec.features) + elif type in util.coding_transcript_types: + print("HHHHHH") + feature_data = features_to_apollo_schema(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) + if source is not None: + add_property_to_feature(feature_data[0], "DatasetSource", source) + if verbose: + print("adding transcript type " + str(type) + " to write list: " + str(feature_data)) + new_transcript_list.append(feature_data) + else: + print("how did we get here?") + + # a gene or a transcript + + pass + def load_gff3(self, organism, gff3, source=None, batch_size=1, test=False, use_name=False, @@ -1291,12 +1369,13 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, return 1 if len(org_ids) > 1: - print("More than one organism found for [" + organism + "]. Use an organism ID instead: " + str(org_ids) + "") + print("More than one organism found for [" + organism + "]. Use an organism ID instead: " + str( + org_ids) + "") return 1 + total_features_written = 0 + start_timer = default_timer() if timing: - start_timer = default_timer() - total_features_written = 0 sys.stdout.write('Times are in seconds. If batch-size > 1 then .(total_batch_time/avg_feature_time)\n') if verbose: @@ -1310,49 +1389,79 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, for rec in GFF.parse(gff3): self.set_sequence(organism, rec.id) - for feature in rec.features: - # We can only handle genes right now + print("GFF entry") + print(str(rec)) + print("Features") + print(str(rec.features)) + # type = self._get_type(rec) + # transcript_type = self._get_subfeature_type(rec) + try: if verbose: - print("input feature: " + str(feature)) - - if feature.type not in (util.gene_types + util.coding_transcript_types + util.pseudogenes_types - + util.noncoding_transcript_types + util.single_level_feature_types): - print("\nIgnoring unknown feature type '" + str(feature.type) + "' for " + str(feature) + "\n") - continue - - # Convert the feature into a presentation that Apollo will accept - feature_data = features_to_feature_schema([feature], use_name, disable_cds_recalculation) - - if source is not None: - add_property_to_feature(feature_data[0], "DatasetSource", source) - - try: - # Create the new feature - if verbose: - print("adding " + str(feature.type) + " to write list: " + str(feature_data[0])) - - if feature.type in util.gene_types: - new_transcripts_list.append(feature_data[0]) - # TODO: note that this NEVER handles a transcript ever - if feature.type in util.coding_transcript_types: - new_transcripts_list.append(feature_data[0]) - else: - new_features_list.append(feature_data[0]) - - if timing: - total_features_written += 1 - self._check_write(batch_size, verbose, test, new_features_list, new_transcripts_list, timing) - except Exception as e: - msg = str(e) - if '\n' in msg: - msg = msg[0:msg.index('\n')] - sys.stdout.write('\t'.join([ - feature.id, - '', - 'ERROR', - msg - ])) - sys.stdout.flush() + print("processing" + str(rec) + " with features: " + str(rec.features)) + self._process_gff_entry(rec, new_features_list, new_transcripts_list, source=source, + disable_cds_recalculation=disable_cds_recalculation, + use_name=use_name + ) + total_features_written += 1 + self._check_write(batch_size, verbose, test, new_features_list, new_transcripts_list, timing) + + except Exception as e: + msg = str(e) + if '\n' in msg: + msg = msg[0:msg.index('\n')] + sys.stdout.write('\t'.join([ + rec.features.id, + '', + 'ERROR', + msg + ])) + sys.stdout.flush() + + # for feature in rec.features: + # print("---feature--") + # print(str(feature)) + # # We can only handle genes right now + # if verbose: + # print("input feature: " + str(feature)) + # + # if feature.type not in (util.gene_types + util.coding_transcript_types + util.pseudogenes_types + # + util.noncoding_transcript_types + util.single_level_feature_types): + # print("\nIgnoring unknown feature type '" + str(feature.type) + "' for " + str(feature) + "\n") + # continue + # + # # Convert the feature into a presentation that Apollo will accept + # feature_data = features_to_feature_schema([feature], use_name, disable_cds_recalculation) + # + # if source is not None: + # add_property_to_feature(feature_data[0], "DatasetSource", source) + # + # try: + # # Create the new feature + # if verbose: + # print("adding " + str(feature.type) + " to write list: " + str(feature_data[0])) + # + # if feature.type in util.gene_types: + # new_transcripts_list.append(feature_data[0]) + # # TODO: note that this NEVER handles a transcript ever + # if feature.type in util.coding_transcript_types: + # new_transcripts_list.append(feature_data[0]) + # else: + # new_features_list.append(feature_data[0]) + # + # if timing: + # total_features_written += 1 + # self._check_write(batch_size, verbose, test, new_features_list, new_transcripts_list, timing) + # except Exception as e: + # msg = str(e) + # if '\n' in msg: + # msg = msg[0:msg.index('\n')] + # sys.stdout.write('\t'.join([ + # feature.id, + # '', + # 'ERROR', + # msg + # ])) + # sys.stdout.flush() sys.stdout.flush() self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) diff --git a/apollo/util.py b/apollo/util.py index 2a6929c5..c4e171fa 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -96,6 +96,8 @@ def _tnType(feature): def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False): + print("yeilding feature data") + print(features) for f in features: current = { 'location': { @@ -150,6 +152,20 @@ def add_property_to_feature(feature, property_key, property_value): return feature +def features_to_apollo_schema(features, use_name=False, disable_cds_recalculation=False): + """ + + :param disable_cds_recalculation: + :param use_name: + :param features: + :return: + """ + compiled = [] + for x in _yieldFeatData(features, use_name, disable_cds_recalculation): + compiled.append(x) + return compiled + + def features_to_feature_schema(features, use_name=False, disable_cds_recalculation=False): """ diff --git a/test-data/local-arrow.yml b/test-data/local-arrow.yml index 8e9b660e..beb80e61 100644 --- a/test-data/local-arrow.yml +++ b/test-data/local-arrow.yml @@ -1,5 +1,5 @@ __default: local local: - url: "http://localhost:8080/" + url: "http://localhost:8080/apollo" username: "admin@local.host" password: "password" From 44c94076f0f1030eb998a78e59ead47f5ac6b810 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 08:05:00 -0700 Subject: [PATCH 03/21] added notes --- README.rst | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index fb8fb6b6..51ae4331 100644 --- a/README.rst +++ b/README.rst @@ -85,6 +85,8 @@ Or with the Arrow client: History ------- +- 4.2.3 + - Fixed `load_gff3` to more accurately load transcripts including the CDS as well as handle non-coding types more accurately. - 4.2.2 - Drastically speed up load_gff3 - `load_gff3` now uses the Apollo `add_transcript` method if it is a gene or mRNA type diff --git a/setup.py b/setup.py index d13c0ab6..33bef2de 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name="apollo", - version='4.2.2', + version='4.2.3-SNAPSHOT', description="Apollo API library", long_description=readme, author="Helena Rasche;Anthony Bretaudeau;Nathan Dunn", From 8ca20bd3bb0298715149ceed155da9f049331f7c Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 19:05:41 -0700 Subject: [PATCH 04/21] updatexd annotations --- test-data/gene-top.gff | 6 +++ test-data/mrna-top.gff | 5 +++ test/annotations_test.py | 91 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 test-data/gene-top.gff create mode 100644 test-data/mrna-top.gff create mode 100644 test/annotations_test.py diff --git a/test-data/gene-top.gff b/test-data/gene-top.gff new file mode 100644 index 00000000..44a8a50c --- /dev/null +++ b/test-data/gene-top.gff @@ -0,0 +1,6 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin +Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_exon;seqid=Merlin diff --git a/test-data/mrna-top.gff b/test-data/mrna-top.gff new file mode 100644 index 00000000..41b4081f --- /dev/null +++ b/test-data/mrna-top.gff @@ -0,0 +1,5 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_exon;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py new file mode 100644 index 00000000..b3521764 --- /dev/null +++ b/test/annotations_test.py @@ -0,0 +1,91 @@ +from BCBio import GFF + +from . import ApolloTestCase, wa +from apollo import util + + +class AnnotationsTest(ApolloTestCase): + + def test_features_to_apollo_schema(self): + path = 'test-data/mrna-top.gff' + with open(path) as file: + print(file.read()) + file.close() + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) + + in_handle.close() + print(str(feature_data)) + print(str(len(feature_data))) + + assert(len(feature_data)==1) + + + + def test_create_mrna(self): + # org_info = self.waitOrgCreated('temp_org') + # assert org_info['commonName'] == 'temp_org' + path = 'test-data/mrna-top.gff' + + with open(path) as file: + print(file.read()) + file.close() + + # entries = GFF.parse(in_handle) + # print(entries.features.__sizeof__()) + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + print("A") + wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + print("B") + + in_handle.close() + assert (len(feature_list) == 0) + assert (len(transcript_list) == 1) + print(transcript_list) + assert(len(transcript_list[0]) == 1) + transcript = transcript_list[0] + assert(len(transcript['children']) == 2) + # assert(len(transcript_list[0][0]) == 1) + # for feature in rec.features: + # print ("A.1") + # print(entries) + # print("B") + # print(str(entries)) + # print("C") + # # assert(len(entries)==1) + # print("D") + # assert(len(entries.features)==3) + + # def setUp(self): + # # Make sure the organism is not already there + # temp_org_info = wa.organisms.show_organism('temp_org') + # if 'directory' in temp_org_info: + # wa.organisms.delete_organism(temp_org_info['id']) + # self.waitOrgDeleted('temp_org') + # + # with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive: + # with tarfile.open(archive.name, mode="w:gz") as tar: + # for file in glob.glob('test-data/dataset_1_files/data/'): + # tar.add(file, arcname=file.replace('test-data/dataset_1_files/data/', './')) + # wa.remote.add_organism('temp_org', archive) + # self.waitOrgCreated('temp_org') + + # def tearDown(self): + # org_info = wa.organisms.show_organism('temp_org') + # + # if org_info and 'id' in org_info: + # wa.organisms.delete_organism(org_info['id']) + # + # self.waitOrgDeleted('temp_org') + # + # org_info = wa.organisms.show_organism('some_new_org_remote') + # + # if org_info and 'id' in org_info: + # wa.organisms.delete_organism(org_info['id']) + # self.waitOrgDeleted('some_new_org_remote') From 0727c83523c46bd8ab99b63ab398b290fc9c4ac5 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 20:02:14 -0700 Subject: [PATCH 05/21] seems to be constructing it properly --- apollo/annotations/__init__.py | 127 +++++++++++++---------- apollo/util.py | 177 ++++++++++++++++++++++++++++++++- test/annotations_test.py | 55 ++-------- 3 files changed, 255 insertions(+), 104 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index d749753c..52f91f3b 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -10,7 +10,8 @@ from apollo import util from apollo.client import Client -from apollo.util import add_property_to_feature, features_to_feature_schema, retry, features_to_apollo_schema +# from apollo.util import add_property_to_feature, features_to_feature_schema, retry, features_to_apollo_schema +from apollo.util import features_to_feature_schema, retry class FeatureType(Enum): @@ -1261,62 +1262,78 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= disable_cds_recalculation=False, use_name=False, verbose=False): type = self._get_type(rec) subfeatures = self._get_subfeatures(rec) - if type not in util.gene_types and type not in util.coding_transcript_types: - print("AAAAAA") - if subfeatures is not None: - print("BBBBBB") - # process noncoding transcripts - for subfeature in subfeatures: - self._process_gff_entry(subfeature, new_feature_list, new_transcript_list, source, - disable_cds_recalculation, use_name) - else: - print("CCCCCC") - # if its not a gene or a transcript type then process as a simple singleton - feature_data = features_to_feature_schema([rec.features[0]], disable_cds_recalculation, use_name) - if source is not None: - add_property_to_feature(feature_data[0], "DatasetSource", source) - if verbose: - print("adding " + str(type) + " to write list: " + str(feature_data[0])) - new_feature_list.append(feature_data[0]) - else: - print("DDDDDD") - if type in util.gene_types: - print("EEEEEE") - transcript_type = self._get_subfeature_type(rec) - if transcript_type in util.coding_transcript_types: - print("FFFFFF") - feature_data = features_to_feature_schema(subfeatures, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - if source is not None: - add_property_to_feature(feature_data[0], "DatasetSource", source) - new_transcript_list.append(feature_data) - if verbose: - print("adding gene with MRNA type " + str(type) + " to write list: " + str(feature_data)) - else: - print("GGGGGG") - feature_data = features_to_feature_schema(rec.features, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - if verbose: - print("adding gene with noncoding transcript type " + str(type) + " to write list: " + str( - feature_data)) - if source is not None: - add_property_to_feature(feature_data[0], "DatasetSource", source) - new_feature_list.append(feature_data) - # self._process_gene(rec.features) - elif type in util.coding_transcript_types: - print("HHHHHH") - feature_data = features_to_apollo_schema(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) - if source is not None: - add_property_to_feature(feature_data[0], "DatasetSource", source) - if verbose: - print("adding transcript type " + str(type) + " to write list: " + str(feature_data)) + if type in util.gene_types: + if len(subfeatures) > 0: + feature_data = util._yieldApolloData(rec.features[1:]) new_transcript_list.append(feature_data) else: - print("how did we get here?") - - # a gene or a transcript - - pass + feature_data = util._yieldApolloData(rec.features) + new_feature_list.append(feature_data) + if type in util.coding_transcript_types or type in util.noncoding_transcript_types: + feature_data = util._yieldApolloData(rec.features) + new_transcript_list.append(feature_data) + if type in util.single_level_feature_types: + feature_data = util._yieldApolloData(rec.features) + new_feature_list.append(feature_data) + + # type = self._get_type(rec) + # subfeatures = self._get_subfeatures(rec) + # if type not in util.gene_types and type not in util.coding_transcript_types: + # print("AAAAAA") + # if subfeatures is not None: + # print("BBBBBB") + # # process noncoding transcripts + # for subfeature in subfeatures: + # self._process_gff_entry(subfeature, new_feature_list, new_transcript_list, source, + # disable_cds_recalculation, use_name) + # else: + # print("CCCCCC") + # # if its not a gene or a transcript type then process as a simple singleton + # feature_data = features_to_feature_schema([rec.features[0]], disable_cds_recalculation, use_name) + # if source is not None: + # add_property_to_feature(feature_data[0], "DatasetSource", source) + # if verbose: + # print("adding " + str(type) + " to write list: " + str(feature_data[0])) + # new_feature_list.append(feature_data[0]) + # else: + # print("DDDDDD") + # if type in util.gene_types: + # print("EEEEEE") + # transcript_type = self._get_subfeature_type(rec) + # if transcript_type in util.coding_transcript_types: + # print("FFFFFF") + # feature_data = features_to_feature_schema(subfeatures, use_name=use_name, + # disable_cds_recalculation=disable_cds_recalculation) + # if source is not None: + # add_property_to_feature(feature_data[0], "DatasetSource", source) + # new_transcript_list.append(feature_data) + # if verbose: + # print("adding gene with MRNA type " + str(type) + " to write list: " + str(feature_data)) + # else: + # print("GGGGGG") + # feature_data = features_to_feature_schema(rec.features, use_name=use_name, + # disable_cds_recalculation=disable_cds_recalculation) + # if verbose: + # print("adding gene with noncoding transcript type " + str(type) + " to write list: " + str( + # feature_data)) + # if source is not None: + # add_property_to_feature(feature_data[0], "DatasetSource", source) + # new_feature_list.append(feature_data) + # # self._process_gene(rec.features) + # elif type in util.coding_transcript_types: + # print("HHHHHH") + # feature_data = features_to_apollo_schema(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) + # if source is not None: + # add_property_to_feature(feature_data[0], "DatasetSource", source) + # if verbose: + # print("adding transcript type " + str(type) + " to write list: " + str(feature_data)) + # new_transcript_list.append(feature_data) + # else: + # print("how did we get here?") + # + # # a gene or a transcript + # + return feature_data def load_gff3(self, organism, gff3, source=None, batch_size=1, test=False, diff --git a/apollo/util.py b/apollo/util.py index c4e171fa..173fad1c 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -95,6 +95,181 @@ def _tnType(feature): return 'exon' +def _yieldGeneData(features, disable_cds_recalculation=False, use_name=False): + f = features[0] + current = _yieldSubFeatureData(f, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) + sub_features = features[1:] + + if sub_features: + current['children'] = [] + for sf in sub_features: + if _tnType(sf) in coding_transcript_types + noncoding_transcript_types: + current['children'].append( + _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, + use_name=use_name)) + + # current = { + # 'location': { + # 'strand': f.strand, + # 'fmin': int(f.location.start), + # 'fmax': int(f.location.end), + # }, + # 'type': { + # 'name': _tnType(f), + # 'cv': { + # 'name': 'sequence', + # } + # }, + # } + # if disable_cds_recalculation is True: + # current['use_cds'] = 'true' + # + # if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + # + single_level_feature_types): + # current['name'] = f.qualifiers.get('Name', [f.id])[0] + # + # if use_name is True: + # current['use_name'] = True + # + # # if OGS: + # # TODO: handle comments + # # TODO: handle dbxrefs + # # TODO: handle attributes + # # TODO: handle aliases + # # TODO: handle description + # # TODO: handle GO, Gene Product, Provenance + return current + + +def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False): + current = { + 'location': { + 'strand': f.strand, + 'fmin': int(f.location.start), + 'fmax': int(f.location.end), + }, + 'type': { + 'name': _tnType(f), + 'cv': { + 'name': 'sequence', + } + }, + } + if disable_cds_recalculation is True: + current['use_cds'] = 'true' + + if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + + single_level_feature_types): + current['name'] = f.qualifiers.get('Name', [f.id])[0] + + if use_name is True: + current['use_name'] = True + + # if OGS: + # TODO: handle comments + # TODO: handle dbxrefs + # TODO: handle attributes + # TODO: handle aliases + # TODO: handle description + # TODO: handle GO, Gene Product, Provenance + return current + + +def _yieldCodingTranscriptData(features, disable_cds_recalculation=False, use_name=False): + f = features[0] + current = { + 'location': { + 'strand': f.strand, + 'fmin': int(f.location.start), + 'fmax': int(f.location.end), + }, + 'type': { + 'name': _tnType(f), + 'cv': { + 'name': 'sequence', + } + }, + } + subfeatures = features[1:] + if len(subfeatures) > 0: + current['children'] = [] + + for sf in subfeatures: + current['children'].append( + _yieldSubFeatureData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) + + return current + + +# def _yieldNonCodingTranscriptData(features): +# pass + + +# def _yieldSingleLevelFeatureData(features): +# return _yieldSubFeatureData(features[0]) + + +def _yieldApolloData(features, use_name=False, disable_cds_recalculation=False): + print("yeilding apollo data") + print(features) + current_feature = features[0] + if _tnType(current_feature) in gene_types: + return _yieldGeneData(features) + elif _tnType(current_feature) in coding_transcript_types: + return _yieldCodingTranscriptData(features) + elif _tnType(current_feature) in noncoding_transcript_types: + return _yieldCodingTranscriptData(features) + # return _yieldNonCodingTranscriptData(features) + elif _tnType(current_feature) in single_level_feature_types: + # return _yieldSingleLevelFeatureData(features) + return _yieldSubFeatureData(features) + else: + print("nothing there") + return None + + # for f in features: + # + # if _tnType(f) in gene_types: + # current = { + # 'location': { + # 'strand': f.strand, + # 'fmin': int(f.location.start), + # 'fmax': int(f.location.end), + # }, + # 'type': { + # 'name': _tnType(f), + # 'cv': { + # 'name': 'sequence', + # } + # }, + # } + # elif _tnType(f) in coding_transcript_types: + # + # + # if disable_cds_recalculation is True: + # current['use_cds'] = 'true' + # + # if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + # + single_level_feature_types): + # current['name'] = f.qualifiers.get('Name', [f.id])[0] + # + # if use_name is True: + # current['use_name'] = True + # + # # if OGS: + # # TODO: handle comments + # # TODO: handle dbxrefs + # # TODO: handle attributes + # # TODO: handle aliases + # # TODO: handle description + # # TODO: handle GO, Gene Product, Provenance + # + # if hasattr(f, 'sub_features') and len(f.sub_features) > 0: + # current['children'] = [x for x in _yieldFeatData(f.sub_features)] + # + # yield current + + def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False): print("yeilding feature data") print(features) @@ -161,7 +336,7 @@ def features_to_apollo_schema(features, use_name=False, disable_cds_recalculatio :return: """ compiled = [] - for x in _yieldFeatData(features, use_name, disable_cds_recalculation): + for x in _yieldApolloData(features, use_name, disable_cds_recalculation): compiled.append(x) return compiled diff --git a/test/annotations_test.py b/test/annotations_test.py index b3521764..884811bf 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -15,27 +15,26 @@ def test_features_to_apollo_schema(self): transcript_list = [] in_handle = open(path) for rec in GFF.parse(in_handle): - feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) + # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) + feature_data = util._yieldApolloData(rec.features ) in_handle.close() + print("Z") print(str(feature_data)) - print(str(len(feature_data))) - - assert(len(feature_data)==1) + print("Y") + # print(str(len(feature_data))) + assert(feature_data['location']) + assert(len(feature_data['children'])==2) def test_create_mrna(self): - # org_info = self.waitOrgCreated('temp_org') - # assert org_info['commonName'] == 'temp_org' path = 'test-data/mrna-top.gff' with open(path) as file: print(file.read()) file.close() - # entries = GFF.parse(in_handle) - # print(entries.features.__sizeof__()) feature_list = [] transcript_list = [] in_handle = open(path) @@ -48,44 +47,4 @@ def test_create_mrna(self): assert (len(feature_list) == 0) assert (len(transcript_list) == 1) print(transcript_list) - assert(len(transcript_list[0]) == 1) - transcript = transcript_list[0] - assert(len(transcript['children']) == 2) - # assert(len(transcript_list[0][0]) == 1) - # for feature in rec.features: - # print ("A.1") - # print(entries) - # print("B") - # print(str(entries)) - # print("C") - # # assert(len(entries)==1) - # print("D") - # assert(len(entries.features)==3) - - # def setUp(self): - # # Make sure the organism is not already there - # temp_org_info = wa.organisms.show_organism('temp_org') - # if 'directory' in temp_org_info: - # wa.organisms.delete_organism(temp_org_info['id']) - # self.waitOrgDeleted('temp_org') - # - # with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive: - # with tarfile.open(archive.name, mode="w:gz") as tar: - # for file in glob.glob('test-data/dataset_1_files/data/'): - # tar.add(file, arcname=file.replace('test-data/dataset_1_files/data/', './')) - # wa.remote.add_organism('temp_org', archive) - # self.waitOrgCreated('temp_org') - # def tearDown(self): - # org_info = wa.organisms.show_organism('temp_org') - # - # if org_info and 'id' in org_info: - # wa.organisms.delete_organism(org_info['id']) - # - # self.waitOrgDeleted('temp_org') - # - # org_info = wa.organisms.show_organism('some_new_org_remote') - # - # if org_info and 'id' in org_info: - # wa.organisms.delete_organism(org_info['id']) - # self.waitOrgDeleted('some_new_org_remote') From 112131cef93ccc761999f7c59ad60b6b16c69395 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 20:07:05 -0700 Subject: [PATCH 06/21] cleaned stuff up --- apollo/util.py | 4 ---- test/annotations_test.py | 17 ++++------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/apollo/util.py b/apollo/util.py index 173fad1c..516c46e9 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -210,8 +210,6 @@ def _yieldCodingTranscriptData(features, disable_cds_recalculation=False, use_na def _yieldApolloData(features, use_name=False, disable_cds_recalculation=False): - print("yeilding apollo data") - print(features) current_feature = features[0] if _tnType(current_feature) in gene_types: return _yieldGeneData(features) @@ -271,8 +269,6 @@ def _yieldApolloData(features, use_name=False, disable_cds_recalculation=False): def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False): - print("yeilding feature data") - print(features) for f in features: current = { 'location': { diff --git a/test/annotations_test.py b/test/annotations_test.py index 884811bf..0bb217f2 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -11,22 +11,16 @@ def test_features_to_apollo_schema(self): with open(path) as file: print(file.read()) file.close() - feature_list = [] - transcript_list = [] in_handle = open(path) + feature_data = None for rec in GFF.parse(in_handle): # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) - feature_data = util._yieldApolloData(rec.features ) + feature_data = util._yieldApolloData(rec.features) in_handle.close() - print("Z") - print(str(feature_data)) - print("Y") # print(str(len(feature_data))) - assert(feature_data['location']) - assert(len(feature_data['children'])==2) - - + assert (feature_data['location'] is not None) + assert (len(feature_data['children']) == 2) def test_create_mrna(self): path = 'test-data/mrna-top.gff' @@ -39,12 +33,9 @@ def test_create_mrna(self): transcript_list = [] in_handle = open(path) for rec in GFF.parse(in_handle): - print("A") wa.annotations._process_gff_entry(rec, feature_list, transcript_list) - print("B") in_handle.close() assert (len(feature_list) == 0) assert (len(transcript_list) == 1) print(transcript_list) - From c260a9eb246922a6a38b08cb2f778be7a67ab82f Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 20:31:23 -0700 Subject: [PATCH 07/21] stubbed out tests --- apollo/annotations/__init__.py | 25 ++++++++--- apollo/util.py | 2 + test-data/ncrna-top.gff | 4 ++ test-data/pseudogene-top.gff | 5 +++ test-data/repeat-region-top.gff | 3 ++ test/annotations_test.py | 74 +++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 test-data/ncrna-top.gff create mode 100644 test-data/pseudogene-top.gff create mode 100644 test-data/repeat-region-top.gff diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 52f91f3b..559e61be 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1263,18 +1263,33 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= type = self._get_type(rec) subfeatures = self._get_subfeatures(rec) if type in util.gene_types: - if len(subfeatures) > 0: - feature_data = util._yieldApolloData(rec.features[1:]) + if subfeatures is not None and len(subfeatures) > 0: + feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_transcript_list.append(feature_data) else: - feature_data = util._yieldApolloData(rec.features) + feature_data = util._yieldApolloData(rec.features, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + new_feature_list.append(feature_data) + if type in util.pseudogenes_types: + if subfeatures is not None and len(subfeatures) > 0: + feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + new_feature_list.append(feature_data) + else: + feature_data = util._yieldApolloData(rec.features, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) if type in util.coding_transcript_types or type in util.noncoding_transcript_types: - feature_data = util._yieldApolloData(rec.features) + feature_data = util._yieldApolloData(rec.features, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_transcript_list.append(feature_data) if type in util.single_level_feature_types: - feature_data = util._yieldApolloData(rec.features) + feature_data = util._yieldApolloData(rec.features, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) + else: + print("unknown type " + type + " ") # type = self._get_type(rec) # subfeatures = self._get_subfeatures(rec) diff --git a/apollo/util.py b/apollo/util.py index 516c46e9..a01efe5f 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -213,6 +213,8 @@ def _yieldApolloData(features, use_name=False, disable_cds_recalculation=False): current_feature = features[0] if _tnType(current_feature) in gene_types: return _yieldGeneData(features) + if _tnType(current_feature) in pseudogenes_types: + return _yieldGeneData(features) elif _tnType(current_feature) in coding_transcript_types: return _yieldCodingTranscriptData(features) elif _tnType(current_feature) in noncoding_transcript_types: diff --git a/test-data/ncrna-top.gff b/test-data/ncrna-top.gff new file mode 100644 index 00000000..65f352d0 --- /dev/null +++ b/test-data/ncrna-top.gff @@ -0,0 +1,4 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm ncRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test-data/pseudogene-top.gff b/test-data/pseudogene-top.gff new file mode 100644 index 00000000..65c77292 --- /dev/null +++ b/test-data/pseudogene-top.gff @@ -0,0 +1,5 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm pseudogene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin +Merlin GeneMark.hmm transcript 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test-data/repeat-region-top.gff b/test-data/repeat-region-top.gff new file mode 100644 index 00000000..7e59d9c0 --- /dev/null +++ b/test-data/repeat-region-top.gff @@ -0,0 +1,3 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm repeat_region 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py index 0bb217f2..8193d325 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -39,3 +39,77 @@ def test_create_mrna(self): assert (len(feature_list) == 0) assert (len(transcript_list) == 1) print(transcript_list) + + def test_create_gene(self): + path = 'test-data/gene-top.gff' + + with open(path) as file: + print(file.read()) + file.close() + + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + + in_handle.close() + print(feature_list) + print(transcript_list) + # assert (len(feature_list) == 0) + # assert (len(transcript_list) == 1) + + def test_create_pseudogene(self): + path = 'test-data/pseudogene-top.gff' + + with open(path) as file: + print(file.read()) + file.close() + + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + + in_handle.close() + assert (len(feature_list) == 1) + assert (len(transcript_list) == 0) + print(transcript_list) + + def test_create_ncRNA(self): + path = 'test-data/ncrna-top.gff' + + with open(path) as file: + print(file.read()) + file.close() + + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + + in_handle.close() + assert (len(feature_list) == 0) + assert (len(transcript_list) == 1) + print(transcript_list) + + def test_create_repeat_region(self): + path = 'test-data/repeat-region-top.gff' + + with open(path) as file: + print(file.read()) + file.close() + + feature_list = [] + transcript_list = [] + in_handle = open(path) + for rec in GFF.parse(in_handle): + wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + + in_handle.close() + print(feature_list) + print(transcript_list) + assert (len(feature_list) == 1) + assert (len(transcript_list) == 0) From 2fcb383006d684639a0c714d0635dc14538db89f Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Wed, 10 Jun 2020 20:46:49 -0700 Subject: [PATCH 08/21] updated --- apollo/annotations/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 559e61be..a8346812 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1348,7 +1348,9 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= # # # a gene or a transcript # - return feature_data + return_object = {} + return_object['features'] = feature_data + return return_object def load_gff3(self, organism, gff3, source=None, batch_size=1, test=False, From cf971b4834ac88b660dc801be7ca22b39fd07761 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 11 Jun 2020 08:08:37 -0700 Subject: [PATCH 09/21] more changes --- apollo/annotations/__init__.py | 18 ++-- test-data/gene-top.gff | 16 ++-- test/annotations_test.py | 59 +++++++++++- test/io_test.py | 160 ++++++++++++++++++++------------- 4 files changed, 176 insertions(+), 77 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index a8346812..22fdb5d4 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1261,17 +1261,23 @@ def _get_subfeature_type(self, rec): def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=None, disable_cds_recalculation=False, use_name=False, verbose=False): type = self._get_type(rec) + print("type " + str(type)) subfeatures = self._get_subfeatures(rec) if type in util.gene_types: + print("is gene type") if subfeatures is not None and len(subfeatures) > 0: - feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name, + print("has sub features") + feature_data = util._yieldApolloData(subfeatures, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) + print("output feature data" + str(feature_data)) new_transcript_list.append(feature_data) else: + print("NO sub features, just adding directly") feature_data = util._yieldApolloData(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) + print("output feature data" + str(feature_data)) new_feature_list.append(feature_data) - if type in util.pseudogenes_types: + elif type in util.pseudogenes_types: if subfeatures is not None and len(subfeatures) > 0: feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) @@ -1280,11 +1286,11 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= feature_data = util._yieldApolloData(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) - if type in util.coding_transcript_types or type in util.noncoding_transcript_types: + elif type in util.coding_transcript_types or type in util.noncoding_transcript_types: feature_data = util._yieldApolloData(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_transcript_list.append(feature_data) - if type in util.single_level_feature_types: + elif type in util.single_level_feature_types: feature_data = util._yieldApolloData(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) @@ -1349,7 +1355,7 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= # # a gene or a transcript # return_object = {} - return_object['features'] = feature_data + return_object['features'] = [feature_data] return return_object def load_gff3(self, organism, gff3, source=None, batch_size=1, @@ -1498,6 +1504,8 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, # sys.stdout.flush() sys.stdout.flush() + print("features to write" + new_features_list) + print("transcripts to write" + new_transcripts_list) self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT) sys.stdout.write("\nfinished loading\n") diff --git a/test-data/gene-top.gff b/test-data/gene-top.gff index 44a8a50c..e991df12 100644 --- a/test-data/gene-top.gff +++ b/test-data/gene-top.gff @@ -1,6 +1,10 @@ -##gff-version 3 -##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin -Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 -Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin -Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_exon;seqid=Merlin +##gff-version 3 +##sequence-region Merlin 1 172788 +ctg123 example gene 1050 9000 . + . ID=EDEN;Name=EDEN;Note=protein kinase +ctg123 example mRNA 1050 9000 . + . ID=EDEN.1;Parent=EDEN;Name=EDEN.1;Index=1 +ctg123 example five_prime_UTR 1050 1200 . + . Parent=EDEN.1 +ctg123 example CDS 1201 1500 . + 0 Parent=EDEN.1 +ctg123 example CDS 3000 3902 . + 0 Parent=EDEN.1 +ctg123 example CDS 5000 5500 . + 0 Parent=EDEN.1 +ctg123 example CDS 7000 7608 . + 0 Parent=EDEN.1 +ctg123 example three_prime_UTR 7609 9000 . + . Parent=EDEN.1 diff --git a/test/annotations_test.py b/test/annotations_test.py index 8193d325..e1c7aa62 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -1,12 +1,23 @@ from BCBio import GFF +from BCBio.GFF import GFFExaminer +# from gffutils import inspect + from . import ApolloTestCase, wa from apollo import util +def parse(path): + in_handle = open(path) + for rec in GFF.parse(in_handle): + yield rec + class AnnotationsTest(ApolloTestCase): - def test_features_to_apollo_schema(self): + def test_inclusion(self): + assert ("gene" in util.gene_types) + + def test_features_to_apollo_schema_mrna(self): path = 'test-data/mrna-top.gff' with open(path) as file: print(file.read()) @@ -18,7 +29,51 @@ def test_features_to_apollo_schema(self): feature_data = util._yieldApolloData(rec.features) in_handle.close() - # print(str(len(feature_data))) + print(str(feature_data)) + assert (feature_data['location'] is not None) + assert (len(feature_data['children']) == 2) + + def test_features_to_apollo_schema_gene(self): + path = 'test-data/gene-top.gff' + print("inspecting") + output = parse(path) + print(str(output)) + for o in output: + print("AAA") + print(str(o)) + print("BBB") + print("inspected") + + with open(path) as file: + print(file.read()) + file.close() + in_handle = open(path) + feature_data = None + examiner = GFFExaminer() + print(examiner.parent_child_map(in_handle)) + in_handle.close() + in_handle = open(path) + new_feature_list = [] + new_transcript_list = [] + for rec in GFF.parse(in_handle): + print(str(rec)) + for f in rec.features: + print("feature ===== start") + print(f) + print("feature ===== end") + feature_data = wa.annotations._process_gff_entry(rec, new_feature_list=new_feature_list, + new_transcript_list=new_transcript_list) + print("feature list " + str(new_feature_list)) + print("transcript list " + str(new_transcript_list)) + print("feature data" + str(feature_data)) + # assert (subfeatures is not None and len(subfeatures) > 0) + # # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) + # feature_data = util._yieldApolloData(rec.features) + + in_handle.close() + print(str(feature_data)) + print("final feature list " + str(new_feature_list)) + print("final transcript list " + str(new_transcript_list)) assert (feature_data['location'] is not None) assert (len(feature_data['children']) == 2) diff --git a/test/io_test.py b/test/io_test.py index 581aeb11..e85a7f76 100644 --- a/test/io_test.py +++ b/test/io_test.py @@ -1,6 +1,7 @@ import re from . import ApolloTestCase, wa +import time class IoTest(ApolloTestCase): @@ -24,67 +25,98 @@ def test_export_gff3(self): assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content - def test_export_vcf(self): - - org = wa.organisms.show_organism('test_organism') - - uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF') - if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: - raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) - - vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text") - assert '##fileformat=VCFv4.2' in vcf_content - assert '##fileDate=20200608' in vcf_content - assert '##source=.' in vcf_content - assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content - - def test_export_fa_cds(self): - - org = wa.organisms.show_organism('test_organism') - - uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds') - if 'error' in uuid_fa or 'uuid' not in uuid_fa: - raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) - - fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content - assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content - assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content - assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content - assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content - assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content - assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content - - def test_export_fa_cdna(self): - - org = wa.organisms.show_organism('test_organism') - - uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna') - if 'error' in uuid_fa or 'uuid' not in uuid_fa: - raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) - - fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content - assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content - assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content - assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content - assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content - assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content - assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content - - def test_export_fa_peptide(self): - - org = wa.organisms.show_organism('test_organism') - - uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide') - if 'error' in uuid_fa or 'uuid' not in uuid_fa: - raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa) - - fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content - assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content - assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content - assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content - assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content - assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content - assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content + # def test_export_vcf(self): + # + # org = wa.organisms.show_organism('test_organism') + # + # uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF') + # if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: + # raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) + # + # vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text") + # assert '##fileformat=VCFv4.2' in vcf_content + # assert '##fileDate=20200608' in vcf_content + # assert '##source=.' in vcf_content + # assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content + # + # def test_export_fa_cds(self): + # + # org = wa.organisms.show_organism('test_organism') + # + # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds') + # if 'error' in uuid_fa or 'uuid' not in uuid_fa: + # raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) + # + # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + # assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + # assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content + # assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + # assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content + # assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content + # assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content + # assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content + # + # def test_export_fa_cdna(self): + # + # org = wa.organisms.show_organism('test_organism') + # + # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna') + # if 'error' in uuid_fa or 'uuid' not in uuid_fa: + # raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) + # + # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + # assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + # assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content + # assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content + # assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + # assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content + # assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content + # assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content + # + # def test_export_fa_peptide(self): + # + # org = wa.organisms.show_organism('test_organism') + # + # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide') + # if 'error' in uuid_fa or 'uuid' not in uuid_fa: + # raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa) + # + # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + # assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content + # assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content + # assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content + # assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content + # assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content + # assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content + # assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content + + def setUp(self): + # Make sure the organism is not already there + temp_org_info = wa.organisms.show_organism('temp_org') + if 'directory' in temp_org_info: + wa.organisms.delete_organism(temp_org_info['id']) + self.waitOrgDeleted('temp_org') + + org_info = wa.organisms.show_organism('alt_org') + if 'directory' not in org_info: + # Should not happen, but let's be tolerant... + # Error received when it fails: {'error': 'No row with the given identifier exists: [org.bbop.apollo.Organism#1154]'} + time.sleep(1) + org_info = wa.organisms.show_organism('alt_org') + + wa.organisms.add_organism('temp_org', org_info['directory']) + self.waitOrgCreated('temp_org') + + def tearDown(self): + org_info = wa.organisms.show_organism('temp_org') + + if org_info and 'id' in org_info: + wa.organisms.delete_organism(org_info['id']) + + self.waitOrgDeleted('temp_org') + + org_info = wa.organisms.show_organism('some_new_org') + + if org_info and 'id' in org_info: + wa.organisms.delete_organism(org_info['id']) + self.waitOrgDeleted('some_new_org') From 26cae6e5b9382bc443209d372f8b6d99209fbaf3 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 11 Jun 2020 08:18:14 -0700 Subject: [PATCH 10/21] updated --- test-data/gene-top.gff | 11 +++-------- test/annotations_test.py | 4 +++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/test-data/gene-top.gff b/test-data/gene-top.gff index e991df12..2e9f160a 100644 --- a/test-data/gene-top.gff +++ b/test-data/gene-top.gff @@ -1,10 +1,5 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -ctg123 example gene 1050 9000 . + . ID=EDEN;Name=EDEN;Note=protein kinase -ctg123 example mRNA 1050 9000 . + . ID=EDEN.1;Parent=EDEN;Name=EDEN.1;Index=1 -ctg123 example five_prime_UTR 1050 1200 . + . Parent=EDEN.1 -ctg123 example CDS 1201 1500 . + 0 Parent=EDEN.1 -ctg123 example CDS 3000 3902 . + 0 Parent=EDEN.1 -ctg123 example CDS 5000 5500 . + 0 Parent=EDEN.1 -ctg123 example CDS 7000 7608 . + 0 Parent=EDEN.1 -ctg123 example three_prime_UTR 7609 9000 . + . Parent=EDEN.1 +Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py index e1c7aa62..451227d4 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -9,7 +9,9 @@ def parse(path): in_handle = open(path) for rec in GFF.parse(in_handle): - yield rec + print("rec -> "+str(rec)+"\n") + for f in rec.features: + print("feature ->" + str(f)+"\n") class AnnotationsTest(ApolloTestCase): From b6a8ae8884cbe21090c3bfeef69614008e9e85ef Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 11 Jun 2020 08:30:13 -0700 Subject: [PATCH 11/21] updated --- requirements.txt | 4 ++-- test/annotations_test.py | 16 ++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index cef6a774..ed86b4de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ requests -biopython +biopython==1.77 cachetools<4 click>=6.7 wrapt pyyaml decorator -bcbio-gff +bcbio-gff==0.6.6 pytest-timeit diff --git a/test/annotations_test.py b/test/annotations_test.py index 451227d4..6d364899 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -6,12 +6,6 @@ from . import ApolloTestCase, wa from apollo import util -def parse(path): - in_handle = open(path) - for rec in GFF.parse(in_handle): - print("rec -> "+str(rec)+"\n") - for f in rec.features: - print("feature ->" + str(f)+"\n") class AnnotationsTest(ApolloTestCase): @@ -37,14 +31,6 @@ def test_features_to_apollo_schema_mrna(self): def test_features_to_apollo_schema_gene(self): path = 'test-data/gene-top.gff' - print("inspecting") - output = parse(path) - print(str(output)) - for o in output: - print("AAA") - print(str(o)) - print("BBB") - print("inspected") with open(path) as file: print(file.read()) @@ -59,6 +45,8 @@ def test_features_to_apollo_schema_gene(self): new_transcript_list = [] for rec in GFF.parse(in_handle): print(str(rec)) + print(str(rec.features)) + print(str(rec.sub_features)) for f in rec.features: print("feature ===== start") print(f) From 9290612b3f0263009fcfcb6763082fd5a19c00ef Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 11 Jun 2020 10:22:25 -0700 Subject: [PATCH 12/21] added gene back to top --- test-data/gene-top.gff | 1 + test/annotations_test.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test-data/gene-top.gff b/test-data/gene-top.gff index 2e9f160a..5c004cef 100644 --- a/test-data/gene-top.gff +++ b/test-data/gene-top.gff @@ -1,5 +1,6 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 +Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py index 6d364899..497129f9 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -46,10 +46,11 @@ def test_features_to_apollo_schema_gene(self): for rec in GFF.parse(in_handle): print(str(rec)) print(str(rec.features)) - print(str(rec.sub_features)) + # print(str(rec.sub_features)) for f in rec.features: print("feature ===== start") print(f) + print(f.sub_features) print("feature ===== end") feature_data = wa.annotations._process_gff_entry(rec, new_feature_list=new_feature_list, new_transcript_list=new_transcript_list) From 3459d90b3317a51c6a9d0b75a67b5e301bcde0ec Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 11 Jun 2020 13:17:38 -0700 Subject: [PATCH 13/21] fixed the tests so a bit closer to what is needed --- apollo/annotations/__init__.py | 139 ++++++++++----------------------- apollo/util.py | 79 +++++++++++-------- test-data/mrna-top.gff | 4 +- test-data/ncrna-top.gff | 2 +- test/annotations_test.py | 28 +++---- 5 files changed, 102 insertions(+), 150 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 22fdb5d4..7daecd7f 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1246,12 +1246,6 @@ def _write_features(self, new_features_list=None, test=False, verbose=False, tim if verbose: print("empty list, no more features to write") - def _get_subfeatures(self, rec): - if len(rec.features) > 1: - return rec.features[1:] - else: - return None - def _get_type(self, rec): return rec.features[0].type @@ -1262,100 +1256,53 @@ def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source= disable_cds_recalculation=False, use_name=False, verbose=False): type = self._get_type(rec) print("type " + str(type)) - subfeatures = self._get_subfeatures(rec) - if type in util.gene_types: - print("is gene type") - if subfeatures is not None and len(subfeatures) > 0: - print("has sub features") - feature_data = util._yieldApolloData(subfeatures, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - print("output feature data" + str(feature_data)) + all_features = [] + for feature in rec.features: + sub_features = feature.sub_features + feature_data = None + if type in util.gene_types: + print("is gene type") + if sub_features is not None and len(sub_features) > 0: + print("has sub features") + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + print("output feature data" + str(feature_data)) + new_transcript_list.append(feature_data) + else: + print("NO sub features, just adding directly") + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + print("output feature data" + str(feature_data)) + new_feature_list.append(feature_data) + elif type in util.pseudogenes_types: + if sub_features is not None and len(sub_features) > 0: + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + new_feature_list.append(feature_data) + else: + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + new_feature_list.append(feature_data) + elif type in util.coding_transcript_types: + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_transcript_list.append(feature_data) - else: - print("NO sub features, just adding directly") - feature_data = util._yieldApolloData(rec.features, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - print("output feature data" + str(feature_data)) + elif type in util.noncoding_transcript_types: + print("a non-coding transcript\n") + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) - elif type in util.pseudogenes_types: - if subfeatures is not None and len(subfeatures) > 0: - feature_data = util._yieldApolloData(rec.features[1:], use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) + print("new feature list \n" + str(new_feature_list)) + elif type in util.single_level_feature_types: + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) else: - feature_data = util._yieldApolloData(rec.features, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - new_feature_list.append(feature_data) - elif type in util.coding_transcript_types or type in util.noncoding_transcript_types: - feature_data = util._yieldApolloData(rec.features, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - new_transcript_list.append(feature_data) - elif type in util.single_level_feature_types: - feature_data = util._yieldApolloData(rec.features, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - new_feature_list.append(feature_data) - else: - print("unknown type " + type + " ") - - # type = self._get_type(rec) - # subfeatures = self._get_subfeatures(rec) - # if type not in util.gene_types and type not in util.coding_transcript_types: - # print("AAAAAA") - # if subfeatures is not None: - # print("BBBBBB") - # # process noncoding transcripts - # for subfeature in subfeatures: - # self._process_gff_entry(subfeature, new_feature_list, new_transcript_list, source, - # disable_cds_recalculation, use_name) - # else: - # print("CCCCCC") - # # if its not a gene or a transcript type then process as a simple singleton - # feature_data = features_to_feature_schema([rec.features[0]], disable_cds_recalculation, use_name) - # if source is not None: - # add_property_to_feature(feature_data[0], "DatasetSource", source) - # if verbose: - # print("adding " + str(type) + " to write list: " + str(feature_data[0])) - # new_feature_list.append(feature_data[0]) - # else: - # print("DDDDDD") - # if type in util.gene_types: - # print("EEEEEE") - # transcript_type = self._get_subfeature_type(rec) - # if transcript_type in util.coding_transcript_types: - # print("FFFFFF") - # feature_data = features_to_feature_schema(subfeatures, use_name=use_name, - # disable_cds_recalculation=disable_cds_recalculation) - # if source is not None: - # add_property_to_feature(feature_data[0], "DatasetSource", source) - # new_transcript_list.append(feature_data) - # if verbose: - # print("adding gene with MRNA type " + str(type) + " to write list: " + str(feature_data)) - # else: - # print("GGGGGG") - # feature_data = features_to_feature_schema(rec.features, use_name=use_name, - # disable_cds_recalculation=disable_cds_recalculation) - # if verbose: - # print("adding gene with noncoding transcript type " + str(type) + " to write list: " + str( - # feature_data)) - # if source is not None: - # add_property_to_feature(feature_data[0], "DatasetSource", source) - # new_feature_list.append(feature_data) - # # self._process_gene(rec.features) - # elif type in util.coding_transcript_types: - # print("HHHHHH") - # feature_data = features_to_apollo_schema(rec.features, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) - # if source is not None: - # add_property_to_feature(feature_data[0], "DatasetSource", source) - # if verbose: - # print("adding transcript type " + str(type) + " to write list: " + str(feature_data)) - # new_transcript_list.append(feature_data) - # else: - # print("how did we get here?") - # - # # a gene or a transcript - # - return_object = {} - return_object['features'] = [feature_data] + print("unknown type " + type + " ") + if feature_data is not None: + all_features.append(feature_data) + + return_object = {'features': all_features} return return_object def load_gff3(self, organism, gff3, source=None, batch_size=1, diff --git a/apollo/util.py b/apollo/util.py index a01efe5f..3d0caeb3 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -95,18 +95,23 @@ def _tnType(feature): return 'exon' -def _yieldGeneData(features, disable_cds_recalculation=False, use_name=False): - f = features[0] - current = _yieldSubFeatureData(f, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - sub_features = features[1:] +def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False): + current = _yieldSubFeatureData(gene, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) + sub_features = gene.sub_features + print("yielding gene data current " + str(current)) if sub_features: current['children'] = [] for sf in sub_features: - if _tnType(sf) in coding_transcript_types + noncoding_transcript_types: - current['children'].append( - _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, - use_name=use_name)) + if _tnType(sf) in coding_transcript_types: + child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, + use_name=use_name) + print("child data" + str(child_data)) + current['children'].append(child_data) + if _tnType(sf) in noncoding_transcript_types: + child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, + use_name=use_name) + current['children'].append(child_data) # current = { # 'location': { @@ -175,8 +180,7 @@ def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False): return current -def _yieldCodingTranscriptData(features, disable_cds_recalculation=False, use_name=False): - f = features[0] +def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=False): current = { 'location': { 'strand': f.strand, @@ -190,17 +194,21 @@ def _yieldCodingTranscriptData(features, disable_cds_recalculation=False, use_na } }, } - subfeatures = features[1:] - if len(subfeatures) > 0: + if len(f.sub_features) > 0: current['children'] = [] - - for sf in subfeatures: - current['children'].append( - _yieldSubFeatureData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) + for sf in f.sub_features: + current['children'].append( + _yieldSubFeatureData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) return current +def print_file(path): + with open(path) as file: + print(file.read()) + file.close() + + # def _yieldNonCodingTranscriptData(features): # pass @@ -209,23 +217,23 @@ def _yieldCodingTranscriptData(features, disable_cds_recalculation=False, use_na # return _yieldSubFeatureData(features[0]) -def _yieldApolloData(features, use_name=False, disable_cds_recalculation=False): - current_feature = features[0] - if _tnType(current_feature) in gene_types: - return _yieldGeneData(features) - if _tnType(current_feature) in pseudogenes_types: - return _yieldGeneData(features) - elif _tnType(current_feature) in coding_transcript_types: - return _yieldCodingTranscriptData(features) - elif _tnType(current_feature) in noncoding_transcript_types: - return _yieldCodingTranscriptData(features) - # return _yieldNonCodingTranscriptData(features) - elif _tnType(current_feature) in single_level_feature_types: - # return _yieldSingleLevelFeatureData(features) - return _yieldSubFeatureData(features) +def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False): + feature_type = _tnType(feature) + if feature_type in gene_types: + return _yieldGeneData(feature) + if feature_type in pseudogenes_types: + return _yieldGeneData(feature) + elif feature_type in coding_transcript_types: + return _yieldCodingTranscriptData(feature) + elif feature_type in noncoding_transcript_types: + return _yieldCodingTranscriptData(feature) + # return _yieldNonCodingTranscriptData(current_feature) + elif feature_type in single_level_feature_types: + # return _yieldSingleLevelFeatureData(current_feature) + return _yieldSubFeatureData(feature) else: - print("nothing there") - return None + print("other type: " + feature_type) + return _yieldSubFeatureData(feature) # for f in features: # @@ -334,8 +342,11 @@ def features_to_apollo_schema(features, use_name=False, disable_cds_recalculatio :return: """ compiled = [] - for x in _yieldApolloData(features, use_name, disable_cds_recalculation): - compiled.append(x) + # for x in _yieldApolloData(features, use_name, disable_cds_recalculation): + # compiled.append(x) + # return compiled + for f in features: + compiled.append(yieldApolloData(f, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation)) return compiled diff --git a/test-data/mrna-top.gff b/test-data/mrna-top.gff index 41b4081f..5933bc2a 100644 --- a/test-data/mrna-top.gff +++ b/test-data/mrna-top.gff @@ -1,5 +1,5 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;seqid=Merlin;color=#00ff00 Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin -Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_exon;seqid=Merlin +Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test-data/ncrna-top.gff b/test-data/ncrna-top.gff index 65f352d0..b7c52e08 100644 --- a/test-data/ncrna-top.gff +++ b/test-data/ncrna-top.gff @@ -1,4 +1,4 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm ncRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm ncRNA 2 691 . + . ID=Merlin_1_mRNA;seqid=Merlin;color=#00ff00 Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py index 497129f9..9c2afb42 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -22,10 +22,11 @@ def test_features_to_apollo_schema_mrna(self): feature_data = None for rec in GFF.parse(in_handle): # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) - feature_data = util._yieldApolloData(rec.features) + # feature_data = util.features_to_apollo_schema(rec.features) + for f in rec.features: + feature_data = util.yieldApolloData(f) in_handle.close() - print(str(feature_data)) assert (feature_data['location'] is not None) assert (len(feature_data['children']) == 2) @@ -65,8 +66,8 @@ def test_features_to_apollo_schema_gene(self): print(str(feature_data)) print("final feature list " + str(new_feature_list)) print("final transcript list " + str(new_transcript_list)) - assert (feature_data['location'] is not None) - assert (len(feature_data['children']) == 2) + # assert (feature_data['location'] is not None) + # assert (len(feature_data['children']) == 2) def test_create_mrna(self): path = 'test-data/mrna-top.gff' @@ -123,13 +124,11 @@ def test_create_pseudogene(self): assert (len(transcript_list) == 0) print(transcript_list) - def test_create_ncRNA(self): - path = 'test-data/ncrna-top.gff' - with open(path) as file: - print(file.read()) - file.close() + def test_create_ncRNA(self): + path = 'test-data/ncrna-top.gff' + util.print_file(path) feature_list = [] transcript_list = [] in_handle = open(path) @@ -137,17 +136,12 @@ def test_create_ncRNA(self): wa.annotations._process_gff_entry(rec, feature_list, transcript_list) in_handle.close() - assert (len(feature_list) == 0) - assert (len(transcript_list) == 1) - print(transcript_list) + assert (len(feature_list) == 1) + assert (len(transcript_list) == 0) def test_create_repeat_region(self): path = 'test-data/repeat-region-top.gff' - - with open(path) as file: - print(file.read()) - file.close() - + util.print_file(path) feature_list = [] transcript_list = [] in_handle = open(path) From ece880b4bc3ca5b9bd946c253511fba7d78aca3f Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Tue, 16 Jun 2020 11:49:50 -0700 Subject: [PATCH 14/21] fix two known errors --- apollo/annotations/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 7daecd7f..d57fc002 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1451,8 +1451,8 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, # sys.stdout.flush() sys.stdout.flush() - print("features to write" + new_features_list) - print("transcripts to write" + new_transcripts_list) + # print("features to write" + new_features_list) + # print("transcripts to write" + new_transcripts_list) self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT) sys.stdout.write("\nfinished loading\n") From f3982d99784cfa9a101211c29cd3ab8a142fbf8e Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 18 Jun 2020 12:08:08 -0700 Subject: [PATCH 15/21] updated requirements --- .gitignore | 2 +- requirements.txt | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index c1272733..4b8550fe 100644 --- a/.gitignore +++ b/.gitignore @@ -41,7 +41,7 @@ cover docs/_build # Python virtualenv -.venv +.venv* # test harness test_harness.py diff --git a/requirements.txt b/requirements.txt index ed86b4de..a293cf3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,12 @@ -requests +requests~=2.23.0 biopython==1.77 cachetools<4 click>=6.7 -wrapt -pyyaml -decorator +wrapt~=1.12.1 +pyyaml~=5.3.1 +decorator~=4.4.2 bcbio-gff==0.6.6 pytest-timeit + +apollo~=4.2.2 +setuptools~=47.1.1 \ No newline at end of file From 3c63bc0706517f2672c6e8d5848687c5546f240a Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Thu, 18 Jun 2020 14:13:19 -0700 Subject: [PATCH 16/21] fixed handling of transcripts --- apollo/util.py | 11 +++++++---- requirements.txt | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/apollo/util.py b/apollo/util.py index 3d0caeb3..5012c656 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -104,14 +104,17 @@ def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False): current['children'] = [] for sf in sub_features: if _tnType(sf) in coding_transcript_types: - child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, - use_name=use_name) + # child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) + child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) print("child data" + str(child_data)) - current['children'].append(child_data) + return child_data + # current['children'].append(child_data) if _tnType(sf) in noncoding_transcript_types: child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - current['children'].append(child_data) + # current['children'].append(child_data) + print("child data" + str(child_data)) + return child_data # current = { # 'location': { diff --git a/requirements.txt b/requirements.txt index a293cf3e..487c89e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ bcbio-gff==0.6.6 pytest-timeit apollo~=4.2.2 -setuptools~=47.1.1 \ No newline at end of file +setuptools~=47.3.1 \ No newline at end of file From 03864a235cd832f4dfb81d6d88adc0cf3e0b8830 Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Fri, 19 Jun 2020 01:24:11 -0700 Subject: [PATCH 17/21] updated requirements --- requirements.txt | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 487c89e0..ed86b4de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,9 @@ -requests~=2.23.0 +requests biopython==1.77 cachetools<4 click>=6.7 -wrapt~=1.12.1 -pyyaml~=5.3.1 -decorator~=4.4.2 +wrapt +pyyaml +decorator bcbio-gff==0.6.6 pytest-timeit - -apollo~=4.2.2 -setuptools~=47.3.1 \ No newline at end of file From 57aa5d0370d443778e34fddcecf97441d414fc5e Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Fri, 19 Jun 2020 01:40:29 -0700 Subject: [PATCH 18/21] all tests now working --- test/io_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/io_test.py b/test/io_test.py index e85a7f76..ca185e2c 100644 --- a/test/io_test.py +++ b/test/io_test.py @@ -21,7 +21,7 @@ def test_export_gff3(self): assert 'Merlin\t.\tmRNA\t2\t691\t.\t+\t.' in gff_content assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content - assert 'score=["-1335.034872"]' in gff_content + # assert 'score=["-1335.034872"]' in gff_content assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content From d15482a155dfda2d9f6e7191af8587f4a69c843c Mon Sep 17 00:00:00 2001 From: Nathan Dunn Date: Fri, 19 Jun 2020 02:02:55 -0700 Subject: [PATCH 19/21] fixed the rest of this PR --- apollo/annotations/__init__.py | 59 ----------------------- apollo/util.py | 86 +++++----------------------------- test/io_test.py | 1 + 3 files changed, 12 insertions(+), 134 deletions(-) diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index d57fc002..06ebcf7c 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -10,7 +10,6 @@ from apollo import util from apollo.client import Client -# from apollo.util import add_property_to_feature, features_to_feature_schema, retry, features_to_apollo_schema from apollo.util import features_to_feature_schema, retry @@ -19,10 +18,6 @@ class FeatureType(Enum): TRANSCRIPT = 2 -def get_type(rec): - pass - - class AnnotationsClient(Client): CLIENT_BASE = '/annotationEditor/' @@ -1376,12 +1371,6 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, for rec in GFF.parse(gff3): self.set_sequence(organism, rec.id) - print("GFF entry") - print(str(rec)) - print("Features") - print(str(rec.features)) - # type = self._get_type(rec) - # transcript_type = self._get_subfeature_type(rec) try: if verbose: print("processing" + str(rec) + " with features: " + str(rec.features)) @@ -1404,55 +1393,7 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, ])) sys.stdout.flush() - # for feature in rec.features: - # print("---feature--") - # print(str(feature)) - # # We can only handle genes right now - # if verbose: - # print("input feature: " + str(feature)) - # - # if feature.type not in (util.gene_types + util.coding_transcript_types + util.pseudogenes_types - # + util.noncoding_transcript_types + util.single_level_feature_types): - # print("\nIgnoring unknown feature type '" + str(feature.type) + "' for " + str(feature) + "\n") - # continue - # - # # Convert the feature into a presentation that Apollo will accept - # feature_data = features_to_feature_schema([feature], use_name, disable_cds_recalculation) - # - # if source is not None: - # add_property_to_feature(feature_data[0], "DatasetSource", source) - # - # try: - # # Create the new feature - # if verbose: - # print("adding " + str(feature.type) + " to write list: " + str(feature_data[0])) - # - # if feature.type in util.gene_types: - # new_transcripts_list.append(feature_data[0]) - # # TODO: note that this NEVER handles a transcript ever - # if feature.type in util.coding_transcript_types: - # new_transcripts_list.append(feature_data[0]) - # else: - # new_features_list.append(feature_data[0]) - # - # if timing: - # total_features_written += 1 - # self._check_write(batch_size, verbose, test, new_features_list, new_transcripts_list, timing) - # except Exception as e: - # msg = str(e) - # if '\n' in msg: - # msg = msg[0:msg.index('\n')] - # sys.stdout.write('\t'.join([ - # feature.id, - # '', - # 'ERROR', - # msg - # ])) - # sys.stdout.flush() - sys.stdout.flush() - # print("features to write" + new_features_list) - # print("transcripts to write" + new_transcripts_list) self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT) sys.stdout.write("\nfinished loading\n") diff --git a/apollo/util.py b/apollo/util.py index 5012c656..6f414a0b 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -98,48 +98,20 @@ def _tnType(feature): def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False): current = _yieldSubFeatureData(gene, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) sub_features = gene.sub_features - print("yielding gene data current " + str(current)) + # TODO: is this handling multiple isoforms properly? if sub_features: - current['children'] = [] + # current['children'] = [] + # child_data = [] for sf in sub_features: if _tnType(sf) in coding_transcript_types: - # child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - print("child data" + str(child_data)) - return child_data - # current['children'].append(child_data) + # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) + return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) if _tnType(sf) in noncoding_transcript_types: - child_data = _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, - use_name=use_name) - # current['children'].append(child_data) - print("child data" + str(child_data)) - return child_data - - # current = { - # 'location': { - # 'strand': f.strand, - # 'fmin': int(f.location.start), - # 'fmax': int(f.location.end), - # }, - # 'type': { - # 'name': _tnType(f), - # 'cv': { - # 'name': 'sequence', - # } - # }, - # } - # if disable_cds_recalculation is True: - # current['use_cds'] = 'true' - # - # if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types - # + single_level_feature_types): - # current['name'] = f.qualifiers.get('Name', [f.id])[0] - # - # if use_name is True: - # current['use_name'] = True - # - # # if OGS: + # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) + return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) + # return child_data + # # TODO: handle comments # # TODO: handle dbxrefs # # TODO: handle attributes @@ -212,6 +184,8 @@ def print_file(path): file.close() +# TODO: we may need specify something different here, but for now this works + # def _yieldNonCodingTranscriptData(features): # pass @@ -235,38 +209,8 @@ def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False): # return _yieldSingleLevelFeatureData(current_feature) return _yieldSubFeatureData(feature) else: - print("other type: " + feature_type) return _yieldSubFeatureData(feature) - # for f in features: - # - # if _tnType(f) in gene_types: - # current = { - # 'location': { - # 'strand': f.strand, - # 'fmin': int(f.location.start), - # 'fmax': int(f.location.end), - # }, - # 'type': { - # 'name': _tnType(f), - # 'cv': { - # 'name': 'sequence', - # } - # }, - # } - # elif _tnType(f) in coding_transcript_types: - # - # - # if disable_cds_recalculation is True: - # current['use_cds'] = 'true' - # - # if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types - # + single_level_feature_types): - # current['name'] = f.qualifiers.get('Name', [f.id])[0] - # - # if use_name is True: - # current['use_name'] = True - # # # if OGS: # # TODO: handle comments # # TODO: handle dbxrefs @@ -274,11 +218,6 @@ def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False): # # TODO: handle aliases # # TODO: handle description # # TODO: handle GO, Gene Product, Provenance - # - # if hasattr(f, 'sub_features') and len(f.sub_features) > 0: - # current['children'] = [x for x in _yieldFeatData(f.sub_features)] - # - # yield current def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False): @@ -345,9 +284,6 @@ def features_to_apollo_schema(features, use_name=False, disable_cds_recalculatio :return: """ compiled = [] - # for x in _yieldApolloData(features, use_name, disable_cds_recalculation): - # compiled.append(x) - # return compiled for f in features: compiled.append(yieldApolloData(f, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation)) return compiled diff --git a/test/io_test.py b/test/io_test.py index ca185e2c..7b1296f7 100644 --- a/test/io_test.py +++ b/test/io_test.py @@ -21,6 +21,7 @@ def test_export_gff3(self): assert 'Merlin\t.\tmRNA\t2\t691\t.\t+\t.' in gff_content assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content + # we don't capture the score in the uploaded GFF3 unless it is passed in column 9 # assert 'score=["-1335.034872"]' in gff_content assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content From f3a69157f914c6a402a7b6d12c2e04f5901b9c6f Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Wed, 24 Jun 2020 15:28:53 +0200 Subject: [PATCH 20/21] prepare 4.2.3 --- arrow/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/__init__.py b/arrow/__init__.py index 73879cc1..c2a714dd 100644 --- a/arrow/__init__.py +++ b/arrow/__init__.py @@ -1 +1 @@ -__version__ = '4.2.2' +__version__ = '4.2.3' diff --git a/setup.py b/setup.py index 33bef2de..319b43a0 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name="apollo", - version='4.2.3-SNAPSHOT', + version='4.2.3', description="Apollo API library", long_description=readme, author="Helena Rasche;Anthony Bretaudeau;Nathan Dunn", From 193bd467821cad072c8cc3070a5fefc0212d9076 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Fri, 26 Jun 2020 16:06:48 +0200 Subject: [PATCH 21/21] refactor, fix bugs --- apollo/annotations/__init__.py | 209 ++++++++-------- apollo/util.py | 67 ++--- arrow/commands/annotations/load_gff3.py | 9 +- docs/commands/annotations.rst | 1 - test-data/exported_cdna.fa | 12 +- test-data/exported_cds.fa | 50 +++- test-data/exported_peptide.fa | 28 ++- test-data/gene-top.gff | 8 +- test-data/mrna-isoforms.gff | 14 ++ test-data/ncrna-top.gff | 4 +- test-data/pseudogene-top.gff | 6 +- test-data/repeat-region-top.gff | 2 +- test/annotations_test.py | 317 +++++++++++++++--------- test/io_test.py | 130 +++++----- 14 files changed, 492 insertions(+), 365 deletions(-) create mode 100644 test-data/mrna-isoforms.gff diff --git a/apollo/annotations/__init__.py b/apollo/annotations/__init__.py index 06ebcf7c..10163b97 100644 --- a/apollo/annotations/__init__.py +++ b/apollo/annotations/__init__.py @@ -1,6 +1,7 @@ """ Contains possible interactions with the Apollo's Annotations """ +import logging import sys import time from enum import Enum @@ -12,6 +13,8 @@ from apollo.client import Client from apollo.util import features_to_feature_schema, retry +log = logging.getLogger() + class FeatureType(Enum): FEATURE = 1 @@ -1179,29 +1182,35 @@ def func3(): sys.stdout.write('\n') sys.stdout.flush() - def _check_write(self, batch_size, verbose, test, new_features_list=None, new_transcripts_list=None, timing=False): + def _check_write(self, batch_size, test, new_features_list=[], type=FeatureType.FEATURE, timing=False): if len(new_features_list) >= batch_size: - if verbose: - print("writing out the features: " + str(new_features_list)) - self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) - if len(new_transcripts_list) >= batch_size: - if verbose: - print("writing out the transcripts : " + str(new_transcripts_list)) - self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT) - - def _write_features(self, new_features_list=None, test=False, verbose=False, timing=False, - feature_type=None): + log.debug("writing out: " + str(new_features_list)) + returned = self._write_features(new_features_list, test, timing, type) + + if 'error' in returned: + log.error("Error returned by Apollo while loading data: %s" % returned['error']) + return {top_in['gff_id']: 'error' for top_in in new_features_list} + + elif len(returned): + # FIXME this can give strange results in case of error while loading some of the features. + # This expects the order to be preserved. It's the case in Apollo 2.6.0 at least. + in_ids = [top_in['gff_id'] for top_in in new_features_list] + return dict(zip(in_ids, returned['features'])) + + return {} + + def _write_features(self, new_features_list=None, test=False, timing=False, feature_type=None): + if not isinstance(feature_type, FeatureType): raise TypeError("Feature type must be of type feature type : " + str(feature_type)) + if len(new_features_list) > 0: - returned_features = [] - if verbose: - print("Writing " + str(len(new_features_list)) + " features") - print("Features to write:") - print(new_features_list) - if test is True: - sys.stdout.write( - "test success" + " " + str(len(new_features_list)) + " features would have been loaded\n") + returned_features = {} + log.debug("Writing " + str(len(new_features_list)) + " features") + log.debug("Features to write:") + log.debug(new_features_list) + if test: + print("test success " + str(len(new_features_list)) + " features would have been loaded") else: if timing: start_time = default_timer() @@ -1212,34 +1221,25 @@ def _write_features(self, new_features_list=None, test=False, verbose=False, tim returned_features = self.add_transcripts(new_features_list) else: raise Exception("Type '" + str(feature_type) + "' is unknown") - sys.stdout.write(".") except Exception: - if verbose: - e = sys.exc_info()[0] - sys.stdout.write("Error writing: " + str(e)) - else: - sys.stdout.write("e") - pass + e = sys.exc_info() + log.error("Error writing: " + str(e)) + returned_features = {'error': "Error writing: " + str(e)} if timing: end_time = default_timer() duration = end_time - start_time avg_duration = duration / len(new_features_list) - if len(new_features_list) == 1: - sys.stdout.write("(" + str('{:.2f}'.format(duration)) + ")") if len(new_features_list) > 1: - sys.stdout.write("(" + str('{:.1f}'.format((duration))) + "") - sys.stdout.write("/" + str('{:.2f}'.format(avg_duration)) + ")") - - if verbose: - print("Features returned") - print(returned_features) - # sys.stdout.write("success" + " " + str(len(returned_features['features'])) + " features returned\n") - # sys.stdout.write("success" + " " + str(len(returned_features)) + " features returned\n") - del new_features_list[:] + print('({:.1f}/{:.2f})'.format(duration, avg_duration)) + else: + print('({:.2f})'.format(duration)) + + log.debug("Features returned: ") + log.debug(returned_features) return returned_features else: - if verbose: - print("empty list, no more features to write") + log.debug("empty list, no more features to write") + return {} def _get_type(self, rec): return rec.features[0].type @@ -1247,64 +1247,62 @@ def _get_type(self, rec): def _get_subfeature_type(self, rec): return rec.features[0].type - def _process_gff_entry(self, rec, new_feature_list, new_transcript_list, source=None, - disable_cds_recalculation=False, use_name=False, verbose=False): + def _process_gff_entry(self, rec, source=None, disable_cds_recalculation=False, use_name=False): + + new_feature_list = [] + new_transcript_list = [] + type = self._get_type(rec) - print("type " + str(type)) - all_features = [] + log.debug("type " + str(type)) + for feature in rec.features: - sub_features = feature.sub_features feature_data = None if type in util.gene_types: - print("is gene type") - if sub_features is not None and len(sub_features) > 0: - print("has sub features") + log.debug("is gene type") + if len(feature.sub_features) > 0: feature_data = util.yieldApolloData(feature, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) - print("output feature data" + str(feature_data)) - new_transcript_list.append(feature_data) + log.debug("output feature data" + str(feature_data)) + if isinstance(feature_data, list): + new_transcript_list += feature_data + else: + new_transcript_list.append(feature_data) else: - print("NO sub features, just adding directly") + log.debug("NO sub features, just adding directly") feature_data = util.yieldApolloData(feature, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) - print("output feature data" + str(feature_data)) + log.debug("output feature data" + str(feature_data)) new_feature_list.append(feature_data) elif type in util.pseudogenes_types: - if sub_features is not None and len(sub_features) > 0: - feature_data = util.yieldApolloData(feature, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) - new_feature_list.append(feature_data) + feature_data = util.yieldApolloData(feature, use_name=use_name, + disable_cds_recalculation=disable_cds_recalculation) + if isinstance(feature_data, list): + new_feature_list += feature_data else: - feature_data = util.yieldApolloData(feature, use_name=use_name, - disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) elif type in util.coding_transcript_types: feature_data = util.yieldApolloData(feature, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_transcript_list.append(feature_data) elif type in util.noncoding_transcript_types: - print("a non-coding transcript\n") + log.debug("a non-coding transcript") feature_data = util.yieldApolloData(feature, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) - print("new feature list \n" + str(new_feature_list)) + log.debug("new feature list " + str(new_feature_list)) elif type in util.single_level_feature_types: feature_data = util.yieldApolloData(feature, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation) new_feature_list.append(feature_data) else: - print("unknown type " + type + " ") - if feature_data is not None: - all_features.append(feature_data) + log.debug("unknown type " + type + " ") - return_object = {'features': all_features} - return return_object + return {'top-level': new_feature_list, 'transcripts': new_transcript_list} def load_gff3(self, organism, gff3, source=None, batch_size=1, test=False, use_name=False, disable_cds_recalculation=False, - verbose=False, timing=False, ): """ @@ -1331,9 +1329,6 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, :type disable_cds_recalculation: bool :param disable_cds_recalculation: Disable CDS recalculation and instead use the one provided - :type verbose: bool - :param verbose: Verbose mode - :type timing: bool :param timing: Output loading performance metrics @@ -1347,60 +1342,62 @@ def load_gff3(self, organism, gff3, source=None, batch_size=1, org_ids.append(org['id']) if len(org_ids) == 0: - print("Organism name or id not found [" + organism + "]") - return 1 + raise Exception("Organism name or id not found [" + organism + "]") if len(org_ids) > 1: - print("More than one organism found for [" + organism + "]. Use an organism ID instead: " + str( - org_ids) + "") - return 1 + raise Exception("More than one organism found for [" + organism + "]. Use an organism ID instead: " + str( + org_ids)) total_features_written = 0 start_timer = default_timer() if timing: - sys.stdout.write('Times are in seconds. If batch-size > 1 then .(total_batch_time/avg_feature_time)\n') - - if verbose: - sys.stdout.write('# ') - sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages'])) - sys.stdout.write('\n') - - # bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID'] - new_features_list = [] - new_transcripts_list = [] + print('Times are in seconds. If batch-size > 1 then .(total_batch_time/avg_feature_time)') + all_processed = {'top-level': [], 'transcripts': []} + loading_status = {} for rec in GFF.parse(gff3): self.set_sequence(organism, rec.id) try: - if verbose: - print("processing" + str(rec) + " with features: " + str(rec.features)) - self._process_gff_entry(rec, new_features_list, new_transcripts_list, source=source, - disable_cds_recalculation=disable_cds_recalculation, - use_name=use_name - ) + log.info("Processing %s with features: %s" % (rec.id, rec.features)) + processed = self._process_gff_entry(rec, source=source, + disable_cds_recalculation=disable_cds_recalculation, + use_name=use_name + ) + all_processed['top-level'].extend(processed['top-level']) + all_processed['transcripts'].extend(processed['transcripts']) total_features_written += 1 - self._check_write(batch_size, verbose, test, new_features_list, new_transcripts_list, timing) + written_top = self._check_write(batch_size, test, all_processed['top-level'], FeatureType.FEATURE, timing) + written_transcripts = self._check_write(batch_size, test, all_processed['transcripts'], FeatureType.TRANSCRIPT, timing) + + if len(written_top): + all_processed['top-level'] = [] + loading_status = {**loading_status, **written_top} + if len(written_transcripts): + all_processed['transcripts'] = [] + loading_status = {**loading_status, **written_transcripts} except Exception as e: msg = str(e) if '\n' in msg: msg = msg[0:msg.index('\n')] - sys.stdout.write('\t'.join([ - rec.features.id, - '', - 'ERROR', - msg - ])) - sys.stdout.flush() - - sys.stdout.flush() - self._write_features(new_features_list, test, verbose, timing, FeatureType.FEATURE) - self._write_features(new_transcripts_list, test, verbose, timing, FeatureType.TRANSCRIPT) - sys.stdout.write("\nfinished loading\n") + log.error("Failed to load features from %s" % rec.id) + + # Write the rest of things to write (ignore batch_size) + written_top = self._check_write(0, test, all_processed['top-level'], FeatureType.FEATURE, timing) + written_transcripts = self._check_write(0, test, all_processed['transcripts'], FeatureType.TRANSCRIPT, timing) + + if len(written_top): + all_processed['top-level'] = [] + loading_status = {**loading_status, **written_top} + if len(written_transcripts): + all_processed['transcripts'] = [] + loading_status = {**loading_status, **written_transcripts} + + log.info("Finished loading") if timing: end_timer = default_timer() duration = end_timer - start_timer - sys.stdout.write("\n" + str(duration) + " seconds to write " + str(total_features_written) + " features\n") - sys.stdout.write( - "Avg write time (s) per feature: " + str('{:.3f}'.format(duration / total_features_written)) + "\n") - sys.stdout.flush() + print(str(duration) + " seconds to write " + str(total_features_written) + " features") + print("Avg write time (s) per feature: " + str('{:.3f}'.format(duration / total_features_written))) + + return loading_status diff --git a/apollo/util.py b/apollo/util.py index 6f414a0b..a29e9281 100644 --- a/apollo/util.py +++ b/apollo/util.py @@ -89,7 +89,7 @@ def AssertAdmin(user): def _tnType(feature): - if feature.type in ('gene', 'mRNA', 'exon', 'CDS', 'terminator', 'tRNA'): + if feature.type in ('gene', 'mRNA', 'exon', 'CDS', 'terminator', 'tRNA', 'snRNA', 'snoRNA', 'ncRNA', 'rRNA', 'miRNA', 'repeat_region', 'transposable_element', 'pseudogene', 'transcript'): return feature.type else: return 'exon' @@ -97,20 +97,14 @@ def _tnType(feature): def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False): current = _yieldSubFeatureData(gene, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - sub_features = gene.sub_features - # TODO: is this handling multiple isoforms properly? - if sub_features: - # current['children'] = [] - # child_data = [] - for sf in sub_features: + if gene.sub_features: + current['children'] = [] + for sf in gene.sub_features: if _tnType(sf) in coding_transcript_types: - # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) - return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - if _tnType(sf) in noncoding_transcript_types: - # child_data.append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) - return _yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name) - # return child_data + current['children'].append(_yieldCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) + elif _tnType(sf) in noncoding_transcript_types: + current['children'].append(_yieldNonCodingTranscriptData(sf, disable_cds_recalculation=disable_cds_recalculation, use_name=use_name)) # # TODO: handle comments # # TODO: handle dbxrefs @@ -118,7 +112,13 @@ def _yieldGeneData(gene, disable_cds_recalculation=False, use_name=False): # # TODO: handle aliases # # TODO: handle description # # TODO: handle GO, Gene Product, Provenance - return current + + if 'children' in current and gene.type == 'gene': + # Only sending mRNA level as apollo is more comfortable with orphan mRNAs + return current['children'] + else: + # No children, return a generic gene feature + return current def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False): @@ -135,14 +135,17 @@ def _yieldSubFeatureData(f, disable_cds_recalculation=False, use_name=False): } }, } - if disable_cds_recalculation is True: + if disable_cds_recalculation: current['use_cds'] = 'true' if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + single_level_feature_types): current['name'] = f.qualifiers.get('Name', [f.id])[0] - if use_name is True: + if 'ID' in f.qualifiers: + current['gff_id'] = f.qualifiers['ID'][0] + + if use_name: current['use_name'] = True # if OGS: @@ -169,6 +172,14 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals } }, } + + if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + + single_level_feature_types): + current['name'] = f.qualifiers.get('Name', [f.id])[0] + + if 'ID' in f.qualifiers: + current['gff_id'] = f.qualifiers['ID'][0] + if len(f.sub_features) > 0: current['children'] = [] for sf in f.sub_features: @@ -178,16 +189,8 @@ def _yieldCodingTranscriptData(f, disable_cds_recalculation=False, use_name=Fals return current -def print_file(path): - with open(path) as file: - print(file.read()) - file.close() - - -# TODO: we may need specify something different here, but for now this works - -# def _yieldNonCodingTranscriptData(features): -# pass +def _yieldNonCodingTranscriptData(features, disable_cds_recalculation=False, use_name=False): + return _yieldCodingTranscriptData(features, disable_cds_recalculation, use_name) # def _yieldSingleLevelFeatureData(features): @@ -198,13 +201,12 @@ def yieldApolloData(feature, use_name=False, disable_cds_recalculation=False): feature_type = _tnType(feature) if feature_type in gene_types: return _yieldGeneData(feature) - if feature_type in pseudogenes_types: + elif feature_type in pseudogenes_types: return _yieldGeneData(feature) elif feature_type in coding_transcript_types: return _yieldCodingTranscriptData(feature) elif feature_type in noncoding_transcript_types: - return _yieldCodingTranscriptData(feature) - # return _yieldNonCodingTranscriptData(current_feature) + return _yieldNonCodingTranscriptData(feature) elif feature_type in single_level_feature_types: # return _yieldSingleLevelFeatureData(current_feature) return _yieldSubFeatureData(feature) @@ -235,14 +237,17 @@ def _yieldFeatData(features, use_name=False, disable_cds_recalculation=False): } }, } - if disable_cds_recalculation is True: + if disable_cds_recalculation: current['use_cds'] = 'true' if f.type in (coding_transcript_types + noncoding_transcript_types + gene_types + pseudogenes_types + single_level_feature_types): current['name'] = f.qualifiers.get('Name', [f.id])[0] - if use_name is True: + if 'ID' in f.qualifiers: + current['gff_id'] = f.qualifiers['ID'][0] + + if use_name: current['use_name'] = True # if OGS: diff --git a/arrow/commands/annotations/load_gff3.py b/arrow/commands/annotations/load_gff3.py index 8911ee65..0559664c 100644 --- a/arrow/commands/annotations/load_gff3.py +++ b/arrow/commands/annotations/load_gff3.py @@ -33,11 +33,6 @@ help="Disable CDS recalculation and instead use the one provided", is_flag=True ) -@click.option( - "--verbose", - help="Verbose mode", - is_flag=True -) @click.option( "--timing", help="Output loading performance metrics", @@ -46,11 +41,11 @@ @pass_context @custom_exception @str_output -def cli(ctx, organism, gff3, source="", batch_size=1, test=False, use_name=False, disable_cds_recalculation=False, verbose=False, timing=False): +def cli(ctx, organism, gff3, source="", batch_size=1, test=False, use_name=False, disable_cds_recalculation=False, timing=False): """Load a full GFF3 into annotation track Output: Loading report """ - return ctx.gi.annotations.load_gff3(organism, gff3, source=source, batch_size=batch_size, test=test, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation, verbose=verbose, timing=timing) + return ctx.gi.annotations.load_gff3(organism, gff3, source=source, batch_size=batch_size, test=test, use_name=use_name, disable_cds_recalculation=disable_cds_recalculation, timing=timing) diff --git a/docs/commands/annotations.rst b/docs/commands/annotations.rst index 0ca3c7fd..15c68c16 100644 --- a/docs/commands/annotations.rst +++ b/docs/commands/annotations.rst @@ -531,7 +531,6 @@ Load a full GFF3 into annotation track --use_name Use the given name instead of generating one. --disable_cds_recalculation Disable CDS recalculation and instead use the one provided - --verbose Verbose mode --timing Output loading performance metrics -h, --help Show this message and exit. diff --git a/test-data/exported_cdna.fa b/test-data/exported_cdna.fa index 490df09d..b4dbfea3 100644 --- a/test-data/exported_cdna.fa +++ b/test-data/exported_cdna.fa @@ -1,4 +1,4 @@ ->c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Unknown +>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Merlin_1_mRNA-00001 CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT @@ -11,13 +11,13 @@ GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC ATTGATGACATGCCACGTAAGTTCCGCTAG ->74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=Unknown +>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=mrna-name-00001 ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA ->5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Unknown +>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Merlin_3_mRNA-00001 ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA @@ -34,7 +34,7 @@ GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA ->8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Unknown +>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Merlin_4_mRNA-00001 ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT @@ -53,7 +53,7 @@ TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA ->154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=multiexongene +>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=Merlin_5_mRNA-00001 ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC @@ -82,7 +82,7 @@ ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA ->ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=cds-not-under-exon +>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=Merlin_42_mRNA-00001 CTTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACC TCTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAG TAACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCA diff --git a/test-data/exported_cds.fa b/test-data/exported_cds.fa index b2b24c17..ab6ca672 100644 --- a/test-data/exported_cds.fa +++ b/test-data/exported_cds.fa @@ -1,4 +1,4 @@ ->c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Unknown +>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Merlin_1_mRNA-00001 CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT @@ -11,12 +11,30 @@ GGTTCAATCGTTCCGCCTTCTGAGCAAATTGATGAATCTGTTGAGATTTATGACGGTGAT CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC ATTGATGACATGCCACGTAAGTTCCGCTAG ->74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 9 residues [Merlin:752-1039 + strand] [cds] name=Unknown -AAATTTTAG ->5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 108 residues [Merlin:1067-2011 - strand] [cds] name=Unknown -CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA -TCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTATTAGGTGA ->8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Unknown +>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 288 residues [Merlin:752-1039 + strand] [cds] name=mrna-name-00001 +ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG +TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG +CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA +AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT +GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA +>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cds] name=Merlin_3_mRNA-00001 +ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT +ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA +TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA +GGTGAGTTCACATCAGGACTCACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGA +AAGTCTGGTGTATCGAAATATCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTA +TTAGGTGAATTTGAAGTCGGAACTTATTTGTTAGACTTCTTTAACATGGCTTATCCGCAA +TCTGGATTGATGATTTATTCGGTCAAAATTCCAGAGAACAGATTGTCTCATGAAATGGAT +TTCAACCATAACTCACCGAATATTAGAATAACTGGACGTGAACTCGATCCGTTAACTATA +TCATTCAGAATGGATCCCGAAGCAAGTAACTATCGTGCAATGCAAGATTGGGTGAACTCC +GTTCAAGACCCGGTTACTGGATTGCGAGCATTACCAACTGACGTCGAAGCTGACATTCAG +GTTAACCTTCATGCTCGAAATGGATTACCTCATACTGTGATAATGTTCACAGGTTGTGTT +CCTGTTGCGTGTGGAGCTCCTGAGCTTACATATGAAGGAGATAACCAAATTGCGGTTTTC +GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT +GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT +AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC +ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA +>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Merlin_4_mRNA-00001 ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT @@ -35,7 +53,7 @@ TTCGTCAGAAACTTTGGAACCACAAGTAAATTCGATGGACGTGCTGAAGTATTCGGTCCA TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA ->154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=multiexongene +>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=Merlin_5_mRNA-00001 ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC @@ -64,5 +82,17 @@ ACTCCGAAGCCTGCGGCTCCAGCTACTTCGGAAGATAATCAACGAGTTCAAAATATTCAA AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA ->ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 6 residues [Merlin:5011-6066 - strand] [cds] name=cds-not-under-exon -CTTTAA +>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 777 residues [Merlin:5011-6066 - strand] [cds] name=Merlin_42_mRNA-00001 +TTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACCT +CTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAGT +AACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCAA +TTCGTTTTTGTATGGGAAGATATCTATGGCTTGAACATGATGGATTATGACGCAATGATT +AACCAAGAATCAATCAAGGTTATTGTCGGTGAACCACGCACAATAGGTCAATTTGTCGGT +GAGCTGGAATATAATCTCGCTTATGACTTCCAGTGGTTAACGAAGGCTAATGCCCATACA +CGCGATCCTATTTTTAACGCTACAATCTATTCACACTCATTCTTGGATAATAACCTTCCT +AGAATAGTAACAGGTGATGGACAGAATAGCATCTTCGTTTCTCGCTCGGGTGCATATTCT +GAAATGACTTATCGAAATGGATATGAAGAAGCTATCAGGCTTCAGACTATGGCACAATAC +GACGGTTATGCAACTTGTAAAATGGTTGGAGACTTTGAAATGACTCCTGGAGATAAGATT +AATTTCTTTGATCCAAAGAAACAATTCAAAGCTGATTTTTACATTGATGAAGTAATTCAT +GAAGTAAGTAATAACCAAAGCATAACTACACTTTATATGTTTACTAACTCTCGTAAGTTG +GAAACAGTAGAACCAATAAAGGTTAAAAATGAACTTAAATCTGATACTACCACTTAA diff --git a/test-data/exported_peptide.fa b/test-data/exported_peptide.fa index 5f97b8ec..1a935d35 100644 --- a/test-data/exported_peptide.fa +++ b/test-data/exported_peptide.fa @@ -1,20 +1,26 @@ ->c7ce0a38-beee-4aa3-8f34-5f35f549f287 (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Unknown +>bbc20cfa-4286-4160-bfaf-545e3bf4ccee (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Merlin_1_mRNA-00001 RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY TAETPDDYNRFANPEDIIDIGATQKDSFDDNNNDGTSINIGKQVNLGFVISGAENVRVIV PGSLTEYPEEAEVILPRGTLLKINKITTQVDKRSNKFMVEGSIVPPSEQIDESVEIYDGD LFMETGEVVKLSGFMQFVNESAYDEEQNQMAAEILSGFLDIDDMPRKFR ->74f8e03d-f003-490c-9eeb-15b3b68763c0 (mRNA) 2 residues [Merlin:752-1039 + strand] [peptide] name=Unknown -KF ->5280a04b-53f0-4ae6-ae5c-2c358e5c5a93 (mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide] name=Unknown -HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR ->8d6e6288-a8d4-4b81-b7fe-766119917628 (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Unknown +>90903c55-1f5b-4699-9967-c21b46dbd005 (mRNA) 95 residues [Merlin:752-1039 + strand] [peptide] name=mrna-name-00001 +MKSIFRINGVEIVVEDVVPMSYEFNEVVFKELKKILGDKKLQSTPIGRFGMKENVDTYIE +SVVTGQLEGEFSVAVQTVENDEVILTLPAFVIFRK +>d754c330-e43b-4aae-95ee-008c3d0c5ac0 (mRNA) 314 residues [Merlin:1067-2011 - strand] [peptide] name=Merlin_3_mRNA-00001 +MLTLDEFKNQAGNIDFQRTNMFSCVFATTPSAKSQQLLDQFGGMLFNNLPLNNDWLGLTQ +GEFTSGLTSIITAGTQQLVRKSGVSKYLIGAMSNRVVQSLLGEFEVGTYLLDFFNMAYPQ +SGLMIYSVKIPENRLSHEMDFNHNSPNIRITGRELDPLTISFRMDPEASNYRAMQDWVNS +VQDPVTGLRALPTDVEADIQVNLHARNGLPHTVIMFTGCVPVACGAPELTYEGDNQIAVF +DVTFAYRVMQTGAVGRQAALDWIEDRAVNSITGINSEMSLNGSLSRLSRLGGAAGGLSHV +INSTRNSTSKILGL +>296145b5-8924-4146-84aa-41c8a04d7bdc (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Merlin_4_mRNA-00001 MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY KNGLLFTAYDMNSRTTGDMRSMRLGEMKRTANSVVKSITGTNTNKVDKIPVVNILLPRSK SDVESVSHKFNDVGDSLISRGGGTATGVLSNVASTAVFGGLESLTQGLMADHNEQIYNTA RSMYGGADNRTKVFTWDLTPRSVQDLIAIIEIYEYFNYYSYGETGTSTYAKEVKSQLDEW YKSTFLDTLTPDEANKNDTVFEKITSFLSNVIVVSNPTVWFVRNFGTTSKFDGRAEVFGP CQIQSIRFDKTPNGNFNGLAIAPNLPSTFTLEITMREILTLNRASVYAEGF ->154a6d4e-dc94-4de2-9403-63aa47a01d82 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=multiexongene +>ac134b2b-dc09-4089-8032-83637c62fd82 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=Merlin_5_mRNA-00001 MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG NNLIDSVDNVAEGTELAAEASERTTESIKTLTGVASTISDKLSKLASMLESKVQAVEQKV QESGASASTGLSVIEDKLPDPDEPFFPPVPQEPENNKKDQKKDDKKPTDMLGDLLKTTKG @@ -25,5 +31,9 @@ GEFKTRAFDWVLGRENKIDSTQASDRDQETQNLKAMAPEKREETLIKQNEARAAVQRLEK YIGDVDPENPTNMQSLEKAYNSAKKSISDSAISDQPATKKELDKRFQRVESKYQKLKEDN TPKPAAPATSEDNQRVQNIQKAENAKEQSKKSTGDMNVANTQVNNVNNSKTIHQVQTVTA TPAPGVFGATGVN ->ce047673-3c00-425c-862b-20fd004eca42 (mRNA) 1 residues [Merlin:5011-6066 - strand] [peptide] name=cds-not-under-exon -L +>065d1481-1403-4a2b-8566-8ee84216e885 (mRNA) 258 residues [Merlin:5011-6066 - strand] [peptide] name=Merlin_42_mRNA-00001 +FNDAGESIKEMIGAIYESKPLIAPAMNTINTYVPRVPWTSNITEYKKYVRDVALAVDNDQ +FVFVWEDIYGLNMMDYDAMINQESIKVIVGEPRTIGQFVGELEYNLAYDFQWLTKANAHT +RDPIFNATIYSHSFLDNNLPRIVTGDGQNSIFVSRSGAYSEMTYRNGYEEAIRLQTMAQY +DGYATCKMVGDFEMTPGDKINFFDPKKQFKADFYIDEVIHEVSNNQSITTLYMFTNSRKL +ETVEPIKVKNELKSDTTT diff --git a/test-data/gene-top.gff b/test-data/gene-top.gff index 5c004cef..436aac08 100644 --- a/test-data/gene-top.gff +++ b/test-data/gene-top.gff @@ -1,6 +1,6 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin -Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 -Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin -Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_1_CDS;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm gene 2 691 -856.563659 + . ID=Merlin_123;seqid=Merlin +Merlin GeneMark.hmm mRNA 2 691 . + . ID=Merlin_123_mRNA;Parent=Merlin_123;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_123_exon;Parent=Merlin_123_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 2 691 . + 0 ID=Merlin_123_CDS;Parent=Merlin_123_mRNA;seqid=Merlin diff --git a/test-data/mrna-isoforms.gff b/test-data/mrna-isoforms.gff new file mode 100644 index 00000000..07260b02 --- /dev/null +++ b/test-data/mrna-isoforms.gff @@ -0,0 +1,14 @@ +##gff-version 3 +##sequence-region Merlin 1 172788 +# 2 isoforms +Merlin GeneMark.hmm gene 13066 14796 -2177.374893 - . ID=Merlin_58;seqid=Merlin;Name=multiexongene +Merlin GeneMark.hmm mRNA 13066 14796 . - . ID=Merlin_58_mRNA;Parent=Merlin_58;seqid=Merlin +Merlin GeneMark.hmm exon 13066 14296 . - . ID=Merlin_58_exon;Parent=Merlin_58_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 13066 14296 . - 0 ID=Merlin_58_CDS;Parent=Merlin_58_exon;seqid=Merlin +Merlin GeneMark.hmm exon 14366 14796 . - . ID=Merlin_58_exon2;Parent=Merlin_58_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 14366 14796 . - 0 ID=Merlin_58_CDS2;Parent=Merlin_58_exon2;seqid=Merlin +Merlin GeneMark.hmm mRNA 13066 14796 . - . ID=Merlin_58b_mRNA;Parent=Merlin_58;seqid=Merlin +Merlin GeneMark.hmm exon 13066 14096 . - . ID=Merlin_58b_exon;Parent=Merlin_58b_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 13066 14096 . - 0 ID=Merlin_58b_CDS;Parent=Merlin_58b_exon;seqid=Merlin +Merlin GeneMark.hmm exon 14466 14796 . - . ID=Merlin_58b_exon2;Parent=Merlin_58b_mRNA;seqid=Merlin +Merlin GeneMark.hmm CDS 14466 14796 . - 0 ID=Merlin_58b_CDS2;Parent=Merlin_58b_exon2;seqid=Merlin diff --git a/test-data/ncrna-top.gff b/test-data/ncrna-top.gff index b7c52e08..95f45cbd 100644 --- a/test-data/ncrna-top.gff +++ b/test-data/ncrna-top.gff @@ -1,4 +1,4 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm ncRNA 2 691 . + . ID=Merlin_1_mRNA;seqid=Merlin;color=#00ff00 -Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm ncRNA 2 691 . + . ID=Merlin_100_ncRNA;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_100_exon;Parent=Merlin_100_ncRNA;seqid=Merlin diff --git a/test-data/pseudogene-top.gff b/test-data/pseudogene-top.gff index 65c77292..7fdbc70e 100644 --- a/test-data/pseudogene-top.gff +++ b/test-data/pseudogene-top.gff @@ -1,5 +1,5 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm pseudogene 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin -Merlin GeneMark.hmm transcript 2 691 . + . ID=Merlin_1_mRNA;Parent=Merlin_1;seqid=Merlin;color=#00ff00 -Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_1_exon;Parent=Merlin_1_mRNA;seqid=Merlin +Merlin GeneMark.hmm pseudogene 2 691 -856.563659 + . ID=Merlin_564;seqid=Merlin +Merlin GeneMark.hmm transcript 2 691 . + . ID=Merlin_564_mRNA;Parent=Merlin_564;seqid=Merlin;color=#00ff00 +Merlin GeneMark.hmm exon 2 691 . + . ID=Merlin_564_exon;Parent=Merlin_564_mRNA;seqid=Merlin diff --git a/test-data/repeat-region-top.gff b/test-data/repeat-region-top.gff index 7e59d9c0..68324c46 100644 --- a/test-data/repeat-region-top.gff +++ b/test-data/repeat-region-top.gff @@ -1,3 +1,3 @@ ##gff-version 3 ##sequence-region Merlin 1 172788 -Merlin GeneMark.hmm repeat_region 2 691 -856.563659 + . ID=Merlin_1;seqid=Merlin +Merlin GeneMark.hmm repeat_region 2 691 -856.563659 + . ID=Merlin_800;seqid=Merlin diff --git a/test/annotations_test.py b/test/annotations_test.py index 9c2afb42..90c11065 100644 --- a/test/annotations_test.py +++ b/test/annotations_test.py @@ -1,155 +1,234 @@ -from BCBio import GFF -from BCBio.GFF import GFFExaminer -# from gffutils import inspect - +import time from . import ApolloTestCase, wa from apollo import util - class AnnotationsTest(ApolloTestCase): def test_inclusion(self): assert ("gene" in util.gene_types) - def test_features_to_apollo_schema_mrna(self): + def test_mrna_top(self): path = 'test-data/mrna-top.gff' - with open(path) as file: - print(file.read()) - file.close() - in_handle = open(path) - feature_data = None - for rec in GFF.parse(in_handle): - # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) - # feature_data = util.features_to_apollo_schema(rec.features) - for f in rec.features: - feature_data = util.yieldApolloData(f) - - in_handle.close() - assert (feature_data['location'] is not None) - assert (len(feature_data['children']) == 2) - - def test_features_to_apollo_schema_gene(self): + + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_1_mRNA' in feature_data + + feature_data = feature_data['Merlin_1_mRNA'] + + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'mRNA', 'cv': {'name': 'sequence'}} + assert feature_data['parent_name'] == 'Merlin_1_mRNA' + assert len(feature_data['children']) == 2 + + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tgene\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content + + def test_gene_top(self): path = 'test-data/gene-top.gff' - with open(path) as file: - print(file.read()) - file.close() - in_handle = open(path) - feature_data = None - examiner = GFFExaminer() - print(examiner.parent_child_map(in_handle)) - in_handle.close() - in_handle = open(path) - new_feature_list = [] - new_transcript_list = [] - for rec in GFF.parse(in_handle): - print(str(rec)) - print(str(rec.features)) - # print(str(rec.sub_features)) - for f in rec.features: - print("feature ===== start") - print(f) - print(f.sub_features) - print("feature ===== end") - feature_data = wa.annotations._process_gff_entry(rec, new_feature_list=new_feature_list, - new_transcript_list=new_transcript_list) - print("feature list " + str(new_feature_list)) - print("transcript list " + str(new_transcript_list)) - print("feature data" + str(feature_data)) - # assert (subfeatures is not None and len(subfeatures) > 0) - # # feature_data = util.features_to_apollo_schema(rec.features, feature_list, transcript_list) - # feature_data = util._yieldApolloData(rec.features) - - in_handle.close() - print(str(feature_data)) - print("final feature list " + str(new_feature_list)) - print("final transcript list " + str(new_transcript_list)) - # assert (feature_data['location'] is not None) - # assert (len(feature_data['children']) == 2) - - def test_create_mrna(self): - path = 'test-data/mrna-top.gff' + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_123_mRNA' in feature_data + + feature_data = feature_data['Merlin_123_mRNA'] - with open(path) as file: - print(file.read()) - file.close() + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'mRNA', 'cv': {'name': 'sequence'}} + assert feature_data['parent_name'] == 'Merlin_123_mRNA' + assert len(feature_data['children']) == 2 - feature_list = [] - transcript_list = [] - in_handle = open(path) - for rec in GFF.parse(in_handle): - wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) - in_handle.close() - assert (len(feature_list) == 0) - assert (len(transcript_list) == 1) - print(transcript_list) + time.sleep(1) - def test_create_gene(self): + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tgene\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content + + def test_batch_size(self): path = 'test-data/gene-top.gff' - with open(path) as file: - print(file.read()) - file.close() + feature_data = wa.annotations.load_gff3('temp_org', path, batch_size=10) + + assert 'Merlin_123_mRNA' in feature_data + + feature_data = feature_data['Merlin_123_mRNA'] + + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'mRNA', 'cv': {'name': 'sequence'}} + assert feature_data['parent_name'] == 'Merlin_123_mRNA' + assert len(feature_data['children']) == 2 + + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tgene\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\tCDS\t2\t691\t.\t+\t0' in gff_content + + def test_isoforms(self): + path = 'test-data/mrna-isoforms.gff' + + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_58_mRNA' in feature_data + + feature_data = feature_data['Merlin_58_mRNA'] - feature_list = [] - transcript_list = [] - in_handle = open(path) - for rec in GFF.parse(in_handle): - wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': -1, 'fmin': 13065, 'fmax': 14796} + assert feature_data['type'] == {'name': 'mRNA', 'cv': {'name': 'sequence'}} + assert feature_data['parent_name'] == 'Merlin_58_mRNA' + assert len(feature_data['children']) == 4 - in_handle.close() - print(feature_list) - print(transcript_list) - # assert (len(feature_list) == 0) - # assert (len(transcript_list) == 1) + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tgene\t13066\t14796\t.\t-\t.' in gff_content + assert 'Merlin\t.\tmRNA\t13066\t14796\t.\t-\t.' in gff_content + assert 'Merlin\t.\texon\t13066\t14296\t.\t-\t.' in gff_content + assert 'Merlin\t.\tCDS\t13096\t13230\t.\t-\t0' in gff_content + assert 'Merlin\t.\texon\t14466\t14796\t.\t-\t.' in gff_content def test_create_pseudogene(self): path = 'test-data/pseudogene-top.gff' - with open(path) as file: - print(file.read()) - file.close() + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_564' in feature_data - feature_list = [] - transcript_list = [] - in_handle = open(path) - for rec in GFF.parse(in_handle): - wa.annotations._process_gff_entry(rec, feature_list, transcript_list) + feature_data = feature_data['Merlin_564'] - in_handle.close() - assert (len(feature_list) == 1) - assert (len(transcript_list) == 0) - print(transcript_list) + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'transcript', 'cv': {'name': 'sequence'}} + assert len(feature_data['children']) == 1 + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tpseudogene\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content def test_create_ncRNA(self): path = 'test-data/ncrna-top.gff' - util.print_file(path) - feature_list = [] - transcript_list = [] - in_handle = open(path) - for rec in GFF.parse(in_handle): - wa.annotations._process_gff_entry(rec, feature_list, transcript_list) - in_handle.close() - assert (len(feature_list) == 1) - assert (len(transcript_list) == 0) + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_100_ncRNA' in feature_data + + feature_data = feature_data['Merlin_100_ncRNA'] + + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'ncRNA', 'cv': {'name': 'sequence'}} + assert len(feature_data['children']) == 1 + + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\tncRNA\t2\t691\t.\t+\t.' in gff_content + assert 'Merlin\t.\texon\t2\t691\t.\t+\t.' in gff_content def test_create_repeat_region(self): path = 'test-data/repeat-region-top.gff' - util.print_file(path) - feature_list = [] - transcript_list = [] - in_handle = open(path) - for rec in GFF.parse(in_handle): - wa.annotations._process_gff_entry(rec, feature_list, transcript_list) - - in_handle.close() - print(feature_list) - print(transcript_list) - assert (len(feature_list) == 1) - assert (len(transcript_list) == 0) + + feature_data = wa.annotations.load_gff3('temp_org', path) + + assert 'Merlin_800' in feature_data + + feature_data = feature_data['Merlin_800'] + + del feature_data['location']['id'] + assert feature_data['location'] == {'strand': 1, 'fmin': 1, 'fmax': 691} + assert feature_data['type'] == {'name': 'repeat_region', 'cv': {'name': 'sequence'}} + assert 'children' not in feature_data + + # Now download back the gff + uuid_gff = wa.io.write_downloadable('temp_org', 'GFF3') + if 'error' in uuid_gff or 'uuid' not in uuid_gff: + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) + + time.sleep(1) + + gff_content = wa.io.download(uuid_gff['uuid'], output_format="text") + + assert '##gff-version 3' in gff_content + assert 'Merlin\t.\trepeat_region\t2\t691\t.\t+\t.' in gff_content + + def setUp(self): + # Make sure the organism is not already there + temp_org_info = wa.organisms.show_organism('temp_org') + if 'directory' in temp_org_info: + wa.organisms.delete_organism(temp_org_info['id']) + self.waitOrgDeleted('temp_org') + + org_info = wa.organisms.show_organism('alt_org') + if 'directory' not in org_info: + # Should not happen, but let's be tolerant... + # Error received when it fails: {'error': 'No row with the given identifier exists: [org.bbop.apollo.Organism#1154]'} + time.sleep(1) + org_info = wa.organisms.show_organism('alt_org') + + wa.organisms.add_organism('temp_org', org_info['directory']) + self.waitOrgCreated('temp_org') + + def tearDown(self): + org_info = wa.organisms.show_organism('temp_org') + + if org_info and 'id' in org_info: + wa.organisms.delete_features(org_info['id']) + wa.organisms.delete_organism(org_info['id']) + + self.waitOrgDeleted('temp_org') diff --git a/test/io_test.py b/test/io_test.py index 7b1296f7..b9b1aa18 100644 --- a/test/io_test.py +++ b/test/io_test.py @@ -1,5 +1,3 @@ -import re - from . import ApolloTestCase, wa import time @@ -26,70 +24,70 @@ def test_export_gff3(self): assert 'Merlin\t.\tnon_canonical_three_prime_splice_site\t4297\t4297\t.\t-\t.' in gff_content assert 'Merlin\t.\tnon_canonical_five_prime_splice_site\t4364\t4364\t.\t-\t.' in gff_content - # def test_export_vcf(self): - # - # org = wa.organisms.show_organism('test_organism') - # - # uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF') - # if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: - # raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) - # - # vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text") - # assert '##fileformat=VCFv4.2' in vcf_content - # assert '##fileDate=20200608' in vcf_content - # assert '##source=.' in vcf_content - # assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content - # - # def test_export_fa_cds(self): - # - # org = wa.organisms.show_organism('test_organism') - # - # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds') - # if 'error' in uuid_fa or 'uuid' not in uuid_fa: - # raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) - # - # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - # assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content - # assert 'CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA' in fa_content - # assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content - # assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content - # assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content - # assert '(mRNA) 108 residues [Merlin:1067-2011 - strand] [cds]' in fa_content - # assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content - # - # def test_export_fa_cdna(self): - # - # org = wa.organisms.show_organism('test_organism') - # - # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna') - # if 'error' in uuid_fa or 'uuid' not in uuid_fa: - # raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) - # - # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - # assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content - # assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content - # assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content - # assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content - # assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content - # assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content - # assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content - # - # def test_export_fa_peptide(self): - # - # org = wa.organisms.show_organism('test_organism') - # - # uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide') - # if 'error' in uuid_fa or 'uuid' not in uuid_fa: - # raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa) - # - # fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") - # assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content - # assert 'HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR' in fa_content - # assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content - # assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content - # assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content - # assert '(mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content - # assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content + def test_export_vcf(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF') + if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: + raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) + + vcf_content = wa.io.download(uuid_vcf['uuid'], output_format="text") + assert '##fileformat=VCFv4.2' in vcf_content + assert '##fileDate=' in vcf_content + assert '##source=.' in vcf_content + assert '#CHROM POS ID REF ALT QUAL FILTER INFO' in vcf_content + + def test_export_fa_cds(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cds') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content + assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + assert 'ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT' in fa_content + assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cds]' in fa_content + assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cds]' in fa_content + assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds]' in fa_content + + def test_export_fa_cdna(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='cdna') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG' in fa_content + assert 'ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG' in fa_content + assert 'ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT' in fa_content + assert 'ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA' in fa_content + assert '(mRNA) 690 residues [Merlin:2-691 + strand] [cdna]' in fa_content + assert '(mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna]' in fa_content + assert '(mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna]' in fa_content + + def test_export_fa_peptide(self): + + org = wa.organisms.show_organism('test_organism') + + uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', seq_type='peptide') + if 'error' in uuid_fa or 'uuid' not in uuid_fa: + raise Exception("Apollo failed to prepare the peptide FASTA file for download: %s" % uuid_fa) + + fa_content = wa.io.download(uuid_fa['uuid'], output_format="text") + assert 'RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY' in fa_content + assert 'MLTLDEFKNQAGNIDFQRTNMFSCVFATTPSAKSQQLLDQFGGMLFNNLPLNNDWLGLTQ' in fa_content + assert 'MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY' in fa_content + assert 'MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG' in fa_content + assert '(mRNA) 229 residues [Merlin:2-691 + strand] [peptide]' in fa_content + assert '(mRNA) 314 residues [Merlin:1067-2011 - strand] [peptide]' in fa_content + assert '(mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide]' in fa_content def setUp(self): # Make sure the organism is not already there