diff --git a/README.md b/README.md
index 789cd17..3967463 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,82 @@
-# Digest
-C++ library which supports various minimizer schemes for digestion of DNA sequences
+# ✂️ Digest: fast, multi-use $k$-mer sub-sampling library
+
+
+
+ Visualization of different minimizer schemes supported in Digest and code example using library
+
+
+
+## What is the Digest library?
+- a `C++` library that supports various sub-sampling schemes for $k$-mers in DNA sequences.
+ - `Digest` library utilizes the rolling hash-function from [ntHash](https://github.com/bcgsc/ntHash) to order the $k$-mers in a window.
+
+## How to install and build into your project?
+
+
+### Step 1: Install library
+
+After cloning from GitHub, we use the [Meson](https://mesonbuild.com) build-system to install the library.
+- `PREFIX` is an absolute path to library files will be install (`*.h` and `*.a` files)
+ - **IMPORTANT**: `PREFIX` should not be the root directory of the `Digest/` repo to avoid any issues with installation.
+- These commands generate an `include` and `lib` folders in `PREFIX` folder
+
+```bash
+git clone https://github.com/VeryAmazed/digest.git
+
+meson setup --prefix= --buildtype=release build
+meson install -C build
+```
+
+### Step 2: Include Digest in your project
+
+#### (a) Using `Meson`:
+
+If your coding project uses `Meson` to build the executable(s), you can include a file called `subprojects/digest.wrap` in your repository and let Meson install it for you.
+
+#### (b) Using `g++`:
+
+To use Digest in your C++ project, you just need to include the header files (`*.h`) and library file (`*.a`) that were installed in the first step. Assuming that `install/` is the directory you installed them in, here is how you can compile.
+
+```bash
+g++ -std=c++17 -o main main.cpp -I install/include/ -L install/lib -lnthash
+```
+
+## Detailed Look at Example Usage (2 ways):
+
+There are three types of minimizer schemes that can be used:
+
+1. Windowed Minimizer
+2. Modimizer
+3. Syncmer
+
+The general steps to use Digest is as follows: (1) include the relevant header files, (2) declare the Digest object and (3) find the positions where the minimizers are present in the sequence.
+
+### 1. Find positions of minimizers:
+```cpp
+#include "digest/digester.hpp"
+#include "digest/window_minimizer.hpp"
+
+digest::WindowMin digester (dna, 15, 7);
+
+std::vector output;
+digester.roll_minimizer(100, output);
+```
+- This code snippet will find up to 100 Windowed Minimizers and store their positions in the vector called `output`.
+- `digest::BadCharPolicy::WRITEOVER` means that anytime the code encounters an non-`ACTG` character, it will replace it with an `A`.
+ - `digest::BadCharPolicy::SKIPOVER` will skip any $k$-mers with non-`ACTG` characters
+- `digest::ds::Adaptive` is our recommended data-structure for finding the minimum value in a window (see wiki for other options)
+
+### 2. Find both positions and hash values of minimizers
+If you would like to obtain both the positions and hash values for each minimizer, you can pass a vector of paired integers to do so.
+
+```
+std::vector> output;
+digester.roll_minimizer(100, output);
+```
+
+
+
diff --git a/include/digest/data_structure.hpp b/include/digest/data_structure.hpp
index 34f097b..9070ba8 100644
--- a/include/digest/data_structure.hpp
+++ b/include/digest/data_structure.hpp
@@ -115,7 +115,7 @@ template struct Naive {
std::array arr;
unsigned int i = 0;
- Naive(uint32_t){};
+ Naive(uint32_t) {};
Naive(const Naive &other) = default;
Naive &operator=(const Naive &other) = default;
@@ -183,7 +183,7 @@ template struct Naive2 {
unsigned int last = 0;
std::vector arr = std::vector(k);
- Naive2(uint32_t){};
+ Naive2(uint32_t) {};
Naive2(const Naive2 &other) = default;
Naive2 &operator=(const Naive2 &other) = default;
diff --git a/tests/density/ACTG.cpp b/tests/density/ACTG.cpp
index 626117d..b06c8fc 100644
--- a/tests/density/ACTG.cpp
+++ b/tests/density/ACTG.cpp
@@ -31,83 +31,87 @@ int dir2[] = {0, 1, 0, -1, 1, -1, 1, -1};
int main() {
- std::cout << std::fixed << std::setprecision(8);
- // if you use ld, use the above and don't use string stream
-
- std::string str;
-
- std::vector strs;
- assert(freopen("../tests/density/ACTG.txt", "r", stdin));
- for (int i = 0; i < 100; i++) {
- std::cin >> str;
- strs.pb(str);
- }
-
- std::vector> mod_min_vec(4, std::vector());
- std::vector> wind_min_vec(4, std::vector());
- std::vector> sync_vec(4, std::vector());
-
- uint64_t mods[4] = {109, 128, 1009, 1024};
- unsigned l_winds[4] = {7, 8, 17, 16};
-
- double kmers = 100000 - 16 + 1;
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::ModMin mm(
- strs[j], 16, mods[i], 0, 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- mm.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers;
- mod_min_vec[i].pb(am);
- }
- }
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::WindowMin
- wm(strs[j], 16, l_winds[i], 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- wm.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers;
- wind_min_vec[i].pb(am);
- }
- }
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::Syncmer
- syn(strs[j], 16, l_winds[i], 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- syn.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers;
- sync_vec[i].pb(am);
- }
- }
- assert(freopen("../tests/density/out1.txt", "w", stdout));
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << mod_min_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << wind_min_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << sync_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- return 0;
+ std::cout << std::fixed << std::setprecision(8);
+ // if you use ld, use the above and don't use string stream
+
+ std::string str;
+
+ std::vector strs;
+ assert(freopen("../tests/density/ACTG.txt", "r", stdin));
+ for (int i = 0; i < 100; i++) {
+ std::cin >> str;
+ strs.pb(str);
+ }
+
+ std::vector> mod_min_vec(4, std::vector());
+ std::vector> wind_min_vec(4, std::vector());
+ std::vector> sync_vec(4, std::vector());
+
+ uint64_t mods[4] = {109, 128, 1009, 1024};
+ unsigned l_winds[4] = {7, 8, 17, 16};
+
+ double kmers = 100000 - 16 + 1;
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::ModMin mm(
+ strs[j], 16, mods[i], 0, 0, digest::MinimizedHashType::CANON);
+ std::vector temp;
+ mm.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers;
+ mod_min_vec[i].pb(am);
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::WindowMin
+ wm(strs[j], 16, l_winds[i], 0,
+ digest::MinimizedHashType::CANON);
+ std::vector temp;
+ wm.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers;
+ wind_min_vec[i].pb(am);
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::Syncmer
+ syn(strs[j], 16, l_winds[i], 0,
+ digest::MinimizedHashType::CANON);
+ std::vector temp;
+ syn.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers;
+ sync_vec[i].pb(am);
+ }
+ }
+ assert(freopen("../tests/density/out1.txt", "w", stdout));
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << mod_min_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << wind_min_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << sync_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ return 0;
}
\ No newline at end of file
diff --git a/tests/density/non-ACTG.cpp b/tests/density/non-ACTG.cpp
index df255f7..7bf42cb 100644
--- a/tests/density/non-ACTG.cpp
+++ b/tests/density/non-ACTG.cpp
@@ -54,105 +54,109 @@ int dir2[] = {0, 1, 0, -1, 1, -1, 1, -1};
int main() {
- std::cout << std::fixed << std::setprecision(8);
- // if you use ld, use the above and don't use string stream
-
- std::string str;
-
- std::vector strs;
- assert(freopen("../tests/density/non-ACTG.txt", "r", stdin));
- for (int i = 0; i < 100; i++) {
- std::cin >> str;
- strs.pb(str);
- }
-
- std::vector> mod_min_vec(4, std::vector());
- std::vector> wind_min_vec(4, std::vector());
- std::vector> sync_vec(4, std::vector());
-
- uint64_t mods[4] = {109, 128, 1009, 1024};
- unsigned l_winds[4] = {7, 8, 17, 16};
-
- std::vector kmers(100, 0);
-
- for (int i = 0; i < 100; i++) {
- int start = 0;
- while (start + 7 < 1e5) {
- bool works = true;
- for (int j = 0; j < 16; j++) {
- if (strs[i][start + j] == 'N') {
- works = false;
- start = start + j;
- break;
- }
- }
- if (works) {
- kmers[i]++;
- }
- start++;
- }
- // std::cout << kmers[i] << " ";
- }
- // std::cout << std::endl;
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::ModMin mm(
- strs[j], 16, mods[i], 0, 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- mm.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers[i];
- mod_min_vec[i].pb(am);
- }
- }
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::WindowMin
- wm(strs[j], 16, l_winds[i], 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- wm.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers[i];
-
- wind_min_vec[i].pb(am);
- }
- }
-
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 100; j++) {
- digest::Syncmer
- syn(strs[j], 16, l_winds[i], 0, digest::MinimizedHashType::CANON);
- std::vector temp;
- syn.roll_minimizer(100000, temp);
- double am = temp.size();
- am /= kmers[i];
-
- sync_vec[i].pb(am);
- }
- }
- assert(freopen("../tests/density/out2.txt", "w", stdout));
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << mod_min_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << wind_min_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- for (int i = 0; i < 4; i++) {
- for (size_t j = 0; j < 100; j++) {
- std::cout << sync_vec[i][j] << " ";
- }
- std::cout << std::endl;
- }
-
- return 0;
+ std::cout << std::fixed << std::setprecision(8);
+ // if you use ld, use the above and don't use string stream
+
+ std::string str;
+
+ std::vector strs;
+ assert(freopen("../tests/density/non-ACTG.txt", "r", stdin));
+ for (int i = 0; i < 100; i++) {
+ std::cin >> str;
+ strs.pb(str);
+ }
+
+ std::vector> mod_min_vec(4, std::vector());
+ std::vector> wind_min_vec(4, std::vector());
+ std::vector> sync_vec(4, std::vector());
+
+ uint64_t mods[4] = {109, 128, 1009, 1024};
+ unsigned l_winds[4] = {7, 8, 17, 16};
+
+ std::vector kmers(100, 0);
+
+ for (int i = 0; i < 100; i++) {
+ int start = 0;
+ while (start + 7 < 1e5) {
+ bool works = true;
+ for (int j = 0; j < 16; j++) {
+ if (strs[i][start + j] == 'N') {
+ works = false;
+ start = start + j;
+ break;
+ }
+ }
+ if (works) {
+ kmers[i]++;
+ }
+ start++;
+ }
+ // std::cout << kmers[i] << " ";
+ }
+ // std::cout << std::endl;
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::ModMin mm(
+ strs[j], 16, mods[i], 0, 0, digest::MinimizedHashType::CANON);
+ std::vector temp;
+ mm.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers[i];
+ mod_min_vec[i].pb(am);
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::WindowMin
+ wm(strs[j], 16, l_winds[i], 0,
+ digest::MinimizedHashType::CANON);
+ std::vector temp;
+ wm.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers[i];
+
+ wind_min_vec[i].pb(am);
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 100; j++) {
+ digest::Syncmer
+ syn(strs[j], 16, l_winds[i], 0,
+ digest::MinimizedHashType::CANON);
+ std::vector temp;
+ syn.roll_minimizer(100000, temp);
+ double am = temp.size();
+ am /= kmers[i];
+
+ sync_vec[i].pb(am);
+ }
+ }
+ assert(freopen("../tests/density/out2.txt", "w", stdout));
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << mod_min_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << wind_min_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (size_t j = 0; j < 100; j++) {
+ std::cout << sync_vec[i][j] << " ";
+ }
+ std::cout << std::endl;
+ }
+
+ return 0;
}
\ No newline at end of file
diff --git a/tests/test/test.cpp b/tests/test/test.cpp
index cc1c72b..00ca8ed 100644
--- a/tests/test/test.cpp
+++ b/tests/test/test.cpp
@@ -1246,7 +1246,7 @@ TEST_CASE("ModMin Testing") {
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 32) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 33) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 63) } \
- { F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64) }
+ {F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64)}
TEST_CASE("WindowMin Testing") {
SECTION("Constructor Testing") {