Skip to content

Commit

Permalink
Merge pull request #43 from VeryAmazed/39
Browse files Browse the repository at this point in the history
fixes #39
  • Loading branch information
ishmeals authored Sep 22, 2024
2 parents 4012378 + 589383e commit 62dd43e
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 59 deletions.
4 changes: 2 additions & 2 deletions include/digest/data_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ template <uint32_t k> struct Naive {
std::array<uint64_t, k> arr;
unsigned int i = 0;

Naive(uint32_t) {};
Naive(uint32_t){};
Naive(const Naive &other) = default;
Naive &operator=(const Naive &other) = default;

Expand Down Expand Up @@ -183,7 +183,7 @@ template <uint32_t k> struct Naive2 {
unsigned int last = 0;
std::vector<uint64_t> arr = std::vector<uint64_t>(k);

Naive2(uint32_t) {};
Naive2(uint32_t){};
Naive2(const Naive2 &other) = default;
Naive2 &operator=(const Naive2 &other) = default;

Expand Down
4 changes: 2 additions & 2 deletions include/digest/digester.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ template <BadCharPolicy P> class Digester {
* @throws BadConstructionException thrown if the starting position is
* greater than the length of the string
*/
void new_seq(const char *seq, size_t len, size_t start) {
virtual void new_seq(const char *seq, size_t len, size_t start) {
this->seq = seq;
this->len = len;
this->offset = 0;
Expand All @@ -231,7 +231,7 @@ template <BadCharPolicy P> class Digester {
* @throws BadConstructionException thrown if the starting position is
* greater than the length of the string
*/
void new_seq(const std::string &seq, size_t pos) {
virtual void new_seq(const std::string &seq, size_t pos) {
new_seq(seq.c_str(), seq.size(), pos);
}

Expand Down
20 changes: 16 additions & 4 deletions include/digest/window_minimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ template <BadCharPolicy P, class T> class WindowMin : public Digester<P> {
}
}

void new_seq(const char *seq, size_t len, size_t start) override {
ds = T(large_window);
Digester<P>::new_seq(seq, len, start);
}

void new_seq(const std::string &seq, size_t pos) override {
ds = T(large_window);
Digester<P>::new_seq(seq.c_str(), seq.size(), pos);
}

/**
*
* @return unsigned, the value of large_window
Expand All @@ -139,16 +149,18 @@ template <BadCharPolicy P, class T> class WindowMin : public Digester<P> {

// function is mainly to help with tests
/**
* @brief gets the size of the internal rmq data structure being used. Mainly used to help with tests (so you probably shouldn't use it).
*
* @brief gets the size of the internal rmq data structure being used.
* Mainly used to help with tests (so you probably shouldn't use it).
*
* @return size_t, the size of the internal rmq data structure object
*/
size_t get_ds_size() { return ds_size; }

// function is mainly to help with tests
/**
* @brief checks if we have generated the first minimizer. Mainly used to help with tests (so you probably shouldn't use it).
*
* @brief checks if we have generated the first minimizer. Mainly used to
* help with tests (so you probably shouldn't use it).
*
* @return bool, if we have already obtained a minimizer
*/
bool get_is_minimized() { return is_minimized; }
Expand Down
21 changes: 13 additions & 8 deletions pybind/bindings.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
#include <digest_utils.hpp>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <digest_utils.hpp>

namespace py = pybind11;

PYBIND11_MODULE(Digest, m) {
m.doc() = "bindings for Digest";
m.def("window_minimizer", &window_minimizer, "A function that runs window minimizer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11, py::arg("include_hash") = false);
m.def("modimizer", &modimizer, "A function that runs mod-minimizer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("mod") = 100, py::arg("include_hash") = false);
m.def("syncmer", &syncmer, "A function that runs syncmer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11, py::arg("include_hash") = false);
m.doc() = "bindings for Digest";
m.def("window_minimizer", &window_minimizer,
"A function that runs window minimizer digestion", py::arg("seq"),
py::arg("k") = 31, py::arg("w") = 11,
py::arg("include_hash") = false);
m.def("modimizer", &modimizer,
"A function that runs mod-minimizer digestion", py::arg("seq"),
py::arg("k") = 31, py::arg("mod") = 100,
py::arg("include_hash") = false);
m.def("syncmer", &syncmer, "A function that runs syncmer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11,
py::arg("include_hash") = false);
}
86 changes: 44 additions & 42 deletions pybind/digest_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,50 +1,52 @@
#include <digest/window_minimizer.hpp>
#include <digest/syncmer.hpp>
#include <digest/mod_minimizer.hpp>
#include <digest/syncmer.hpp>
#include <digest/window_minimizer.hpp>
#include <variant>

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> window_minimizer (
const std::string &seq, unsigned k, unsigned large_window, bool include_hash=false) {
digest::WindowMin<digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive> digester (seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
window_minimizer(const std::string &seq, unsigned k, unsigned large_window,
bool include_hash = false) {
digest::WindowMin<digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive>
digester(seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}
//std::vector<std::pair<size_t, size_t>> output;
// std::vector<std::pair<size_t, size_t>> output;

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> modimizer (
const std::string &seq, unsigned k, uint32_t mod, bool include_hash=false) {
digest::ModMin<digest::BadCharPolicy::SKIPOVER> digester (seq, k, mod);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
modimizer(const std::string &seq, unsigned k, uint32_t mod,
bool include_hash = false) {
digest::ModMin<digest::BadCharPolicy::SKIPOVER> digester(seq, k, mod);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> syncmer (
const std::string &seq, unsigned k, unsigned large_window, bool include_hash=false) {
digest::Syncmer<digest::BadCharPolicy::WRITEOVER, digest::ds::Adaptive> digester (seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
syncmer(const std::string &seq, unsigned k, unsigned large_window,
bool include_hash = false) {
digest::Syncmer<digest::BadCharPolicy::WRITEOVER, digest::ds::Adaptive>
digester(seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}
2 changes: 1 addition & 1 deletion tests/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,7 +1246,7 @@ TEST_CASE("ModMin Testing") {
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 32) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 33) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 63) } \
{F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64)}
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64) }

TEST_CASE("WindowMin Testing") {
SECTION("Constructor Testing") {
Expand Down

0 comments on commit 62dd43e

Please sign in to comment.