Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes #39 #43

Merged
merged 2 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/digest/data_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ template <uint32_t k> struct Naive {
std::array<uint64_t, k> arr;
unsigned int i = 0;

Naive(uint32_t) {};
Naive(uint32_t){};
Naive(const Naive &other) = default;
Naive &operator=(const Naive &other) = default;

Expand Down Expand Up @@ -183,7 +183,7 @@ template <uint32_t k> struct Naive2 {
unsigned int last = 0;
std::vector<uint64_t> arr = std::vector<uint64_t>(k);

Naive2(uint32_t) {};
Naive2(uint32_t){};
Naive2(const Naive2 &other) = default;
Naive2 &operator=(const Naive2 &other) = default;

Expand Down
4 changes: 2 additions & 2 deletions include/digest/digester.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ template <BadCharPolicy P> class Digester {
* @throws BadConstructionException thrown if the starting position is
* greater than the length of the string
*/
void new_seq(const char *seq, size_t len, size_t start) {
virtual void new_seq(const char *seq, size_t len, size_t start) {
this->seq = seq;
this->len = len;
this->offset = 0;
Expand All @@ -231,7 +231,7 @@ template <BadCharPolicy P> class Digester {
* @throws BadConstructionException thrown if the starting position is
* greater than the length of the string
*/
void new_seq(const std::string &seq, size_t pos) {
virtual void new_seq(const std::string &seq, size_t pos) {
new_seq(seq.c_str(), seq.size(), pos);
}

Expand Down
20 changes: 16 additions & 4 deletions include/digest/window_minimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,16 @@ template <BadCharPolicy P, class T> class WindowMin : public Digester<P> {
}
}

void new_seq(const char *seq, size_t len, size_t start) override {
ds = T(large_window);
Digester<P>::new_seq(seq, len, start);
}

void new_seq(const std::string &seq, size_t pos) override {
ds = T(large_window);
Digester<P>::new_seq(seq.c_str(), seq.size(), pos);
}

/**
*
* @return unsigned, the value of large_window
Expand All @@ -139,16 +149,18 @@ template <BadCharPolicy P, class T> class WindowMin : public Digester<P> {

// function is mainly to help with tests
/**
* @brief gets the size of the internal rmq data structure being used. Mainly used to help with tests (so you probably shouldn't use it).
*
* @brief gets the size of the internal rmq data structure being used.
* Mainly used to help with tests (so you probably shouldn't use it).
*
* @return size_t, the size of the internal rmq data structure object
*/
size_t get_ds_size() { return ds_size; }

// function is mainly to help with tests
/**
* @brief checks if we have generated the first minimizer. Mainly used to help with tests (so you probably shouldn't use it).
*
* @brief checks if we have generated the first minimizer. Mainly used to
* help with tests (so you probably shouldn't use it).
*
* @return bool, if we have already obtained a minimizer
*/
bool get_is_minimized() { return is_minimized; }
Expand Down
21 changes: 13 additions & 8 deletions pybind/bindings.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
#include <digest_utils.hpp>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <digest_utils.hpp>

namespace py = pybind11;

PYBIND11_MODULE(Digest, m) {
m.doc() = "bindings for Digest";
m.def("window_minimizer", &window_minimizer, "A function that runs window minimizer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11, py::arg("include_hash") = false);
m.def("modimizer", &modimizer, "A function that runs mod-minimizer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("mod") = 100, py::arg("include_hash") = false);
m.def("syncmer", &syncmer, "A function that runs syncmer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11, py::arg("include_hash") = false);
m.doc() = "bindings for Digest";
m.def("window_minimizer", &window_minimizer,
"A function that runs window minimizer digestion", py::arg("seq"),
py::arg("k") = 31, py::arg("w") = 11,
py::arg("include_hash") = false);
m.def("modimizer", &modimizer,
"A function that runs mod-minimizer digestion", py::arg("seq"),
py::arg("k") = 31, py::arg("mod") = 100,
py::arg("include_hash") = false);
m.def("syncmer", &syncmer, "A function that runs syncmer digestion",
py::arg("seq"), py::arg("k") = 31, py::arg("w") = 11,
py::arg("include_hash") = false);
}
86 changes: 44 additions & 42 deletions pybind/digest_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,50 +1,52 @@
#include <digest/window_minimizer.hpp>
#include <digest/syncmer.hpp>
#include <digest/mod_minimizer.hpp>
#include <digest/syncmer.hpp>
#include <digest/window_minimizer.hpp>
#include <variant>

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> window_minimizer (
const std::string &seq, unsigned k, unsigned large_window, bool include_hash=false) {
digest::WindowMin<digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive> digester (seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
window_minimizer(const std::string &seq, unsigned k, unsigned large_window,
bool include_hash = false) {
digest::WindowMin<digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive>
digester(seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}
//std::vector<std::pair<size_t, size_t>> output;
// std::vector<std::pair<size_t, size_t>> output;

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> modimizer (
const std::string &seq, unsigned k, uint32_t mod, bool include_hash=false) {
digest::ModMin<digest::BadCharPolicy::SKIPOVER> digester (seq, k, mod);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
modimizer(const std::string &seq, unsigned k, uint32_t mod,
bool include_hash = false) {
digest::ModMin<digest::BadCharPolicy::SKIPOVER> digester(seq, k, mod);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}

std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>> syncmer (
const std::string &seq, unsigned k, unsigned large_window, bool include_hash=false) {
digest::Syncmer<digest::BadCharPolicy::WRITEOVER, digest::ds::Adaptive> digester (seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
std::variant<std::vector<uint32_t>, std::vector<std::pair<uint32_t, uint32_t>>>
syncmer(const std::string &seq, unsigned k, unsigned large_window,
bool include_hash = false) {
digest::Syncmer<digest::BadCharPolicy::WRITEOVER, digest::ds::Adaptive>
digester(seq, k, large_window);
if (include_hash) {
std::vector<std::pair<uint32_t, uint32_t>> output;
digester.roll_minimizer(seq.length(), output);
return output;
} else {
std::vector<uint32_t> output;
digester.roll_minimizer(seq.length(), output);
return output;
}
}
2 changes: 1 addition & 1 deletion tests/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,7 +1246,7 @@ TEST_CASE("ModMin Testing") {
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 32) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 33) } \
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 63) } \
{F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64)}
{ F(digest::BadCharPolicy::SKIPOVER, digest::ds::Adaptive, 64) }

TEST_CASE("WindowMin Testing") {
SECTION("Constructor Testing") {
Expand Down