Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt7: prefetching #226

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ QueryResult OnDiskDataset::query(const Query &query,
}
throw std::runtime_error("Unexpected ngram type in query");
},
[this, &seen](PrimitiveQuery primitive) {
for (auto &ndx : indices) {
if (ndx.index_type() == primitive.itype) {
ndx.prefetch(primitive.trigram);
}
}
},
counters);
}

Expand Down
6 changes: 6 additions & 0 deletions libursa/OnDiskIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ QueryResult OnDiskIndex::query(TriGram trigram, QueryCounters *counters) const {
return QueryResult(std::move(query_primitive(trigram, &counters->reads())));
}

void OnDiskIndex::prefetch(TriGram trigram) const {
std::pair<uint64_t, uint64_t> offsets = get_run_offsets(trigram);
uint64_t length = offsets.second - offsets.first;
ndxfile.prefetch(length, offsets.first);
}

std::pair<uint64_t, uint64_t> OnDiskIndex::get_run_offsets(
TriGram trigram) const {
uint64_t ptrs[2];
Expand Down
2 changes: 2 additions & 0 deletions libursa/OnDiskIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class OnDiskIndex {
const fs::path &get_fpath() const { return fpath; }
IndexType index_type() const { return ntype; }
QueryResult query(TriGram trigram, QueryCounters *counters) const;
void prefetch(TriGram trigram) const;

uint64_t real_size() const;
static void on_disk_merge(const fs::path &db_base, const std::string &fname,
IndexType merge_type,
Expand Down
38 changes: 34 additions & 4 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,46 @@ Query Query::plan(const std::unordered_set<IndexType> &types_to_query) const {
return plan_qstring(types_to_query, value);
}

// Prefetch the next `howmany` ngrams.
// This doesn't recurse into other queries. It's not a big problem,
// because all primitives that we can fetch are in long AND sequences.
// But in the future we may consider improving this.
void Query::prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetcher) const {
for (int i = 0; i < howmany; i++) {
int ndx = i + from_index;
if (ndx >= queries.size()) {
break;
}
if (queries[ndx].type == QueryType::PRIMITIVE) {
if (only_last && (i + 1 != howmany)) {
continue;
}
spdlog::debug("prefetching {}", ndx);
prefetcher(queries[ndx].ngram);
}
}
}

QueryResult Query::run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetcher,
QueryCounters *counters) const {
// Case: primitive query - reduces to AND with tokens from query plan.
if (type == QueryType::PRIMITIVE) {
return primitive(ngram, counters);
}

constexpr int PRETECTH_RANGE = 3;
prefetch(0, PRETECTH_RANGE, false, prefetcher);

// Case: and. Short circuits when result is already empty.
if (type == QueryType::AND) {
auto result = QueryResult::everything();
for (const auto &query : queries) {
result.do_and(query.run(primitive, counters), &counters->ands());
for (int i = 0; i < queries.size(); i++) {
prefetch(i + 1, PRETECTH_RANGE, true, prefetcher);
const auto &query = queries[i];
result.do_and(query.run(primitive, prefetcher, counters),
&counters->ands());
if (result.is_empty()) {
break;
}
Expand All @@ -232,7 +261,8 @@ QueryResult Query::run(const QueryPrimitive &primitive,
if (type == QueryType::OR) {
auto result = QueryResult::empty();
for (const auto &query : queries) {
result.do_or(query.run(primitive, counters), &counters->ors());
result.do_or(query.run(primitive, prefetcher, counters),
&counters->ors());
if (result.is_everything()) {
break;
}
Expand All @@ -252,7 +282,7 @@ QueryResult Query::run(const QueryPrimitive &primitive,
int cutoff = count;
int nonempty_sources = queries.size();
for (const auto &query : queries) {
QueryResult next = query.run(primitive, counters);
QueryResult next = query.run(primitive, prefetcher, counters);
if (next.is_everything()) {
cutoff -= 1;
if (cutoff <= 0) {
Expand Down
6 changes: 6 additions & 0 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class PrimitiveQuery {
using QueryPrimitive =
std::function<QueryResult(PrimitiveQuery, QueryCounters *counter)>;

using PrefetchFunc = std::function<void(PrimitiveQuery)>;

// Query represents the query as provided by the user.
// Query can contain subqueries (using AND/OR/MINOF) or be a literal query.
// There are actually two types of literal query objects - "plain" and
Expand Down Expand Up @@ -60,10 +62,14 @@ class Query {
bool operator==(const Query &other) const;

QueryResult run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetch,
QueryCounters *counters) const;
Query plan(const std::unordered_set<IndexType> &types_to_query) const;

private:
void prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetch) const;

QueryType type;
// used for QueryType::PRIMITIVE before plan()
QString value;
Expand Down
4 changes: 4 additions & 0 deletions libursa/RawFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ void RawFile::pread(void *buf, size_t to_read, off_t offset) const {
}
}

void RawFile::prefetch(size_t size, off_t offset) const {
::posix_fadvise(fd, offset, size, POSIX_FADV_WILLNEED);
}

template <typename T>
void RawFile::write(const T *buf, size_t count) {
const auto *buf_raw = reinterpret_cast<const char *>(buf);
Expand Down
1 change: 1 addition & 0 deletions libursa/RawFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class RawFile {

uint64_t size() const;
void pread(void *buf, size_t to_read, off_t offset) const;
void prefetch(size_t size, off_t offse) const;

template <typename T>
void write(const T *buf, size_t count);
Expand Down
2 changes: 1 addition & 1 deletion libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0";
// Project version.
// Consider updating the version tag when doing PRs.
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt6";
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt7";
// clang-format on
Loading