Skip to content

Commit

Permalink
opt7: prefetching (#226)
Browse files Browse the repository at this point in the history
  • Loading branch information
msm-cert authored Oct 1, 2024
1 parent d64bc15 commit 93b169a
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 5 deletions.
7 changes: 7 additions & 0 deletions libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ QueryResult OnDiskDataset::query(const Query &query,
}
throw std::runtime_error("Unexpected ngram type in query");
},
[this, &seen](PrimitiveQuery primitive) {
for (auto &ndx : indices) {
if (ndx.index_type() == primitive.itype) {
ndx.prefetch(primitive.trigram);
}
}
},
counters);
}

Expand Down
6 changes: 6 additions & 0 deletions libursa/OnDiskIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ QueryResult OnDiskIndex::query(TriGram trigram, QueryCounters *counters) const {
return QueryResult(std::move(query_primitive(trigram, &counters->reads())));
}

void OnDiskIndex::prefetch(TriGram trigram) const {
std::pair<uint64_t, uint64_t> offsets = get_run_offsets(trigram);
uint64_t length = offsets.second - offsets.first;
ndxfile.prefetch(length, offsets.first);
}

std::pair<uint64_t, uint64_t> OnDiskIndex::get_run_offsets(
TriGram trigram) const {
uint64_t ptrs[2];
Expand Down
2 changes: 2 additions & 0 deletions libursa/OnDiskIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class OnDiskIndex {
const fs::path &get_fpath() const { return fpath; }
IndexType index_type() const { return ntype; }
QueryResult query(TriGram trigram, QueryCounters *counters) const;
void prefetch(TriGram trigram) const;

uint64_t real_size() const;
static void on_disk_merge(const fs::path &db_base, const std::string &fname,
IndexType merge_type,
Expand Down
38 changes: 34 additions & 4 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,46 @@ Query Query::plan(const std::unordered_set<IndexType> &types_to_query) const {
return plan_qstring(types_to_query, value);
}

// Prefetch the next `howmany` ngrams.
// This doesn't recurse into other queries. It's not a big problem,
// because all primitives that we can fetch are in long AND sequences.
// But in the future we may consider improving this.
void Query::prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetcher) const {
for (int i = 0; i < howmany; i++) {
int ndx = i + from_index;
if (ndx >= queries.size()) {
break;
}
if (queries[ndx].type == QueryType::PRIMITIVE) {
if (only_last && (i + 1 != howmany)) {
continue;
}
spdlog::debug("prefetching {}", ndx);
prefetcher(queries[ndx].ngram);
}
}
}

QueryResult Query::run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetcher,
QueryCounters *counters) const {
// Case: primitive query - reduces to AND with tokens from query plan.
if (type == QueryType::PRIMITIVE) {
return primitive(ngram, counters);
}

constexpr int PRETECTH_RANGE = 3;
prefetch(0, PRETECTH_RANGE, false, prefetcher);

// Case: and. Short circuits when result is already empty.
if (type == QueryType::AND) {
auto result = QueryResult::everything();
for (const auto &query : queries) {
result.do_and(query.run(primitive, counters), &counters->ands());
for (int i = 0; i < queries.size(); i++) {
prefetch(i + 1, PRETECTH_RANGE, true, prefetcher);
const auto &query = queries[i];
result.do_and(query.run(primitive, prefetcher, counters),
&counters->ands());
if (result.is_empty()) {
break;
}
Expand All @@ -232,7 +261,8 @@ QueryResult Query::run(const QueryPrimitive &primitive,
if (type == QueryType::OR) {
auto result = QueryResult::empty();
for (const auto &query : queries) {
result.do_or(query.run(primitive, counters), &counters->ors());
result.do_or(query.run(primitive, prefetcher, counters),
&counters->ors());
if (result.is_everything()) {
break;
}
Expand All @@ -252,7 +282,7 @@ QueryResult Query::run(const QueryPrimitive &primitive,
int cutoff = count;
int nonempty_sources = queries.size();
for (const auto &query : queries) {
QueryResult next = query.run(primitive, counters);
QueryResult next = query.run(primitive, prefetcher, counters);
if (next.is_everything()) {
cutoff -= 1;
if (cutoff <= 0) {
Expand Down
6 changes: 6 additions & 0 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class PrimitiveQuery {
using QueryPrimitive =
std::function<QueryResult(PrimitiveQuery, QueryCounters *counter)>;

using PrefetchFunc = std::function<void(PrimitiveQuery)>;

// Query represents the query as provided by the user.
// Query can contain subqueries (using AND/OR/MINOF) or be a literal query.
// There are actually two types of literal query objects - "plain" and
Expand Down Expand Up @@ -60,10 +62,14 @@ class Query {
bool operator==(const Query &other) const;

QueryResult run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetch,
QueryCounters *counters) const;
Query plan(const std::unordered_set<IndexType> &types_to_query) const;

private:
void prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetch) const;

QueryType type;
// used for QueryType::PRIMITIVE before plan()
QString value;
Expand Down
4 changes: 4 additions & 0 deletions libursa/RawFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ void RawFile::pread(void *buf, size_t to_read, off_t offset) const {
}
}

void RawFile::prefetch(size_t size, off_t offset) const {
::posix_fadvise(fd, offset, size, POSIX_FADV_WILLNEED);
}

template <typename T>
void RawFile::write(const T *buf, size_t count) {
const auto *buf_raw = reinterpret_cast<const char *>(buf);
Expand Down
1 change: 1 addition & 0 deletions libursa/RawFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class RawFile {

uint64_t size() const;
void pread(void *buf, size_t to_read, off_t offset) const;
void prefetch(size_t size, off_t offse) const;

template <typename T>
void write(const T *buf, size_t count);
Expand Down
2 changes: 1 addition & 1 deletion libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0";
// Project version.
// Consider updating the version tag when doing PRs.
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt6";
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt7";
// clang-format on

0 comments on commit 93b169a

Please sign in to comment.