diff --git a/libursa/OnDiskDataset.cpp b/libursa/OnDiskDataset.cpp index fe6fc44..9e9b05b 100644 --- a/libursa/OnDiskDataset.cpp +++ b/libursa/OnDiskDataset.cpp @@ -83,6 +83,13 @@ QueryResult OnDiskDataset::query(const Query &query, } throw std::runtime_error("Unexpected ngram type in query"); }, + [this, &seen](PrimitiveQuery primitive) { + for (auto &ndx : indices) { + if (ndx.index_type() == primitive.itype) { + ndx.prefetch(primitive.trigram); + } + } + }, counters); } diff --git a/libursa/OnDiskIndex.cpp b/libursa/OnDiskIndex.cpp index 445d7aa..da83dd4 100644 --- a/libursa/OnDiskIndex.cpp +++ b/libursa/OnDiskIndex.cpp @@ -60,6 +60,12 @@ QueryResult OnDiskIndex::query(TriGram trigram, QueryCounters *counters) const { return QueryResult(std::move(query_primitive(trigram, &counters->reads()))); } +void OnDiskIndex::prefetch(TriGram trigram) const { + std::pair offsets = get_run_offsets(trigram); + uint64_t length = offsets.second - offsets.first; + ndxfile.prefetch(length, offsets.first); +} + std::pair OnDiskIndex::get_run_offsets( TriGram trigram) const { uint64_t ptrs[2]; diff --git a/libursa/OnDiskIndex.h b/libursa/OnDiskIndex.h index 5549328..e73152f 100644 --- a/libursa/OnDiskIndex.h +++ b/libursa/OnDiskIndex.h @@ -38,6 +38,8 @@ class OnDiskIndex { const fs::path &get_fpath() const { return fpath; } IndexType index_type() const { return ntype; } QueryResult query(TriGram trigram, QueryCounters *counters) const; + void prefetch(TriGram trigram) const; + uint64_t real_size() const; static void on_disk_merge(const fs::path &db_base, const std::string &fname, IndexType merge_type, diff --git a/libursa/Query.cpp b/libursa/Query.cpp index c48898a..29eef83 100644 --- a/libursa/Query.cpp +++ b/libursa/Query.cpp @@ -211,17 +211,46 @@ Query Query::plan(const std::unordered_set &types_to_query) const { return plan_qstring(types_to_query, value); } +// Prefetch the next `howmany` ngrams. +// This doesn't recurse into other queries. It's not a big problem, +// because all primitives that we can fetch are in long AND sequences. +// But in the future we may consider improving this. +void Query::prefetch(int from_index, int howmany, bool only_last, + const PrefetchFunc &prefetcher) const { + for (int i = 0; i < howmany; i++) { + int ndx = i + from_index; + if (ndx >= queries.size()) { + break; + } + if (queries[ndx].type == QueryType::PRIMITIVE) { + if (only_last && (i + 1 != howmany)) { + continue; + } + spdlog::debug("prefetching {}", ndx); + prefetcher(queries[ndx].ngram); + } + } +} + QueryResult Query::run(const QueryPrimitive &primitive, + const PrefetchFunc &prefetcher, QueryCounters *counters) const { // Case: primitive query - reduces to AND with tokens from query plan. if (type == QueryType::PRIMITIVE) { return primitive(ngram, counters); } + + constexpr int PRETECTH_RANGE = 3; + prefetch(0, PRETECTH_RANGE, false, prefetcher); + // Case: and. Short circuits when result is already empty. if (type == QueryType::AND) { auto result = QueryResult::everything(); - for (const auto &query : queries) { - result.do_and(query.run(primitive, counters), &counters->ands()); + for (int i = 0; i < queries.size(); i++) { + prefetch(i + 1, PRETECTH_RANGE, true, prefetcher); + const auto &query = queries[i]; + result.do_and(query.run(primitive, prefetcher, counters), + &counters->ands()); if (result.is_empty()) { break; } @@ -232,7 +261,8 @@ QueryResult Query::run(const QueryPrimitive &primitive, if (type == QueryType::OR) { auto result = QueryResult::empty(); for (const auto &query : queries) { - result.do_or(query.run(primitive, counters), &counters->ors()); + result.do_or(query.run(primitive, prefetcher, counters), + &counters->ors()); if (result.is_everything()) { break; } @@ -252,7 +282,7 @@ QueryResult Query::run(const QueryPrimitive &primitive, int cutoff = count; int nonempty_sources = queries.size(); for (const auto &query : queries) { - QueryResult next = query.run(primitive, counters); + QueryResult next = query.run(primitive, prefetcher, counters); if (next.is_everything()) { cutoff -= 1; if (cutoff <= 0) { diff --git a/libursa/Query.h b/libursa/Query.h index c7d138a..11fa221 100644 --- a/libursa/Query.h +++ b/libursa/Query.h @@ -33,6 +33,8 @@ class PrimitiveQuery { using QueryPrimitive = std::function; +using PrefetchFunc = std::function; + // Query represents the query as provided by the user. // Query can contain subqueries (using AND/OR/MINOF) or be a literal query. // There are actually two types of literal query objects - "plain" and @@ -60,10 +62,14 @@ class Query { bool operator==(const Query &other) const; QueryResult run(const QueryPrimitive &primitive, + const PrefetchFunc &prefetch, QueryCounters *counters) const; Query plan(const std::unordered_set &types_to_query) const; private: + void prefetch(int from_index, int howmany, bool only_last, + const PrefetchFunc &prefetch) const; + QueryType type; // used for QueryType::PRIMITIVE before plan() QString value; diff --git a/libursa/RawFile.cpp b/libursa/RawFile.cpp index 9c427ed..ca91ff8 100644 --- a/libursa/RawFile.cpp +++ b/libursa/RawFile.cpp @@ -44,6 +44,10 @@ void RawFile::pread(void *buf, size_t to_read, off_t offset) const { } } +void RawFile::prefetch(size_t size, off_t offset) const { + ::posix_fadvise(fd, offset, size, POSIX_FADV_WILLNEED); +} + template void RawFile::write(const T *buf, size_t count) { const auto *buf_raw = reinterpret_cast(buf); diff --git a/libursa/RawFile.h b/libursa/RawFile.h index a644cae..eb53e66 100644 --- a/libursa/RawFile.h +++ b/libursa/RawFile.h @@ -13,6 +13,7 @@ class RawFile { uint64_t size() const; void pread(void *buf, size_t to_read, off_t offset) const; + void prefetch(size_t size, off_t offse) const; template void write(const T *buf, size_t count); diff --git a/libursa/Version.h.in b/libursa/Version.h.in index 57dda41..b2be872 100644 --- a/libursa/Version.h.in +++ b/libursa/Version.h.in @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0"; // Project version. // Consider updating the version tag when doing PRs. // clang-format off -constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt6"; +constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt7"; // clang-format on