Skip to content

Commit

Permalink
Fix OCDBT node arity limit enforcement
Browse files Browse the repository at this point in the history
Previously, the node arity was not properly constrained, such that if
`max_decoded_node_bytes` was set to a large value, the maximum node
arity could be exceeded.

PiperOrigin-RevId: 579972860
Change-Id: I2d1404b5bfd09f87374169efded35379c895a74e
  • Loading branch information
jbms authored and copybara-github committed Nov 6, 2023
1 parent 81aef76 commit 41812f3
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 7 deletions.
1 change: 1 addition & 0 deletions tensorstore/kvstore/ocdbt/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ tensorstore_cc_test(
"//tensorstore/util:status_testutil",
"@com_github_nlohmann_json//:nlohmann_json",
"@com_google_absl//absl/strings:cord",
"@com_google_absl//absl/strings:str_format",
"@com_google_googletest//:gtest_main",
],
)
Expand Down
1 change: 1 addition & 0 deletions tensorstore/kvstore/ocdbt/driver_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/strings/cord.h"
#include "absl/strings/str_format.h"
#include <nlohmann/json.hpp>
#include "tensorstore/context.h"
#include "tensorstore/internal/global_initializer.h"
Expand Down
3 changes: 3 additions & 0 deletions tensorstore/kvstore/ocdbt/format/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,11 @@ tensorstore_cc_test(
"//tensorstore/util:quote_string",
"//tensorstore/util:status_testutil",
"//tensorstore/util:str_cat",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings:cord",
"@com_google_absl//absl/strings:str_format",
"@com_google_googletest//:gtest_main",
"@com_google_riegeli//riegeli/bytes:writer",
],
)

Expand Down
19 changes: 12 additions & 7 deletions tensorstore/kvstore/ocdbt/format/btree_node_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <variant>
#include <vector>

#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/cord.h"
#include "riegeli/bytes/writer.h"
Expand Down Expand Up @@ -320,13 +321,15 @@ Result<std::vector<EncodedNode>> BtreeNodeEncoder<Entry>::Finalize(
auto& a = buffered_entries_[i - 1];
auto& b = buffered_entries_[i];
if (a.existing == b.existing) {
assert(a.entry.key < b.entry.key);
ABSL_DCHECK_LT(a.entry.key, b.entry.key);
} else if (a.existing) {
assert(ComparePrefixedKeyToUnprefixedKey{existing_prefix_}(
a.entry.key, b.entry.key) < 0);
ABSL_DCHECK_LT(ComparePrefixedKeyToUnprefixedKey{existing_prefix_}(
a.entry.key, b.entry.key),
0);
} else {
assert(ComparePrefixedKeyToUnprefixedKey{existing_prefix_}(
b.entry.key, a.entry.key) > 0);
ABSL_DCHECK_GT(ComparePrefixedKeyToUnprefixedKey{existing_prefix_}(
b.entry.key, a.entry.key),
0);
}
}
#endif // TENSORSTORE_INTERNAL_OCDBT_DEBUG
Expand All @@ -345,15 +348,16 @@ Result<std::vector<EncodedNode>> BtreeNodeEncoder<Entry>::Finalize(
while (start_i < buffered_entries_.size()) {
size_t size_upper_bound = get_range_size(buffered_entries_.size());
size_t num_nodes = tensorstore::CeilOfRatio<size_t>(
buffered_entries_.size() - start_i, kMinArity);
buffered_entries_.size() - start_i, kMaxNodeArity);
if (config_.max_decoded_node_bytes != 0) {
num_nodes = std::min(
num_nodes = std::max(
num_nodes, tensorstore::CeilOfRatio<size_t>(
size_upper_bound, config_.max_decoded_node_bytes));
}
size_t target_size = tensorstore::CeilOfRatio(size_upper_bound, num_nodes);
size_t end_i;
for (end_i = start_i + 1; end_i < buffered_entries_.size(); ++end_i) {
if (end_i - start_i >= kMaxNodeArity) break;
size_t size = get_range_size(end_i);
if (size >= target_size && end_i >= start_i + kMinArity) {
if (size > config_.max_decoded_node_bytes &&
Expand All @@ -364,6 +368,7 @@ Result<std::vector<EncodedNode>> BtreeNodeEncoder<Entry>::Finalize(
}
}
assert(end_i > start_i);
assert(end_i - start_i <= kMaxNodeArity);
TENSORSTORE_ASSIGN_OR_RETURN(
auto encoded_node,
EncodeEntries<Entry>(
Expand Down
43 changes: 43 additions & 0 deletions tensorstore/kvstore/ocdbt/format/btree_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include "tensorstore/kvstore/ocdbt/format/btree.h"

#include <stddef.h>

#include <algorithm>
#include <string>
#include <string_view>
#include <type_traits>
Expand All @@ -22,7 +25,10 @@

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_format.h"
#include "riegeli/bytes/writer.h"
#include "tensorstore/kvstore/ocdbt/format/btree_codec.h"
#include "tensorstore/kvstore/ocdbt/format/btree_node_encoder.h"
#include "tensorstore/kvstore/ocdbt/format/codec_util.h"
Expand All @@ -41,6 +47,7 @@ using ::tensorstore::internal_ocdbt::Config;
using ::tensorstore::internal_ocdbt::DecodeBtreeNode;
using ::tensorstore::internal_ocdbt::EncodedNode;
using ::tensorstore::internal_ocdbt::InteriorNodeEntry;
using ::tensorstore::internal_ocdbt::kMaxNodeArity;
using ::tensorstore::internal_ocdbt::LeafNodeEntry;

Result<std::vector<EncodedNode>> EncodeExistingNode(const Config& config,
Expand Down Expand Up @@ -275,4 +282,40 @@ TEST(BtreeNodeTest, CorruptInteriorZeroNumEntries) {
"Error decoding b-tree node: Empty b-tree node; .*"));
}

TEST(BtreeNodeTest, MaxArity) {
Config config;
config.compression = Config::NoCompression{};
config.max_decoded_node_bytes = 1000000000;
BtreeNode node;
node.height = 0;
auto& entries = node.entries.emplace<BtreeNode::LeafNodeEntries>();
std::vector<std::string> keys;
for (size_t i = 0; i <= kMaxNodeArity; ++i) {
keys.push_back(absl::StrFormat("%07d", i));
}
std::sort(keys.begin(), keys.end());
const auto add_entry = [&](size_t i) {
entries.push_back({/*.key=*/keys[i],
/*.value_reference=*/absl::Cord()});
};
for (size_t i = 0; i < kMaxNodeArity; ++i) {
add_entry(i);
}
TestBtreeNodeRoundTrip(config, node);
add_entry(kMaxNodeArity);
TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto encoded_nodes,
EncodeExistingNode(config, node));
ASSERT_EQ(2, encoded_nodes.size());
TENSORSTORE_ASSERT_OK_AND_ASSIGN(
auto decoded_node1,
DecodeBtreeNode(encoded_nodes[0].encoded_node, /*base_path=*/{}));
TENSORSTORE_ASSERT_OK_AND_ASSIGN(
auto decoded_node2,
DecodeBtreeNode(encoded_nodes[1].encoded_node, /*base_path=*/{}));
EXPECT_EQ(kMaxNodeArity / 2 + 1,
std::get<BtreeNode::LeafNodeEntries>(decoded_node1.entries).size());
EXPECT_EQ(kMaxNodeArity / 2,
std::get<BtreeNode::LeafNodeEntries>(decoded_node2.entries).size());
}

} // namespace

0 comments on commit 41812f3

Please sign in to comment.