From 6a8729c5e572146981387275aec62d6b8c0e15ac Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Fri, 27 Oct 2023 11:26:17 +0800 Subject: [PATCH 1/3] add Reserve for column. Optimize large block insertion --- clickhouse/columns/array.cpp | 6 ++++++ clickhouse/columns/array.h | 3 +++ clickhouse/columns/column.h | 3 +++ clickhouse/columns/date.cpp | 5 +++++ clickhouse/columns/date.h | 26 +++++++++++++++----------- clickhouse/columns/decimal.cpp | 5 +++++ clickhouse/columns/decimal.h | 2 ++ clickhouse/columns/enum.cpp | 6 ++++++ clickhouse/columns/enum.h | 3 +++ clickhouse/columns/geo.cpp | 6 ++++++ clickhouse/columns/geo.h | 3 +++ clickhouse/columns/ip4.cpp | 5 +++++ clickhouse/columns/ip4.h | 3 +++ clickhouse/columns/ip6.cpp | 5 +++++ clickhouse/columns/ip6.h | 3 +++ clickhouse/columns/lowcardinality.cpp | 6 ++++++ clickhouse/columns/lowcardinality.h | 3 +++ clickhouse/columns/map.cpp | 5 +++++ clickhouse/columns/map.h | 3 +++ clickhouse/columns/nothing.h | 3 +++ clickhouse/columns/nullable.cpp | 6 ++++++ clickhouse/columns/nullable.h | 3 +++ clickhouse/columns/numeric.h | 6 +++--- clickhouse/columns/string.cpp | 7 +++++++ clickhouse/columns/string.h | 6 ++++++ clickhouse/columns/tuple.cpp | 5 +++++ clickhouse/columns/tuple.h | 3 +++ clickhouse/columns/uuid.cpp | 5 +++++ clickhouse/columns/uuid.h | 3 +++ 29 files changed, 134 insertions(+), 14 deletions(-) diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 5e5e72e7..cf088186 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -52,6 +52,12 @@ ColumnRef ColumnArray::CloneEmpty() const { return std::make_shared(data_->CloneEmpty()); } +void ColumnArray::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); + offsets_->Reserve(new_cap); +} + void ColumnArray::Append(ColumnRef column) { if (auto col = column->As()) { for (size_t i = 0; i < col->Size(); ++i) { diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 0ea33d5a..ea51c778 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -47,6 +47,9 @@ class ColumnArray : public Column { } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/column.h b/clickhouse/columns/column.h index b54cbdee..475df89a 100644 --- a/clickhouse/columns/column.h +++ b/clickhouse/columns/column.h @@ -52,6 +52,9 @@ class Column : public std::enable_shared_from_this { /// Appends content of given column to the end of current one. virtual void Append(ColumnRef column) = 0; + /// Increase the capacity of the column for large block insertion. + virtual void Reserve(size_t new_cap) = 0; + /// Template method to load column data from input stream. It'll call LoadPrefix and LoadBody. /// Should be called only once from the client. Derived classes should not call it. bool Load(InputStream* input, size_t rows); diff --git a/clickhouse/columns/date.cpp b/clickhouse/columns/date.cpp index c3476c7f..132c6fc7 100644 --- a/clickhouse/columns/date.cpp +++ b/clickhouse/columns/date.cpp @@ -303,6 +303,11 @@ std::string ColumnDateTime64::Timezone() const { return type_->As()->Timezone(); } +void ColumnDateTime64::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnDateTime64::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/date.h b/clickhouse/columns/date.h index c6e32234..bf501723 100644 --- a/clickhouse/columns/date.h +++ b/clickhouse/columns/date.h @@ -34,8 +34,8 @@ class ColumnDate : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; /// Returns the capacity of the column size_t Capacity() const; @@ -79,9 +79,6 @@ class ColumnDate32 : public Column { /// The implementation is fundamentally wrong, ignores timezones, leap years and daylight saving. std::time_t At(size_t n) const; - /// Appends content of given column to the end of current one. - void Append(ColumnRef column) override; - inline std::time_t operator [] (size_t n) const { return At(n); } /// Do append data as is -- number of day in Unix epoch (32bit signed), no conversions performed. @@ -91,12 +88,16 @@ class ColumnDate32 : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; +public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + + /// Appends content of given column to the end of current one. + void Append(ColumnRef column) override; + /// Loads column data from input stream. bool LoadBody(InputStream* input, size_t rows) override; @@ -148,13 +149,13 @@ class ColumnDateTime : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; @@ -205,6 +206,9 @@ class ColumnDateTime64 : public Column { std::string Timezone() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/decimal.cpp b/clickhouse/columns/decimal.cpp index d44dc0c0..37de8647 100644 --- a/clickhouse/columns/decimal.cpp +++ b/clickhouse/columns/decimal.cpp @@ -191,6 +191,11 @@ Int128 ColumnDecimal::At(size_t i) const { } } +void ColumnDecimal::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnDecimal::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/decimal.h b/clickhouse/columns/decimal.h index 4b09553a..aa499a12 100644 --- a/clickhouse/columns/decimal.h +++ b/clickhouse/columns/decimal.h @@ -21,6 +21,8 @@ class ColumnDecimal : public Column { inline auto operator[](size_t i) const { return At(i); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; void Append(ColumnRef column) override; bool LoadBody(InputStream* input, size_t rows) override; void SaveBody(OutputStream* output) override; diff --git a/clickhouse/columns/enum.cpp b/clickhouse/columns/enum.cpp index c84d9847..c6729e64 100644 --- a/clickhouse/columns/enum.cpp +++ b/clickhouse/columns/enum.cpp @@ -68,6 +68,12 @@ void ColumnEnum::SetNameAt(size_t n, const std::string& name) { data_.at(n) = static_cast(type_->As()->GetEnumValue(name)); } +template +void ColumnEnum::Reserve(size_t new_cap) +{ + data_.reserve(new_cap); +} + template void ColumnEnum::Append(ColumnRef column) { if (auto col = column->As>()) { diff --git a/clickhouse/columns/enum.h b/clickhouse/columns/enum.h index 1d962751..43900f6c 100644 --- a/clickhouse/columns/enum.h +++ b/clickhouse/columns/enum.h @@ -30,6 +30,9 @@ class ColumnEnum : public Column { void SetNameAt(size_t n, const std::string& name); public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/geo.cpp b/clickhouse/columns/geo.cpp index e618fbe5..b7bd2fc4 100644 --- a/clickhouse/columns/geo.cpp +++ b/clickhouse/columns/geo.cpp @@ -54,6 +54,12 @@ const typename ColumnGeo::ValueType ColumnGeoAt(n); } +template +void ColumnGeo::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + template void ColumnGeo::Append(ColumnRef column) { if (auto col = column->template As()) { diff --git a/clickhouse/columns/geo.h b/clickhouse/columns/geo.h index c3757f8a..1b129739 100644 --- a/clickhouse/columns/geo.h +++ b/clickhouse/columns/geo.h @@ -29,6 +29,9 @@ class ColumnGeo : public Column { inline const ValueType operator[](size_t n) const { return At(n); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/ip4.cpp b/clickhouse/columns/ip4.cpp index 5e7ca892..bc70ece6 100644 --- a/clickhouse/columns/ip4.cpp +++ b/clickhouse/columns/ip4.cpp @@ -74,6 +74,11 @@ std::string ColumnIPv4::AsString(size_t n) const { return ip_str; } +void ColumnIPv4::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnIPv4::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/ip4.h b/clickhouse/columns/ip4.h index 103be527..2253e305 100644 --- a/clickhouse/columns/ip4.h +++ b/clickhouse/columns/ip4.h @@ -39,6 +39,9 @@ class ColumnIPv4 : public Column { std::string AsString(size_t n) const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/ip6.cpp b/clickhouse/columns/ip6.cpp index 838bcce0..1400ed01 100644 --- a/clickhouse/columns/ip6.cpp +++ b/clickhouse/columns/ip6.cpp @@ -65,6 +65,11 @@ in6_addr ColumnIPv6::operator [] (size_t n) const { return *reinterpret_cast(data_->At(n).data()); } +void ColumnIPv6::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnIPv6::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/ip6.h b/clickhouse/columns/ip6.h index 74d8c1e1..41af0d58 100644 --- a/clickhouse/columns/ip6.h +++ b/clickhouse/columns/ip6.h @@ -35,6 +35,9 @@ class ColumnIPv6 : public Column { std::string AsString(size_t n) const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index c0c12319..95fce6a7 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -174,6 +174,12 @@ ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr dicti ColumnLowCardinality::~ColumnLowCardinality() {} +void ColumnLowCardinality::Reserve(size_t new_cap) +{ + dictionary_column_->Reserve(new_cap); + index_column_->Reserve(new_cap); +} + void ColumnLowCardinality::Setup(ColumnRef dictionary_column) { AppendDefaultItem(); diff --git a/clickhouse/columns/lowcardinality.h b/clickhouse/columns/lowcardinality.h index afadae22..17e3ce99 100644 --- a/clickhouse/columns/lowcardinality.h +++ b/clickhouse/columns/lowcardinality.h @@ -65,6 +65,9 @@ class ColumnLowCardinality : public Column { ~ColumnLowCardinality(); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends another LowCardinality column to the end of this one, updating dictionary. void Append(ColumnRef /*column*/) override; diff --git a/clickhouse/columns/map.cpp b/clickhouse/columns/map.cpp index 3f5616df..f98902c3 100644 --- a/clickhouse/columns/map.cpp +++ b/clickhouse/columns/map.cpp @@ -33,6 +33,11 @@ ColumnMap::ColumnMap(ColumnRef data) : Column(GetMapType(data->GetType())), data_(data->As()) { } +void ColumnMap::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnMap::Clear() { data_->Clear(); } diff --git a/clickhouse/columns/map.h b/clickhouse/columns/map.h index ac5dc0a7..4d644802 100644 --- a/clickhouse/columns/map.h +++ b/clickhouse/columns/map.h @@ -25,6 +25,9 @@ class ColumnMap : public Column { */ explicit ColumnMap(ColumnRef data); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/nothing.h b/clickhouse/columns/nothing.h index 0b28d572..8e1a4e30 100644 --- a/clickhouse/columns/nothing.h +++ b/clickhouse/columns/nothing.h @@ -26,6 +26,9 @@ class ColumnNothing : public Column { { } + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t) override {}; + /// Appends one element to the column. void Append(std::unique_ptr) { ++size_; } diff --git a/clickhouse/columns/nullable.cpp b/clickhouse/columns/nullable.cpp index dd863545..d02b7f2c 100644 --- a/clickhouse/columns/nullable.cpp +++ b/clickhouse/columns/nullable.cpp @@ -34,6 +34,12 @@ ColumnRef ColumnNullable::Nulls() const return nulls_; } +void ColumnNullable::Reserve(size_t new_cap) +{ + nested_->Reserve(new_cap); + nulls_->Reserve(new_cap); +} + void ColumnNullable::Append(ColumnRef column) { if (auto col = column->As()) { if (!col->nested_->Type()->IsEqual(nested_->Type())) { diff --git a/clickhouse/columns/nullable.h b/clickhouse/columns/nullable.h index c1924af0..1946e8b9 100644 --- a/clickhouse/columns/nullable.h +++ b/clickhouse/columns/nullable.h @@ -27,6 +27,9 @@ class ColumnNullable : public Column { ColumnRef Nulls() const; public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/numeric.h b/clickhouse/columns/numeric.h index 1cbcca9c..e2a7675e 100644 --- a/clickhouse/columns/numeric.h +++ b/clickhouse/columns/numeric.h @@ -19,6 +19,9 @@ class ColumnVector : public Column { explicit ColumnVector(const std::vector& data); explicit ColumnVector(std::vector && data); + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends one element to the end of column. void Append(const T& value); @@ -33,9 +36,6 @@ class ColumnVector : public Column { /// Get Raw Vector Contents std::vector& GetWritableData(); - /// Increase the capacity of the column - void Reserve(size_t new_cap); - /// Returns the capacity of the column size_t Capacity() const; diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 62ec464b..07655967 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -190,6 +190,13 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} +void ColumnString::Reserve(size_t new_cap) +{ + items_.reserve(new_cap); + // 16 is arbitrary number, assumption that string values are about ~256 bytes long. + blocks_.reserve(std::max(1, new_cap / 16)); +} + void ColumnString::Append(std::string_view str) { if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) { blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size())); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index aa78270e..6d6d5e77 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -27,6 +27,9 @@ class ColumnFixedString : public Column { Append(v); } + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t) override {}; + /// Appends one element to the column. void Append(std::string_view str); @@ -84,6 +87,9 @@ class ColumnString : public Column { ColumnString& operator=(const ColumnString&) = delete; ColumnString(const ColumnString&) = delete; + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends one element to the column. void Append(std::string_view str); diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index 42dc6e63..24864063 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -20,6 +20,11 @@ size_t ColumnTuple::TupleSize() const { return columns_.size(); } +void ColumnTuple::Reserve(size_t new_cap) +{ + columns_.reserve(new_cap); +} + void ColumnTuple::Append(ColumnRef column) { if (!this->Type()->IsEqual(column->Type())) { throw ValidationError( diff --git a/clickhouse/columns/tuple.h b/clickhouse/columns/tuple.h index c9795565..ebc1b895 100644 --- a/clickhouse/columns/tuple.h +++ b/clickhouse/columns/tuple.h @@ -26,6 +26,9 @@ class ColumnTuple : public Column { } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; diff --git a/clickhouse/columns/uuid.cpp b/clickhouse/columns/uuid.cpp index 36a7229c..72d3d2b8 100644 --- a/clickhouse/columns/uuid.cpp +++ b/clickhouse/columns/uuid.cpp @@ -34,6 +34,11 @@ const UUID ColumnUUID::At(size_t n) const { return UUID(data_->At(n * 2), data_->At(n * 2 + 1)); } +void ColumnUUID::Reserve(size_t new_cap) +{ + data_->Reserve(new_cap); +} + void ColumnUUID::Append(ColumnRef column) { if (auto col = column->As()) { data_->Append(col->data_); diff --git a/clickhouse/columns/uuid.h b/clickhouse/columns/uuid.h index 4f6c9192..ccd03f84 100644 --- a/clickhouse/columns/uuid.h +++ b/clickhouse/columns/uuid.h @@ -26,6 +26,9 @@ class ColumnUUID : public Column { inline const UUID operator [] (size_t n) const { return At(n); } public: + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + /// Appends content of given column to the end of current one. void Append(ColumnRef column) override; From 79f311f781310dff47ed0c6968d611243acc7802 Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Wed, 1 Nov 2023 16:13:03 +0800 Subject: [PATCH 2/3] code change for review --- clickhouse/columns/array.cpp | 3 +-- clickhouse/columns/decimal.cpp | 3 +-- clickhouse/columns/enum.cpp | 3 +-- clickhouse/columns/geo.cpp | 3 +-- clickhouse/columns/ip4.cpp | 3 +-- clickhouse/columns/ip6.cpp | 3 +-- clickhouse/columns/lowcardinality.cpp | 3 +-- clickhouse/columns/map.cpp | 3 +-- clickhouse/columns/nullable.cpp | 3 +-- clickhouse/columns/string.cpp | 17 +++++++++++------ clickhouse/columns/string.h | 2 +- clickhouse/columns/tuple.cpp | 7 ++++--- clickhouse/columns/uuid.cpp | 3 +-- 13 files changed, 26 insertions(+), 30 deletions(-) diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index cf088186..1867d778 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -52,8 +52,7 @@ ColumnRef ColumnArray::CloneEmpty() const { return std::make_shared(data_->CloneEmpty()); } -void ColumnArray::Reserve(size_t new_cap) -{ +void ColumnArray::Reserve(size_t new_cap) { data_->Reserve(new_cap); offsets_->Reserve(new_cap); } diff --git a/clickhouse/columns/decimal.cpp b/clickhouse/columns/decimal.cpp index 37de8647..2d214ecf 100644 --- a/clickhouse/columns/decimal.cpp +++ b/clickhouse/columns/decimal.cpp @@ -191,8 +191,7 @@ Int128 ColumnDecimal::At(size_t i) const { } } -void ColumnDecimal::Reserve(size_t new_cap) -{ +void ColumnDecimal::Reserve(size_t new_cap) { data_->Reserve(new_cap); } diff --git a/clickhouse/columns/enum.cpp b/clickhouse/columns/enum.cpp index c6729e64..43fab893 100644 --- a/clickhouse/columns/enum.cpp +++ b/clickhouse/columns/enum.cpp @@ -69,8 +69,7 @@ void ColumnEnum::SetNameAt(size_t n, const std::string& name) { } template -void ColumnEnum::Reserve(size_t new_cap) -{ +void ColumnEnum::Reserve(size_t new_cap) { data_.reserve(new_cap); } diff --git a/clickhouse/columns/geo.cpp b/clickhouse/columns/geo.cpp index b7bd2fc4..fa987732 100644 --- a/clickhouse/columns/geo.cpp +++ b/clickhouse/columns/geo.cpp @@ -55,8 +55,7 @@ const typename ColumnGeo::ValueType ColumnGeo -void ColumnGeo::Reserve(size_t new_cap) -{ +void ColumnGeo::Reserve(size_t new_cap) { data_->Reserve(new_cap); } diff --git a/clickhouse/columns/ip4.cpp b/clickhouse/columns/ip4.cpp index bc70ece6..8790afb6 100644 --- a/clickhouse/columns/ip4.cpp +++ b/clickhouse/columns/ip4.cpp @@ -74,8 +74,7 @@ std::string ColumnIPv4::AsString(size_t n) const { return ip_str; } -void ColumnIPv4::Reserve(size_t new_cap) -{ +void ColumnIPv4::Reserve(size_t new_cap) { data_->Reserve(new_cap); } diff --git a/clickhouse/columns/ip6.cpp b/clickhouse/columns/ip6.cpp index 1400ed01..0d47b5e8 100644 --- a/clickhouse/columns/ip6.cpp +++ b/clickhouse/columns/ip6.cpp @@ -65,8 +65,7 @@ in6_addr ColumnIPv6::operator [] (size_t n) const { return *reinterpret_cast(data_->At(n).data()); } -void ColumnIPv6::Reserve(size_t new_cap) -{ +void ColumnIPv6::Reserve(size_t new_cap) { data_->Reserve(new_cap); } diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 95fce6a7..19369d33 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -174,8 +174,7 @@ ColumnLowCardinality::ColumnLowCardinality(std::shared_ptr dicti ColumnLowCardinality::~ColumnLowCardinality() {} -void ColumnLowCardinality::Reserve(size_t new_cap) -{ +void ColumnLowCardinality::Reserve(size_t new_cap) { dictionary_column_->Reserve(new_cap); index_column_->Reserve(new_cap); } diff --git a/clickhouse/columns/map.cpp b/clickhouse/columns/map.cpp index f98902c3..839b0668 100644 --- a/clickhouse/columns/map.cpp +++ b/clickhouse/columns/map.cpp @@ -33,8 +33,7 @@ ColumnMap::ColumnMap(ColumnRef data) : Column(GetMapType(data->GetType())), data_(data->As()) { } -void ColumnMap::Reserve(size_t new_cap) -{ +void ColumnMap::Reserve(size_t new_cap) { data_->Reserve(new_cap); } diff --git a/clickhouse/columns/nullable.cpp b/clickhouse/columns/nullable.cpp index d02b7f2c..23940c12 100644 --- a/clickhouse/columns/nullable.cpp +++ b/clickhouse/columns/nullable.cpp @@ -34,8 +34,7 @@ ColumnRef ColumnNullable::Nulls() const return nulls_; } -void ColumnNullable::Reserve(size_t new_cap) -{ +void ColumnNullable::Reserve(size_t new_cap) { nested_->Reserve(new_cap); nulls_->Reserve(new_cap); } diff --git a/clickhouse/columns/string.cpp b/clickhouse/columns/string.cpp index 07655967..791c2c6c 100644 --- a/clickhouse/columns/string.cpp +++ b/clickhouse/columns/string.cpp @@ -30,6 +30,10 @@ ColumnFixedString::ColumnFixedString(size_t n) { } +void ColumnFixedString::Reserve(size_t new_cap) { + data_.reserve(string_size_ * new_cap); +} + void ColumnFixedString::Append(std::string_view str) { if (str.size() > string_size_) { throw ValidationError("Expected string of length not greater than " @@ -45,8 +49,10 @@ void ColumnFixedString::Append(std::string_view str) { data_.insert(data_.size(), str); // Pad up to string_size_ with zeroes. - const auto padding_size = string_size_ - str.size(); - data_.resize(data_.size() + padding_size, char(0)); + if (str.size() < string_size_) { + const auto padding_size = string_size_ - str.size(); + data_.resize(data_.size() + padding_size, char(0)); + } } void ColumnFixedString::Clear() { @@ -160,8 +166,8 @@ ColumnString::ColumnString(size_t element_count) : Column(Type::CreateString()) { items_.reserve(element_count); - // 100 is arbitrary number, assumption that string values are about ~40 bytes long. - blocks_.reserve(std::max(1, element_count / 100)); + // 16 is arbitrary number, assumption that string values are about ~256 bytes long. + blocks_.reserve(std::max(1, element_count / 16)); } ColumnString::ColumnString(const std::vector& data) @@ -190,8 +196,7 @@ ColumnString::ColumnString(std::vector&& data) ColumnString::~ColumnString() {} -void ColumnString::Reserve(size_t new_cap) -{ +void ColumnString::Reserve(size_t new_cap) { items_.reserve(new_cap); // 16 is arbitrary number, assumption that string values are about ~256 bytes long. blocks_.reserve(std::max(1, new_cap / 16)); diff --git a/clickhouse/columns/string.h b/clickhouse/columns/string.h index 6d6d5e77..d6006556 100644 --- a/clickhouse/columns/string.h +++ b/clickhouse/columns/string.h @@ -28,7 +28,7 @@ class ColumnFixedString : public Column { } /// Increase the capacity of the column for large block insertion. - void Reserve(size_t) override {}; + void Reserve(size_t) override; /// Appends one element to the column. void Append(std::string_view str); diff --git a/clickhouse/columns/tuple.cpp b/clickhouse/columns/tuple.cpp index 24864063..56858590 100644 --- a/clickhouse/columns/tuple.cpp +++ b/clickhouse/columns/tuple.cpp @@ -20,9 +20,10 @@ size_t ColumnTuple::TupleSize() const { return columns_.size(); } -void ColumnTuple::Reserve(size_t new_cap) -{ - columns_.reserve(new_cap); +void ColumnTuple::Reserve(size_t new_cap) { + for (auto& column : columns_) { + column->Reserve(new_cap); + } } void ColumnTuple::Append(ColumnRef column) { diff --git a/clickhouse/columns/uuid.cpp b/clickhouse/columns/uuid.cpp index 72d3d2b8..fbaff97d 100644 --- a/clickhouse/columns/uuid.cpp +++ b/clickhouse/columns/uuid.cpp @@ -34,8 +34,7 @@ const UUID ColumnUUID::At(size_t n) const { return UUID(data_->At(n * 2), data_->At(n * 2 + 1)); } -void ColumnUUID::Reserve(size_t new_cap) -{ +void ColumnUUID::Reserve(size_t new_cap) { data_->Reserve(new_cap); } From d343428241380537f7212406f051c0d5057fbd1e Mon Sep 17 00:00:00 2001 From: wangwei <1261385937@qq.com> Date: Wed, 1 Nov 2023 17:55:49 +0800 Subject: [PATCH 3/3] add ut --- ut/Column_ut.cpp | 50 +++++++++++++----------------------------------- ut/utils_meta.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp index 9eb3c7cf..bac666ec 100644 --- a/ut/Column_ut.cpp +++ b/ut/Column_ut.cpp @@ -376,25 +376,18 @@ TYPED_TEST(GenericColumnTest, Swap) { #endif TYPED_TEST(GenericColumnTest, ReserveAndCapacity) { - if constexpr ( - // TODO(venemkov): test that ColumnType has Reserve() and Capacity() methods - is_one_of_v) { - + using column_type = typename TestFixture::ColumnType; + auto [column0, values] = this->MakeColumnWithValues(2); + auto values_copy = values; + EXPECT_NO_THROW(column0->Reserve(0u)); + EXPECT_EQ(2u, column0->Size()); + EXPECT_TRUE(CompareRecursive(values, values_copy)); + + auto column1 = this->MakeColumn(); + column1->Reserve(10u); + EXPECT_EQ(0u, column1->Size()); + + if constexpr (has_method_Reserve_v && has_method_Capacity_v) { auto column = this->MakeColumn(); EXPECT_EQ(0u, column->Capacity()); EXPECT_NO_THROW(column->Reserve(100u)); @@ -408,24 +401,7 @@ TYPED_TEST(GenericColumnTest, ReserveAndCapacity) { TYPED_TEST(GenericColumnTest, GetWritableData) { - if constexpr ( - // TODO(venemkov): test that ColumnType has GetWritableData() method - is_one_of_v) { + if constexpr (has_method_GetWritableData_v) { auto [column, values] = this->MakeColumnWithValues(111); // Do conversion from time_t to internal representation, similar to what ColumnDate and ColumnDate32 do if constexpr (is_one_of_v { template inline constexpr bool is_one_of_v = is_one_of::value; + + +#define HAS_METHOD(FUN) \ +template \ +struct has_method_##FUN : std::false_type {}; \ +template \ +struct has_method_##FUN>> \ +: std::true_type {}; \ +template \ +constexpr bool has_method_##FUN##_v = has_method_##FUN::value; + +HAS_METHOD(Reserve); +HAS_METHOD(Capacity); +HAS_METHOD(GetWritableData);