Skip to content

Commit

Permalink
count only sort slots
Browse files Browse the repository at this point in the history
  • Loading branch information
murphyatwork committed Dec 20, 2024
1 parent 34adfe4 commit f9c908c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
29 changes: 22 additions & 7 deletions be/src/exec/chunks_sorter_full_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@
#include "exec/sorting/merge.h"
#include "exec/sorting/sort_permute.h"
#include "exec/sorting/sorting.h"
#include "exprs/column_ref.h"
#include "exprs/expr.h"
#include "gutil/strings/substitute.h"
#include "runtime/current_thread.h"
#include "runtime/runtime_state.h"
#include "util/stopwatch.hpp"

namespace starrocks {

Expand All @@ -34,7 +31,14 @@ ChunksSorterFullSort::ChunksSorterFullSort(RuntimeState* state, const std::vecto
: ChunksSorter(state, sort_exprs, is_asc_order, is_null_first, sort_keys, false),
max_buffered_rows(static_cast<size_t>(max_buffered_rows)),
max_buffered_bytes(max_buffered_bytes),
_early_materialized_slots(early_materialized_slots.begin(), early_materialized_slots.end()) {}
_early_materialized_slots(early_materialized_slots.begin(), early_materialized_slots.end()) {
// initialize _sort_slots
for (auto& expr : *sort_exprs) {
std::vector<SlotId> slots;
expr->root()->get_slot_ids(&slots);
_sort_slots.insert(slots.begin(), slots.end());
}
}

ChunksSorterFullSort::~ChunksSorterFullSort() = default;

Expand All @@ -43,8 +47,8 @@ void ChunksSorterFullSort::setup_runtime(RuntimeState* state, RuntimeProfile* pr
_runtime_profile = profile;
_parent_mem_tracker = parent_mem_tracker;
_object_pool = std::make_unique<ObjectPool>();
_runtime_profile->add_info_string("MaxBufferedRows", strings::Substitute("$0", max_buffered_rows));
_runtime_profile->add_info_string("MaxBufferedBytes", strings::Substitute("$0", max_buffered_bytes));
_runtime_profile->add_info_string("MaxBufferedRows", std::to_string(max_buffered_rows));
_runtime_profile->add_info_string("MaxBufferedBytes", std::to_string(max_buffered_bytes));
_profiler = _object_pool->add(new ChunksSorterFullSortProfiler(profile, parent_mem_tracker));
}

Expand All @@ -60,7 +64,18 @@ Status ChunksSorterFullSort::_merge_unsorted(RuntimeState* state, const ChunkPtr
SCOPED_TIMER(_build_timer);
_staging_unsorted_chunks.push_back(std::move(chunk));
_staging_unsorted_rows += chunk->num_rows();
_staging_unsorted_bytes += chunk->bytes_usage();

// Only consider the memory usage of columns used as SORT_KEY, as they are more critical for CPU-cache
size_t mem_usage = 0;
if (!_sort_slots.empty()) {
auto& slot_map = chunk->get_slot_id_to_index_map();
for (auto [slot_id, index] : slot_map) {
if (_sort_slots.contains(slot_id)) {
mem_usage += chunk->get_column_by_slot_id(slot_id)->byte_size();
}
}
}
_staging_unsorted_bytes += mem_usage;
return Status::OK();
}

Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/chunks_sorter_full_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "column/vectorized_fwd.h"
#include "exec/chunks_sorter.h"
#include "exec/sorting/merge.h"
#include "gtest/gtest_prod.h"

namespace starrocks {
class ExprContext;
Expand Down Expand Up @@ -90,9 +89,11 @@ class ChunksSorterFullSort : public ChunksSorter {
std::unique_ptr<ObjectPool> _object_pool = nullptr;
ChunksSorterFullSortProfiler* _profiler = nullptr;

// Parameters to control the buffering behavior: buffering some chunks before partial-sort to reduce memory random access
// TODO: further tunning the buffer parameter
const size_t max_buffered_rows; // Max buffer 1024000 rows
const size_t max_buffered_bytes; // Max buffer 16MB bytes
std::set<SlotId> _sort_slots; // Slots participating in the sorting procedure

// only when order-by columns(_sort_exprs) are all ColumnRefs and the cost of eager-materialization of
// other columns is large than ordinal column, then we materialize order-by columns and ordinal columns eagerly,
Expand Down

0 comments on commit f9c908c

Please sign in to comment.