Skip to content

Commit

Permalink
Raise an error for invalid repetition levels when delimiting records
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve committed Jan 7, 2025
1 parent f41f590 commit be347da
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
6 changes: 6 additions & 0 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3990,6 +3990,12 @@ TEST(TestArrowReaderAdHoc, CorruptedSchema) {
TryReadDataFile(path, ::arrow::StatusCode::IOError);
}

TEST(TestArrowReaderAdHoc, InvalidRepetitionLevels) {
// GH-45185 - Repetition levels start with 1 instead of 0
auto path = test::get_data_file("ARROW-GH-45185.parquet", /*is_good=*/false);
TryReadDataFile(path, ::arrow::StatusCode::IOError);
}

TEST(TestArrowReaderAdHoc, LARGE_MEMORY_TEST(LargeStringColumn)) {
// ARROW-3762
::arrow::StringBuilder builder;
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1603,7 +1603,12 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
// another record start or exhausting the ColumnChunk
int64_t level = levels_position_;
if (at_record_start_) {
ARROW_DCHECK_EQ(0, rep_levels[levels_position_]);
if (rep_levels[levels_position_] != 0) {
std::stringstream ss;
ss << "The repetition level at the start of a record must be 0 but got "
<< rep_levels[levels_position_];
throw ParquetException(ss.str());
}
++levels_position_;
// We have decided to consume the level at this position; therefore we
// must advance until we find another record boundary
Expand Down

0 comments on commit be347da

Please sign in to comment.