From b394ff92102a4d1d81270c4bac703ca7fe531d04 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Fri, 3 Jan 2025 11:17:44 -0600 Subject: [PATCH] update --- parquet/src/arrow/async_reader/arrow_reader.rs | 2 -- parquet/src/arrow/async_reader/mod.rs | 2 +- parquet/src/file/serialized_reader.rs | 3 +++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/parquet/src/arrow/async_reader/arrow_reader.rs b/parquet/src/arrow/async_reader/arrow_reader.rs index a654928f0b2..b8a1466c838 100644 --- a/parquet/src/arrow/async_reader/arrow_reader.rs +++ b/parquet/src/arrow/async_reader/arrow_reader.rs @@ -120,7 +120,6 @@ impl FilteredParquetRecordBatchReader { self.row_filter.take() } - #[inline(never)] /// Take a selection, and return the new selection where the rows are filtered by the predicate. fn build_predicate_filter( &mut self, @@ -372,7 +371,6 @@ impl Iterator for CachedPageReader { impl PageReader for CachedPageReader { fn get_next_page(&mut self) -> Result, ParquetError> { - // self.inner.get_next_page() let next_page_offset = self.inner.peek_next_page_offset()?; let Some(offset) = next_page_offset else { diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 31335fe56f4..8f66c5ddd74 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -686,7 +686,7 @@ where /// - `Ok(None)` if the stream has ended. /// - `Err(error)` if the stream has errored. All subsequent calls will return `Ok(None)`. /// - `Ok(Some(reader))` which holds all the data for the row group. - pub async fn next_row_group(&mut self) -> Result> { + pub async fn next_row_group(&mut self) -> Result> { loop { match &mut self.state { StreamState::Decoding(_) | StreamState::Reading(_) => { diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index e50b520c55d..cc1ab74028a 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -568,6 +568,9 @@ impl SerializedPageReader { }) } + /// Similar to `peek_next_page`, but returns the offset of the next page instead of the page metadata. + /// Unlike page metadata, an offset can uniquely identify a page. + /// Useful when we want to if the next page is being cached or read previously. #[cfg(feature = "async")] pub(crate) fn peek_next_page_offset(&mut self) -> Result> { match &mut self.state {