From 52cbdfb50aaa70f2c66a71231c65633f5218aeb4 Mon Sep 17 00:00:00 2001 From: Mingzhuo Yin Date: Mon, 30 Dec 2024 23:18:02 +0800 Subject: [PATCH] fix: page read counts exceed maximum locks (#23) close #22 ## bench results trec-covid dataset prev: [pg_bm25.rs] Query: 15.8767s (31.49/s) after: [pg_bm25.rs] Query: 15.7073s (31.83/s) --------- Signed-off-by: Mingzhuo Yin --- src/page/virtual.rs | 21 +++++++++++++++------ src/segment/posting/reader.rs | 16 +++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/page/virtual.rs b/src/page/virtual.rs index 4c38194..d6b77ef 100644 --- a/src/page/virtual.rs +++ b/src/page/virtual.rs @@ -1,6 +1,6 @@ use super::{ bm25_page_size, page_alloc_init_forknum, page_alloc_with_fsm, page_read, page_write, PageFlags, - PageReadGuard, PageWriteGuard, + PageWriteGuard, }; const DIRECT_COUNT: usize = bm25_page_size() / 4; @@ -9,14 +9,24 @@ const INDIRECT2_COUNT: usize = INDIRECT1_COUNT * DIRECT_COUNT; pub struct VirtualPageReader { relation: pgrx::pg_sys::Relation, - direct_inode: PageReadGuard, + direct_inode: Box<[u32]>, + indirect1_inode_blkno: u32, } impl VirtualPageReader { pub fn new(relation: pgrx::pg_sys::Relation, blkno: u32) -> Self { + assert!(blkno != pgrx::pg_sys::InvalidBlockNumber); + let direct_inode_page = page_read(relation, blkno); + let data = direct_inode_page.data(); + let mut direct_inode: Vec = Vec::with_capacity(data.len() / 4); + direct_inode.extend_from_slice(bytemuck::cast_slice(data)); + let direct_inode = direct_inode.into_boxed_slice(); + let indirect1_inode_blkno = direct_inode_page.opaque.next_blkno; + Self { relation, - direct_inode: page_read(relation, blkno), + direct_inode, + indirect1_inode_blkno, } } @@ -43,12 +53,11 @@ impl VirtualPageReader { pub fn get_block_id(&self, virtual_id: u32) -> u32 { let mut virtual_id = virtual_id as usize; if virtual_id < DIRECT_COUNT { - let slice = &self.direct_inode.data()[virtual_id * 4..][..4]; - return u32::from_le_bytes(slice.try_into().unwrap()); + return self.direct_inode[virtual_id]; } virtual_id -= DIRECT_COUNT; - let indirect1_inode = page_read(self.relation, self.direct_inode.opaque.next_blkno); + let indirect1_inode = page_read(self.relation, self.indirect1_inode_blkno); if virtual_id < INDIRECT1_COUNT { let indirect1_id = virtual_id / DIRECT_COUNT; let indirect1_offset = virtual_id % DIRECT_COUNT; diff --git a/src/segment/posting/reader.rs b/src/segment/posting/reader.rs index ec53e57..143f4fd 100644 --- a/src/segment/posting/reader.rs +++ b/src/segment/posting/reader.rs @@ -1,7 +1,7 @@ use std::{fmt::Debug, io::Read, mem::MaybeUninit}; use crate::{ - page::{page_read, PageReadGuard, PageReader, VirtualPageReader}, + page::{page_read, PageReader, VirtualPageReader}, segment::{field_norm::id_to_fieldnorm, posting::SkipBlockFlags}, utils::compress_block::BlockDecoder, weight::Bm25Weight, @@ -52,7 +52,6 @@ pub struct PostingReader { block_data_reader: VirtualPageReader, cur_page: pgrx::pg_sys::BlockNumber, page_offset: usize, - page_inner: Option, cur_block: usize, block_offset: usize, remain_doc_cnt: u32, @@ -108,7 +107,6 @@ impl PostingReader { block_data_reader, cur_page: 0, page_offset: 0, - page_inner: None, cur_block: 0, block_offset: 0, remain_doc_cnt: term_info.doc_count, @@ -234,12 +232,10 @@ impl PostingReader { self.skip_blocks[self.cur_block - 1].last_doc }; - let page = self.page_inner.get_or_insert_with(|| { - page_read( - self.index, - self.block_data_reader.get_block_id(self.cur_page), - ) - }); + let page = page_read( + self.index, + self.block_data_reader.get_block_id(self.cur_page), + ); if self.remain_doc_cnt < COMPRESSION_BLOCK_SIZE as u32 { debug_assert!(skip.flag.contains(SkipBlockFlags::UNFULLED)); @@ -285,7 +281,6 @@ impl PostingReader { if self.completed() { self.page_offset = 0; - self.page_inner = None; return; } @@ -295,7 +290,6 @@ impl PostingReader { { self.cur_page += 1; self.page_offset = 0; - self.page_inner = None; } } }