diff --git a/parser/src/earley/grammar.rs b/parser/src/earley/grammar.rs index a1c0cd42..3c7dcc71 100644 --- a/parser/src/earley/grammar.rs +++ b/parser/src/earley/grammar.rs @@ -547,14 +547,14 @@ impl CSymIdx { } } -/// This is a pointer into rules[] array, and represents a particular +/// This is a pointer into rhs_elements[] array, and represents a particular /// element in the rhs of a rule (and thus by implication also a unique lhs). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct RhsEltIdx(u32); +pub struct RhsPtr(u32); -impl RhsEltIdx { +impl RhsPtr { pub fn from_index(idx: u32) -> Self { - RhsEltIdx(idx) + RhsPtr(idx) } pub fn as_index(&self) -> usize { @@ -571,7 +571,7 @@ pub struct CSymbol { pub props: SymbolProps, pub gen_grammar: Option, // this points to the first element of rhs of each rule - pub rules: Vec, + pub rules: Vec, pub sym_flags: SymFlags, pub lexeme: Option, } @@ -648,16 +648,16 @@ pub struct CGrammar { lexer_spec: LexerSpec, // indexed by CSymIdx symbols: Vec, - // This is rhs of rules, indexed by RhsEltIdx (CSymbol::rules) + // This is rhs of rules, indexed by RhsPtr (CSymbol::rules) // Each rhs ends with CSymIdx::NULL // A pointer into this array represents an Earley item: - // the dot is before rules[rhs_elt_idx]; when it points at CSymIdx::NULL, the item is complete - rules: Vec, - // given a pointer into rules[] (shifted by RULE_SHIFT), + // the dot is before rhs_elements[rhs_ptr]; when it points at CSymIdx::NULL, the item is complete + rhs_elements: Vec, + // given a pointer into rhs_elements[] (shifted by RULE_SHIFT), // this gives the index of the lhs symbol - rhs_elt_idx_to_sym_idx: Vec, - // this is cache, rhs_elt_idx_to_sym_flags[x] == symbols[rhs_elt_idx_to_sym_idx[x]].sym_flags - rhs_elt_idx_to_sym_flags: Vec, + rhs_ptr_to_sym_idx: Vec, + // this is cache, rhs_ptr_to_sym_flags[x] == symbols[rhs_ptr_to_sym_idx[x]].sym_flags + rhs_ptr_to_sym_flags: Vec, } const RULE_SHIFT: usize = 2; @@ -667,26 +667,26 @@ impl CGrammar { &self.lexer_spec } - pub fn sym_idx_lhs(&self, rule: RhsEltIdx) -> CSymIdx { - self.rhs_elt_idx_to_sym_idx[rule.as_index() >> RULE_SHIFT] + pub fn sym_idx_lhs(&self, rule: RhsPtr) -> CSymIdx { + self.rhs_ptr_to_sym_idx[rule.as_index() >> RULE_SHIFT] } - pub fn sym_flags_lhs(&self, rule: RhsEltIdx) -> SymFlags { - self.rhs_elt_idx_to_sym_flags[rule.as_index() >> RULE_SHIFT] + pub fn sym_flags_lhs(&self, rule: RhsPtr) -> SymFlags { + self.rhs_ptr_to_sym_flags[rule.as_index() >> RULE_SHIFT] } - pub fn rule_rhs(&self, rule: RhsEltIdx) -> (&[CSymIdx], usize) { + pub fn rule_rhs(&self, rule: RhsPtr) -> (&[CSymIdx], usize) { let idx = rule.as_index(); let mut start = idx - 1; - while self.rules[start] != CSymIdx::NULL { + while self.rhs_elements[start] != CSymIdx::NULL { start -= 1; } start += 1; let mut stop = idx; - while self.rules[stop] != CSymIdx::NULL { + while self.rhs_elements[stop] != CSymIdx::NULL { stop += 1; } - (&self.rules[start..stop], idx - start) + (&self.rhs_elements[start..stop], idx - start) } pub fn sym_data(&self, sym: CSymIdx) -> &CSymbol { @@ -697,12 +697,12 @@ impl CGrammar { &mut self.symbols[sym.0 as usize] } - pub fn sym_idx_dot(&self, idx: RhsEltIdx) -> CSymIdx { - self.rules[idx.0 as usize] + pub fn sym_idx_dot(&self, idx: RhsPtr) -> CSymIdx { + self.rhs_elements[idx.0 as usize] } #[inline(always)] - pub fn sym_data_dot(&self, idx: RhsEltIdx) -> &CSymbol { + pub fn sym_data_dot(&self, idx: RhsPtr) -> &CSymbol { self.sym_data(self.sym_idx_dot(idx)) } @@ -710,7 +710,7 @@ impl CGrammar { self.start_symbol } - pub fn rules_of(&self, sym: CSymIdx) -> &[RhsEltIdx] { + pub fn rules_of(&self, sym: CSymIdx) -> &[RhsPtr] { &self.sym_data(sym).rules } @@ -726,9 +726,9 @@ impl CGrammar { start_symbol: CSymIdx::NULL, // replaced lexer_spec, symbols: vec![], - rules: vec![CSymIdx::NULL], // make sure RhsEltIdx::NULL is invalid - rhs_elt_idx_to_sym_idx: vec![], - rhs_elt_idx_to_sym_flags: vec![], + rhs_elements: vec![CSymIdx::NULL], // make sure RhsPtr::NULL is invalid + rhs_ptr_to_sym_idx: vec![], + rhs_ptr_to_sym_flags: vec![], }; outp.add_symbol(CSymbol { idx: CSymIdx::NULL, @@ -794,20 +794,20 @@ impl CGrammar { if rule.rhs.is_empty() { continue; } - let curr = RhsEltIdx(outp.rules.len().try_into().unwrap()); + let curr = RhsPtr(outp.rhs_elements.len().try_into().unwrap()); outp.sym_data_mut(idx).rules.push(curr); // outp.rules.push(idx); for r in &rule.rhs { - outp.rules.push(sym_map[r]); + outp.rhs_elements.push(sym_map[r]); } - outp.rules.push(CSymIdx::NULL); + outp.rhs_elements.push(CSymIdx::NULL); } - while outp.rules.len() % (1 << RULE_SHIFT) != 0 { - outp.rules.push(CSymIdx::NULL); + while outp.rhs_elements.len() % (1 << RULE_SHIFT) != 0 { + outp.rhs_elements.push(CSymIdx::NULL); } - let rlen = outp.rules.len() >> RULE_SHIFT; - while outp.rhs_elt_idx_to_sym_idx.len() < rlen { - outp.rhs_elt_idx_to_sym_idx.push(idx); + let rlen = outp.rhs_elements.len() >> RULE_SHIFT; + while outp.rhs_ptr_to_sym_idx.len() < rlen { + outp.rhs_ptr_to_sym_idx.push(idx); } } @@ -815,8 +815,8 @@ impl CGrammar { sym.sym_flags = SymFlags::from_csymbol(sym); } - outp.rhs_elt_idx_to_sym_flags = outp - .rhs_elt_idx_to_sym_idx + outp.rhs_ptr_to_sym_flags = outp + .rhs_ptr_to_sym_idx .iter() .map(|s| outp.sym_data(*s).sym_flags) .collect(); @@ -853,7 +853,7 @@ impl CGrammar { &self.symbols[sym.0 as usize].name } - pub fn rule_to_string(&self, rule: RhsEltIdx) -> String { + pub fn rule_to_string(&self, rule: RhsPtr) -> String { let sym = self.sym_idx_lhs(rule); let symdata = self.sym_data(sym); let lhs = self.sym_name(sym); diff --git a/parser/src/earley/parser.rs b/parser/src/earley/parser.rs index 8f807d7f..6f1115f0 100644 --- a/parser/src/earley/parser.rs +++ b/parser/src/earley/parser.rs @@ -25,7 +25,7 @@ use crate::{ }; use super::{ - grammar::{CGrammar, CSymIdx, CSymbol, RhsEltIdx}, + grammar::{CGrammar, CSymIdx, CSymbol, RhsPtr}, lexer::{LexerResult, PreLexeme}, lexerspec::{Lexeme, LexemeIdx, LexerSpec}, }; @@ -125,14 +125,14 @@ impl Item { #[allow(dead_code)] const NULL: Self = Item { data: 0 }; - fn new(rule: RhsEltIdx, start: usize) -> Self { + fn new(rule: RhsPtr, start: usize) -> Self { Item { data: rule.as_index() as u64 | ((start as u64) << 32), } } - fn rhs_elt_idx(&self) -> RhsEltIdx { - RhsEltIdx::from_index(self.data as u32) + fn rhs_ptr(&self) -> RhsPtr { + RhsPtr::from_index(self.data as u32) } fn start_pos(&self) -> usize { @@ -495,10 +495,10 @@ impl ParserState { set } - fn after_dots(&self) -> impl Iterator + '_ { + fn after_dots(&self) -> impl Iterator + '_ { self.curr_row() .item_indices() - .map(|i| self.scratch.items[i].rhs_elt_idx()) + .map(|i| self.scratch.items[i].rhs_ptr()) } fn after_dots_symdata(&self) -> impl Iterator + '_ { @@ -627,7 +627,7 @@ impl ParserState { } fn item_lhs(&self, item: &Item) -> CSymIdx { - self.grammar.sym_idx_lhs(item.rhs_elt_idx()) + self.grammar.sym_idx_lhs(item.rhs_ptr()) } fn item_sym_data(&self, item: &Item) -> &CSymbol { @@ -1180,7 +1180,7 @@ impl ParserState { // each row while i < last { let item = self.scratch.items[i]; - let sym = self.grammar.sym_data_dot(item.rhs_elt_idx()); + let sym = self.grammar.sym_data_dot(item.rhs_ptr()); if sym.lexeme == Some(lexeme.idx) { self.scratch.just_add(item.advance_dot(), i, "scan"); } @@ -1218,7 +1218,7 @@ impl ParserState { debug!(" agenda: {}", self.item_to_string(item_idx)); } - let rule = item.rhs_elt_idx(); + let rule = item.rhs_ptr(); let after_dot = self.grammar.sym_idx_dot(rule); // If 'rule' is a complete Earley item ... @@ -1271,7 +1271,7 @@ impl ParserState { // The main completion inference rule (slide 21 in Kallmeyer 2018) for i in self.rows[item.start_pos()].item_indices() { let item = self.scratch.items[i]; - if self.grammar.sym_idx_dot(item.rhs_elt_idx()) == lhs { + if self.grammar.sym_idx_dot(item.rhs_ptr()) == lhs { self.scratch.add_unique(item.advance_dot(), i, "complete"); } } @@ -1823,7 +1823,7 @@ impl<'a> Recognizer for ParserRecognizer<'a> { fn item_to_string(g: &CGrammar, item: &Item) -> String { format!( "{} @{}", - g.rule_to_string(item.rhs_elt_idx()), + g.rule_to_string(item.rhs_ptr()), item.start_pos(), ) }