Skip to content

Commit

Permalink
fix zero_or_more()
Browse files Browse the repository at this point in the history
add RegexBuilder to GrammarBuilder
  • Loading branch information
mmoskal committed Nov 4, 2024
1 parent 7b279f3 commit 6fecfdb
Showing 1 changed file with 87 additions and 11 deletions.
98 changes: 87 additions & 11 deletions parser/src/grammar_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,81 @@ pub struct GrammarBuilder {
strings: HashMap<String, NodeRef>,
curr_grammar_id: u32,
nodes: Vec<Node>,
rx_nodes: Vec<RegexNode>,
pub regex: RegexBuilder,
}

pub struct RegexBuilder {
node_ids: HashMap<RegexNode, RegexId>,
nodes: Vec<RegexNode>,
}

impl RegexBuilder {
pub fn new() -> Self {
Self {
nodes: vec![],
node_ids: HashMap::new(),
}
}

pub fn add_node(&mut self, node: RegexNode) -> RegexId {
if let Some(id) = self.node_ids.get(&node) {
return *id;
}
let id = RegexId(self.nodes.len());
self.nodes.push(node.clone());
self.node_ids.insert(node, id);
id
}

pub fn regex(&mut self, rx: String) -> RegexId {
self.add_node(RegexNode::Regex(rx))
}

pub fn literal(&mut self, s: String) -> RegexId {
self.add_node(RegexNode::Literal(s))
}

pub fn concat(&mut self, nodes: Vec<RegexId>) -> RegexId {
if nodes.len() == 1 {
return nodes[0];
}
if nodes.len() == 0 {
return self.add_node(RegexNode::NoMatch);
}
self.add_node(RegexNode::Concat(nodes))
}

pub fn select(&mut self, nodes: Vec<RegexId>) -> RegexId {
if nodes.len() == 1 {
return nodes[0];
}
if nodes.len() == 0 {
return self.add_node(RegexNode::NoMatch);
}
self.add_node(RegexNode::Or(nodes))
}

pub fn zero_or_more(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 0, None)
}

pub fn one_or_more(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 1, None)
}

pub fn optional(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 0, Some(1))
}

pub fn repeat(&mut self, node: RegexId, min: u32, max: Option<u32>) -> RegexId {
self.add_node(RegexNode::Repeat(node, min, max))
}

fn finalize(&mut self) -> Vec<RegexNode> {
let r = std::mem::take(&mut self.nodes);
*self = Self::new();
r
}
}

impl GrammarBuilder {
Expand Down Expand Up @@ -52,16 +126,10 @@ impl GrammarBuilder {
strings: HashMap::new(),
curr_grammar_id: 0,
nodes: vec![],
rx_nodes: vec![],
regex: RegexBuilder::new(),
}
}

pub fn add_regex_node(&mut self, node: RegexNode) -> RegexId {
let id = RegexId(self.rx_nodes.len());
self.rx_nodes.push(node);
id
}

fn shift_nodes(&mut self) {
if self.top_grammar.grammars.len() == 0 {
assert!(self.nodes.is_empty(), "nodes added before add_grammar()");
Expand All @@ -72,8 +140,7 @@ impl GrammarBuilder {
"no nodes added before add_grammar() or finalize()"
);
self.top_grammar.grammars.last_mut().unwrap().nodes = nodes;
self.top_grammar.grammars.last_mut().unwrap().rx_nodes =
std::mem::take(&mut self.rx_nodes);
self.top_grammar.grammars.last_mut().unwrap().rx_nodes = self.regex.finalize();
}
}

Expand Down Expand Up @@ -170,10 +237,19 @@ impl GrammarBuilder {
self.select(&[value, empty])
}

pub fn one_or_more(&mut self, elt: NodeRef) -> NodeRef {
let p = self.placeholder();
let p_elt = self.join(&[p, elt]);
let inner = self.select(&[elt, p_elt]);
self.set_placeholder(p, inner);
p
}

pub fn zero_or_more(&mut self, elt: NodeRef) -> NodeRef {
let p = self.placeholder();
let empty = self.empty();
let inner = self.select(&[empty, elt]);
let p_elt = self.join(&[p, elt]);
let inner = self.select(&[empty, p_elt]);
self.set_placeholder(p, inner);
p
}
Expand Down

0 comments on commit 6fecfdb

Please sign in to comment.