Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lark syntax -> llguidance translator #37

Merged
merged 12 commits into from
Nov 4, 2024
98 changes: 87 additions & 11 deletions parser/src/grammar_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,81 @@ pub struct GrammarBuilder {
strings: HashMap<String, NodeRef>,
curr_grammar_id: u32,
nodes: Vec<Node>,
rx_nodes: Vec<RegexNode>,
pub regex: RegexBuilder,
}

pub struct RegexBuilder {
node_ids: HashMap<RegexNode, RegexId>,
nodes: Vec<RegexNode>,
}

impl RegexBuilder {
pub fn new() -> Self {
Self {
nodes: vec![],
node_ids: HashMap::new(),
}
}

pub fn add_node(&mut self, node: RegexNode) -> RegexId {
if let Some(id) = self.node_ids.get(&node) {
return *id;
}
let id = RegexId(self.nodes.len());
self.nodes.push(node.clone());
self.node_ids.insert(node, id);
id
}

pub fn regex(&mut self, rx: String) -> RegexId {
self.add_node(RegexNode::Regex(rx))
}

pub fn literal(&mut self, s: String) -> RegexId {
self.add_node(RegexNode::Literal(s))
}

pub fn concat(&mut self, nodes: Vec<RegexId>) -> RegexId {
if nodes.len() == 1 {
return nodes[0];
}
if nodes.len() == 0 {
return self.add_node(RegexNode::NoMatch);
}
self.add_node(RegexNode::Concat(nodes))
}

pub fn select(&mut self, nodes: Vec<RegexId>) -> RegexId {
if nodes.len() == 1 {
return nodes[0];
}
if nodes.len() == 0 {
return self.add_node(RegexNode::NoMatch);
}
self.add_node(RegexNode::Or(nodes))
}

pub fn zero_or_more(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 0, None)
}

pub fn one_or_more(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 1, None)
}

pub fn optional(&mut self, node: RegexId) -> RegexId {
self.repeat(node, 0, Some(1))
}

pub fn repeat(&mut self, node: RegexId, min: u32, max: Option<u32>) -> RegexId {
self.add_node(RegexNode::Repeat(node, min, max))
}

fn finalize(&mut self) -> Vec<RegexNode> {
let r = std::mem::take(&mut self.nodes);
*self = Self::new();
r
}
}

impl GrammarBuilder {
Expand Down Expand Up @@ -52,16 +126,10 @@ impl GrammarBuilder {
strings: HashMap::new(),
curr_grammar_id: 0,
nodes: vec![],
rx_nodes: vec![],
regex: RegexBuilder::new(),
}
}

pub fn add_regex_node(&mut self, node: RegexNode) -> RegexId {
let id = RegexId(self.rx_nodes.len());
self.rx_nodes.push(node);
id
}

fn shift_nodes(&mut self) {
if self.top_grammar.grammars.len() == 0 {
assert!(self.nodes.is_empty(), "nodes added before add_grammar()");
Expand All @@ -72,8 +140,7 @@ impl GrammarBuilder {
"no nodes added before add_grammar() or finalize()"
);
self.top_grammar.grammars.last_mut().unwrap().nodes = nodes;
self.top_grammar.grammars.last_mut().unwrap().rx_nodes =
std::mem::take(&mut self.rx_nodes);
self.top_grammar.grammars.last_mut().unwrap().rx_nodes = self.regex.finalize();
}
}

Expand Down Expand Up @@ -170,10 +237,19 @@ impl GrammarBuilder {
self.select(&[value, empty])
}

pub fn one_or_more(&mut self, elt: NodeRef) -> NodeRef {
let p = self.placeholder();
let p_elt = self.join(&[p, elt]);
let inner = self.select(&[elt, p_elt]);
self.set_placeholder(p, inner);
p
}

pub fn zero_or_more(&mut self, elt: NodeRef) -> NodeRef {
let p = self.placeholder();
let empty = self.empty();
let inner = self.select(&[empty, elt]);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hudson-ai be aware that this was broken; I'll be merging this initial lark stuff soon, so the fix will come in (zero_or_more was really zero_or_one)

let p_elt = self.join(&[p, elt]);
let inner = self.select(&[empty, p_elt]);
self.set_placeholder(p, inner);
p
}
Expand Down
Loading