diff --git a/parser/src/grammar_builder.rs b/parser/src/grammar_builder.rs index 8a43a457..55e214a2 100644 --- a/parser/src/grammar_builder.rs +++ b/parser/src/grammar_builder.rs @@ -2,7 +2,9 @@ use std::{collections::HashMap, sync::atomic::AtomicU32}; use anyhow::{ensure, Result}; -use crate::api::{GrammarWithLexer, Node, NodeId, NodeProps, RegexSpec, TopLevelGrammar}; +use crate::api::{ + GrammarWithLexer, Node, NodeId, NodeProps, RegexId, RegexNode, RegexSpec, TopLevelGrammar, +}; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct NodeRef { @@ -16,6 +18,7 @@ pub struct GrammarBuilder { strings: HashMap, curr_grammar_id: u32, nodes: Vec, + rx_nodes: Vec, } impl GrammarBuilder { @@ -49,9 +52,16 @@ impl GrammarBuilder { strings: HashMap::new(), curr_grammar_id: 0, nodes: vec![], + rx_nodes: vec![], } } + pub fn add_regex_node(&mut self, node: RegexNode) -> RegexId { + let id = RegexId(self.rx_nodes.len()); + self.rx_nodes.push(node); + id + } + fn shift_nodes(&mut self) { if self.top_grammar.grammars.len() == 0 { assert!(self.nodes.is_empty(), "nodes added before add_grammar()"); @@ -62,6 +72,8 @@ impl GrammarBuilder { "no nodes added before add_grammar() or finalize()" ); self.top_grammar.grammars.last_mut().unwrap().nodes = nodes; + self.top_grammar.grammars.last_mut().unwrap().rx_nodes = + std::mem::take(&mut self.rx_nodes); } } diff --git a/parser/src/lark/compiler.rs b/parser/src/lark/compiler.rs new file mode 100644 index 00000000..6262be8f --- /dev/null +++ b/parser/src/lark/compiler.rs @@ -0,0 +1,65 @@ +use std::collections::HashMap; + +use anyhow::Result; + +use crate::{ + api::{RegexSpec, TopLevelGrammar}, + GrammarBuilder, NodeRef, +}; + +use super::ast::*; + +struct Compiler { + builder: GrammarBuilder, + items: Vec, + nodes: HashMap, +} + +struct NodeInfo { + id: NodeRef, + is_terminal: bool, + regex: Option, +} + +pub fn lark_to_llguidance(items: Vec) -> Result { + let mut c = Compiler { + builder: GrammarBuilder::new(), + items, + nodes: HashMap::new(), + }; + c.execute()?; + c.builder.finalize() +} + +impl Compiler { + fn execute(&mut self) -> Result<()> { + for item in self.items.iter() { + match item { + Item::Rule(rule) => { + let id = self.builder.placeholder(); + self.nodes.insert( + rule.name.clone(), + NodeInfo { + id, + is_terminal: false, + regex: None, + }, + ); + } + Item::Token(token_def) => { + let id = self.builder.placeholder(); + self.nodes.insert( + token_def.name.clone(), + NodeInfo { + id, + is_terminal: true, + regex: None, + }, + ); + } + Item::Statement(statement) => todo!(), + } + } + Ok(()) + } +} diff --git a/parser/src/lark/mod.rs b/parser/src/lark/mod.rs index cb8a0a4c..a1e9a1a5 100644 --- a/parser/src/lark/mod.rs +++ b/parser/src/lark/mod.rs @@ -1,4 +1,5 @@ mod ast; +mod compiler; mod lexer; mod parser;