Skip to content

Commit

Permalink
start on compiler
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 1, 2024
1 parent 446a292 commit 7b279f3
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 1 deletion.
14 changes: 13 additions & 1 deletion parser/src/grammar_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::{collections::HashMap, sync::atomic::AtomicU32};

use anyhow::{ensure, Result};

use crate::api::{GrammarWithLexer, Node, NodeId, NodeProps, RegexSpec, TopLevelGrammar};
use crate::api::{
GrammarWithLexer, Node, NodeId, NodeProps, RegexId, RegexNode, RegexSpec, TopLevelGrammar,
};

#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct NodeRef {
Expand All @@ -16,6 +18,7 @@ pub struct GrammarBuilder {
strings: HashMap<String, NodeRef>,
curr_grammar_id: u32,
nodes: Vec<Node>,
rx_nodes: Vec<RegexNode>,
}

impl GrammarBuilder {
Expand Down Expand Up @@ -49,9 +52,16 @@ impl GrammarBuilder {
strings: HashMap::new(),
curr_grammar_id: 0,
nodes: vec![],
rx_nodes: vec![],
}
}

pub fn add_regex_node(&mut self, node: RegexNode) -> RegexId {
let id = RegexId(self.rx_nodes.len());
self.rx_nodes.push(node);
id
}

fn shift_nodes(&mut self) {
if self.top_grammar.grammars.len() == 0 {
assert!(self.nodes.is_empty(), "nodes added before add_grammar()");
Expand All @@ -62,6 +72,8 @@ impl GrammarBuilder {
"no nodes added before add_grammar() or finalize()"
);
self.top_grammar.grammars.last_mut().unwrap().nodes = nodes;
self.top_grammar.grammars.last_mut().unwrap().rx_nodes =
std::mem::take(&mut self.rx_nodes);
}
}

Expand Down
65 changes: 65 additions & 0 deletions parser/src/lark/compiler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use std::collections::HashMap;

use anyhow::Result;

use crate::{
api::{RegexSpec, TopLevelGrammar},
GrammarBuilder, NodeRef,
};

use super::ast::*;

struct Compiler {
builder: GrammarBuilder,
items: Vec<Item>,
nodes: HashMap<String, NodeInfo>,
}

struct NodeInfo {
id: NodeRef,
is_terminal: bool,
regex: Option<RegexSpec>,
}

pub fn lark_to_llguidance(items: Vec<Item>) -> Result<TopLevelGrammar> {
let mut c = Compiler {
builder: GrammarBuilder::new(),
items,
nodes: HashMap::new(),
};
c.execute()?;
c.builder.finalize()
}

impl Compiler {
fn execute(&mut self) -> Result<()> {
for item in self.items.iter() {
match item {
Item::Rule(rule) => {
let id = self.builder.placeholder();
self.nodes.insert(
rule.name.clone(),
NodeInfo {
id,
is_terminal: false,
regex: None,
},
);
}
Item::Token(token_def) => {
let id = self.builder.placeholder();
self.nodes.insert(
token_def.name.clone(),
NodeInfo {
id,
is_terminal: true,
regex: None,
},
);
}
Item::Statement(statement) => todo!(),
}
}
Ok(())
}
}
1 change: 1 addition & 0 deletions parser/src/lark/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod ast;
mod compiler;
mod lexer;
mod parser;

Expand Down

0 comments on commit 7b279f3

Please sign in to comment.