Skip to content

Commit

Permalink
add support for json_schema and lark_grammar in GrammarWithLexer
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 4, 2024
1 parent 9c9ab24 commit 075714b
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
13 changes: 13 additions & 0 deletions parser/src/api.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::Debug;

use serde::{Deserialize, Serialize};
use serde_json::Value;

/// This represents a collection of grammars, with a designated
/// "start" grammar at first position.
Expand All @@ -19,8 +20,18 @@ pub const DEFAULT_CONTEXTUAL: bool = true;
#[derive(Serialize, Deserialize, Clone, Default)]
pub struct GrammarWithLexer {
/// The start symbol is at nodes[0]
/// When nodes is empty, then one of json_schema or lark_grammar must be set.
#[serde(default)]
pub nodes: Vec<Node>,

/// The JSON schema that the grammar should generate.
/// When this is set, nodes and rx_nodes must be empty.
pub json_schema: Option<Value>,

/// The Lark grammar that the grammar should generate.
/// When this is set, nodes and rx_nodes must be empty.
pub lark_grammar: Option<String>,

/// This is no longer used.
/// When enabled, the grammar can use `Lexeme` but not `Gen`.
/// When disabled, the grammar can use `Gen` but not `Lexeme`.
Expand Down Expand Up @@ -374,6 +385,8 @@ impl TopLevelGrammar {
json_allowed_escapes: None,
json_raw: None,
}],
json_schema: None,
lark_grammar: None,
greedy_lexer: true,
greedy_skip_rx: None,
contextual: None,
Expand Down
34 changes: 32 additions & 2 deletions parser/src/earley/from_guidance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use crate::api::{
GrammarWithLexer, Node, ParserLimits, RegexId, RegexNode, RegexSpec, TopLevelGrammar,
DEFAULT_CONTEXTUAL,
};
use crate::{loginfo, Logger};
use crate::lark::{lark_to_llguidance, parse_lark};
use crate::{loginfo, JsonCompileOptions, Logger};
use anyhow::{bail, ensure, Result};
use derivre::{ExprRef, JsonQuoteOptions, RegexAst, RegexBuilder};
use instant::Instant;
Expand Down Expand Up @@ -84,8 +85,37 @@ fn map_rx_nodes(
fn grammar_from_json(
tok_env: &TokEnv,
limits: &mut ParserLimits,
input: GrammarWithLexer,
mut input: GrammarWithLexer,
) -> Result<(LexerSpec, Grammar)> {
if input.json_schema.is_some() || input.lark_grammar.is_some() {
ensure!(
input.nodes.is_empty() && input.rx_nodes.is_empty(),
"cannot have both json_schema/lark_grammar and nodes/rx_nodes"
);

let mut new_grm = if let Some(json_schema) = input.json_schema.as_ref() {
ensure!(
input.lark_grammar.is_none(),
"cannot have both json_schema and lark_grammar"
);
let opts = JsonCompileOptions { compact: false };
opts.json_to_llg_no_validate(json_schema)?
} else {
let items = parse_lark(input.lark_grammar.as_ref().unwrap())?;
lark_to_llguidance(items)?
};

let g = new_grm.grammars.pop().unwrap();

input.greedy_skip_rx = g.greedy_skip_rx;
input.nodes = g.nodes;
input.rx_nodes = g.rx_nodes;
input.contextual = g.contextual;

input.lark_grammar = None;
input.json_schema = None;
}

ensure!(input.nodes.len() > 0, "empty grammar");

let (builder, rx_nodes) = map_rx_nodes(limits, input.rx_nodes, input.allow_invalid_utf8)?;
Expand Down

0 comments on commit 075714b

Please sign in to comment.