From 529c7b1934f2ffbcc809ebfdf5991e071f2d8768 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Fri, 8 Nov 2024 13:48:11 -0800 Subject: [PATCH] add jsonschema_validation feature --- parser/Cargo.toml | 8 +++---- parser/src/earley/from_guidance.rs | 2 +- parser/src/ffi.rs | 2 +- parser/src/json.rs | 37 +++++------------------------- parser/src/json_validation.rs | 32 ++++++++++++++++++++++++++ parser/src/lib.rs | 2 ++ sample_parser/src/minimal.rs | 2 +- 7 files changed, 47 insertions(+), 38 deletions(-) create mode 100644 parser/src/json_validation.rs diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 833a8230..1d7a763c 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -13,14 +13,14 @@ rustc-hash = "2.0.0" instant = "0.1.13" jsonschema = { version = "0.24.0", default-features = false, optional = true } url = "2.5.2" -lazy_static = "1.5.0" +lazy_static = { version = "1.5.0", optional = true } regex-syntax = "0.8.5" [features] default = ["jsonschema_validation", "lark"] -logging = [] -lark = [] # ~115k of code -jsonschema_validation = ["jsonschema"] +logging = [] # this is extensive debug logging +lark = [] # ~115k (binary) +jsonschema_validation = ["jsonschema", "lazy_static"] # ~2.5M (binary) [lib] crate-type = ["staticlib", "rlib", "cdylib"] diff --git a/parser/src/earley/from_guidance.rs b/parser/src/earley/from_guidance.rs index e945aeff..f4b82ac6 100644 --- a/parser/src/earley/from_guidance.rs +++ b/parser/src/earley/from_guidance.rs @@ -98,7 +98,7 @@ fn grammar_from_json( "cannot have both json_schema and lark_grammar" ); let opts = JsonCompileOptions { compact: false }; - opts.json_to_llg_no_validate(json_schema)? + opts.json_to_llg(json_schema)? } else { lark_to_llguidance(input.lark_grammar.as_ref().unwrap())? }; diff --git a/parser/src/ffi.rs b/parser/src/ffi.rs index 655c4136..7cd1a362 100644 --- a/parser/src/ffi.rs +++ b/parser/src/ffi.rs @@ -290,7 +290,7 @@ fn new_constraint_json(init: &LlgConstraintInit, json_schema: *const c_char) -> .map_err(|e| anyhow::anyhow!("Invalid JSON in json_schema: {e}"))?; let opts = JsonCompileOptions { compact: false }; let grammar = opts - .json_to_llg_no_validate(&json_schema) + .json_to_llg(&json_schema) .map_err(|e| anyhow::anyhow!("Error compiling JSON schema to LLG: {e}"))?; new_constraint_core(init, grammar) } diff --git a/parser/src/json.rs b/parser/src/json.rs index cd097981..0fde7f56 100644 --- a/parser/src/json.rs +++ b/parser/src/json.rs @@ -1,6 +1,4 @@ use anyhow::{anyhow, bail, Result}; -use jsonschema::Validator; -use lazy_static::lazy_static; use serde_json::{json, Value}; use std::{collections::HashMap, vec}; @@ -122,7 +120,12 @@ macro_rules! cache { impl JsonCompileOptions { pub fn json_to_llg(&self, schema: &Value) -> Result { let mut compiler = Compiler::new(self.clone()); - compiler.validate(schema)?; + #[cfg(feature = "jsonschema_validation")] + { + use crate::json_validation::validate_schema; + validate_schema(schema)?; + } + compiler.execute(schema)?; compiler.builder.finalize() } @@ -208,28 +211,6 @@ impl OptionalField for Value { } } -struct DummyResolver {} -impl jsonschema::Retrieve for DummyResolver { - fn retrieve( - &self, - uri: &jsonschema::Uri<&str>, - ) -> std::result::Result> { - Err(anyhow!("external resolver disabled (url: {})", uri).into()) - } -} - -lazy_static! { - static ref SCHEMA_VALIDATOR: Validator = { - Validator::options() - .with_draft(jsonschema::Draft::Draft7) - .with_retriever(DummyResolver {}) - .build(&json!({ - "$ref": "http://json-schema.org/draft-07/schema" - })) - .unwrap() - }; -} - impl Compiler { pub fn new(options: JsonCompileOptions) -> Self { Self { @@ -242,12 +223,6 @@ impl Compiler { } } - pub fn validate(&mut self, schema: &Value) -> Result<()> { - SCHEMA_VALIDATOR - .validate(schema) - .map_err(|mut e| anyhow!("Invalid schema: {}", e.next().unwrap())) - } - pub fn execute(&mut self, schema: &Value) -> Result<()> { self.builder.add_grammar(GrammarWithLexer { greedy_skip_rx: if self.options.compact { diff --git a/parser/src/json_validation.rs b/parser/src/json_validation.rs new file mode 100644 index 00000000..e5484b70 --- /dev/null +++ b/parser/src/json_validation.rs @@ -0,0 +1,32 @@ +use anyhow::{anyhow, Result}; +use jsonschema::Validator; +use lazy_static::lazy_static; +use serde_json::{json, Value}; + +struct DummyResolver {} +impl jsonschema::Retrieve for DummyResolver { + fn retrieve( + &self, + uri: &jsonschema::Uri<&str>, + ) -> std::result::Result> { + Err(anyhow!("external resolver disabled (url: {})", uri).into()) + } +} + +lazy_static! { + static ref SCHEMA_VALIDATOR: Validator = { + Validator::options() + .with_draft(jsonschema::Draft::Draft7) + .with_retriever(DummyResolver {}) + .build(&json!({ + "$ref": "http://json-schema.org/draft-07/schema" + })) + .unwrap() + }; +} + +pub fn validate_schema(schema: &Value) -> Result<()> { + SCHEMA_VALIDATOR + .validate(schema) + .map_err(|mut e| anyhow!("Invalid schema: {}", e.next().unwrap())) +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 7b491abe..2dd37dcb 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -24,6 +24,8 @@ pub mod ffi; mod grammar_builder; mod json; +#[cfg(feature = "jsonschema_validation")] +mod json_validation; pub use grammar_builder::{GrammarBuilder, NodeRef}; pub use json::JsonCompileOptions; pub use tokenizer_json::token_bytes_from_tokenizer_json; diff --git a/sample_parser/src/minimal.rs b/sample_parser/src/minimal.rs index 24eb25e3..a1d4ff65 100644 --- a/sample_parser/src/minimal.rs +++ b/sample_parser/src/minimal.rs @@ -60,7 +60,7 @@ fn main() { compact: false, }; let val = serde_json::from_str(&schema_file).expect("Invalid JSON in schema"); - opts.json_to_llg_no_validate(&val) + opts.json_to_llg(&val) .expect("Failed to convert JSON to LLG") } else { panic!("Unknown schema file extension")