From 67ae422aa4510bfe4ba98e7ca4251ce9e36cd1b7 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Wed, 11 Dec 2024 22:20:15 +0100 Subject: [PATCH] optimize enums --- parser/src/grammar_builder.rs | 4 +++ parser/src/json/compiler.rs | 18 +++++++++--- parser/src/json/schema.rs | 53 ++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/parser/src/grammar_builder.rs b/parser/src/grammar_builder.rs index f97d2d28..5025da74 100644 --- a/parser/src/grammar_builder.rs +++ b/parser/src/grammar_builder.rs @@ -102,6 +102,10 @@ impl RegexBuilder { self.add_node(RegexNode::And(nodes)) } + pub fn or(&mut self, nodes: Vec) -> RegexId { + self.add_node(RegexNode::Or(nodes)) + } + fn finalize(&mut self) -> Vec { let r = std::mem::take(&mut self.nodes); *self = Self::new(); diff --git a/parser/src/json/compiler.rs b/parser/src/json/compiler.rs index 2e3cc4d5..60e2bbdc 100644 --- a/parser/src/json/compiler.rs +++ b/parser/src/json/compiler.rs @@ -216,6 +216,7 @@ impl Compiler { max_length, pattern, format, + const_string: _, } => self.gen_json_string( *min_length, *max_length, @@ -260,6 +261,18 @@ impl Compiler { } fn process_any_of(&mut self, options: Vec) -> Result { + let consts = options + .iter() + .filter_map(|schema| schema.const_compile()) + .collect::>(); + if consts.len() == options.len() { + let consts = consts + .into_iter() + .map(|c| self.builder.regex.add_node(c)) + .collect::>(); + let rx = self.builder.regex.or(consts); + return Ok(self.builder.lexeme(RegexSpec::RegexId(rx), false)); + } let mut nodes = vec![]; let mut errors = vec![]; for option in options.into_iter() { @@ -447,10 +460,7 @@ impl Compiler { .collect::>(); let taken = self.builder.regex.select(taken_name_ids); let not_taken = self.builder.regex.not(taken); - let valid = self - .builder - .regex - .regex(format!("\"({})*\"", CHAR_REGEX)); + let valid = self.builder.regex.regex(format!("\"({})*\"", CHAR_REGEX)); let valid_and_not_taken = self.builder.regex.and(vec![valid, not_taken]); let rx = RegexSpec::RegexId(valid_and_not_taken); self.builder.lexeme(rx, false) diff --git a/parser/src/json/schema.rs b/parser/src/json/schema.rs index 06ba72fa..0114aeec 100644 --- a/parser/src/json/schema.rs +++ b/parser/src/json/schema.rs @@ -10,6 +10,8 @@ use referencing::{Draft, Registry, Resolver, ResourceRef}; use regex_syntax::escape; use serde_json::Value; +use crate::api::RegexNode; + const DEFAULT_ROOT_URI: &str = "json-schema:///"; const DEFAULT_DRAFT: Draft = Draft::Draft202012; const TYPES: [&str; 6] = ["null", "boolean", "number", "string", "array", "object"]; @@ -95,6 +97,7 @@ pub enum Schema { max_length: Option, pattern: Option, format: Option, + const_string: Option, }, Array { min_items: u64, @@ -128,6 +131,40 @@ impl Schema { } } + pub fn const_compile(&self) -> Option { + let str = match self { + Schema::Null => "null", + Schema::Boolean => return Some(RegexNode::Regex("true|false".to_string())), + Schema::Number { + minimum: Some(x), + maximum: Some(y), + .. + } if x == y => return Some(RegexNode::Literal(x.to_string())), + Schema::String { + const_string: Some(s), + .. + } => s, + Schema::LiteralBool { value } => { + if *value { + "true" + } else { + "false" + } + } + + Schema::Any + | Schema::Number { .. } + | Schema::String { .. } + | Schema::Unsatisfiable { .. } + | Schema::Array { .. } + | Schema::Object { .. } + | Schema::AnyOf { .. } + | Schema::OneOf { .. } + | Schema::Ref { .. } => return None, + }; + Some(RegexNode::Literal(str.to_string())) + } + /// Shallowly normalize the schema, removing any unnecessary nesting or empty options. fn normalize(self) -> Schema { match self { @@ -423,7 +460,13 @@ fn compile_contents_map(ctx: &Context, mut schemadict: HashMap<&str, &Value>) -> .iter() .map(|value| compile_resource(&ctx, ctx.as_resource_ref(value))) .collect::>>()?; - let merged = intersect(ctx, vec![siblings].into_iter().chain(options.into_iter()).collect())?; + let merged = intersect( + ctx, + vec![siblings] + .into_iter() + .chain(options.into_iter()) + .collect(), + )?; return Ok(merged); } @@ -556,6 +599,7 @@ fn compile_const(instance: &Value) -> Result { max_length: None, pattern: Some(format!("^{}$", escape(s))), format: None, + const_string: Some(s.clone()), }), Value::Array(items) => { let prefix_items = items @@ -717,6 +761,7 @@ fn compile_string( max_length, pattern, format, + const_string: None, }) } @@ -904,12 +949,14 @@ fn intersect_two(ctx: &Context, schema0: Schema, schema1: Schema) -> Result Schema::String { min_length: min1.max(min2), @@ -938,6 +985,10 @@ fn intersect_two(ctx: &Context, schema0: Schema, schema1: Schema) -> Result Some(s1), + _ => None, + }, }, ( Schema::Array {