diff --git a/parser/src/earley/from_guidance.rs b/parser/src/earley/from_guidance.rs index a03dccd7..b976e6f8 100644 --- a/parser/src/earley/from_guidance.rs +++ b/parser/src/earley/from_guidance.rs @@ -5,6 +5,7 @@ use super::{grammar::SymbolProps, lexerspec::LexerSpec, CGrammar, Grammar}; use crate::api::{ GrammarWithLexer, Node, RegexId, RegexNode, RegexSpec, TopLevelGrammar, DEFAULT_CONTEXTUAL, }; +use crate::Logger; use anyhow::{bail, Result}; use derivre::{ExprRef, RegexAst, RegexBuilder}; @@ -202,8 +203,8 @@ fn grammar_from_json(input: GrammarWithLexer) -> Result<(LexerSpec, Grammar)> { pub fn grammars_from_json( input: TopLevelGrammar, - print_out: bool, -) -> Result<(Vec>, String)> { + logger: &mut Logger, +) -> Result>> { let grammars = input .grammars .into_iter() @@ -214,24 +215,23 @@ pub fn grammars_from_json( g.validate_grammar_refs(&grammars)?; } - let mut out = String::new(); let grammars = grammars .into_iter() .enumerate() .map(|(idx, (lex, mut grm))| { - if print_out { - writeln!(out, "Grammar #{}:\n{:?}\n{:?}\n", idx, lex, grm).unwrap(); + if logger.level_enabled(2) { + writeln!(logger, "Grammar #{}:\n{:?}\n{:?}\n", idx, lex, grm).unwrap(); } grm = grm.optimize(); - if print_out { - write!(out, " == Optimize ==>\n{:?}", grm).unwrap(); + if logger.level_enabled(2) { + write!(logger, " == Optimize ==>\n{:?}", grm).unwrap(); } Arc::new(grm.compile(lex)) }) .collect::>(); - Ok((grammars, out)) + Ok(grammars) } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 693aec83..0c2c3d8e 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -3,4 +3,28 @@ mod tokenparser; pub use tokenparser::TokenParser; pub mod api; pub mod output; -pub use toktrie; \ No newline at end of file +pub use toktrie; + +mod logging; +pub use logging::Logger; + +#[macro_export] +macro_rules! infoln { + ($s:expr, $($arg:tt)*) => { + if $s.logger.level_enabled(2) { + use std::fmt::Write; + writeln!($s.logger, $($arg)*).unwrap(); + } + }; +} + +#[macro_export] +macro_rules! warn { + ($s:expr, $($arg:tt)*) => { + if $s.logger.level_enabled(1) { + use std::fmt::Write; + $s.logger.write_str("Warning: ").unwrap(); + writeln!($s.logger, $($arg)*).unwrap(); + } + }; +} diff --git a/parser/src/logging.rs b/parser/src/logging.rs new file mode 100644 index 00000000..893f7ed6 --- /dev/null +++ b/parser/src/logging.rs @@ -0,0 +1,95 @@ +use std::fmt::Write; + +pub struct Logger { + effective_level: u32, + buffer_level: u32, + stderr_level: u32, + buffer: String, +} + +impl Clone for Logger { + fn clone(&self) -> Self { + Self { + effective_level: self.effective_level, + buffer_level: self.buffer_level, + stderr_level: self.stderr_level, + buffer: String::new(), // clean logs on clone + } + } +} + +impl Logger { + pub fn new(buffer_level: u32, stderr_level: u32) -> Self { + Self { + buffer_level, + stderr_level, + effective_level: std::cmp::max(buffer_level, stderr_level), + buffer: String::new(), + } + } + + pub fn warn(&mut self, s: &str) { + if self.level_enabled(1) { + self.write_str("Warning: ").unwrap(); + self.write_str(s).unwrap(); + self.write_str("\n").unwrap(); + } + } + + pub fn info(&mut self, s: &str) { + if self.level_enabled(2) { + self.write_str(s).unwrap(); + self.write_str("\n").unwrap(); + } + } + + #[inline(always)] + pub fn level_enabled(&self, level: u32) -> bool { + level <= self.effective_level + } + + #[inline(always)] + pub fn effective_level(&self) -> u32 { + self.effective_level + } + + #[inline(always)] + pub fn buffer_level(&self) -> u32 { + self.buffer_level + } + + #[inline(always)] + pub fn stderr_level(&self) -> u32 { + self.stderr_level + } + + pub fn set_buffer_level(&mut self, buffer_level: u32) { + self.buffer_level = buffer_level; + self.effective_level = std::cmp::max(self.effective_level, self.buffer_level); + } + + pub fn set_stderr_level(&mut self, stderr_level: u32) { + self.stderr_level = stderr_level; + self.effective_level = std::cmp::max(self.effective_level, self.stderr_level); + } + + pub fn get_buffer(&self) -> &str { + &self.buffer + } + + pub fn get_and_clear_logs(&mut self) -> String { + std::mem::take(&mut self.buffer) + } +} + +impl Write for Logger { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + if self.effective_level >= self.buffer_level { + self.buffer.push_str(s); + } + if self.effective_level >= self.stderr_level { + eprint!("{}", s); + } + Ok(()) + } +} diff --git a/parser/src/tokenparser.rs b/parser/src/tokenparser.rs index 92a6966f..eb3e5fd7 100644 --- a/parser/src/tokenparser.rs +++ b/parser/src/tokenparser.rs @@ -1,49 +1,21 @@ -use std::fmt::Write; use std::sync::Arc; use crate::{ api::{GenGrammarOptions, StopReason, TopLevelGrammar}, earley::{grammars_from_json, CGrammar, CSymIdx, ModelVariable, Parser, ParserStats}, + infoln, warn, Logger, }; use anyhow::Result; use serde_json::json; use toktrie::{InferenceCapabilities, SimpleVob, StepArg, StepResult, TokenId, TokenizerEnv}; -macro_rules! infoln { - ($s:expr, $($arg:tt)*) => { - if $s.log_level >= 2 { - if $s.save_logs { - writeln!($s.logs, $($arg)*).unwrap(); - } else { - eprintln!($($arg)*); - } - } - }; -} - -macro_rules! warn { - ($s:expr, $($arg:tt)*) => { - if $s.log_level >= 1 { - if $s.save_logs { - $s.logs.push_str("Warning: "); - writeln!($s.logs, $($arg)*).unwrap(); - } else { - eprint!("Warning: "); - eprintln!($($arg)*); - } - } - }; -} - #[derive(Clone)] pub struct TokenParser { pub token_env: Arc, pub parser: Parser, - pub log_level: isize, pub mid_process_start_time: std::time::Instant, pub inference_caps: InferenceCapabilities, - save_logs: bool, - logs: String, + pub logger: Logger, pending_bogus_backtrack: u32, // sampling any of these will pop the parser stack: pop_tokens: Option, @@ -83,23 +55,20 @@ impl TokenParser { pub fn from_llguidance_json( token_env: Arc, buf: TopLevelGrammar, - log_level: isize, - save_logs: bool, + mut logger: Logger, inference_caps: InferenceCapabilities, ) -> Result { let mid_process_start_time = std::time::Instant::now(); let test_trace = buf.test_trace; let max_tokens = buf.max_tokens.unwrap_or(usize::MAX); - let (compiled_grammars, grammar_log) = grammars_from_json(buf, log_level >= 2)?; + let compiled_grammars = grammars_from_json(buf, &mut logger)?; let parser = Parser::new( Arc::clone(&compiled_grammars[0]), GenGrammarOptions::default(), )?; Ok(TokenParser { - log_level, - logs: grammar_log, - save_logs, + logger, test_trace, token_env, inference_caps, @@ -122,10 +91,6 @@ impl TokenParser { }) } - pub fn get_and_clear_logs(&mut self) -> String { - std::mem::replace(&mut self.logs, String::new()) - } - pub fn stop_reason(&self) -> StopReason { self.stop_reason } diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 448acf34..819d2c41 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -368,7 +368,7 @@ checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" [[package]] name = "toktrie" version = "0.1.0" -source = "git+https://github.com/microsoft/toktrie?rev=d8d179ae2bbfe41fcd140d583781efd9fedfcfb0#d8d179ae2bbfe41fcd140d583781efd9fedfcfb0" +source = "git+https://github.com/microsoft/toktrie?rev=7550e792ba9af7d22ee6a9bf4fd7631e1ca659f9#7550e792ba9af7d22ee6a9bf4fd7631e1ca659f9" dependencies = [ "anyhow", "bytemuck", diff --git a/rust/src/py.rs b/rust/src/py.rs index faf23815..ec2c9946 100644 --- a/rust/src/py.rs +++ b/rust/src/py.rs @@ -3,6 +3,7 @@ use std::{borrow::Cow, sync::Arc}; use llguidance_parser::toktrie::{ self, InferenceCapabilities, StepArg, StepResult, TokRxInfo, TokTrie, TokenId, TokenizerEnv, }; +use llguidance_parser::Logger; use llguidance_parser::{ api::TopLevelGrammar, output::{ParserOutput, Reporter}, @@ -50,9 +51,9 @@ impl LLInterpreter { conditional_ff_tokens: true, fork: false, }; - let inner = - TokenParser::from_llguidance_json(Arc::new(env), arg, log_level, false, inference_caps) - .map_err(|e| PyValueError::new_err(e.to_string()))?; + let logger = Logger::new(0, std::cmp::max(0, log_level) as u32); + let inner = TokenParser::from_llguidance_json(Arc::new(env), arg, logger, inference_caps) + .map_err(|e| PyValueError::new_err(e.to_string()))?; let reporter = Reporter::new(&inner); Ok(LLInterpreter { inner,