Skip to content

Commit

Permalink
more robust logging infra
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jul 31, 2024
1 parent b36f954 commit 20e7fae
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 53 deletions.
16 changes: 8 additions & 8 deletions parser/src/earley/from_guidance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use super::{grammar::SymbolProps, lexerspec::LexerSpec, CGrammar, Grammar};
use crate::api::{
GrammarWithLexer, Node, RegexId, RegexNode, RegexSpec, TopLevelGrammar, DEFAULT_CONTEXTUAL,
};
use crate::Logger;
use anyhow::{bail, Result};
use derivre::{ExprRef, RegexAst, RegexBuilder};

Expand Down Expand Up @@ -202,8 +203,8 @@ fn grammar_from_json(input: GrammarWithLexer) -> Result<(LexerSpec, Grammar)> {

pub fn grammars_from_json(
input: TopLevelGrammar,
print_out: bool,
) -> Result<(Vec<Arc<CGrammar>>, String)> {
logger: &mut Logger,
) -> Result<Vec<Arc<CGrammar>>> {
let grammars = input
.grammars
.into_iter()
Expand All @@ -214,24 +215,23 @@ pub fn grammars_from_json(
g.validate_grammar_refs(&grammars)?;
}

let mut out = String::new();
let grammars = grammars
.into_iter()
.enumerate()
.map(|(idx, (lex, mut grm))| {
if print_out {
writeln!(out, "Grammar #{}:\n{:?}\n{:?}\n", idx, lex, grm).unwrap();
if logger.level_enabled(2) {
writeln!(logger, "Grammar #{}:\n{:?}\n{:?}\n", idx, lex, grm).unwrap();
}

grm = grm.optimize();

if print_out {
write!(out, " == Optimize ==>\n{:?}", grm).unwrap();
if logger.level_enabled(2) {
write!(logger, " == Optimize ==>\n{:?}", grm).unwrap();
}

Arc::new(grm.compile(lex))
})
.collect::<Vec<_>>();

Ok((grammars, out))
Ok(grammars)
}
26 changes: 25 additions & 1 deletion parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,28 @@ mod tokenparser;
pub use tokenparser::TokenParser;
pub mod api;
pub mod output;
pub use toktrie;
pub use toktrie;

mod logging;
pub use logging::Logger;

#[macro_export]
macro_rules! infoln {
($s:expr, $($arg:tt)*) => {
if $s.logger.level_enabled(2) {
use std::fmt::Write;
writeln!($s.logger, $($arg)*).unwrap();
}
};
}

#[macro_export]
macro_rules! warn {
($s:expr, $($arg:tt)*) => {
if $s.logger.level_enabled(1) {
use std::fmt::Write;
$s.logger.write_str("Warning: ").unwrap();
writeln!($s.logger, $($arg)*).unwrap();
}
};
}
95 changes: 95 additions & 0 deletions parser/src/logging.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use std::fmt::Write;

pub struct Logger {
effective_level: u32,
buffer_level: u32,
stderr_level: u32,
buffer: String,
}

impl Clone for Logger {
fn clone(&self) -> Self {
Self {
effective_level: self.effective_level,
buffer_level: self.buffer_level,
stderr_level: self.stderr_level,
buffer: String::new(), // clean logs on clone
}
}
}

impl Logger {
pub fn new(buffer_level: u32, stderr_level: u32) -> Self {
Self {
buffer_level,
stderr_level,
effective_level: std::cmp::max(buffer_level, stderr_level),
buffer: String::new(),
}
}

pub fn warn(&mut self, s: &str) {
if self.level_enabled(1) {
self.write_str("Warning: ").unwrap();
self.write_str(s).unwrap();
self.write_str("\n").unwrap();
}
}

pub fn info(&mut self, s: &str) {
if self.level_enabled(2) {
self.write_str(s).unwrap();
self.write_str("\n").unwrap();
}
}

#[inline(always)]
pub fn level_enabled(&self, level: u32) -> bool {
level <= self.effective_level
}

#[inline(always)]
pub fn effective_level(&self) -> u32 {
self.effective_level
}

#[inline(always)]
pub fn buffer_level(&self) -> u32 {
self.buffer_level
}

#[inline(always)]
pub fn stderr_level(&self) -> u32 {
self.stderr_level
}

pub fn set_buffer_level(&mut self, buffer_level: u32) {
self.buffer_level = buffer_level;
self.effective_level = std::cmp::max(self.effective_level, self.buffer_level);
}

pub fn set_stderr_level(&mut self, stderr_level: u32) {
self.stderr_level = stderr_level;
self.effective_level = std::cmp::max(self.effective_level, self.stderr_level);
}

pub fn get_buffer(&self) -> &str {
&self.buffer
}

pub fn get_and_clear_logs(&mut self) -> String {
std::mem::take(&mut self.buffer)
}
}

impl Write for Logger {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
if self.effective_level >= self.buffer_level {
self.buffer.push_str(s);
}
if self.effective_level >= self.stderr_level {
eprint!("{}", s);
}
Ok(())
}
}
45 changes: 5 additions & 40 deletions parser/src/tokenparser.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,21 @@
use std::fmt::Write;
use std::sync::Arc;

use crate::{
api::{GenGrammarOptions, StopReason, TopLevelGrammar},
earley::{grammars_from_json, CGrammar, CSymIdx, ModelVariable, Parser, ParserStats},
infoln, warn, Logger,
};
use anyhow::Result;
use serde_json::json;
use toktrie::{InferenceCapabilities, SimpleVob, StepArg, StepResult, TokenId, TokenizerEnv};

macro_rules! infoln {
($s:expr, $($arg:tt)*) => {
if $s.log_level >= 2 {
if $s.save_logs {
writeln!($s.logs, $($arg)*).unwrap();
} else {
eprintln!($($arg)*);
}
}
};
}

macro_rules! warn {
($s:expr, $($arg:tt)*) => {
if $s.log_level >= 1 {
if $s.save_logs {
$s.logs.push_str("Warning: ");
writeln!($s.logs, $($arg)*).unwrap();
} else {
eprint!("Warning: ");
eprintln!($($arg)*);
}
}
};
}

#[derive(Clone)]
pub struct TokenParser {
pub token_env: Arc<dyn TokenizerEnv + Sync>,
pub parser: Parser,
pub log_level: isize,
pub mid_process_start_time: std::time::Instant,
pub inference_caps: InferenceCapabilities,
save_logs: bool,
logs: String,
pub logger: Logger,
pending_bogus_backtrack: u32,
// sampling any of these will pop the parser stack:
pop_tokens: Option<SimpleVob>,
Expand Down Expand Up @@ -83,23 +55,20 @@ impl TokenParser {
pub fn from_llguidance_json(
token_env: Arc<dyn TokenizerEnv + Sync>,
buf: TopLevelGrammar,
log_level: isize,
save_logs: bool,
mut logger: Logger,
inference_caps: InferenceCapabilities,
) -> Result<Self> {
let mid_process_start_time = std::time::Instant::now();
let test_trace = buf.test_trace;
let max_tokens = buf.max_tokens.unwrap_or(usize::MAX);
let (compiled_grammars, grammar_log) = grammars_from_json(buf, log_level >= 2)?;
let compiled_grammars = grammars_from_json(buf, &mut logger)?;
let parser = Parser::new(
Arc::clone(&compiled_grammars[0]),
GenGrammarOptions::default(),
)?;

Ok(TokenParser {
log_level,
logs: grammar_log,
save_logs,
logger,
test_trace,
token_env,
inference_caps,
Expand All @@ -122,10 +91,6 @@ impl TokenParser {
})
}

pub fn get_and_clear_logs(&mut self) -> String {
std::mem::replace(&mut self.logs, String::new())
}

pub fn stop_reason(&self) -> StopReason {
self.stop_reason
}
Expand Down
2 changes: 1 addition & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions rust/src/py.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{borrow::Cow, sync::Arc};
use llguidance_parser::toktrie::{
self, InferenceCapabilities, StepArg, StepResult, TokRxInfo, TokTrie, TokenId, TokenizerEnv,
};
use llguidance_parser::Logger;
use llguidance_parser::{
api::TopLevelGrammar,
output::{ParserOutput, Reporter},
Expand Down Expand Up @@ -50,9 +51,9 @@ impl LLInterpreter {
conditional_ff_tokens: true,
fork: false,
};
let inner =
TokenParser::from_llguidance_json(Arc::new(env), arg, log_level, false, inference_caps)
.map_err(|e| PyValueError::new_err(e.to_string()))?;
let logger = Logger::new(0, std::cmp::max(0, log_level) as u32);
let inner = TokenParser::from_llguidance_json(Arc::new(env), arg, logger, inference_caps)
.map_err(|e| PyValueError::new_err(e.to_string()))?;
let reporter = Reporter::new(&inner);
Ok(LLInterpreter {
inner,
Expand Down

0 comments on commit 20e7fae

Please sign in to comment.