Skip to content
This repository has been archived by the owner on Nov 30, 2024. It is now read-only.

Commit

Permalink
limit debug tokens to a maximum of 200 and indicate truncation in output
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 28, 2024
1 parent 2a90302 commit d8545d3
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions core/src/toktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,16 @@ impl TokTrie {
.join("‧")
}

pub const MAX_DBG_TOKENS: usize = 200;

pub fn tokens_dbg(&self, toks: &[u32]) -> String {
let joined = toks
let (limited, toks) = if toks.len() > Self::MAX_DBG_TOKENS {
(true, &toks[0..Self::MAX_DBG_TOKENS])
} else {
(false, toks)
};

let mut joined = toks
.iter()
.map(|t| {
let s = self.token_dbg(*t);
Expand All @@ -436,6 +444,10 @@ impl TokTrie {
.collect::<Vec<_>>()
.join("‧");

if limited {
joined.push_str("…");
}

format!("\"{}\"", joined)
}

Expand Down Expand Up @@ -516,6 +528,9 @@ impl TokTrie {
for c in self.node_children(n) {
if let Some(tok) = c.token_id() {
res.push(tok);
if res.len() > Self::MAX_DBG_TOKENS + 1 {
break;
}
}
stack.push(c);
}
Expand Down Expand Up @@ -756,7 +771,8 @@ impl TokTrie {
}

pub fn token_id_at_bytes(&self, bytes: &[u8]) -> Option<TokenId> {
self.child_at_bytes(self.root(), bytes).and_then(|n| n.token_id())
self.child_at_bytes(self.root(), bytes)
.and_then(|n| n.token_id())
}

pub fn compute_bias(&self, r: &mut impl Recognizer, logits: &mut SimpleVob) {
Expand Down

0 comments on commit d8545d3

Please sign in to comment.