Skip to content
This repository has been archived by the owner on Nov 30, 2024. It is now read-only.

Commit

Permalink
optimize decode_raw()
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 20, 2024
1 parent 6172936 commit 148399b
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions core/src/toktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ impl TokTrie {
let max_examples = 50;

let ts_neg = ts.negated();
let use_neg = ts_neg.num_set() * 20 < ts.num_set();
let use_neg = ts_neg.num_set() * 10 < ts.num_set();
let ts1 = if use_neg { &ts_neg } else { &ts };
let num_set = ts1.num_set();
let max_tok = std::cmp::min(max_examples, num_set);
Expand Down Expand Up @@ -472,10 +472,12 @@ impl TokTrie {
}

pub fn decode_raw(&self, tokens: &[TokenId]) -> Vec<u8> {
tokens
.iter()
.flat_map(|t| self.token(*t).to_vec())
.collect()
let mut res = Vec::new();
res.reserve(tokens.len() * 6 + 32); // approximately
for &tok in tokens {
res.extend_from_slice(self.token(tok));
}
res
}

pub fn decode_str(&self, tokens: &[TokenId]) -> String {
Expand Down

0 comments on commit 148399b

Please sign in to comment.