Skip to content
This repository has been archived by the owner on Nov 30, 2024. It is now read-only.

Commit

Permalink
more stats
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Aug 10, 2024
1 parent c1b18f6 commit 6934722
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions core/src/toktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -809,20 +809,24 @@ impl TokTrie {
next_pop
}

fn count_until_depth(&self, depth: usize) -> usize {
fn count_until_depth(&self, depth: usize) -> (usize, usize) {
let mut count = 0;
let mut num_tokens = 0;
let mut stack = vec![(self.root(), 0)];
while let Some((n, d)) = stack.pop() {
if d == depth {
continue;
} else {
for c in self.node_children(n) {
count += 1;
if c.token_id().is_some() {
num_tokens += 1;
}
stack.push((c, d + 1));
}
}
}
count
(count, num_tokens)
}

pub fn trie_stats(&self) -> String {
Expand Down Expand Up @@ -876,17 +880,19 @@ impl TokTrie {
}

for depth in 0..30 {
let count = self.count_until_depth(depth);
if count > 0 {
histogram.push_str(&format!("\ndepth {}: {} nodes", depth, count));
}
let (count, num_tokens) = self.count_until_depth(depth);
histogram.push_str(&format!(
"\ndepth {}: {} nodes {} tokens",
depth, count, num_tokens
));
}

format!(
"{} nodes, {} token nodes,\n{}",
"{}\n{} nodes, {} token nodes, {} token bytes",
histogram,
self.nodes.len(),
token_nodes,
histogram
self.token_data.len(),
)
}
}
Expand Down

0 comments on commit 6934722

Please sign in to comment.