Skip to content
This repository has been archived by the owner on Nov 30, 2024. It is now read-only.

Commit

Permalink
bugfix and small API usability
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jul 24, 2024
1 parent 158139a commit ae506a0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
9 changes: 8 additions & 1 deletion core/src/svob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl SimpleVob {
r
}

pub fn all_true(size: usize) -> Self {
pub fn alloc_ones(size: usize) -> Self {
let mut r = Self::alloc(size);
r.set_all(true);
r
Expand Down Expand Up @@ -304,6 +304,13 @@ impl SimpleVob {
.all(|(a, b)| *a & *b == 0)
}

pub fn sub(&mut self, other: &SimpleVob) {
assert_eq!(self.size, other.size);
for (idx, v) in self.data.iter_mut().zip(other.data.iter()) {
*idx &= !*v;
}
}

pub fn first_bit_set_here_and_in(&self, other: &SimpleVob) -> Option<usize> {
assert_eq!(self.size, other.size);
for (idx, (a, b)) in self.data.iter().zip(other.data.iter()).enumerate() {
Expand Down
14 changes: 10 additions & 4 deletions core/src/toktree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,7 @@ impl TokTrie {
}

pub fn alloc_token_set(&self) -> SimpleVob {
let mut r = SimpleVob::new();
r.resize(self.vocab_size() + 1);
r
SimpleVob::alloc(self.vocab_size() + 1)
}

pub fn singleton_token_set(&self, tok: TokenId) -> SimpleVob {
Expand Down Expand Up @@ -324,6 +322,9 @@ impl TokTrie {
}

pub fn token(&self, idx: u32) -> &[u8] {
if idx >= self.token_offsets.len() as u32 {
return &[];
}
let off = self.token_offsets[idx as usize];
let len = off & ((1 << LEN_BITS) - 1);
let off = (off >> LEN_BITS) as usize;
Expand Down Expand Up @@ -711,8 +712,13 @@ impl TokTrie {
}
}

let n = self.child_at_bytes(self.root(), start);
if n.is_none() {
return;
}
let n = n.unwrap();

r.trie_started();
let n = self.child_at_bytes(self.root(), start).unwrap();
let defl_tok = self.vocab_size() as u32;
let off = self.node_offset(n);
let mut p = off + 1;
Expand Down

0 comments on commit ae506a0

Please sign in to comment.