Skip to content

Commit

Permalink
use new marker bit in StateID
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Aug 17, 2024
1 parent 6d3ba4a commit c344d51
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 11 deletions.
4 changes: 2 additions & 2 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ version = "0.1.6"
edition = "2021"

[dependencies]
toktrie = { git = "https://github.com/microsoft/toktrie", rev = "59641076bc86504317f07f99465a0f600e957fd3" }
derivre = { git = "https://github.com/microsoft/derivre", rev = "ad363698cc95d7e63c5116aa114596f18dc79385" }
toktrie = { git = "https://github.com/microsoft/toktrie", rev = "bafe0f49e4334cc82c8a0234270dfbac71697db1" }
derivre = { git = "https://github.com/microsoft/derivre", rev = "401b25ff73a738dcf5e2d94f73539f97ed9aed59" }
serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108"
anyhow = "1.0.75"
Expand Down
8 changes: 5 additions & 3 deletions parser/src/earley/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use toktrie::SimpleVob;
use anyhow::Result;
use std::fmt::Debug;
use toktrie::SimpleVob;

use super::{
lexerspec::{LexemeIdx, LexerSpec},
Expand Down Expand Up @@ -152,7 +152,7 @@ impl Lexer {
} else {
LexerResult::Error
}
} else {
} else if state.has_lowest_match() {
if let Some((idx, hidden_len)) = self.dfa.lowest_match(state) {
LexerResult::Lexeme(PreLexeme {
idx: LexemeIdx::new(idx),
Expand All @@ -161,8 +161,10 @@ impl Lexer {
hidden_len,
})
} else {
LexerResult::State(state, byte)
unreachable!()
}
} else {
LexerResult::State(state, byte)
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions parser/src/earley/regexvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ impl RegexVec {

#[inline(always)]
pub fn transition(&mut self, state: StateID, b: u8) -> StateID {
let mapped = self.alpha.map(b);
let idx = state.as_usize() * self.alpha.len() + mapped;
let idx = self.alpha.map_state(state, b);
// let new_state = unsafe { std::ptr::read(self.state_table_ptr.add(idx)) };
let new_state = self.state_table[idx];
if new_state != StateID::MISSING {
new_state
} else {
self.transition_inner(state, mapped as u8, idx)
self.transition_inner(state, self.alpha.map(b) as u8, idx)
}
}

Expand Down Expand Up @@ -329,7 +329,11 @@ impl RegexVec {
state_desc.lowest_match = self.lowest_match_inner(id);
self.append_state(state_desc);
}
id
if self.state_desc(id).lowest_match.is_some() {
id._set_lowest_match()
} else {
id
}
}

fn compute_state_desc(&self, state: StateID) -> StateDesc {
Expand Down
4 changes: 2 additions & 2 deletions rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions scripts/update-git.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,7 @@ def get_latest_commit_hash(repo_path):
with open(cargo_toml_path, "w") as file:
file.write(cargo_toml_contents)

# run cargo fetch to Cargo.lock
subprocess.run(["cargo", "fetch", "--manifest-path", "rust/Cargo.toml"], check=True)

print("Cargo.toml updated successfully.")

0 comments on commit c344d51

Please sign in to comment.