Skip to content

Commit

Permalink
better grammar printout
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Dec 3, 2024
1 parent 71de1ef commit 0b98ead
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 55 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = "2021"

[dependencies]
toktrie = { workspace = true }
derivre = { git = "https://github.com/microsoft/derivre", rev = "02ee497e6e404a0b402b4f68a9abf599d22ed2ed" }
derivre = { git = "https://github.com/microsoft/derivre", rev = "4748aad0b763a033bccbfa96a0e8f9f7d8a90949" }
serde = { version = "1.0.210", features = ["derive"] }
serde_json = { version = "1.0.132", features = ["preserve_order"] }
anyhow = "1.0.90"
Expand Down
29 changes: 19 additions & 10 deletions parser/src/earley/lexerspec.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::Result;
use derivre::{ExprRef, JsonQuoteOptions, RegexAst, RegexBuilder};
use derivre::{raw::ExprSet, ExprRef, JsonQuoteOptions, RegexAst, RegexBuilder};
use std::{fmt::Debug, hash::Hash};
use toktrie::{bytes::limit_str, SimpleVob, TokTrie};

Expand Down Expand Up @@ -67,7 +67,7 @@ impl LexemeIdx {
}

impl LexemeSpec {
/// Check if the lexeme always matches bytes, and has at least one more byte to spare.
/// Check if the lexeme always matches bytes.
pub fn has_forced_bytes(&self, bytes: &[u8]) -> bool {
match &self.rx {
RegexAst::Literal(s) if s.len() >= bytes.len() => {
Expand All @@ -80,18 +80,26 @@ impl LexemeSpec {
pub fn class(&self) -> LexemeClass {
self.class
}
}

impl Debug for LexemeSpec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{}] {} {:?}", self.idx.0, self.name, self.rx)?;
pub fn to_string(&self, max_len: usize, exprset: Option<&ExprSet>) -> String {
use std::fmt::Write;
let mut f = String::new();
write!(f, "[{}] {} ", self.idx.0, self.name).unwrap();
self.rx.write_to_str(&mut f, max_len, exprset);
if self.lazy {
write!(f, " lazy")?;
f.push_str(" lazy");
}
if self.contextual {
write!(f, " contextual")?;
f.push_str(" contextual");
}
Ok(())
f
}
}

impl Debug for LexemeSpec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = self.to_string(512, None);
f.write_str(&s)
}
}

Expand Down Expand Up @@ -343,7 +351,8 @@ impl Debug for LexerSpec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "LexerSpec {{ lexemes: [")?;
for lex in &self.lexemes {
writeln!(f, " {:?}", lex)?;
let slex = lex.to_string(512, Some(self.regex_builder.exprset()));
writeln!(f, " {}", slex)?;
}
write!(f, "] }}")
}
Expand Down
49 changes: 6 additions & 43 deletions scripts/update-git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import subprocess
import re
import sys
import json
import os


# Function to get the latest commit hash of a git repository
Expand All @@ -25,20 +23,14 @@ def get_latest_commit_hash(repo_path):


# Function to update a single Cargo.toml file with commit hashes
def update_cargo_toml(cargo_toml_path, toktrie_commit, derivre_commit):
def update_cargo_toml(cargo_toml_path, derivre_commit):
try:
with open(cargo_toml_path, "r") as file:
cargo_toml_contents = file.read()

# Patterns for replacing the rev in Cargo.toml
toktrie_pattern = r'(toktrie[_a-z]*\s*=\s*\{[^}]*rev\s*=\s*")[^"]*(")'
derivre_pattern = r'(derivre\s*=\s*\{[^}]*rev\s*=\s*")[^"]*(")'

cargo_toml_contents = re.sub(
toktrie_pattern,
lambda m: m.group(1) + toktrie_commit + m.group(2),
cargo_toml_contents,
)
cargo_toml_contents = re.sub(
derivre_pattern,
lambda m: m.group(1) + derivre_commit + m.group(2),
Expand All @@ -57,53 +49,24 @@ def update_cargo_toml(cargo_toml_path, toktrie_commit, derivre_commit):


# Get the latest commit hashes for toktire and derivre
toktrie_commit = get_latest_commit_hash("../toktrie")
derivre_commit = get_latest_commit_hash("../derivre")

# Check if the commit hashes were retrieved successfully
if not toktrie_commit or not derivre_commit:
if not derivre_commit:
print("Error retrieving commit hashes. Exiting.")
sys.exit(1)

# List of Cargo.toml paths to update
cargo_toml_paths = [
"parser/Cargo.toml",
"sample_parser/Cargo.toml",
"python_ext/Cargo.toml",
]

# Update each Cargo.toml file
for cargo_toml_path in cargo_toml_paths:
update_cargo_toml(cargo_toml_path, toktrie_commit, derivre_commit)
update_cargo_toml(cargo_toml_path, derivre_commit)


def get_workspace_cargo_toml():
try:
result = subprocess.run(
["cargo", "locate-project", "--workspace"],
capture_output=True,
text=True,
check=True,
)
data = json.loads(result.stdout)
return data["root"]
except subprocess.CalledProcessError as e:
print("Error running cargo command:", e)
except KeyError:
print("Unexpected JSON structure from cargo command.")
return None


ws = get_workspace_cargo_toml()
if ws:
os.rename(ws, ws + ".tmp")

try:
# Run cargo fetch for each path to update Cargo.lock
for path in cargo_toml_paths:
subprocess.run(["cargo", "fetch", "--manifest-path", path], check=True)
finally:
if ws:
os.rename(ws + ".tmp", ws)
# Run cargo fetch for each path to update Cargo.lock
for path in cargo_toml_paths:
subprocess.run(["cargo", "fetch", "--manifest-path", path], check=True)

print("All Cargo.toml files updated and cargo fetch run successfully.")

0 comments on commit 0b98ead

Please sign in to comment.