diff --git a/Cargo.lock b/Cargo.lock index 369f888..91b0ec3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,6 +1165,7 @@ dependencies = [ "pyo3", "serde", "serde_json", + "toktrie_hf_tokenizers", ] [[package]] diff --git a/python_ext/Cargo.toml b/python_ext/Cargo.toml index 1dcc3b4..6f80f1f 100644 --- a/python_ext/Cargo.toml +++ b/python_ext/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] llguidance = { workspace = true } -# toktrie_hf_tokenizers = { workspace = true } +toktrie_hf_tokenizers = { workspace = true, optional = true } bytemuck = "1.19.0" pyo3 = {version = "0.21.2", features = ["extension-module", "abi3-py39"]} serde = { version = "1.0.210", features = ["derive"] } @@ -16,3 +16,7 @@ serde_json = "1.0.132" name = "_lib" crate-type = ["cdylib"] path = "src/lib.rs" + +[features] +default = [] +tokenizers = ["toktrie_hf_tokenizers"] \ No newline at end of file diff --git a/python_ext/src/py.rs b/python_ext/src/py.rs index ac71bf5..2c1951d 100644 --- a/python_ext/src/py.rs +++ b/python_ext/src/py.rs @@ -157,13 +157,21 @@ impl LLTokenizer { slices: Option>, ) -> PyResult { let tok_env: TokEnv = if let Some(_tokenizer_str) = tokenizer.extract::().ok() { - let _ = n_vocab; - return Err(PyValueError::new_err( - "Expecting a TokenizerWrapper() class, not a string", - )); - // let tok = toktrie_hf_tokenizers::ByteTokenizerEnv::from_name(&tokenizer_str, n_vocab) - // .map_err(val_error)?; - // tok.to_env() + #[cfg(feature = "tokenizers")] + { + let tok = + toktrie_hf_tokenizers::ByteTokenizerEnv::from_name(&_tokenizer_str, n_vocab) + .map_err(val_error)?; + tok.to_env() + } + + #[cfg(not(feature = "tokenizers"))] + { + let _ = n_vocab; + return Err(PyValueError::new_err( + "Expecting a TokenizerWrapper() class, not a string", + )); + } } else { Arc::new(PyTokenizer::py_new(tokenizer)?) };