From 9351183c122f397f138610c8beefc8a8c02eea33 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Mon, 9 Sep 2024 11:58:14 -0700 Subject: [PATCH] check in the generated .h file for reference --- .gitignore | 1 - parser/llguidance.h | 225 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 parser/llguidance.h diff --git a/.gitignore b/.gitignore index 3fcca90a..07045adb 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,3 @@ dist *.egg-info build .intentionally-empty-file.o -parser/llguidance.h diff --git a/parser/llguidance.h b/parser/llguidance.h new file mode 100644 index 00000000..507b9185 --- /dev/null +++ b/parser/llguidance.h @@ -0,0 +1,225 @@ +#ifndef LLGUIDANCE_H +#define LLGUIDANCE_H + +#include +#include +#include +#include +#include + +typedef struct LlgConstraint LlgConstraint; + +typedef struct LlgTokenizer LlgTokenizer; + +typedef struct LlgParserLimits { + /** + * For non-ambiguous grammars, this is the maximum "branching factor" of the grammar. + * For ambiguous grammars, this might get hit much quicker. + * Default: 200 + */ + size_t max_items_in_row; + /** + * How much "fuel" are we willing to spend to build initial lexer regex AST nodes. + * Default: 1_000_000 (~20ms) + */ + uint64_t initial_lexer_fuel; + /** + * Maximum lexer fuel for computation of the whole token mask. + * Default: 500_000 (~10ms) + */ + uint64_t step_lexer_fuel; + /** + * Maximum number of lexer states. + * Default: 10_000 + */ + size_t max_lexer_states; + /** + * Maximum size of the grammar (symbols in productions) + * Default: 500_000 (a few megabytes of JSON) + */ + size_t max_grammar_size; +} LlgParserLimits; + +typedef struct LlgConstraintInit { + /** + * The tokenizer to use, created with llg_new_tokenizer() + */ + const struct LlgTokenizer *tokenizer; + /** + * The log level for the buffer that is kept inside of the constraint + * 0 - no logging, 1 - warnings only, 2 - info + */ + uint32_t log_buffer_level; + /** + * The log level for writing to stderr + */ + uint32_t log_stderr_level; + /** + * Does the engine support fast-forward tokens? + * (Appending more than one token to output at once) + */ + bool ff_tokens_ok; + /** + * Does the engine support backtracking? + * (Removing tokens from the output) + */ + bool backtrack_ok; + /** + * The resource limits for the parser + * Default values will be used for all fields that are 0 + */ + struct LlgParserLimits limits; +} LlgConstraintInit; + +typedef struct LlgMaskResult { + /** + * One bit per vocab token + * This is valid until any call to llg_*() on the current constraint + */ + const uint32_t *sample_mask; + /** + * Temperature to use for sampling + */ + float temperature; + /** + * Should the sequence stop? + */ + bool is_stop; +} LlgMaskResult; + +typedef uint32_t LlgToken; + +/** + * Represents result from llg_commit_token() + */ +typedef struct LlgCommitResult { + /** + * The tokens to append to the output if any + * This is valid until any call to llg_*() on the current constraint + */ + const uint32_t *tokens; + /** + * The number of tokens in the tokens array (can be 0) + */ + uint32_t n_tokens; + /** + * Should the sequence stop? + */ + bool is_stop; +} LlgCommitResult; + +/** + * Tokenization function + * Will not write more than output_tokens_len tokens (which can be 0) + * Returns the total number of tokens (which can be more than output_tokens_len) + */ +typedef size_t (*LlgTokenizeFn)(const uint8_t *bytes, + size_t bytes_len, + uint32_t *output_tokens, + size_t output_tokens_len); + +typedef struct LlgTokenizerInit { + /** + * The number of tokens in the vocabulary + */ + uint32_t vocab_size; + /** + * The token ID for the end of sentence token + * For chat mode, set it to end-of-turn token + */ + LlgToken tok_eos; + /** + * An array of the lengths of the token strings (vocab_size elements) + */ + const uint32_t *token_lens; + /** + * A pointer to the token strings + * The length of this the sum of all token_lens + */ + const uint8_t *token_bytes; + /** + * Set to true to enable hack that works around the tokenize_fn only + * accepting valid UTF-8 strings and possibly adding etc. + * TODO: the bit not implemented yet + */ + bool tokenize_assumes_string; + /** + * Tokenization function, see TokenizeFn docs. + * It should only tokenize the bytes and not add + * any etc. It should also work on any byte sequence, including + * invalid UTF-8. If this is not the case, set tokenize_assumes_string to true. + * Either way, this function has to be thread-safe! + */ + LlgTokenizeFn tokenize_fn; +} LlgTokenizerInit; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * Set the default values for the ConstraintInit + * Disables ff_tokens and backtracking, enables warnings on stderr + * and all logging to the buffer (get with llg_flush_logs()). + * You need to set the tokenizer field manually. + */ +void llg_constraint_init_set_defaults(struct LlgConstraintInit *init); + +/** + * Create a new constraint from a grammar JSON string + * Always returns a non-null value. Call llg_get_error() on the result to check for errors. + */ +struct LlgConstraint *llg_new_constraint(const struct LlgConstraintInit *init, + const char *grammar_json); + +/** + * Get the error message from the constraint or null if there is no error. + * After it returns a non-null value, it will always return it until the constraint is freed + * using llg_free_constraint() (at which point the pointer will be invalid). + */ +const char *llg_get_error(const struct LlgConstraint *cc); + +/** + * Compute mask for the next token sampling + * It typically takes up to a millisecond for a 100k tokenizer, so should be called in background. + * Returns 0 on success and -1 on error (use llg_get_error() to get the exact error). + * When 0 is returned, the result is written to *res_p. + */ +int32_t llg_compute_mask(struct LlgConstraint *cc, struct LlgMaskResult *res_p); + +/** + * Commit the token sampled with the mask returned from llg_compute_mask(). + * Can be run on the critical path of sampling (is fast). + * Returns 0 on success and -1 on error (use llg_get_error() to get the exact error). + * When 0 is returned, the result is written to *res_p. + */ +int32_t llg_commit_token(struct LlgConstraint *cc, LlgToken token, struct LlgCommitResult *res_p); + +/** + * Construct a new tokenizer from the given TokenizerInit + */ +struct LlgTokenizer *llg_new_tokenizer(const struct LlgTokenizerInit *tok_init); + +/** + * Free the tokenizer. Should *NOT* be called while there are still constraints using it. + */ +void llg_free_tokenizer(struct LlgTokenizer *tok); + +/** + * Free the constraint + */ +void llg_free_constraint(struct LlgConstraint *cc); + +/** + * Get the logs from the constraint, since last call to this function. + * The logs are null-terminated. + * The logs are kept in the constraint until the next call to this function + * or until the constraint is freed. + */ +const char *llg_flush_logs(struct LlgConstraint *cc); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* LLGUIDANCE_H */