diff --git a/llm/server/server/data/processor.py b/llm/server/server/data/processor.py index a470742c6e..423fe6b614 100644 --- a/llm/server/server/data/processor.py +++ b/llm/server/server/data/processor.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod from paddlenlp.transformers import Llama3Tokenizer, LlamaTokenizer -from paddlenlp.utils.llm_utils import get_eos_token_id +from paddlenlp.trl.llm_utils import get_eos_token_id from server.engine.config import Config from server.utils import data_processor_logger @@ -282,7 +282,7 @@ def _load_tokenizer(self): """ if self.config.use_hf_tokenizer: from transformers import AutoTokenizer - return AutoTokenizer.from_pretrained(self.config.model_dir, use_fast=False) + return AutoTokenizer.from_pretrained(self.config.model_dir, use_fast=False, vocab_file=os.path.join(self.config.model_dir, "sentencepiece.bpe.model")) else: from paddlenlp.transformers import AutoTokenizer return AutoTokenizer.from_pretrained(self.config.model_dir) diff --git a/llm/server/server/engine/infer.py b/llm/server/server/engine/infer.py index 5d1f9bd33b..655ec060e7 100644 --- a/llm/server/server/engine/infer.py +++ b/llm/server/server/engine/infer.py @@ -25,7 +25,7 @@ import paddle import paddle.distributed as dist import paddle.distributed.fleet as fleet -from paddlenlp.utils.llm_utils import get_rotary_position_embedding +from paddlenlp.trl.llm_utils import get_rotary_position_embedding from paddlenlp_ops import step_paddle from server.data.processor import DataProcessor from server.engine.config import Config