diff --git a/samples/python/visual_language_chat/visual_language_chat.py b/samples/python/visual_language_chat/visual_language_chat.py index b024c8b7fe..6c222938a4 100755 --- a/samples/python/visual_language_chat/visual_language_chat.py +++ b/samples/python/visual_language_chat/visual_language_chat.py @@ -61,7 +61,7 @@ def main(): # Cache compiled models on disk for GPU to save time on the # next run. It's not beneficial for CPU. enable_compile_cache["CACHE_DIR"] = "vlm_cache" - pipe = openvino_genai.VLMPipeline(args.model_dir, device, **enable_compile_cache) + pipe = openvino_genai.VLMPipeline(args.model_dir, device, enable_compile_cache) config = openvino_genai.GenerationConfig() config.max_new_tokens = 100 diff --git a/src/python/py_generate_pipeline.cpp b/src/python/py_generate_pipeline.cpp index 2f40b042ae..ff651f0117 100644 --- a/src/python/py_generate_pipeline.cpp +++ b/src/python/py_generate_pipeline.cpp @@ -402,18 +402,17 @@ PYBIND11_MODULE(py_generate_pipeline, m) { m.doc() = "Pybind11 binding for LLM Pipeline"; py::class_(m, "LLMPipeline", "This class is used for generation with LLMs") - // init(model_path, tokenizer, device, config, kwargs) should be defined before init(model_path, device, config, kwargs) - // to prevent tokenizer treated as kwargs argument .def(py::init([]( const std::string& model_path, const std::string& device, - const py::kwargs& kwargs + const std::map& config ) { ScopedVar env_manager(utils::ov_tokenizers_module_path()); - return std::make_unique(model_path, device, utils::kwargs_to_any_map(kwargs)); + return std::make_unique(model_path, device, utils::properties_to_any_map(config)); }), py::arg("model_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", py::arg("device") = "CPU", "device on which inference will be done", + py::arg("config") = ov::AnyMap({}), "openvino.properties map", R"( LLMPipeline class constructor. model_path (str): Path to the model file. @@ -422,60 +421,40 @@ PYBIND11_MODULE(py_generate_pipeline, m) { )") .def(py::init([]( - const std::string& model_path, - const Tokenizer& tokenizer, + const std::string& model_path, const std::string& device, - const std::map& config, const py::kwargs& kwargs ) { ScopedVar env_manager(utils::ov_tokenizers_module_path()); - auto kwargs_properies = utils::kwargs_to_any_map(kwargs); - if (config.size()) { - PyErr_WarnEx(PyExc_DeprecationWarning, - "'config' parameters is deprecated, please use kwargs to pass config properties instead.", - 1); - auto properies = utils::properties_to_any_map(config); - kwargs_properies.insert(properies.begin(), properies.end()); - } - return std::make_unique(model_path, tokenizer, device, kwargs_properies); + return std::make_unique(model_path, device, utils::kwargs_to_any_map(kwargs)); }), - py::arg("model_path"), - py::arg("tokenizer"), - py::arg("device") = "CPU", - py::arg("config") = ov::AnyMap({}), "openvino.properties map", + py::arg("model_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", + py::arg("device") = "CPU", "device on which inference will be done", R"( - LLMPipeline class constructor for manualy created openvino_genai.Tokenizer. + LLMPipeline class constructor. model_path (str): Path to the model file. - tokenizer (openvino_genai.Tokenizer): tokenizer object. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. - kwargs: Device properties. Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline. )") + .def(py::init([]( - const std::string& model_path, + const std::string& model_path, + const Tokenizer& tokenizer, const std::string& device, - const std::map& config, - const py::kwargs& kwargs + const std::map& config ) { ScopedVar env_manager(utils::ov_tokenizers_module_path()); - auto kwargs_properies = utils::kwargs_to_any_map(kwargs); - if (config.size()) { - PyErr_WarnEx(PyExc_DeprecationWarning, - "'config' parameters is deprecated, please use kwargs to pass config properties instead.", - 1); - auto properies = utils::properties_to_any_map(config); - kwargs_properies.insert(properies.begin(), properies.end()); - } - return std::make_unique(model_path, device, kwargs_properies); + return std::make_unique(model_path, tokenizer, device, utils::properties_to_any_map(config)); }), - py::arg("model_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", - py::arg("device") = "CPU", "device on which inference will be done", + py::arg("model_path"), + py::arg("tokenizer"), + py::arg("device") = "CPU", py::arg("config") = ov::AnyMap({}), "openvino.properties map", R"( - LLMPipeline class constructor. + LLMPipeline class constructor for manualy created openvino_genai.Tokenizer. model_path (str): Path to the model file. + tokenizer (openvino_genai.Tokenizer): tokenizer object. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. - kwargs: Device properties. Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline. )") diff --git a/src/python/py_text2image_pipeline.cpp b/src/python/py_text2image_pipeline.cpp index 052c2cba95..63877ba2e2 100644 --- a/src/python/py_text2image_pipeline.cpp +++ b/src/python/py_text2image_pipeline.cpp @@ -144,6 +144,8 @@ ov::AnyMap text2image_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_com "Use help(openvino_genai.Text2ImagePipeline.generate) to get list of acceptable parameters.")); } } + + } return params; } diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp index 0cde6fe045..7f6d639e01 100644 --- a/src/python/py_vlm_pipeline.cpp +++ b/src/python/py_vlm_pipeline.cpp @@ -55,8 +55,25 @@ auto vlm_generate_kwargs_docstring = R"( :rtype: DecodedResults )"; +py::object call_vlm_generate( + ov::genai::VLMPipeline& pipe, + const std::string& prompt, + const std::vector& images, + const ov::genai::GenerationConfig& generation_config, + const utils::PyBindStreamerVariant& py_streamer, + const py::kwargs& kwargs +) { + auto updated_config = *ov::genai::pybind::utils::update_config_from_kwargs(generation_config, kwargs); + ov::genai::StreamerVariant streamer = ov::genai::pybind::utils::pystreamer_to_streamer(py_streamer); + + return py::cast(pipe.generate(prompt, images, updated_config, streamer)); +} -ov::AnyMap vlm_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) { +py::object call_vlm_generate( + ov::genai::VLMPipeline& pipe, + const std::string& prompt, + const py::kwargs& kwargs +) { ov::AnyMap params = {}; for (const auto& item : kwargs) { @@ -64,48 +81,22 @@ ov::AnyMap vlm_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_pr py::object value = py::cast(item.second); if (key == "images") { - params.insert({ov::genai::images(std::move(py::cast>(value)))}); + params.insert({ov::genai::images(std::move(py::cast>(item.second)))}); } else if (key == "image") { - params.insert({ov::genai::image(std::move(py::cast(value)))}); + params.insert({ov::genai::image(std::move(py::cast(item.second)))}); } else if (key == "generation_config") { - params.insert({ov::genai::generation_config(std::move(py::cast(value)))}); + params.insert({ov::genai::generation_config(std::move(py::cast(item.second)))}); } else if (key == "streamer") { auto py_streamer = py::cast(value); params.insert({ov::genai::streamer(std::move(ov::genai::pybind::utils::pystreamer_to_streamer(py_streamer)))}); - } - else { - if (allow_compile_properties) { - // convert arbitrary objects to ov::Any - // not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property - if (utils::py_object_is_any_map(value)) { - auto map = utils::py_object_to_any_map(value); - params.insert(map.begin(), map.end()); - } else { - params[key] = utils::py_object_to_any(value); - } - } - else { - // generate doesn't run compile(), so only VLMPipeline specific properties are allowed - throw(std::invalid_argument("'" + key + "' is unexpected parameter name. " + + } else { + throw(std::invalid_argument("'" + key + "' is unexpected parameter name. " "Use help(openvino_genai.VLMPipeline.generate) to get list of acceptable parameters.")); - } } } - return params; -} - -py::object call_vlm_generate( - ov::genai::VLMPipeline& pipe, - const std::string& prompt, - const std::vector& images, - const ov::genai::GenerationConfig& generation_config, - const utils::PyBindStreamerVariant& py_streamer, - const py::kwargs& kwargs -) { - auto updated_config = *ov::genai::pybind::utils::update_config_from_kwargs(generation_config, kwargs); - ov::genai::StreamerVariant streamer = ov::genai::pybind::utils::pystreamer_to_streamer(py_streamer); - return py::cast(pipe.generate(prompt, images, updated_config, streamer)); + return py::cast(pipe.generate(prompt, params)); } void init_vlm_pipeline(py::module_& m) { @@ -113,18 +104,18 @@ void init_vlm_pipeline(py::module_& m) { .def(py::init([]( const std::string& model_path, const std::string& device, - const py::kwargs& kwargs + const std::map& config ) { ScopedVar env_manager(utils::ov_tokenizers_module_path()); - return std::make_unique(model_path, device, vlm_kwargs_to_any_map(kwargs, true)); + return std::make_unique(model_path, device, utils::properties_to_any_map(config)); }), py::arg("model_path"), "folder with exported model files", py::arg("device") = "CPU", "device on which inference will be done", + py::arg("config") = ov::AnyMap({}), "openvino.properties map" R"( VLMPipeline class constructor. model_path (str): Path to the folder with exported model files. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. - kwargs: Device properties )") .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "") @@ -155,7 +146,7 @@ void init_vlm_pipeline(py::module_& m) { const std::string& prompt, const py::kwargs& kwargs ) { - return py::cast(pipe.generate(prompt, vlm_kwargs_to_any_map(kwargs, false))); + return call_vlm_generate(pipe, prompt, kwargs); }, py::arg("prompt"), "Input string", (vlm_generate_kwargs_docstring + std::string(" \n ")).c_str() diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp index 2f779b1836..f10d5c86d6 100644 --- a/src/python/py_whisper_pipeline.cpp +++ b/src/python/py_whisper_pipeline.cpp @@ -255,44 +255,43 @@ void init_whisper_pipeline(py::module_& m) { .def_readonly("chunks", &WhisperDecodedResults::chunks); py::class_(m, "WhisperPipeline") - // init(model_path, tokenizer, device, kwargs) should be defined before init(model_path, device, kwargs) - // to prevent tokenizer treated as kwargs argument .def(py::init([](const std::string& model_path, - const Tokenizer& tokenizer, const std::string& device, - const py::kwargs& kwargs) { - return std::make_unique(model_path, - tokenizer, - device, - utils::kwargs_to_any_map(kwargs)); + const std::map& config) { + ScopedVar env_manager(utils::ov_tokenizers_module_path()); + return std::make_unique(model_path, device, utils::properties_to_any_map(config)); }), py::arg("model_path"), - py::arg("tokenizer"), + "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", py::arg("device") = "CPU", + "device on which inference will be done", + py::arg("config") = ov::AnyMap({}), "openvino.properties map", R"( - WhisperPipeline class constructor for manualy created openvino_genai.Tokenizer. + WhisperPipeline class constructor. model_path (str): Path to the model file. - tokenizer (openvino_genai.Tokenizer): tokenizer object. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. - kwargs: Device properties. )") + .def(py::init([](const std::string& model_path, + const Tokenizer& tokenizer, const std::string& device, - const py::kwargs& kwargs) { - ScopedVar env_manager(utils::ov_tokenizers_module_path()); - return std::make_unique(model_path, device, utils::kwargs_to_any_map(kwargs)); + const std::map& config) { + return std::make_unique(model_path, + tokenizer, + device, + utils::properties_to_any_map(config)); }), py::arg("model_path"), - "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", + py::arg("tokenizer"), py::arg("device") = "CPU", - "device on which inference will be done", + py::arg("config") = ov::AnyMap({}), "openvino.properties map", R"( - WhisperPipeline class constructor. + WhisperPipeline class constructor for manualy created openvino_genai.Tokenizer. model_path (str): Path to the model file. + tokenizer (openvino_genai.Tokenizer): tokenizer object. device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'. - kwargs: Device properties. )") .def( diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py index cc259233e0..1195d0b04e 100644 --- a/tests/python_tests/ov_genai_test_utils.py +++ b/tests/python_tests/ov_genai_test_utils.py @@ -197,7 +197,7 @@ def read_model(params, **tokenizer_kwargs): path, tokenizer, opt_model, - ov_genai.LLMPipeline(str(path), device='CPU', **{"ENABLE_MMAP": False}), + ov_genai.LLMPipeline(str(path), device='CPU', config={"ENABLE_MMAP": False}), ) @@ -252,4 +252,4 @@ def load_pipe(configs: List[Tuple], temp_path): def get_continuous_batching(path): scheduler_config = ov_genai.SchedulerConfig() scheduler_config.cache_size = 1 - return ov_genai.LLMPipeline(str(path), ov_genai.Tokenizer(str(path)), device='CPU', **{"scheduler_config": scheduler_config}) + return ov_genai.LLMPipeline(str(path), ov_genai.Tokenizer(str(path)), device='CPU', config={"scheduler_config": scheduler_config}) diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py index b033d18642..b68de6372d 100644 --- a/tests/python_tests/test_chat_generate_api.py +++ b/tests/python_tests/test_chat_generate_api.py @@ -118,7 +118,7 @@ def test_chat_compare_statefull_vs_text_history(model_descr, generation_config: # HF in chat scenario does not add special tokens, but openvino tokenizer by default is converted with add_special_tokens=True. # Need to regenerate openvino_tokenizer/detokenizer. model_id, path, tokenizer, model_opt, pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'), add_special_tokens=False) - pipe_with_kv_cache = ov_genai.LLMPipeline(str(path), device, **{"ENABLE_MMAP": False}) + pipe_with_kv_cache = ov_genai.LLMPipeline(str(path), device, config={"ENABLE_MMAP": False}) pipe_with_kv_cache.start_chat() for question in quenstions: diff --git a/tests/python_tests/test_whisper_generate_api.py b/tests/python_tests/test_whisper_generate_api.py index ed32b8823b..6eeea76d31 100644 --- a/tests/python_tests/test_whisper_generate_api.py +++ b/tests/python_tests/test_whisper_generate_api.py @@ -68,7 +68,7 @@ def read_whisper_model(params, **tokenizer_kwargs): path, opt_pipe, ov_genai.WhisperPipeline( - str(path), device="CPU", **{"ENABLE_MMAP": False} + str(path), device="CPU", config={"ENABLE_MMAP": False} ), ) @@ -201,7 +201,7 @@ def test_whisper_constructors(model_descr, test_sample): expected = opt_pipe(test_sample)["text"] genai_result = ov_genai.WhisperPipeline( - str(path), device="CPU", **{"ENABLE_MMAP": False} + str(path), device="CPU", config={"ENABLE_MMAP": False} ).generate(test_sample) assert genai_result.texts[0] == expected @@ -213,7 +213,7 @@ def test_whisper_constructors(model_descr, test_sample): tokenizer = ov_genai.Tokenizer(str(path)) genai_result = ov_genai.WhisperPipeline( - str(path), tokenizer=tokenizer, device="CPU", **{"ENABLE_MMAP": False} + str(path), tokenizer=tokenizer, device="CPU", config={"ENABLE_MMAP": False} ).generate(test_sample) assert genai_result.texts[0] == expected @@ -237,7 +237,7 @@ def test_max_new_tokens(model_descr, test_sample): tokenizer = ov_genai.Tokenizer(str(path)) genai_pipeline = ov_genai.WhisperPipeline( - str(path), tokenizer=tokenizer, device="CPU", **{"ENABLE_MMAP": False} + str(path), tokenizer=tokenizer, device="CPU", config={"ENABLE_MMAP": False} ) config = genai_pipeline.get_generation_config() config.max_new_tokens = 30 diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py index 21a58a6e3b..da77f5da22 100644 --- a/tools/llm_bench/llm_bench_utils/ov_utils.py +++ b/tools/llm_bench/llm_bench_utils/ov_utils.py @@ -201,7 +201,7 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs): setattr(scheduler_config, param, value) ov_config["scheduler_config"] = scheduler_config start = time.perf_counter() - llm_pipe = openvino_genai.LLMPipeline(str(model_path), device.upper(), **ov_config) + llm_pipe = openvino_genai.LLMPipeline(str(model_path), device.upper(), ov_config) end = time.perf_counter() log.info(f'Pipeline initialization time: {end - start:.2f}s')