diff --git a/modules/openvino_code/package-lock.json b/modules/openvino_code/package-lock.json index 02daeccf9..a438faa43 100644 --- a/modules/openvino_code/package-lock.json +++ b/modules/openvino_code/package-lock.json @@ -1,12 +1,12 @@ { "name": "openvino-code-completion", - "version": "0.0.13", + "version": "0.0.14", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openvino-code-completion", - "version": "0.0.13", + "version": "0.0.14", "license": "https://github.com/openvinotoolkit/openvino_contrib/blob/master/LICENSE", "workspaces": [ "side-panel-ui" @@ -3628,9 +3628,9 @@ "dev": true }, "node_modules/follow-redirects": { - "version": "1.15.4", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.9.tgz", - "integrity": "sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==", + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "dev": true, "funding": [ { diff --git a/modules/openvino_code/package.json b/modules/openvino_code/package.json index ac6cf03be..1950e30fd 100644 --- a/modules/openvino_code/package.json +++ b/modules/openvino_code/package.json @@ -1,7 +1,7 @@ { "publisher": "OpenVINO", "name": "openvino-code-completion", - "version": "0.0.13", + "version": "0.0.14", "displayName": "OpenVINO Code Completion", "description": "VSCode extension for AI code completion with OpenVINO", "icon": "media/logo.png", @@ -57,6 +57,7 @@ "vsce:publish": "vsce publish", "ovsx:publish": "ovsx publish", "clear-out": "rimraf ./out" + }, "devDependencies": { "@types/glob": "8.1.0", @@ -200,6 +201,17 @@ ], "description": "Which model to use for code generation." }, + "openvinoCode.device": { + "order": 1, + "type": "string", + "default": "CPU", + "enum":[ + "CPU", + "GPU", + "NPU" + ], + "description": "Which device to use for code generation" + }, "openvinoCode.serverUrl": { "order": 1, "type": "string", diff --git a/modules/openvino_code/server/pyproject.toml b/modules/openvino_code/server/pyproject.toml index 4bc1ea4cb..232ca9869 100644 --- a/modules/openvino_code/server/pyproject.toml +++ b/modules/openvino_code/server/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp310-cp310-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.10"', 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp311-cp311-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.11"', 'torch ; sys_platform != "linux"', - 'openvino==2023.3.0', + 'openvino==2024.0.0', 'transformers==4.36.0', 'optimum==1.17.1', 'optimum-intel[openvino]==1.15.0', @@ -27,13 +27,18 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 119 -target-versions = ["py38", "py39", "py310", "py311"] - +target-version = ['py38', 'py39', 'py310', 'py311'] +unstable = true +preview = true [tool.ruff] -ignore = ["C901", "E501", "E741", "W605"] -select = ["C", "E", "F", "I", "W"] +lint.ignore = ["C901", "E501", "E741", "W605", "F401", "W292"] +lint.select = ["C", "E", "F", "I", "W"] +lint.extend-safe-fixes = ["F601"] +lint.extend-unsafe-fixes = ["UP034"] +lint.fixable = ["F401"] line-length = 119 -[tool.ruff.isort] + +[tool.ruff.lint.isort] lines-after-imports = 2 diff --git a/modules/openvino_code/server/src/app.py b/modules/openvino_code/server/src/app.py index bac8f953e..56dfd8bf2 100644 --- a/modules/openvino_code/server/src/app.py +++ b/modules/openvino_code/server/src/app.py @@ -114,7 +114,11 @@ async def generate_stream( generation_request = TypeAdapter(GenerationRequest).validate_python(await request.json()) logger.info(generation_request) return StreamingResponse( - generator.generate_stream(generation_request.inputs, generation_request.parameters.model_dump(), request) + generator.generate_stream( + generation_request.inputs, + generation_request.parameters.model_dump(), + request, + ) ) @@ -127,7 +131,11 @@ async def summarize( start = perf_counter() generated_text: str = generator.summarize( - request.inputs, request.template, request.definition, request.format, request.parameters.model_dump() + request.inputs, + request.template, + request.definition, + request.format, + request.parameters.model_dump(), ) stop = perf_counter() @@ -148,6 +156,10 @@ async def summarize_stream( logger.info(request) return StreamingResponse( generator.summarize_stream( - request.inputs, request.template, request.definition, request.format, request.parameters.model_dump() + request.inputs, + request.template, + request.definition, + request.format, + request.parameters.model_dump(), ) ) diff --git a/modules/openvino_code/server/src/generators.py b/modules/openvino_code/server/src/generators.py index 761d652e2..4acc7fbb3 100644 --- a/modules/openvino_code/server/src/generators.py +++ b/modules/openvino_code/server/src/generators.py @@ -5,7 +5,17 @@ from pathlib import Path from threading import Thread from time import time -from typing import Any, Callable, Container, Dict, Generator, List, Optional, Type, Union +from typing import ( + Any, + Callable, + Container, + Dict, + Generator, + List, + Optional, + Type, + Union, +) import torch from fastapi import Request @@ -53,11 +63,20 @@ def get_model(checkpoint: str, device: str = "CPU") -> OVModel: model_class = get_model_class(checkpoint) try: model = model_class.from_pretrained( - checkpoint, ov_config=ov_config, compile=False, device=device, trust_remote_code=True + checkpoint, + ov_config=ov_config, + compile=False, + device=device, + trust_remote_code=True, ) except EntryNotFoundError: model = model_class.from_pretrained( - checkpoint, ov_config=ov_config, export=True, compile=False, device=device, trust_remote_code=True + checkpoint, + ov_config=ov_config, + export=True, + compile=False, + device=device, + trust_remote_code=True, ) model.save_pretrained(model_path) model.compile() @@ -75,10 +94,24 @@ def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str: async def generate_stream(self, input_text: str, parameters: Dict[str, Any], request: Request): raise NotImplementedError - def summarize(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]): + def summarize( + self, + input_text: str, + template: str, + signature: str, + style: str, + parameters: Dict[str, Any], + ): raise NotImplementedError - def summarize_stream(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]): + def summarize_stream( + self, + input_text: str, + template: str, + signature: str, + style: str, + parameters: Dict[str, Any], + ): raise NotImplementedError @@ -128,13 +161,19 @@ def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str: prompt_len = input_ids.shape[-1] config = GenerationConfig.from_dict({**self.generation_config.to_dict(), **parameters}) output_ids = self.model.generate( - input_ids, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config + input_ids, + generation_config=config, + stopping_criteria=stopping_criteria, + **self.assistant_model_config, )[0][prompt_len:] logger.info(f"Number of input tokens: {prompt_len}; generated {len(output_ids)} tokens") return self.tokenizer.decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) async def generate_stream( - self, input_text: str, parameters: Dict[str, Any], request: Optional[Request] = None + self, + input_text: str, + parameters: Dict[str, Any], + request: Optional[Request] = None, ) -> Generator[str, None, None]: input_ids = self.tokenizer.encode(input_text, return_tensors="pt") streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) @@ -192,7 +231,10 @@ def generate_between( prev_len = prompt.shape[-1] prompt = self.model.generate( - prompt, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config + prompt, + generation_config=config, + stopping_criteria=stopping_criteria, + **self.assistant_model_config, )[ :, :-1 ] # skip the last token - stop token @@ -219,7 +261,10 @@ async def generate_between_stream( prev_len = prompt.shape[-1] prompt = self.model.generate( - prompt, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config + prompt, + generation_config=config, + stopping_criteria=stopping_criteria, + **self.assistant_model_config, )[ :, :-1 ] # skip the last token - stop token @@ -237,24 +282,40 @@ def summarization_input(function: str, signature: str, style: str) -> str: signature=signature, ) - def summarize(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]) -> str: + def summarize( + self, + input_text: str, + template: str, + signature: str, + style: str, + parameters: Dict[str, Any], + ) -> str: prompt = self.summarization_input(input_text, signature, style) splited_template = re.split(r"\$\{.*\}", template) splited_template[0] = prompt + splited_template[0] - return self.generate_between(splited_template, parameters, stopping_criteria=self.summarize_stopping_criteria)[ - len(prompt) : - ] + return self.generate_between( + splited_template, + parameters, + stopping_criteria=self.summarize_stopping_criteria, + )[len(prompt) :] async def summarize_stream( - self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any] + self, + input_text: str, + template: str, + signature: str, + style: str, + parameters: Dict[str, Any], ): prompt = self.summarization_input(input_text, signature, style) splited_template = re.split(r"\$\{.*\}", template) splited_template = [prompt] + splited_template async for token in self.generate_between_stream( - splited_template, parameters, stopping_criteria=self.summarize_stopping_criteria + splited_template, + parameters, + stopping_criteria=self.summarize_stopping_criteria, ): yield token diff --git a/modules/openvino_code/shared/device.ts b/modules/openvino_code/shared/device.ts new file mode 100644 index 000000000..508fa5330 --- /dev/null +++ b/modules/openvino_code/shared/device.ts @@ -0,0 +1,25 @@ +import { Features } from './features'; + +enum DeviceId { + CPU = 'CPU', + GPU = 'GPU', + NPU = 'NPU', + } + +export enum DeviceName { + CPU = 'CPU', + GPU = 'GPU', + NPU = 'NPU', + } + +export const DEVICE_NAME_TO_ID_MAP: Record = { + [DeviceName.CPU]: DeviceId.CPU, + [DeviceName.GPU]: DeviceId.GPU, + [DeviceName.NPU]: DeviceId.NPU, +}; + +export const DEVICE_SUPPORTED_FEATURES: Record = { + [DeviceName.CPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM], + [DeviceName.GPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM], + [DeviceName.NPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM], +}; diff --git a/modules/openvino_code/shared/side-panel-message.ts b/modules/openvino_code/shared/side-panel-message.ts index 0c0720d6d..2ebdf20b6 100644 --- a/modules/openvino_code/shared/side-panel-message.ts +++ b/modules/openvino_code/shared/side-panel-message.ts @@ -10,6 +10,7 @@ export enum SidePanelMessageTypes { GENERATE_COMPLETION_CLICK = `${sidePanelMessagePrefix}.generateCompletionClick`, SETTINGS_CLICK = `${sidePanelMessagePrefix}.settingsClick`, MODEL_CHANGE = `${sidePanelMessagePrefix}.modelChange`, + DEVICE_CHANGE = `${sidePanelMessagePrefix}.deviceChange`, } export interface ISidePanelMessage

{ diff --git a/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/DeviceSelect/DeviceSelect.tsx b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/DeviceSelect/DeviceSelect.tsx new file mode 100644 index 000000000..6532fe926 --- /dev/null +++ b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/DeviceSelect/DeviceSelect.tsx @@ -0,0 +1,41 @@ +//import { ModelName } from '@shared/model'; +import { DeviceName } from '@shared/device'; +import { Select, SelectOptionProps } from '../../../shared/Select/Select'; +import { ServerStatus } from '@shared/server-state'; +import { Features } from '@shared/features'; + +const options: SelectOptionProps[] = [ + { value: DeviceName.CPU }, + { value: DeviceName.GPU }, + { value: DeviceName.NPU }, +]; + +interface DeviceSelectProps { + disabled: boolean; + selectedDeviceName: DeviceName; + onChange: (deviceName: DeviceName) => void; + supportedFeatures: Features[]; + serverStatus: ServerStatus; +} + +export const DeviceSelect = ({ + disabled, + selectedDeviceName, + onChange, + supportedFeatures, + serverStatus, +}: DeviceSelectProps): JSX.Element => { + const isServerStopped = serverStatus === ServerStatus.STOPPED; + return ( + <> + + {isServerStopped && Supported Features: {supportedFeatures.join(', ')}} + + ); +}; diff --git a/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ServerSection.tsx b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ServerSection.tsx index 68ed0ea33..5895fcf52 100644 --- a/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ServerSection.tsx +++ b/modules/openvino_code/side-panel-ui/src/components/sections/ServerSection/ServerSection.tsx @@ -7,6 +7,8 @@ import { ServerStatus } from './ServerStatus/ServerStatus'; import './ServerSection.css'; import { ModelSelect } from './ModelSelect/ModelSelect'; import { ModelName } from '@shared/model'; +import { DeviceSelect } from './DeviceSelect/DeviceSelect'; +import { DeviceName } from '@shared/device'; interface ServerSectionProps { state: IExtensionState | null; @@ -46,6 +48,15 @@ export function ServerSection({ state }: ServerSectionProps): JSX.Element { }); }; + const handleDeviceChange = (deviceName: DeviceName) => { + vscode.postMessage({ + type: SidePanelMessageTypes.DEVICE_CHANGE, + payload: { + deviceName, + }, + }); + }; + if (!state) { return <>Extension state is not available; } @@ -64,6 +75,13 @@ export function ServerSection({ state }: ServerSectionProps): JSX.Element { supportedFeatures={state.features.supportedList} serverStatus={state.server.status} > + {isServerStarting && }

{isServerStopped && } diff --git a/modules/openvino_code/src/configuration.ts b/modules/openvino_code/src/configuration.ts index bd76a32eb..e41e4bc5f 100644 --- a/modules/openvino_code/src/configuration.ts +++ b/modules/openvino_code/src/configuration.ts @@ -1,4 +1,5 @@ import { ModelName } from '@shared/model'; +import { DeviceName } from '@shared/device'; import { WorkspaceConfiguration, workspace } from 'vscode'; import { CONFIG_KEY } from './constants'; @@ -7,6 +8,7 @@ import { CONFIG_KEY } from './constants'; */ export type CustomConfiguration = { model: ModelName; + device: DeviceName; serverUrl: string; serverRequestTimeout: number; streamInlineCompletion: boolean; diff --git a/modules/openvino_code/src/python-server/python-server-runner.ts b/modules/openvino_code/src/python-server/python-server-runner.ts index a65afa10b..a13f82dc7 100644 --- a/modules/openvino_code/src/python-server/python-server-runner.ts +++ b/modules/openvino_code/src/python-server/python-server-runner.ts @@ -13,6 +13,7 @@ import { join } from 'path'; import { MODEL_NAME_TO_ID_MAP, ModelName } from '@shared/model'; import { extensionState } from '../state'; import { clearLruCache } from '../lru-cache.decorator'; +import { DEVICE_NAME_TO_ID_MAP, DeviceName } from '@shared/device'; const SERVER_STARTED_STDOUT_ANCHOR = 'OpenVINO Code Server started'; @@ -20,7 +21,7 @@ interface ServerHooks { onStarted: () => void; } -async function runServer(modelName: ModelName, config: PythonServerConfiguration, hooks?: ServerHooks) { +async function runServer(modelName: ModelName, deviceName: DeviceName, config: PythonServerConfiguration, hooks?: ServerHooks) { const { serverDir, proxyEnv, abortSignal, logger } = config; logger.info('Starting server...'); @@ -40,8 +41,9 @@ async function runServer(modelName: ModelName, config: PythonServerConfiguration } const model = MODEL_NAME_TO_ID_MAP[modelName]; + const device = DEVICE_NAME_TO_ID_MAP[deviceName]; - await spawnCommand(venvPython, ['main.py', '--model', model], { + await spawnCommand(venvPython, ['main.py', '--model', model, '--device', device], { logger, cwd: serverDir, abortSignal, @@ -149,8 +151,9 @@ export class NativePythonServerRunner { this._stateController.setStage(ServerStartingStage.START_SERVER); const modelName = extensionState.config.model; + const deviceName = extensionState.config.device; - await runServer(modelName, config, { + await runServer(modelName, deviceName, config, { onStarted: () => { this._stateController.setStatus(ServerStatus.STARTED); this._stateController.setStage(null); diff --git a/modules/openvino_code/src/side-panel/side-panel-message-handler.ts b/modules/openvino_code/src/side-panel/side-panel-message-handler.ts index 35f42032b..55a63639c 100644 --- a/modules/openvino_code/src/side-panel/side-panel-message-handler.ts +++ b/modules/openvino_code/src/side-panel/side-panel-message-handler.ts @@ -4,6 +4,7 @@ import { Webview, commands } from 'vscode'; import { settingsService } from '../settings/settings.service'; import { COMMANDS } from '../constants'; import { ModelName } from '@shared/model'; +import { DeviceName } from '@shared/device'; type SidePanelMessageHandlerType = (webview: Webview, payload?: ISidePanelMessage['payload']) => void; @@ -12,6 +13,8 @@ const sidePanelMessageHandlers: Record settingsService.openSettings(), [SidePanelMessageTypes.MODEL_CHANGE]: (_, payload) => settingsService.updateSetting('model', (payload as { modelName: ModelName }).modelName), + [SidePanelMessageTypes.DEVICE_CHANGE]: (_, payload) => + settingsService.updateSetting('device', (payload as { deviceName: DeviceName }).deviceName), [SidePanelMessageTypes.START_SERVER_CLICK]: () => void commands.executeCommand(COMMANDS.START_SERVER_NATIVE), [SidePanelMessageTypes.STOP_SERVER_CLICK]: () => void commands.executeCommand(COMMANDS.STOP_SERVER_NATIVE), [SidePanelMessageTypes.SHOW_SERVER_LOG_CLICK]: () => void commands.executeCommand(COMMANDS.SHOW_SERVER_LOG),