Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPENVINO-CODE] add-device-options #895

Merged
merged 62 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
e8dc030
add-device-options
kumarijy Mar 26, 2024
8121c20
add device and device select folder
kumarijy Mar 26, 2024
899ad97
ignore lint
kumarijy Mar 26, 2024
66d5212
remove ModelName
kumarijy Mar 26, 2024
e4e3316
remove ModelName
kumarijy Mar 26, 2024
9536d7d
fix lines imports
kumarijy Mar 26, 2024
b4886bb
fix lint ignores
kumarijy Mar 26, 2024
1f7d102
fix lint isort
kumarijy Mar 26, 2024
ee0cc6b
add lint fix
kumarijy Mar 26, 2024
4843b15
add logging in main.py
kumarijy Mar 26, 2024
c538e70
add logging in main.py
kumarijy Mar 26, 2024
2425027
change import order in main.py
kumarijy Mar 26, 2024
3adf15a
change import order in main.py
kumarijy Mar 26, 2024
cf08049
change import order in main.py
kumarijy Mar 26, 2024
eb19b2a
change import order in main.py
kumarijy Mar 26, 2024
3cc252d
change import order in main.py
kumarijy Mar 26, 2024
f31fb24
change import order in main.py
kumarijy Mar 26, 2024
3091bdd
update pyproject.toml
kumarijy Mar 26, 2024
8a92dfd
update pyproject.toml
kumarijy Mar 26, 2024
9e70a6b
npm lint fix
kumarijy Mar 26, 2024
c4058cf
updating package-json
kumarijy Mar 26, 2024
3120a2b
modify pyproject.toml
kumarijy Mar 26, 2024
b5ec305
updating pyproject.toml
kumarijy Mar 26, 2024
697e3c5
changed main.py
kumarijy Mar 27, 2024
b08812c
add features to CPU device
kumarijy Mar 27, 2024
b7c5e2b
update ruff config
kumarijy Mar 27, 2024
dcaa095
update main.py
kumarijy Mar 27, 2024
4b63cc2
update main.py
kumarijy Mar 27, 2024
cf73629
update main.py
kumarijy Mar 27, 2024
4a59b72
update main.py
kumarijy Mar 27, 2024
aa98410
update main.py
kumarijy Mar 27, 2024
5e1f65d
update main.py
kumarijy Mar 27, 2024
fe15530
update main.py
kumarijy Mar 27, 2024
26f5c28
update main.py
kumarijy Mar 27, 2024
e566094
update main.py
kumarijy Mar 27, 2024
25d1214
update toml file
kumarijy Mar 27, 2024
39d584b
update toml file
kumarijy Mar 27, 2024
78453af
update toml file
kumarijy Mar 27, 2024
e8288a7
update toml file
kumarijy Mar 27, 2024
a0c34a5
update toml file
kumarijy Mar 27, 2024
00c0498
update toml file
kumarijy Mar 27, 2024
269a2f5
update toml file
kumarijy Mar 27, 2024
c0d0bef
update toml file
kumarijy Mar 27, 2024
859b6b4
update main py
kumarijy Mar 28, 2024
a08e01c
update toml file
kumarijy Mar 28, 2024
b3f7948
update toml file
kumarijy Mar 28, 2024
124da51
update toml file
kumarijy Mar 28, 2024
4d9aa07
update toml file
kumarijy Mar 28, 2024
1218502
update toml file
kumarijy Mar 28, 2024
8e7e109
update toml file
kumarijy Mar 28, 2024
5360f2c
update main py
kumarijy Mar 28, 2024
eb64b61
update toml file
kumarijy Mar 28, 2024
d9c53e4
update toml file
kumarijy Mar 28, 2024
fa94818
update toml file
kumarijy Mar 28, 2024
f77d2ba
update toml file and main.py
kumarijy Mar 29, 2024
b611ca3
update and reformat main.py
kumarijy Mar 29, 2024
17253d0
update and reformat main.py
kumarijy Mar 29, 2024
3f31661
update and reformat main.py
kumarijy Mar 29, 2024
a0730fa
black reformatted files
kumarijy Mar 29, 2024
6e677a2
reformatting results
kumarijy Apr 2, 2024
cfe128d
openvino code complete version change
kumarijy Apr 2, 2024
e616faf
updated code complete version in package.json
kumarijy Apr 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions modules/openvino_code/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 13 additions & 1 deletion modules/openvino_code/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"publisher": "OpenVINO",
"name": "openvino-code-completion",
"version": "0.0.13",
"version": "0.0.14",
"displayName": "OpenVINO Code Completion",
"description": "VSCode extension for AI code completion with OpenVINO",
"icon": "media/logo.png",
Expand Down Expand Up @@ -57,6 +57,7 @@
"vsce:publish": "vsce publish",
"ovsx:publish": "ovsx publish",
"clear-out": "rimraf ./out"

},
"devDependencies": {
"@types/glob": "8.1.0",
Expand Down Expand Up @@ -200,6 +201,17 @@
],
"description": "Which model to use for code generation."
},
"openvinoCode.device": {
"order": 1,
"type": "string",
"default": "CPU",
"enum":[
"CPU",
"GPU",
"NPU"
],
"description": "Which device to use for code generation"
},
"openvinoCode.serverUrl": {
"order": 1,
"type": "string",
Expand Down
17 changes: 11 additions & 6 deletions modules/openvino_code/server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies = [
'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp310-cp310-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.10"',
'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp311-cp311-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.11"',
'torch ; sys_platform != "linux"',
'openvino==2023.3.0',
'openvino==2024.0.0',
'transformers==4.36.0',
'optimum==1.17.1',
'optimum-intel[openvino]==1.15.0',
Expand All @@ -27,13 +27,18 @@ build-backend = "setuptools.build_meta"

[tool.black]
line-length = 119
target-versions = ["py38", "py39", "py310", "py311"]

target-version = ['py38', 'py39', 'py310', 'py311']
unstable = true
preview = true

[tool.ruff]
ignore = ["C901", "E501", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
lint.ignore = ["C901", "E501", "E741", "W605", "F401", "W292"]
lint.select = ["C", "E", "F", "I", "W"]
lint.extend-safe-fixes = ["F601"]
lint.extend-unsafe-fixes = ["UP034"]
lint.fixable = ["F401"]
line-length = 119

[tool.ruff.isort]

[tool.ruff.lint.isort]
lines-after-imports = 2
18 changes: 15 additions & 3 deletions modules/openvino_code/server/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ async def generate_stream(
generation_request = TypeAdapter(GenerationRequest).validate_python(await request.json())
logger.info(generation_request)
return StreamingResponse(
generator.generate_stream(generation_request.inputs, generation_request.parameters.model_dump(), request)
generator.generate_stream(
generation_request.inputs,
generation_request.parameters.model_dump(),
request,
)
)


Expand All @@ -127,7 +131,11 @@ async def summarize(

start = perf_counter()
generated_text: str = generator.summarize(
request.inputs, request.template, request.definition, request.format, request.parameters.model_dump()
request.inputs,
request.template,
request.definition,
request.format,
request.parameters.model_dump(),
)
stop = perf_counter()

Expand All @@ -148,6 +156,10 @@ async def summarize_stream(
logger.info(request)
return StreamingResponse(
generator.summarize_stream(
request.inputs, request.template, request.definition, request.format, request.parameters.model_dump()
request.inputs,
request.template,
request.definition,
request.format,
request.parameters.model_dump(),
)
)
91 changes: 76 additions & 15 deletions modules/openvino_code/server/src/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@
from pathlib import Path
from threading import Thread
from time import time
from typing import Any, Callable, Container, Dict, Generator, List, Optional, Type, Union
from typing import (
Any,
Callable,
Container,
Dict,
Generator,
List,
Optional,
Type,
Union,
)

import torch
from fastapi import Request
Expand Down Expand Up @@ -53,11 +63,20 @@ def get_model(checkpoint: str, device: str = "CPU") -> OVModel:
model_class = get_model_class(checkpoint)
try:
model = model_class.from_pretrained(
checkpoint, ov_config=ov_config, compile=False, device=device, trust_remote_code=True
checkpoint,
ov_config=ov_config,
compile=False,
device=device,
trust_remote_code=True,
)
except EntryNotFoundError:
model = model_class.from_pretrained(
checkpoint, ov_config=ov_config, export=True, compile=False, device=device, trust_remote_code=True
checkpoint,
ov_config=ov_config,
export=True,
compile=False,
device=device,
trust_remote_code=True,
)
model.save_pretrained(model_path)
model.compile()
Expand All @@ -75,10 +94,24 @@ def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str:
async def generate_stream(self, input_text: str, parameters: Dict[str, Any], request: Request):
raise NotImplementedError

def summarize(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]):
def summarize(
self,
input_text: str,
template: str,
signature: str,
style: str,
parameters: Dict[str, Any],
):
raise NotImplementedError

def summarize_stream(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]):
def summarize_stream(
self,
input_text: str,
template: str,
signature: str,
style: str,
parameters: Dict[str, Any],
):
raise NotImplementedError


Expand Down Expand Up @@ -128,13 +161,19 @@ def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str:
prompt_len = input_ids.shape[-1]
config = GenerationConfig.from_dict({**self.generation_config.to_dict(), **parameters})
output_ids = self.model.generate(
input_ids, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config
input_ids,
generation_config=config,
stopping_criteria=stopping_criteria,
**self.assistant_model_config,
)[0][prompt_len:]
logger.info(f"Number of input tokens: {prompt_len}; generated {len(output_ids)} tokens")
return self.tokenizer.decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

async def generate_stream(
self, input_text: str, parameters: Dict[str, Any], request: Optional[Request] = None
self,
input_text: str,
parameters: Dict[str, Any],
request: Optional[Request] = None,
) -> Generator[str, None, None]:
input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
Expand Down Expand Up @@ -192,7 +231,10 @@ def generate_between(
prev_len = prompt.shape[-1]

prompt = self.model.generate(
prompt, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config
prompt,
generation_config=config,
stopping_criteria=stopping_criteria,
**self.assistant_model_config,
)[
:, :-1
] # skip the last token - stop token
Expand All @@ -219,7 +261,10 @@ async def generate_between_stream(
prev_len = prompt.shape[-1]

prompt = self.model.generate(
prompt, generation_config=config, stopping_criteria=stopping_criteria, **self.assistant_model_config
prompt,
generation_config=config,
stopping_criteria=stopping_criteria,
**self.assistant_model_config,
)[
:, :-1
] # skip the last token - stop token
Expand All @@ -237,24 +282,40 @@ def summarization_input(function: str, signature: str, style: str) -> str:
signature=signature,
)

def summarize(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]) -> str:
def summarize(
self,
input_text: str,
template: str,
signature: str,
style: str,
parameters: Dict[str, Any],
) -> str:
prompt = self.summarization_input(input_text, signature, style)
splited_template = re.split(r"\$\{.*\}", template)
splited_template[0] = prompt + splited_template[0]

return self.generate_between(splited_template, parameters, stopping_criteria=self.summarize_stopping_criteria)[
len(prompt) :
]
return self.generate_between(
splited_template,
parameters,
stopping_criteria=self.summarize_stopping_criteria,
)[len(prompt) :]

async def summarize_stream(
self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]
self,
input_text: str,
template: str,
signature: str,
style: str,
parameters: Dict[str, Any],
):
prompt = self.summarization_input(input_text, signature, style)
splited_template = re.split(r"\$\{.*\}", template)
splited_template = [prompt] + splited_template

async for token in self.generate_between_stream(
splited_template, parameters, stopping_criteria=self.summarize_stopping_criteria
splited_template,
parameters,
stopping_criteria=self.summarize_stopping_criteria,
):
yield token

Expand Down
25 changes: 25 additions & 0 deletions modules/openvino_code/shared/device.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { Features } from './features';

enum DeviceId {
CPU = 'CPU',
GPU = 'GPU',
NPU = 'NPU',
}

export enum DeviceName {
CPU = 'CPU',
GPU = 'GPU',
NPU = 'NPU',
}

export const DEVICE_NAME_TO_ID_MAP: Record<DeviceName, DeviceId> = {
[DeviceName.CPU]: DeviceId.CPU,
[DeviceName.GPU]: DeviceId.GPU,
[DeviceName.NPU]: DeviceId.NPU,
};

export const DEVICE_SUPPORTED_FEATURES: Record<DeviceName, Features[]> = {
[DeviceName.CPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM],
[DeviceName.GPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM],
[DeviceName.NPU]: [Features.CODE_COMPLETION, Features.SUMMARIZATION, Features.FIM],
};
1 change: 1 addition & 0 deletions modules/openvino_code/shared/side-panel-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export enum SidePanelMessageTypes {
GENERATE_COMPLETION_CLICK = `${sidePanelMessagePrefix}.generateCompletionClick`,
SETTINGS_CLICK = `${sidePanelMessagePrefix}.settingsClick`,
MODEL_CHANGE = `${sidePanelMessagePrefix}.modelChange`,
DEVICE_CHANGE = `${sidePanelMessagePrefix}.deviceChange`,
}

export interface ISidePanelMessage<P = unknown> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//import { ModelName } from '@shared/model';
import { DeviceName } from '@shared/device';
import { Select, SelectOptionProps } from '../../../shared/Select/Select';
import { ServerStatus } from '@shared/server-state';
import { Features } from '@shared/features';

const options: SelectOptionProps<DeviceName>[] = [
{ value: DeviceName.CPU },
{ value: DeviceName.GPU },
{ value: DeviceName.NPU },
];

interface DeviceSelectProps {
disabled: boolean;
selectedDeviceName: DeviceName;
onChange: (deviceName: DeviceName) => void;
supportedFeatures: Features[];
serverStatus: ServerStatus;
}

export const DeviceSelect = ({
disabled,
selectedDeviceName,
onChange,
supportedFeatures,
serverStatus,
}: DeviceSelectProps): JSX.Element => {
const isServerStopped = serverStatus === ServerStatus.STOPPED;
return (
<>
<Select
label="Device"
options={options}
selectedValue={selectedDeviceName}
disabled={disabled}
onChange={(value) => onChange(value)}
></Select>
{isServerStopped && <span>Supported Features: {supportedFeatures.join(', ')}</span>}
</>
);
};
Loading
Loading