Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Evaluator] Blueprint #1382

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/deepsparse/eval/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
104 changes: 104 additions & 0 deletions src/deepsparse/eval/evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The main entrypoint for evaluating a target
on a requested dataset
"""

from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Union

from pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, Pipeline
from src.deepsparse.eval.registry import EVAL_REGISTRY


@dataclass
class Metric:
type: str
value: float


@dataclass
class Dataset:
type: str
name: str
config: str
split: str


@dataclass
class EvalSample:
input: Any
output: Any


@dataclass
class Evaluation:
# TODO: How to handle serialization of the
# data structure (to yaml and json)
task: str
dataset: Dataset
metrics: List[Metric]
samples: List[EvalSample]


def evaluate(
target: str,
datasets: Union[str, List[str]],
integration: str,
engine_type: Union[None, DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE] = None,
batch_size: int = 1,
target_args: Optional[Dict] = None,
engine_args: Optional[Dict] = None,
splits=None,
metrics=None,
**kwargs,
) -> List[Evaluation]:
"""
:param target: The target to evaluate. Can be a path to
a sparsezoo stub, hugging face path, or a path to a
local directory containing a model file
:param datasets: The datasets to evaluate on. Can be a string
for a single dataset or a list of strings for multiple datasets.
:param integration: The name of the evaluation integration to use.
Must be a valid integration name from the EVAL_REGISTRY.
:param engine_type: The engine to use for the evaluation.
:param batch_size: The batch size to use for the evaluation.
:param target_args: The arguments to alter the
behavior of the evaluated target.
:param engine_args: The arguments to pass to the engine.
:param splits: ...
:param metrics: ...
:param kwargs: Additional arguments to pass to the evaluation integration.
:return: A list of Evaluation objects containing the results of the evaluation.
"""

# TODO: Implement a function that checks for valid target
# TODO: Implement EVAL_REGISTRY
# TODO: Implement a function that checks for valid engine_type
# TODO: Clarify the type of missing arguments

eval_integration = EVAL_REGISTRY.get(target, integration, datasets)

return eval_integration(
target=target,
target_args=target_args,
datasets=datasets,
splits=splits,
metrics=metrics,
batch_size=batch_size,
engine_type=engine_type,
engine_args=engine_args,
**kwargs,
)
13 changes: 13 additions & 0 deletions src/deepsparse/eval/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
105 changes: 105 additions & 0 deletions src/deepsparse/eval/integrations/llm_evaluation_harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from transformers import AutoModelForCausalLM

from deepsparse import Pipeline
from lm_eval import evaluator
from lm_eval.base import BaseLM


def integration_eval(
target,
target_args,
datasets,
batch_size,
splits=None,
metrics=None,
engine_type=None,
engine_args=None,
**kwargs,
):
model = initialize_model(target, target_args)

evaluator.simple_evaluate(
model=model,
model_args=kwargs.get("model_args", target_args),
tasks=kwargs.get("tasks", datasets),
batch_size=batch_size,
**kwargs,
)

return True


class DeepSparseLM(BaseLM):
# potentially create pipeline inside of this class
def __init__(self, stub, max_length: Optional[int] = None):
self.model = Pipeline.create(task="text_generation", model_path=stub)

self.default_max_length = 1024
self._max_length = max_length

@classmethod
def from_pipeline(cls, pipeline: Pipeline):
return cls(pipeline)

@property
def batch_size(self):
return self.model._batch_size

@property
def eot_token(self) -> str:
pass

def _model_call(self, inps):
# Isn't used because we override _loglikelihood_tokens
raise NotImplementedError()

def _model_generate(self, context, max_length, eos_token_id):
# Isn't used because we override greedy_until
raise NotImplementedError()

def _loglikelihood_tokens(self, requests, **kwargs):
pass

@property
def device(self):
return "cpu"

@property
def eot_token_id(self) -> int:
pass

@property
def max_gen_toks(self):
pass

@property
def max_length(self):
return self._max_length or self.default_max_length

def tok_encode(self, string: str):
return self.model.tokenizer.encode(string)

def tok_decode(self, tokens):
return self.tokenizer.decode(tokens)


def initialize_model(target, target_args):
# creates model: Union[DeepSparseLM, Module]
# given the target
return AutoModelForCausalLM.from_pretrained(target)
Binary file not shown.
13 changes: 13 additions & 0 deletions tests/deepsparse/eval/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions tests/deepsparse/eval/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
26 changes: 26 additions & 0 deletions tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from src.deepsparse.eval.integrations.llm_evaluation_harness import integration_eval


@pytest.mark.parametrize(
"target,datasets, batch_size",
[("roneneldan/TinyStories-1M", ["hellaswag"], 1)],
)
def test_integration_eval(target, datasets, batch_size):
assert integration_eval(
target=target, datasets=datasets, batch_size=1, target_args=None, limit=5
)
Loading