From 7a8be8b39d35ba6e71f45ba6f3e0a546bad61e57 Mon Sep 17 00:00:00 2001 From: Damian Date: Fri, 3 Nov 2023 07:52:22 +0000 Subject: [PATCH 1/3] initial commit --- src/deepsparse/eval/__init__.py | 13 ++ src/deepsparse/eval/evaluator.py | 81 ++++++++++++ src/deepsparse/eval/integrations/__init__.py | 13 ++ .../lm_cache/roneneldan/TinyStories-1M_.db | Bin 0 -> 12288 bytes .../integrations/lm_evaluation_harness.py | 118 ++++++++++++++++++ 5 files changed, 225 insertions(+) create mode 100644 src/deepsparse/eval/__init__.py create mode 100644 src/deepsparse/eval/evaluator.py create mode 100644 src/deepsparse/eval/integrations/__init__.py create mode 100644 src/deepsparse/eval/integrations/lm_cache/roneneldan/TinyStories-1M_.db create mode 100644 src/deepsparse/eval/integrations/lm_evaluation_harness.py diff --git a/src/deepsparse/eval/__init__.py b/src/deepsparse/eval/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/src/deepsparse/eval/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/deepsparse/eval/evaluator.py b/src/deepsparse/eval/evaluator.py new file mode 100644 index 0000000000..ee486a1abe --- /dev/null +++ b/src/deepsparse/eval/evaluator.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +The main entrypoint for evaluating a model +or a Pipeline on a requested dataset +""" + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Union + +from pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, Pipeline +from src.deepsparse.eval.registry import EVAL_REGISTRY + + +@dataclass +class Metric: + type: str + value: float + + +@dataclass +class Dataset: + type: str + name: str + config: str + split: str + + +@dataclass +class EvalSample: + input: Any + output: Any + + +@dataclass +class Evaluation: + task: str + dataset: Dataset + metrics: List[Metric] + samples: List[EvalSample] + + +def eval( + target: Union["Module", Pipeline], + datasets: Union[str, List[str]], + integration: str, + batch_size: int = 1, + target_args: Optional[Dict] = None, + engine: Union[None, DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE] = None, + engine_args: Optional[Dict] = None, + splits=None, + metrics=None, + **kwargs, +) -> List[Evaluation]: + # TODO: Decide on the final types of arguments later + + # TODO: Implement registry + eval_integration = EVAL_REGISTRY.resolve(target, integration, datasets) + + return eval_integration( + target=target, + target_args=target_args, + datasets=datasets, + splits=splits, + metrics=metrics, + batch_size=batch_size, + engine=engine, + engine_args=engine_args, + **kwargs, + ) diff --git a/src/deepsparse/eval/integrations/__init__.py b/src/deepsparse/eval/integrations/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/src/deepsparse/eval/integrations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/deepsparse/eval/integrations/lm_cache/roneneldan/TinyStories-1M_.db b/src/deepsparse/eval/integrations/lm_cache/roneneldan/TinyStories-1M_.db new file mode 100644 index 0000000000000000000000000000000000000000..64ce058ea2f912dd57df07a2f112eac67bd0a4ff GIT binary patch literal 12288 zcmeI#&r8EF6u|MM$`E06w;g(U?4+QA_y=e!Ifxx<*MmEi$|4NA4mA}$`SlUfZ_e^Zjkojy~TNTAf|~ z4#PhcgieJn{MmJYeFP9d009ILKmY**5I_I{1o|Q{(}Qso>2Fs)Rko??a%C6Y5L{;_ z&W+@8k{X$Gs}nhTv0sv#$6W5Sn`xZQ<<`tk str: + pass + + def _model_call(self, inps): + # Isn't used because we override _loglikelihood_tokens + raise NotImplementedError() + + def _model_generate(self, context, max_length, eos_token_id): + # Isn't used because we override greedy_until + raise NotImplementedError() + + def _loglikelihood_tokens(self, requests, **kwargs): + pass + + @property + def device(self): + return "cpu" + + @property + def eot_token_id(self) -> int: + pass + + @property + def max_gen_toks(self): + pass + + @property + def max_length(self): + return self._max_length or self.default_max_length + + def tok_encode(self, string: str): + return self.model.tokenizer.encode(string) + + def tok_decode(self, tokens): + return self.tokenizer.decode(tokens) + + +def integration_eval( + target: Union[str, "Module"], + target_args, + datasets, + splits, + metrics, + batch_size, + engine, + engine_args, + **kwargs, +): + + if isinstance(target, str): + target = DeepSparseLM(stub=target) + else: + pass # model is a torch.Module + + results = evaluator.simple_evaluate( + model=kwargs.get("model", target), + model_args=kwargs.get("model_args", target_args), + tasks=kwargs.get("tasks", datasets), + # num_fewshot=num_fewshot, + # batch_size=batch_size, + # max_batch_size=max_batch_size, + # device=device, + # no_cache=no_cache, + # limit=limit, + # description_dict=description_dict, + # decontamination_ngrams_path=decontamination_ngrams_path, + # check_integrity=check_integrity, + # write_out=write_out, + # output_base_path=output_base_path, + **kwargs, + ) + + +if __name__ == "__main__": + target = "hf:mgoin/TinyStories-1M-deepsparse" + datasets = ["hellaswag"] + target_args = "" + limit = 2 # testing purposes + integration_eval(target=target, datasets=datasets, target_args=target_args, limit=limit, splits=None, metrics=None, batch_size=1, engine=None, engine_args=None) From edead87766b35a35781cb7aa1354011eaf80e838 Mon Sep 17 00:00:00 2001 From: Damian Date: Fri, 3 Nov 2023 09:53:57 +0000 Subject: [PATCH 2/3] add first unittest --- src/deepsparse/eval/evaluator.py | 41 ++++++++--- ...n_harness.py => llm_evaluation_harness.py} | 73 ++++++++----------- tests/deepsparse/eval/__init__.py | 13 ++++ .../deepsparse/eval/integrations/__init__.py | 13 ++++ .../test_llm_evaluation_harness.py | 26 +++++++ 5 files changed, 114 insertions(+), 52 deletions(-) rename src/deepsparse/eval/integrations/{lm_evaluation_harness.py => llm_evaluation_harness.py} (62%) create mode 100644 tests/deepsparse/eval/__init__.py create mode 100644 tests/deepsparse/eval/integrations/__init__.py create mode 100644 tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py diff --git a/src/deepsparse/eval/evaluator.py b/src/deepsparse/eval/evaluator.py index ee486a1abe..98c8596771 100644 --- a/src/deepsparse/eval/evaluator.py +++ b/src/deepsparse/eval/evaluator.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -The main entrypoint for evaluating a model -or a Pipeline on a requested dataset +The main entrypoint for evaluating a target +on a requested dataset """ from dataclasses import dataclass @@ -45,28 +45,51 @@ class EvalSample: @dataclass class Evaluation: + # TODO: How to handle serialization of the + # data structure (to yaml and json) task: str dataset: Dataset metrics: List[Metric] samples: List[EvalSample] -def eval( - target: Union["Module", Pipeline], +def evaluate( + target: str, datasets: Union[str, List[str]], integration: str, + engine_type: Union[None, DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE] = None, batch_size: int = 1, target_args: Optional[Dict] = None, - engine: Union[None, DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE] = None, engine_args: Optional[Dict] = None, splits=None, metrics=None, **kwargs, ) -> List[Evaluation]: - # TODO: Decide on the final types of arguments later + """ + :param target: The target to evaluate. Can be a path to + a sparsezoo stub, hugging face path, or a path to a + local directory containing a model file + :param datasets: The datasets to evaluate on. Can be a string + for a single dataset or a list of strings for multiple datasets. + :param integration: The name of the evaluation integration to use. + Must be a valid integration name from the EVAL_REGISTRY. + :param engine_type: The engine to use for the evaluation. + :param batch_size: The batch size to use for the evaluation. + :param target_args: The arguments to alter the + behavior of the evaluated target. + :param engine_args: The arguments to pass to the engine. + :param splits: ... + :param metrics: ... + :param kwargs: Additional arguments to pass to the evaluation integration. + :return: A list of Evaluation objects containing the results of the evaluation. + """ - # TODO: Implement registry - eval_integration = EVAL_REGISTRY.resolve(target, integration, datasets) + # TODO: Implement a function that checks for valid target + # TODO: Implement EVAL_REGISTRY + # TODO: Implement a function that checks for valid engine_type + # TODO: Clarify the type of missing arguments + + eval_integration = EVAL_REGISTRY.get(target, integration, datasets) return eval_integration( target=target, @@ -75,7 +98,7 @@ def eval( splits=splits, metrics=metrics, batch_size=batch_size, - engine=engine, + engine_type=engine_type, engine_args=engine_args, **kwargs, ) diff --git a/src/deepsparse/eval/integrations/lm_evaluation_harness.py b/src/deepsparse/eval/integrations/llm_evaluation_harness.py similarity index 62% rename from src/deepsparse/eval/integrations/lm_evaluation_harness.py rename to src/deepsparse/eval/integrations/llm_evaluation_harness.py index d8cc4a146e..719c9235f9 100644 --- a/src/deepsparse/eval/integrations/lm_evaluation_harness.py +++ b/src/deepsparse/eval/integrations/llm_evaluation_harness.py @@ -14,10 +14,35 @@ from typing import Optional +from transformers import AutoModelForCausalLM + from deepsparse import Pipeline from lm_eval import evaluator from lm_eval.base import BaseLM -from typing import Union + + +def integration_eval( + target, + target_args, + datasets, + batch_size, + splits=None, + metrics=None, + engine_type=None, + engine_args=None, + **kwargs, +): + model = initialize_model(target, target_args) + + evaluator.simple_evaluate( + model=model, + model_args=kwargs.get("model_args", target_args), + tasks=kwargs.get("tasks", datasets), + batch_size=batch_size, + **kwargs, + ) + + return True class DeepSparseLM(BaseLM): @@ -74,45 +99,7 @@ def tok_decode(self, tokens): return self.tokenizer.decode(tokens) -def integration_eval( - target: Union[str, "Module"], - target_args, - datasets, - splits, - metrics, - batch_size, - engine, - engine_args, - **kwargs, -): - - if isinstance(target, str): - target = DeepSparseLM(stub=target) - else: - pass # model is a torch.Module - - results = evaluator.simple_evaluate( - model=kwargs.get("model", target), - model_args=kwargs.get("model_args", target_args), - tasks=kwargs.get("tasks", datasets), - # num_fewshot=num_fewshot, - # batch_size=batch_size, - # max_batch_size=max_batch_size, - # device=device, - # no_cache=no_cache, - # limit=limit, - # description_dict=description_dict, - # decontamination_ngrams_path=decontamination_ngrams_path, - # check_integrity=check_integrity, - # write_out=write_out, - # output_base_path=output_base_path, - **kwargs, - ) - - -if __name__ == "__main__": - target = "hf:mgoin/TinyStories-1M-deepsparse" - datasets = ["hellaswag"] - target_args = "" - limit = 2 # testing purposes - integration_eval(target=target, datasets=datasets, target_args=target_args, limit=limit, splits=None, metrics=None, batch_size=1, engine=None, engine_args=None) +def initialize_model(target, target_args): + # creates model: Union[DeepSparseLM, Module] + # given the target + return AutoModelForCausalLM.from_pretrained(target) diff --git a/tests/deepsparse/eval/__init__.py b/tests/deepsparse/eval/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/tests/deepsparse/eval/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/deepsparse/eval/integrations/__init__.py b/tests/deepsparse/eval/integrations/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/tests/deepsparse/eval/integrations/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py b/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py new file mode 100644 index 0000000000..7f452859f3 --- /dev/null +++ b/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py @@ -0,0 +1,26 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from src.deepsparse.eval.integrations.llm_evaluation_harness import integration_eval + + +@pytest.mark.parametrize( + "target,datasets, batch_size", + [("roneneldan/TinyStories-1M", ["hellaswag"], 1)], +) +def test_integration_eval(target, datasets, batch_size): + out = integration_eval( + target=target, datasets=datasets, batch_size=1, target_args=None, limit=5 + ) From 627d4244257f5df8cfec782cb73912cc0f7e3117 Mon Sep 17 00:00:00 2001 From: dbogunowicz <97082108+dbogunowicz@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:56:36 +0100 Subject: [PATCH 3/3] Update tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py --- .../deepsparse/eval/integrations/test_llm_evaluation_harness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py b/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py index 7f452859f3..faef9ca921 100644 --- a/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py +++ b/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py @@ -21,6 +21,6 @@ [("roneneldan/TinyStories-1M", ["hellaswag"], 1)], ) def test_integration_eval(target, datasets, batch_size): - out = integration_eval( + assert integration_eval( target=target, datasets=datasets, batch_size=1, target_args=None, limit=5 )