neuralmagic · dbogunowicz · Nov 3, 2023 · Nov 3, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/src/deepsparse/eval/__init__.py b/src/deepsparse/eval/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/deepsparse/eval/evaluator.py b/src/deepsparse/eval/evaluator.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+The main entrypoint for evaluating a target
+on a requested dataset
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Union
+
+from pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE, Pipeline
+from src.deepsparse.eval.registry import EVAL_REGISTRY
+
+
+@dataclass
+class Metric:
+    type: str
+    value: float
+
+
+@dataclass
+class Dataset:
+    type: str
+    name: str
+    config: str
+    split: str
+
+
+@dataclass
+class EvalSample:
+    input: Any
+    output: Any
+
+
+@dataclass
+class Evaluation:
+    # TODO: How to handle serialization of the
+    # data structure (to yaml and json)
+    task: str
+    dataset: Dataset
+    metrics: List[Metric]
+    samples: List[EvalSample]
+
+
+def evaluate(
+    target: str,
+    datasets: Union[str, List[str]],
+    integration: str,
+    engine_type: Union[None, DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE] = None,
+    batch_size: int = 1,
+    target_args: Optional[Dict] = None,
+    engine_args: Optional[Dict] = None,
+    splits=None,
+    metrics=None,
+    **kwargs,
+) -> List[Evaluation]:
+    """
+    :param target: The target to evaluate. Can be a path to
+        a sparsezoo stub, hugging face path, or a path to a
+        local directory containing a model file
+    :param datasets: The datasets to evaluate on. Can be a string
+        for a single dataset or a list of strings for multiple datasets.
+    :param integration: The name of the evaluation integration to use.
+        Must be a valid integration name from the EVAL_REGISTRY.
+    :param engine_type: The engine to use for the evaluation.
+    :param batch_size: The batch size to use for the evaluation.
+    :param target_args: The arguments to alter the
+        behavior of the evaluated target.
+    :param engine_args: The arguments to pass to the engine.
+    :param splits: ...
+    :param metrics: ...
+    :param kwargs: Additional arguments to pass to the evaluation integration.
+    :return: A list of Evaluation objects containing the results of the evaluation.
+    """
+
+    # TODO: Implement a function that checks for valid target
+    # TODO: Implement EVAL_REGISTRY
+    # TODO: Implement a function that checks for valid engine_type
+    # TODO: Clarify the type of missing arguments
+
+    eval_integration = EVAL_REGISTRY.get(target, integration, datasets)
+
+    return eval_integration(
+        target=target,
+        target_args=target_args,
+        datasets=datasets,
+        splits=splits,
+        metrics=metrics,
+        batch_size=batch_size,
+        engine_type=engine_type,
+        engine_args=engine_args,
+        **kwargs,
+    )
diff --git a/src/deepsparse/eval/integrations/__init__.py b/src/deepsparse/eval/integrations/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/deepsparse/eval/integrations/llm_evaluation_harness.py b/src/deepsparse/eval/integrations/llm_evaluation_harness.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+from transformers import AutoModelForCausalLM
+
+from deepsparse import Pipeline
+from lm_eval import evaluator
+from lm_eval.base import BaseLM
+
+
+def integration_eval(
+    target,
+    target_args,
+    datasets,
+    batch_size,
+    splits=None,
+    metrics=None,
+    engine_type=None,
+    engine_args=None,
+    **kwargs,
+):
+    model = initialize_model(target, target_args)
+
+    evaluator.simple_evaluate(
+        model=model,
+        model_args=kwargs.get("model_args", target_args),
+        tasks=kwargs.get("tasks", datasets),
+        batch_size=batch_size,
+        **kwargs,
+    )
+
+    return True
+
+
+class DeepSparseLM(BaseLM):
+    # potentially create pipeline inside of this class
+    def __init__(self, stub, max_length: Optional[int] = None):
+        self.model = Pipeline.create(task="text_generation", model_path=stub)
+
+        self.default_max_length = 1024
+        self._max_length = max_length
+
+    @classmethod
+    def from_pipeline(cls, pipeline: Pipeline):
+        return cls(pipeline)
+
+    @property
+    def batch_size(self):
+        return self.model._batch_size
+
+    @property
+    def eot_token(self) -> str:
+        pass
+
+    def _model_call(self, inps):
+        # Isn't used because we override _loglikelihood_tokens
+        raise NotImplementedError()
+
+    def _model_generate(self, context, max_length, eos_token_id):
+        # Isn't used because we override greedy_until
+        raise NotImplementedError()
+
+    def _loglikelihood_tokens(self, requests, **kwargs):
+        pass
+
+    @property
+    def device(self):
+        return "cpu"
+
+    @property
+    def eot_token_id(self) -> int:
+        pass
+
+    @property
+    def max_gen_toks(self):
+        pass
+
+    @property
+    def max_length(self):
+        return self._max_length or self.default_max_length
+
+    def tok_encode(self, string: str):
+        return self.model.tokenizer.encode(string)
+
+    def tok_decode(self, tokens):
+        return self.tokenizer.decode(tokens)
+
+
+def initialize_model(target, target_args):
+    # creates model: Union[DeepSparseLM, Module]
+    # given the target
+    return AutoModelForCausalLM.from_pretrained(target)
diff --git a/src/deepsparse/eval/integrations/lm_cache/roneneldan/TinyStories-1M_.db b/src/deepsparse/eval/integrations/lm_cache/roneneldan/TinyStories-1M_.db
diff --git a/tests/deepsparse/eval/__init__.py b/tests/deepsparse/eval/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/deepsparse/eval/integrations/__init__.py b/tests/deepsparse/eval/integrations/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py b/tests/deepsparse/eval/integrations/test_llm_evaluation_harness.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from src.deepsparse.eval.integrations.llm_evaluation_harness import integration_eval
+
+
+@pytest.mark.parametrize(
+    "target,datasets, batch_size",
+    [("roneneldan/TinyStories-1M", ["hellaswag"], 1)],
+)
+def test_integration_eval(target, datasets, batch_size):
+    assert integration_eval(
+        target=target, datasets=datasets, batch_size=1, target_args=None, limit=5
+    )