diff --git a/py/autoevals/string.py b/py/autoevals/string.py index 5428890..e078969 100644 --- a/py/autoevals/string.py +++ b/py/autoevals/string.py @@ -4,6 +4,7 @@ from Levenshtein import distance from autoevals.partial import ScorerWithPartial +from autoevals.value import normalize_value from .oai import arun_cached_request, run_cached_request @@ -59,6 +60,7 @@ def __init__(self, prefix="", model=MODEL, expected_min=0.7, api_key=None, base_ self.extra_args["base_url"] = base_url async def _a_embed(self, value): + value = normalize_value(value, maybe_object=False) with self._CACHE_LOCK: if value in self._CACHE: return self._CACHE[value] @@ -71,6 +73,7 @@ async def _a_embed(self, value): return result def _embed(self, value): + value = normalize_value(value, maybe_object=False) with self._CACHE_LOCK: if value in self._CACHE: return self._CACHE[value] diff --git a/py/autoevals/test_embeddings.py b/py/autoevals/test_embeddings.py index 882318e..08c67fe 100644 --- a/py/autoevals/test_embeddings.py +++ b/py/autoevals/test_embeddings.py @@ -1,4 +1,7 @@ +import asyncio + from autoevals import EmbeddingSimilarity +from autoevals.value import normalize_value SYNONYMS = [ ("water", ["water", "H2O", "agua"]), @@ -27,3 +30,21 @@ def test_embeddings(): result = evaluator(word1, word2) print(f"[{word1}]", f"[{word2}]", result) assert result.score < 0.5 + + +VALUES = [ + ("water", "wind"), + (["cold", "water"], ["cold", "wind"]), + ({"water": "wet"}, {"wind": "dry"}), +] + + +def test_embedding_values(): + for run_async in [False, True]: + evaluator = EmbeddingSimilarity() + for (word1, word2) in VALUES: + if run_async: + result = asyncio.run(evaluator.eval_async(word1, word2)) + else: + result = evaluator(word1, word2) + print(f"[{word1}]", f"[{word2}]", f"run_async={run_async}", result)