Skip to content

Commit

Permalink
Add ExactMatch scorer (#79)
Browse files Browse the repository at this point in the history
By popular request -- this scorer simply compares two values and tells
you whether they're equal or not. Of course, things get a little tricky
if one thing is an object and the other is a string (not an uncommon
scenario when generating JSON).
  • Loading branch information
ankrgyl authored Jul 19, 2024
1 parent a7c7135 commit 67d0bf8
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ npx braintrust run example.eval.js
### Heuristic

- Levenshtein distance
- Exact match
- Numeric difference
- JSON diff
- Jaccard distance
Expand Down
6 changes: 6 additions & 0 deletions js/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
import { ListContains } from "./list";
import { ScorerWithPartial } from "./partial";
import { Moderation } from "./moderation";
import { ExactMatch } from "./value";

interface AutoevalMethod {
method: ScorerWithPartial<any, any>;
Expand Down Expand Up @@ -164,6 +165,11 @@ export const Evaluators: {
method: Levenshtein,
description: "Uses the Levenshtein distance to compare two strings.",
},
{
method: ExactMatch,
description:
"Compares two values for exact equality. If the values are objects, they are converted to JSON strings before comparison.",
},
{
method: NumericDiff,
description: "Compares numbers by normalizing their difference.",
Expand Down
30 changes: 30 additions & 0 deletions js/value.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { ListContains } from "./list";
import { NumericDiff } from "./number";
import { LevenshteinScorer } from "./string";
import { ExactMatch } from "./value";

test("Levenshtein Test", async () => {
const cases = [
Expand Down Expand Up @@ -89,3 +90,32 @@ test("ListContains Test", async () => {
).score,
).toBe(1);
});

test("ExactMatch", async () => {
const cases = [
{ output: "hello", expected: "hello", expectedScore: 1 },
{ output: "hello", expected: "world", expectedScore: 0 },
{ output: 123, expected: 123, expectedScore: 1 },
{ output: 123, expected: "123", expectedScore: 1 },
{ output: { a: 1, b: 2 }, expected: { a: 1, b: 2 }, expectedScore: 1 },
{ output: { a: 1, b: 2 }, expected: { a: 1, b: 3 }, expectedScore: 0 },
{ output: [1, 2, 3], expected: [1, 2, 3], expectedScore: 1 },
{ output: [1, 2, 3], expected: [3, 2, 1], expectedScore: 0 },
{ output: { a: 1, b: 2 }, expected: { b: 2, a: 1 }, expectedScore: 0 }, // Order matters
{ output: { a: 1, b: 2 }, expected: '{"a": 1, "b": 2}', expectedScore: 1 }, // String representation matches dict
{ output: { a: 1, b: 2 }, expected: '{"a":1, "b":2}', expectedScore: 1 }, // String representation matches dict
{ output: { a: 1, b: 2 }, expected: '{"b":2, "a":1}', expectedScore: 0 },
{
output: { a: 1, b: 2 },
expected: { b: 2, a: 1, c: 3 },
expectedScore: 0,
}, // Extra key, not equal
{ output: null, expected: null, expectedScore: 1 },
{ output: null, expected: undefined, expectedScore: 1 },
];

for (const { output, expected, expectedScore } of cases) {
const score = (await ExactMatch({ output, expected })).score;
expect(score).toBeCloseTo(expectedScore, 4);
}
});
41 changes: 41 additions & 0 deletions js/value.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { makePartial, ScorerWithPartial } from "./partial";

/**
* A simple scorer that tests whether two values are equal. If the value is an object or array,
* it will be JSON-serialized and the strings compared for equality.
*/
export const ExactMatch: ScorerWithPartial<unknown, {}> = makePartial(
(args) => {
const maybeObject = needsJSON(args.output) || needsJSON(args.expected);
const [output, expected] = [
normalizeValue(args.output ?? null, maybeObject),
normalizeValue(args.expected ?? null, maybeObject),
];

const score = output === expected ? 1 : 0;

return {
name: "ExactMatch",
score,
};
},
"ExactMatch",
);

function needsJSON(value: unknown): boolean {
return typeof value === "object" || Array.isArray(value);
}

export function normalizeValue(value: unknown, maybeObject: boolean): string {
if (needsJSON(value)) {
return JSON.stringify(value);
}
try {
if (typeof value === "string" && maybeObject) {
return JSON.stringify(JSON.parse(value));
}
} catch (e) {
// That's ok, just return the string representation
}
return `${value}`;
}
29 changes: 29 additions & 0 deletions py/autoevals/test_values.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest
from pytest import approx

from autoevals.list import ListContains
from autoevals.number import NumericDiff
from autoevals.string import LevenshteinScorer
from autoevals.value import ExactMatch


def test_levenshtein():
Expand Down Expand Up @@ -77,3 +79,30 @@ def test_list_contains():
assert (
ListContains(pairwise_evaluator=LevenshteinScorer(), allow_extra_entities=True)(["a", "b"], ["a"]).score == 1
)


def test_exact_match():
cases = [
["hello", "hello", 1],
["hello", "world", 0],
[123, 123, 1],
[123, "123", 1],
[{"a": 1, "b": 2}, {"a": 1, "b": 2}, 1],
[{"a": 1, "b": 2}, {"a": 1, "b": 3}, 0],
[[1, 2, 3], [1, 2, 3], 1],
[[1, 2, 3], [3, 2, 1], 0],
[{"a": 1, "b": 2}, {"b": 2, "a": 1}, 0], # Order matters
[{"a": 1, "b": 2}, '{"a": 1, "b": 2}', 1], # String representation matches dict
[{"a": 1, "b": 2}, '{"a":1, "b":2}', 1], # String representation matches dict
[{"a": 1, "b": 2}, '{"b": 2, "a": 1}', 0],
[{"a": 1, "b": 2}, {"b": 2, "a": 1, "c": 3}, 0], # Extra key, not equal
[None, None, 1],
[None, "None", 1],
]

for output, expected, expected_score in cases:
assert ExactMatch()(output, expected).score == approx(expected_score, abs=1e-4), (
output,
expected,
expected_score,
)
37 changes: 37 additions & 0 deletions py/autoevals/value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import json
from typing import Any

from braintrust_core.score import Score

from autoevals.partial import ScorerWithPartial


class ExactMatch(ScorerWithPartial):
"""
A simple scorer that tests whether two values are equal. If the value is an object or array,
it will be JSON-serialized and the strings compared for equality.
"""

def _run_eval_sync(self, output, expected=None, **kwargs):
maybe_object = needs_json(output) or needs_json(expected)
output, expected = normalize_value(output, maybe_object), normalize_value(expected, maybe_object)
score = 1 if output == expected else 0

return Score(name=self._name(), score=score)


def needs_json(value: Any) -> bool:
return isinstance(value, (dict, list))


def normalize_value(value: Any, maybe_object: bool) -> str:
if needs_json(value):
return json.dumps(value)

try:
if maybe_object:
return json.dumps(json.loads(value))
except json.JSONDecodeError:
pass

return str(value)

0 comments on commit 67d0bf8

Please sign in to comment.