diff --git a/js/llm.ts b/js/llm.ts index a32b513..a20e161 100644 --- a/js/llm.ts +++ b/js/llm.ts @@ -9,6 +9,7 @@ import { ChatCompletionTool, } from "openai/resources"; import { makePartial, ScorerWithPartial } from "./partial"; +import { renderMessages } from "./render-messages"; const NO_COT_SUFFIX = "Answer the question by calling `select_choice` with a single choice from {{__choices}}."; @@ -118,15 +119,7 @@ export async function OpenAIClassifier( ...remainingRenderArgs, }; - const messages: ChatCompletionMessageParam[] = messagesArg.map((m) => ({ - ...m, - content: m.content - ? mustache.render(m.content as string, renderArgs, undefined, { - escape: (v: unknown) => - typeof v === "string" ? v : JSON.stringify(v), - }) - : "", - })); + const messages = renderMessages(messagesArg, renderArgs); const resp = await cachedChatCompletion( { diff --git a/js/render-messages.test.ts b/js/render-messages.test.ts new file mode 100644 index 0000000..667af2c --- /dev/null +++ b/js/render-messages.test.ts @@ -0,0 +1,35 @@ +import { renderMessages } from "./render-messages"; +import { ChatCompletionMessageParam } from "openai/resources"; + +describe("renderMessages", () => { + it("should never HTML-escape values, regardless of mustache syntax", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "{{value}} and {{{value}}}" }, + ]; + const rendered = renderMessages(messages, { value: "bold" }); + expect(rendered[0].content).toBe("bold and bold"); + }); + + it("should stringify objects when using either {{...}} or {{{...}}}", () => { + const messages: ChatCompletionMessageParam[] = [ + { + role: "user", + content: "Double braces: {{data}}, Triple braces: {{{data}}}", + }, + ]; + const data = { foo: "bar", num: 42 }; + const rendered = renderMessages(messages, { data }); + const stringified = JSON.stringify(data); + expect(rendered[0].content).toBe( + `Double braces: ${stringified}, Triple braces: ${stringified}`, + ); + }); + + it("should handle empty content", () => { + const messages: ChatCompletionMessageParam[] = [ + { role: "user", content: "" }, + ]; + const rendered = renderMessages(messages, {}); + expect(rendered[0].content).toBe(""); + }); +}); diff --git a/js/render-messages.ts b/js/render-messages.ts new file mode 100644 index 0000000..e0f0bbe --- /dev/null +++ b/js/render-messages.ts @@ -0,0 +1,22 @@ +import mustache from "mustache"; +import { ChatCompletionMessageParam } from "openai/resources"; + +export function renderMessages( + messages: ChatCompletionMessageParam[], + renderArgs: Record, +): ChatCompletionMessageParam[] { + return messages.map((m) => ({ + ...m, + content: m.content + ? mustache.render( + (m.content as string).replace(/\{{3}/g, "{{").replace(/\}{3}/g, "}}"), + renderArgs, + undefined, + { + escape: (v: unknown) => + typeof v === "string" ? v : JSON.stringify(v), + }, + ) + : "", + })); +} diff --git a/py/autoevals/test_llm.py b/py/autoevals/test_llm.py index 12ec823..5af71c2 100644 --- a/py/autoevals/test_llm.py +++ b/py/autoevals/test_llm.py @@ -5,12 +5,54 @@ import chevron import pytest import respx +from pydantic import BaseModel from autoevals import init from autoevals.llm import * from autoevals.llm import build_classification_tools +class TestModel(BaseModel): + foo: str + num: int + + +def test_render_messages(): + classifier = OpenAILLMClassifier( + "test", + messages=[ + {"role": "user", "content": "{{value}} and {{{value}}}"}, + {"role": "user", "content": "Dict double braces: {{data}}"}, + {"role": "user", "content": "Dict triple braces: {{{data}}}"}, + {"role": "user", "content": "Model double braces: {{model}}"}, + {"role": "user", "content": "Model triple braces: {{{model}}}"}, + {"role": "user", "content": ""}, # test empty content + ], + model="gpt-4", + choice_scores={"A": 1}, + classification_tools=[], + ) + + test_dict = {"foo": "bar", "num": 42} + test_model = TestModel(foo="bar", num=42) + + rendered = classifier._render_messages(value="bold", data=test_dict, model=test_model) + + # Test HTML escaping - double braces escape, triple braces don't. + assert rendered[0]["content"] == "<b>bold</b> and bold" + + # Test dict rendering - both use str() but double braces escape HTML chars if present. + assert rendered[1]["content"] == "Dict double braces: {'foo': 'bar', 'num': 42}" + assert rendered[2]["content"] == "Dict triple braces: {'foo': 'bar', 'num': 42}" + + # Test model rendering - both use str() but double braces escape HTML chars if present. + assert rendered[3]["content"] == "Model double braces: foo='bar' num=42" + assert rendered[4]["content"] == "Model triple braces: foo='bar' num=42" + + # Test empty content. + assert rendered[5]["content"] == "" + + def test_template_html(): template_double = "{{output}}" template_triple = "{{{output}}}"