Skip to content

Commit

Permalink
Remove validity score (#34)
Browse files Browse the repository at this point in the history
Fixes BRA-622

We think function calling works well enough that it shouldn't be
necessary to track validity as a separate score.
  • Loading branch information
manugoyal authored Nov 30, 2023
1 parent a4707b3 commit c2efd85
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 15 deletions.
9 changes: 2 additions & 7 deletions js/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,6 @@ export async function OpenAIClassifier<RenderArgs, Output>(
content: m.content && mustache.render(m.content as string, renderArgs),
}));

let ret = null;
let validityScore = 1;
try {
const resp = await cachedChatCompletion(
{
Expand All @@ -145,23 +143,20 @@ export async function OpenAIClassifier<RenderArgs, Output>(
);

if (resp.choices.length > 0) {
ret = {
return {
name,
...parseResponse(resp.choices[0].message!, choiceScores),
};
} else {
throw new Error("Empty response from OpenAI");
}
} catch (error) {
validityScore = 0;
ret = {
return {
name,
score: 0,
error: `${error}`,
};
}

return ret;
}

function parseResponse(
Expand Down
8 changes: 0 additions & 8 deletions py/autoevals/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,26 +153,18 @@ def _postprocess_response(self, resp):
raise ValueError("Empty response from OpenAI")

async def _run_eval_async(self, output, expected, **kwargs):
validity_score = 1
try:
return self._postprocess_response(
await arun_cached_request(**self._request_args(output, expected, **kwargs))
)
except Exception as e:
validity_score = 0
return Score(name=self.name, score=0, error=e)
finally:
current_span().log(scores={f"{self._name()} parsed": validity_score})

def _run_eval_sync(self, output, expected, **kwargs):
validity_score = 1
try:
return self._postprocess_response(run_cached_request(**self._request_args(output, expected, **kwargs)))
except Exception as e:
validity_score = 0
return Score(name=self.name, score=0, error=e)
finally:
current_span().log(scores={f"{self._name()} parsed": validity_score})


@dataclass
Expand Down

0 comments on commit c2efd85

Please sign in to comment.