From 2b4f208b1e9729669c21927e4f50dafa8f8bbd3a Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 11:03:54 -0800 Subject: [PATCH 1/6] Support multiple openai versions --- py/autoevals/llm.py | 13 ++++++-- py/autoevals/oai.py | 76 ++++++++++++++++++++------------------------ py/autoevals/util.py | 47 +++++++++++++++++++++++++++ setup.py | 2 +- 4 files changed, 92 insertions(+), 46 deletions(-) diff --git a/py/autoevals/llm.py b/py/autoevals/llm.py index 9e97284..c92513f 100644 --- a/py/autoevals/llm.py +++ b/py/autoevals/llm.py @@ -129,16 +129,21 @@ def _render_messages(self, **kwargs): ] def _request_args(self, output, expected, **kwargs): - return dict( - Completion=openai.ChatCompletion, + ret = dict( model=self.model, - engine=self.engine, messages=self._render_messages(output=output, expected=expected, **kwargs), functions=self.classification_functions, function_call={"name": "select_choice"}, **self.extra_args, ) + if self.engine is not None: + # This parameter has been deprecated (https://help.openai.com/en/articles/6283125-what-happened-to-engines) + # and is unsupported in OpenAI v1, so only set it if the user has specified it + ret['engine'] = self.engine + + return ret + def _postprocess_response(self, resp): if len(resp["choices"]) > 0: return self._process_response(resp["choices"][0]["message"]) @@ -163,6 +168,8 @@ def _run_eval_sync(self, output, expected, **kwargs): return self._postprocess_response(run_cached_request(**self._request_args(output, expected, **kwargs))) except Exception as e: validity_score = 0 + import traceback + traceback.print_exc() return Score(name=self.name, score=0, error=e) finally: current_span().log(scores={f"{self._name()} parsed": validity_score}) diff --git a/py/autoevals/oai.py b/py/autoevals/oai.py index bc91e54..014e0aa 100644 --- a/py/autoevals/oai.py +++ b/py/autoevals/oai.py @@ -6,7 +6,7 @@ import time from pathlib import Path -from .util import current_span, traced +from .util import current_span, prepare_openai_complete _CACHE_DIR = None _CONN = None @@ -30,52 +30,53 @@ def open_cache(): return _CONN -def log_openai_request(input_args, response, **kwargs): - span = current_span() - if not span: - return +CACHE_LOCK = threading.Lock() - input = input_args.pop("messages") - span.log( - metrics={ - "tokens": response["usage"]["total_tokens"], - "prompt_tokens": response["usage"]["prompt_tokens"], - "completion_tokens": response["usage"]["completion_tokens"], - }, - metadata={**input_args, **kwargs}, - input=input, - output=response["choices"][0], - ) +def post_process_response(resp): + # This normalizes against craziness in OpenAI v0 vs. v1 + if hasattr(resp, "to_dict"): + # v0 + return resp.to_dict() + else: + # v1 + return resp.dict() -CACHE_LOCK = threading.Lock() +def log_cached_response(params, resp): + with current_span().start_span(name="OpenAI Completion") as span: + messages = params.pop("messages", None) + span.log( + metrics={ + "tokens": resp["usage"]["total_tokens"], + "prompt_tokens": resp["usage"]["prompt_tokens"], + "completion_tokens": resp["usage"]["completion_tokens"], + }, + input=messages, + output=resp["choices"], + ) -@traced(name="OpenAI Completion") -def run_cached_request(Completion=None, **kwargs): - # OpenAI is very slow to import, so we only do it if we need it - import openai - if Completion is None: - Completion = openai.Completion +def run_cached_request(api_key=None, **kwargs): + # OpenAI is very slow to import, so we only do it if we need it + complete, RateLimitError = prepare_openai_complete(is_async=False, api_key=api_key) param_key = json.dumps(kwargs) conn = open_cache() with CACHE_LOCK: cursor = conn.cursor() resp = cursor.execute("""SELECT response FROM "cache" WHERE params=?""", [param_key]).fetchone() - cached = False retries = 0 if resp: - cached = True resp = json.loads(resp[0]) + log_cached_response(kwargs, resp) else: sleep_time = 0.1 while retries < 20: try: - resp = Completion.create(**kwargs).to_dict() + resp = post_process_response(complete(**kwargs)) break - except openai.error.RateLimitError: + except RateLimitError: sleep_time *= 1.5 time.sleep(sleep_time) retries += 1 @@ -85,36 +86,29 @@ def run_cached_request(Completion=None, **kwargs): cursor.execute("""INSERT INTO "cache" VALUES (?, ?)""", [param_key, json.dumps(resp)]) conn.commit() - log_openai_request(kwargs, resp, cached=cached) - return resp -@traced(name="OpenAI Completion") -async def arun_cached_request(Completion=None, **kwargs): - # OpenAI is very slow to import, so we only do it if we need it - import openai - - if Completion is None: - Completion = openai.Completion +async def arun_cached_request(api_key=None, **kwargs): + complete, RateLimitError = prepare_openai_complete(is_async=True, api_key=api_key) param_key = json.dumps(kwargs) conn = open_cache() with CACHE_LOCK: cursor = conn.cursor() resp = cursor.execute("""SELECT response FROM "cache" WHERE params=?""", [param_key]).fetchone() - cached = False retries = 0 if resp: resp = json.loads(resp[0]) - cached = True + log_cached_response(kwargs, resp) else: sleep_time = 0.1 while retries < 100: try: - resp = (await Completion.acreate(**kwargs)).to_dict() + resp = post_process_response(await complete(**kwargs)) break - except openai.error.RateLimitError: + except RateLimitError: + # Just assume it's a rate limit error sleep_time *= 1.5 await asyncio.sleep(sleep_time) retries += 1 @@ -124,6 +118,4 @@ async def arun_cached_request(Completion=None, **kwargs): cursor.execute("""INSERT INTO "cache" VALUES (?, ?)""", [param_key, json.dumps(resp)]) conn.commit() - log_openai_request(kwargs, resp, cached=cached, retries=retries) - return resp diff --git a/py/autoevals/util.py b/py/autoevals/util.py index 20a51a3..27ab891 100644 --- a/py/autoevals/util.py +++ b/py/autoevals/util.py @@ -1,5 +1,7 @@ import dataclasses import json +import sys +import textwrap class SerializableDataClass: @@ -48,3 +50,48 @@ def traced(*span_args, **span_kwargs): return span_args[0] else: return lambda f: f + +def prepare_openai_complete(is_async=False, api_key=None): + try: + import openai + except Exception as e: + print(textwrap.dedent( + f"""\ + Unable to import openai. Please install it, e.g. with + + pip install 'openai' + + {e} + """ + ), file=sys.stderr) + raise + + openai_obj = openai + is_v1 = False + if hasattr(openai, "chat") and hasattr(openai.chat, "completions"): + # This is the new v1 API + is_v1 = True + if is_async: + openai_obj = openai.AsyncOpenAI(api_key=api_key) + else: + openai_obj = openai.OpenAI(api_key=api_key) + + try: + from braintrust.oai import wrap_openai + openai_obj = wrap_openai(openai_obj) + except ImportError: + pass + + complete_fn = None + rate_limit_error = None + if is_v1: + rate_limit_error = openai.RateLimitError + complete_fn = openai_obj.chat.completions.create + else: + rate_limit_error = openai.error.RateLimitError + if is_async: + complete_fn = openai_obj.ChatCompletion.acreate + else: + complete_fn = openai_obj.ChatCompletion.create + + return complete_fn, rate_limit_error \ No newline at end of file diff --git a/setup.py b/setup.py index 6143ce4..2200f29 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open(os.path.join(dir_name, "README.md"), "r", encoding="utf-8") as f: long_description = f.read() -install_requires = ["chevron", "openai==0.28.1", "levenshtein", "pyyaml"] +install_requires = ["chevron", "levenshtein", "pyyaml"] extras_require = { "dev": [ From 56dd228715f8cb912ee1f531f07495a6e627e0a5 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 11:15:37 -0800 Subject: [PATCH 2/6] Cleanup --- py/autoevals/llm.py | 7 ++----- py/autoevals/util.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/py/autoevals/llm.py b/py/autoevals/llm.py index c92513f..c85a8a3 100644 --- a/py/autoevals/llm.py +++ b/py/autoevals/llm.py @@ -5,7 +5,6 @@ from typing import List, Optional import chevron -import openai import yaml from .base import Score, Scorer @@ -140,8 +139,8 @@ def _request_args(self, output, expected, **kwargs): if self.engine is not None: # This parameter has been deprecated (https://help.openai.com/en/articles/6283125-what-happened-to-engines) # and is unsupported in OpenAI v1, so only set it if the user has specified it - ret['engine'] = self.engine - + ret["engine"] = self.engine + return ret def _postprocess_response(self, resp): @@ -168,8 +167,6 @@ def _run_eval_sync(self, output, expected, **kwargs): return self._postprocess_response(run_cached_request(**self._request_args(output, expected, **kwargs))) except Exception as e: validity_score = 0 - import traceback - traceback.print_exc() return Score(name=self.name, score=0, error=e) finally: current_span().log(scores={f"{self._name()} parsed": validity_score}) diff --git a/py/autoevals/util.py b/py/autoevals/util.py index 27ab891..279f00f 100644 --- a/py/autoevals/util.py +++ b/py/autoevals/util.py @@ -51,19 +51,23 @@ def traced(*span_args, **span_kwargs): else: return lambda f: f + def prepare_openai_complete(is_async=False, api_key=None): try: import openai except Exception as e: - print(textwrap.dedent( - f"""\ - Unable to import openai. Please install it, e.g. with + print( + textwrap.dedent( + f"""\ + Unable to import openai: {e} - pip install 'openai' + Please install it, e.g. with - {e} + pip install 'openai' """ - ), file=sys.stderr) + ), + file=sys.stderr, + ) raise openai_obj = openai @@ -78,6 +82,7 @@ def prepare_openai_complete(is_async=False, api_key=None): try: from braintrust.oai import wrap_openai + openai_obj = wrap_openai(openai_obj) except ImportError: pass @@ -94,4 +99,4 @@ def prepare_openai_complete(is_async=False, api_key=None): else: complete_fn = openai_obj.ChatCompletion.create - return complete_fn, rate_limit_error \ No newline at end of file + return complete_fn, rate_limit_error From d3817a3e3be1414c6f9b5a1d1c5bbe7acf2c46bf Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 11:16:00 -0800 Subject: [PATCH 3/6] Manually install openai in tests --- .github/workflows/python.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index add3903..d837e76 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -18,7 +18,7 @@ jobs: python-version: "3.11" - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools build twine + python -m pip install --upgrade pip setuptools build twine openai python -m pip install -e .[dev] - name: Test with pytest run: | From 9bd4c23ef5e9fa9dea6adda2e0cdae988ea0e42d Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 11:21:33 -0800 Subject: [PATCH 4/6] Add dummy API key --- .github/workflows/python.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index d837e76..5660109 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -6,6 +6,9 @@ on: push: branches: [main] +env: + OPENAI_API_KEY: sk-dummy + jobs: build: runs-on: ubuntu-latest From 84e94361d652d12406e36dad90ca1069fe2d22ed Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 11:35:46 -0800 Subject: [PATCH 5/6] Fix no-op span --- py/autoevals/util.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/py/autoevals/util.py b/py/autoevals/util.py index 279f00f..845141c 100644 --- a/py/autoevals/util.py +++ b/py/autoevals/util.py @@ -2,6 +2,7 @@ import json import sys import textwrap +import time class SerializableDataClass: @@ -14,21 +15,40 @@ def as_json(self, **kwargs): return json.dumps(self.as_dict(), **kwargs) -class NoOpSpan: - def log(self, **kwargs): +# DEVNOTE: This is copied from braintrust-sdk/py/src/braintrust/logger.py +class _NoopSpan: + def __init__(self, *args, **kwargs): pass - def start_span(self, *args, **kwargs): - return self + @property + def id(self): + return "" + + @property + def span_id(self): + return "" + + @property + def root_span_id(self): + return "" - def end(self, *args, **kwargs): + def log(self, **event): pass + def start_span(self, name, span_attributes={}, start_time=None, set_current=None, **event): + return self + + def end(self, end_time=None): + return end_time or time.time() + + def close(self, end_time=None): + return self.end(end_time) + def __enter__(self): - pass + return self - def __exit__(self, exc_type, exc_val, exc_tb): - pass + def __exit__(self, type, value, callback): + del type, value, callback def current_span(): @@ -37,7 +57,7 @@ def current_span(): return _get_current_span() except ImportError as e: - return NoOpSpan() + return _NoopSpan() def traced(*span_args, **span_kwargs): From e8dfc2bddada552caec6d7f18aa4817279865ad6 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Thu, 9 Nov 2023 16:36:59 -0800 Subject: [PATCH 6/6] Comments --- py/autoevals/oai.py | 54 +++++++++++++++++++++++++++++++++++++++++++- py/autoevals/util.py | 52 ------------------------------------------ 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/py/autoevals/oai.py b/py/autoevals/oai.py index 014e0aa..a2789ef 100644 --- a/py/autoevals/oai.py +++ b/py/autoevals/oai.py @@ -2,11 +2,13 @@ import json import os import sqlite3 +import sys +import textwrap import threading import time from pathlib import Path -from .util import current_span, prepare_openai_complete +from .util import current_span _CACHE_DIR = None _CONN = None @@ -33,6 +35,56 @@ def open_cache(): CACHE_LOCK = threading.Lock() +def prepare_openai_complete(is_async=False, api_key=None): + try: + import openai + except Exception as e: + print( + textwrap.dedent( + f"""\ + Unable to import openai: {e} + + Please install it, e.g. with + + pip install 'openai' + """ + ), + file=sys.stderr, + ) + raise + + openai_obj = openai + is_v1 = False + if hasattr(openai, "OpenAI"): + # This is the new v1 API + is_v1 = True + if is_async: + openai_obj = openai.AsyncOpenAI(api_key=api_key) + else: + openai_obj = openai.OpenAI(api_key=api_key) + + try: + from braintrust.oai import wrap_openai + + openai_obj = wrap_openai(openai_obj) + except ImportError: + pass + + complete_fn = None + rate_limit_error = None + if is_v1: + rate_limit_error = openai.RateLimitError + complete_fn = openai_obj.chat.completions.create + else: + rate_limit_error = openai.error.RateLimitError + if is_async: + complete_fn = openai_obj.ChatCompletion.acreate + else: + complete_fn = openai_obj.ChatCompletion.create + + return complete_fn, rate_limit_error + + def post_process_response(resp): # This normalizes against craziness in OpenAI v0 vs. v1 if hasattr(resp, "to_dict"): diff --git a/py/autoevals/util.py b/py/autoevals/util.py index 845141c..018479d 100644 --- a/py/autoevals/util.py +++ b/py/autoevals/util.py @@ -1,7 +1,5 @@ import dataclasses import json -import sys -import textwrap import time @@ -70,53 +68,3 @@ def traced(*span_args, **span_kwargs): return span_args[0] else: return lambda f: f - - -def prepare_openai_complete(is_async=False, api_key=None): - try: - import openai - except Exception as e: - print( - textwrap.dedent( - f"""\ - Unable to import openai: {e} - - Please install it, e.g. with - - pip install 'openai' - """ - ), - file=sys.stderr, - ) - raise - - openai_obj = openai - is_v1 = False - if hasattr(openai, "chat") and hasattr(openai.chat, "completions"): - # This is the new v1 API - is_v1 = True - if is_async: - openai_obj = openai.AsyncOpenAI(api_key=api_key) - else: - openai_obj = openai.OpenAI(api_key=api_key) - - try: - from braintrust.oai import wrap_openai - - openai_obj = wrap_openai(openai_obj) - except ImportError: - pass - - complete_fn = None - rate_limit_error = None - if is_v1: - rate_limit_error = openai.RateLimitError - complete_fn = openai_obj.chat.completions.create - else: - rate_limit_error = openai.error.RateLimitError - if is_async: - complete_fn = openai_obj.ChatCompletion.acreate - else: - complete_fn = openai_obj.ChatCompletion.create - - return complete_fn, rate_limit_error