braintrustdata · ankrgyl · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/.github/workflows/eval-py.yml b/.github/workflows/eval-py.yml
@@ -0,0 +1,46 @@
+name: Run Python evals
+
+on:
+  push:
+    # files:
+    #   - 'test-eval/**'
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    name: Run Python evals
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: "recursive"
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12" # TODO: Matrix test different versions
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r test-eval-py/requirements.txt
+
+      - name: Run Evals
+        uses: ./
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          root: test-eval-py
+          runtime: python # Assuming the action supports a 'python' runtime
+
+
+      # - name: Start terminal session
+      #   uses: mxschmitt/action-tmate@v3
+      #   with:
+      #     limit-access-to-actor: true
diff --git a/.gitignore b/.gitignore
@@ -103,3 +103,4 @@ __tests__/runner/*
 *.code-workspace
 
 .turbo
+__pycache__
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/README.md b/README.md
@@ -75,14 +75,14 @@ jobs:
           root: my_eval_dir
 ```
 
-> [!IMPORTANT]
-> You must specify `permissions` for the action to leave comments on your PR.
-> Without these permissions, you'll see Github API errors.
+> [!IMPORTANT] You must specify `permissions` for the action to leave comments
+> on your PR. Without these permissions, you'll see Github API errors.
 
 To see examples of fully configured templates, see the `examples` directory:
 
 - [`node with npm`](examples/npm.yml)
 - [`node with pnpm`](examples/pnpm.yml)
+- [`python`](examples/python.yml)
 
 ## How it works
 

diff --git a/eval/dist/index.js b/eval/dist/index.js
diff --git a/eval/dist/index.js.map b/eval/dist/index.js.map
diff --git a/eval/src/braintrust.ts b/eval/src/braintrust.ts
@@ -64,6 +64,16 @@ export async function runEval(args: Params, onSummary: OnSummaryFn) {
   // Change working directory
   process.chdir(path.resolve(root));
 
-  const command = `npx braintrust eval --jsonl ${paths}`;
+  let command: string;
+  switch (args.runtime) {
+    case "node":
+      command = `npx braintrust eval --jsonl ${paths}`;
+      break;
+    case "python":
+      command = `braintrust eval --jsonl ${paths}`;
+      break;
+    default:
+      throw new Error(`Unsupported runtime: ${args.runtime}`);
+  }
   await runCommand(command, onSummary);
 }
diff --git a/eval/src/main.ts b/eval/src/main.ts
@@ -38,9 +38,6 @@ async function main(): Promise<void> {
       `Invalid arguments: ${args.error.errors.map(e => e.message).join("\n")}`,
     );
   }
-  if (args.data.runtime !== "node") {
-    throw new Error("Only Node.js runtime is supported");
-  }
 
   await upsertComment(`${TITLE}Evals in progress... ⌛`);
 

diff --git a/examples/python.yml b/examples/python.yml
@@ -0,0 +1,41 @@
+name: Run Python evals
+
+on:
+  push:
+    # Uncomment to run only when files in the 'evals' directory change
+    # - paths:
+    #     - "evals/**"
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    name: Run evals
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        id: checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12" # Replace with your Python version
+
+      # Tweak this to a dependency manager of your choice
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r test-eval-py/requirements.txt
+
+      - name: Run Evals
+        uses: ./
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          runtime: python
+          root: my_eval_dir
diff --git a/test-eval-py/.python-version b/test-eval-py/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/test-eval-py/eval_trivial.py b/test-eval-py/eval_trivial.py
diff --git a/test-eval-py/eval_tutorial.py b/test-eval-py/eval_tutorial.py
@@ -0,0 +1,19 @@
+from braintrust import Eval
+
+from autoevals import Levenshtein
+
+Eval(
+    "Say Hi Bot Python",  # Replace with your project name
+    data=lambda: [
+        {
+            "input": "Foo",
+            "expected": "Hi Foo",
+        },
+        {
+            "input": "Bar",
+            "expected": "Hello Bar",
+        },
+    ],  # Replace with your eval dataset
+    task=lambda input: "Hi " + input,  # Replace with your LLM call
+    scores=[Levenshtein],
+)
diff --git a/test-eval-py/pyproject.toml b/test-eval-py/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "test-eval-py"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "autoevals>=0.0.92",
+    "braintrust>=0.0.160",
+]
diff --git a/test-eval-py/requirements.txt b/test-eval-py/requirements.txt
@@ -0,0 +1,24 @@
+attrs==24.2.0
+autoevals==0.0.92
+braintrust==0.0.160
+braintrust-core==0.0.54
+certifi==2024.8.30
+charset-normalizer==3.3.2
+chevron==0.14.0
+exceptiongroup==1.2.0
+gitdb==4.0.11
+gitpython==3.1.43
+idna==3.10
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+levenshtein==0.26.0
+python-dotenv==1.0.1
+pyyaml==6.0.2
+rapidfuzz==3.10.0
+referencing==0.35.1
+requests==2.32.3
+rpds-py==0.20.0
+smmap==5.0.1
+sseclient-py==1.8.0
+tqdm==4.66.5
+urllib3==2.2.3
Original file line number	Diff line number	Diff line change
Expand Up		@@ -103,3 +103,4 @@ __tests__/runner/*
		*.code-workspace

		.turbo
		__pycache__