Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into vllm_token_ids
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexPiche committed Jan 6, 2025
2 parents 2dabb13 + c626df1 commit c3e39db
Show file tree
Hide file tree
Showing 117 changed files with 131 additions and 56 deletions.
5 changes: 2 additions & 3 deletions examples/form_filler/scripts/prepare_test_assets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import random
from pathlib import Path
import sys
from pathlib import Path

import hydra
import yaml
Expand All @@ -14,8 +14,7 @@
from ..user_simulator_agent import UserSimulatorTape
from .run_formfiller_agent import run_formfiller_agent

assets_folder = Path(__file__).parent.parent.parent.parent / "tests" / "res" / "form_filler"

assets_folder = Path(__file__).parent.parent.parent.parent / "tests" / "examples" / "res" / "form_filler"

input_tapes_for_teacher_path = assets_folder / "input_tapes_for_teacher.yaml"
input_tapes_for_user_path = assets_folder / "input_tapes_for_user.yaml"
Expand Down
33 changes: 33 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,36 @@ markers = [
"gpu: requires gpu",
"multi_gpu: requires more than 1 gpu",
]

[tool.tox]
min_version = "4"
requires = ["tox-uv>=1"]
env_list = ["type", "3.10", "3.11", "3.12", "3.13"]
skip_missing_interpreters = false

[tool.tox.env_run_base]
description = "Run test under {base_python}"
deps = [
"pytest>=8"
]
commands = [
[
"pytest",
"-s",
"--color=yes",
"-m",
"not slow",
"tests/",
"--ignore=tests/finetune/",
"--ignore=tests/examples/",
],
]

[tool.tox.env.type]
description = "run type check on code base"
deps = [
"mypy==1.11.2",
"types-cachetools>=5.5.0.20240820",
"types-chardet>=5.0.4.6",
]
commands = [["mypy", "tapeagents"]]
1 change: 1 addition & 0 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
pytest==8.3.3
flake8==6.0.0
ruff==0.6.6
tox==4.15.1
# notebook
ipykernel==6.29.5
testbook==0.4.2
Expand Down
1 change: 0 additions & 1 deletion requirements.finetune.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ deepspeed==0.15.1
numpy==1.26.4
peft==0.12.0
tokenizers==0.20.1
transformers==4.45.2
wandb==0.19.1
vllm==0.6.1
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
60 changes: 11 additions & 49 deletions tests/test_examples.py → tests/examples/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,23 @@
import tempfile
from pathlib import Path

import transformers
import yaml
from omegaconf import DictConfig

from tapeagents.finetune.data import load_samples
from tapeagents.io import load_tapes
from tests.make_test_data import run_test_in_tmp_dir

sys.path.append(str(Path(__file__).parent.parent.resolve())) # allow to import from examples
sys.path.append(str(Path(__file__).parent.parent.parent.resolve())) # allow to import from examples

from examples.data_science import data_science
from examples.delegate import ExampleTape, FindIrregularVerbs
from examples.delegate_stack import ExampleTape as ExampleTapeStack
from examples.delegate_stack import Linguist, make_analyze_text_chain
from examples.delegate_stack import (
ExampleTape as ExampleTapeStack,
)
from examples.delegate_stack import (
Linguist,
make_analyze_text_chain,
)
from examples.form_filler.environment import FormFillerEnvironment
from examples.form_filler.scripts.prepare_test_assets import (
get_teacher_agent,
Expand All @@ -34,16 +37,13 @@
from examples.gaia_agent.agent import GaiaAgent
from examples.gaia_agent.environment import GaiaEnvironment
from examples.gaia_agent.tape import GaiaTape
from examples.gsm8k_tuning.finetune_student import get_training_samples_from_tapes
from examples.gsm8k_tuning.math_agent import MathAgent, MathTape
from examples.llama_agent import LLAMAChatBot
from examples.optimize.optimize import make_agentic_rag_agent, make_env
from examples.rl_gsm8k.orchestrate_rl import CoTMathAgent, RLMathTape, extract_tape_training_samples
from examples.tape_improver import tape_improver
from examples.workarena.agent import WorkArenaAgent
from examples.workarena.steps import WorkArenaTape
from tapeagents.config import DB_DEFAULT_FILENAME
from tapeagents.core import AgentStep, LLMCall, TrainingText
from tapeagents.core import AgentStep, TrainingText
from tapeagents.dialog_tape import DialogTape
from tapeagents.environment import EmptyEnvironment
from tapeagents.llms import LLM, ReplayLLM, TrainableLLM
Expand Down Expand Up @@ -224,7 +224,7 @@ def test_form_filler():
os.environ["TAPEAGENTS_MOCK_DATE"] = "2024-12-09"
assets_dir = str(Path(__file__).parent / "res" / "form_filler")
forms_path = str(
Path(__file__).parent.parent / "examples" / "form_filler" / "assets" / "forms" / "train" / "FlyCorp"
Path(__file__).parent.parent.parent / "examples" / "form_filler" / "assets" / "forms" / "train" / "FlyCorp"
)
env = FormFillerEnvironment.from_spec(forms_path)

Expand Down Expand Up @@ -273,7 +273,7 @@ def test_tape_improver():


def test_optimize():
with run_test_in_tmp_dir("optimize"):
with run_test_in_tmp_dir("tests/examples/res/optimize"):
with open("config.yaml") as f:
cfg = DictConfig(yaml.safe_load(f))
agent = make_agentic_rag_agent(cfg)
Expand All @@ -283,41 +283,6 @@ def test_optimize():
assert replay_success, "Failed to replay tape"


def test_gsm8k_tuning_tapes_generation():
run_dir = f"{res_path}/gsm8k_tuning"
llm = mock_llm(run_dir)
agent = MathAgent.create(llm)
tapes = load_tapes(MathTape, os.path.join(run_dir, "tapes"), file_extension=".json")
logger.info(f"Validate {len(tapes)} tapes")
fails = replay_tapes(agent, tapes, reuse_observations=True)
assert fails == 0, f"{fails} failed tapes"


def test_gsm8k_tuning_samples_prep():
run_dir = f"{res_path}/gsm8k_tuning"
training_samples = load_samples(f"{run_dir}/training_samples.jsonl")
new_training_samples = get_training_samples_from_tapes(f"{run_dir}/tapes/")
assert training_samples == new_training_samples


def test_rl_gsm8k_data():
run_dir = f"{res_path}/rl_gsm8k"
tapes = load_tapes(RLMathTape, run_dir, file_extension=".json")
llm = mock_llm(run_dir)
llm.tokenizer = transformers.AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
agent = CoTMathAgent.create(llm)
cfg = DictConfig({"dataset_name": "math", "finetune": {"seq_length": 1024}})
training_samples = []
for tape in tapes:
for step in tape:
if llm_call_data := step.metadata.other.get("llm_call"):
step.metadata.other["llm_call"] = LLMCall(**llm_call_data)
_, training_sample, _ = extract_tape_training_samples(tape, agent, "train", cfg)
training_samples.append(training_sample[0])
new_training_samples = load_samples(f"{run_dir}/training_samples.jsonl")
assert training_samples == new_training_samples


if __name__ == "__main__":
test_llama_agent()
test_llama_agent_traces()
Expand All @@ -329,6 +294,3 @@ def test_rl_gsm8k_data():
test_data_science()
test_form_filler()
test_tape_improver()
test_gsm8k_tuning_tapes_generation()
test_gsm8k_tuning_samples_prep()
test_rl_gsm8k_data()
6 changes: 4 additions & 2 deletions tests/test_notebooks.py → tests/examples/test_notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from pathlib import Path

import testbook
from make_test_data import run_test_in_tmp_dir

from tests.make_test_data import run_test_in_tmp_dir

res_dir = f"{pathlib.Path(__file__).parent.resolve()}/res"

Expand All @@ -12,7 +13,7 @@ def test_intro_notebook():
intro_notebook_path = Path("intro.ipynb").resolve()
assets_path = Path("assets").resolve()
with testbook.testbook(intro_notebook_path) as tb:
with run_test_in_tmp_dir("intro_notebook") as test_data_dir:
with run_test_in_tmp_dir("tests/examples/res/intro_notebook") as test_data_dir:
shutil.copytree(assets_path, Path("assets"))
sqlite_path = Path(test_data_dir) / "tapedata.sqlite"
tb.inject(
Expand All @@ -31,5 +32,6 @@ def test_intro_notebook():
)
tb.execute()


if __name__ == "__main__":
test_intro_notebook()
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
79 changes: 79 additions & 0 deletions tests/finetune/test_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
import os
import sys
from pathlib import Path

import transformers
from omegaconf import DictConfig

from tapeagents.io import load_tapes

sys.path.append(str(Path(__file__).parent.parent.resolve())) # allow to import from examples

from examples.gsm8k_tuning.finetune_student import get_training_samples_from_tapes
from examples.gsm8k_tuning.math_agent import MathAgent, MathTape
from examples.rl_gsm8k.orchestrate_rl import (
CoTMathAgent,
RLMathTape,
extract_tape_training_samples,
)
from tapeagents.core import LLMCall
from tapeagents.finetune.data import load_samples
from tapeagents.llms import LLM, ReplayLLM, TrainableLLM
from tapeagents.orchestrator import replay_tapes

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

res_path = Path(__file__).parent.resolve() / "res"


def mock_llm(run_dir: str) -> LLM:
llama = TrainableLLM(
base_url="https://api.together.xyz",
model_name="meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
tokenizer_name="meta-llama/Meta-Llama-3-70B-Instruct",
parameters=dict(temperature=0.7, max_tokens=512),
)
return ReplayLLM.from_llm(llama, run_dir)


def test_gsm8k_tuning_tapes_generation():
run_dir = f"{res_path}/gsm8k_tuning"
llm = mock_llm(run_dir)
agent = MathAgent.create(llm)
tapes = load_tapes(MathTape, os.path.join(run_dir, "tapes"), file_extension=".json")
logger.info(f"Validate {len(tapes)} tapes")
fails = replay_tapes(agent, tapes, reuse_observations=True)
assert fails == 0, f"{fails} failed tapes"


def test_gsm8k_tuning_samples_prep():
run_dir = f"{res_path}/gsm8k_tuning"
training_samples = load_samples(f"{run_dir}/training_samples.jsonl")
new_training_samples = get_training_samples_from_tapes(f"{run_dir}/tapes/")
assert training_samples == new_training_samples


def test_rl_gsm8k_data():
run_dir = f"{res_path}/rl_gsm8k"
tapes = load_tapes(RLMathTape, run_dir, file_extension=".json")
llm = mock_llm(run_dir)
llm.tokenizer = transformers.AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
agent = CoTMathAgent.create(llm)
cfg = DictConfig({"dataset_name": "math", "finetune": {"seq_length": 1024}})
training_samples = []
for tape in tapes:
for step in tape:
if llm_call_data := step.metadata.other.get("llm_call"):
step.metadata.other["llm_call"] = LLMCall(**llm_call_data)
_, training_sample, _ = extract_tape_training_samples(tape, agent, "train", cfg)
training_samples.append(training_sample[0])
new_training_samples = load_samples(f"{run_dir}/training_samples.jsonl")
assert training_samples == new_training_samples


if __name__ == "__main__":
test_gsm8k_tuning_tapes_generation()
test_gsm8k_tuning_samples_prep()
test_rl_gsm8k_data()
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/make_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def run_test_in_tmp_dir(test_name: str):
"""Copy test resources to a temporary directory and run the test there"""
cur_dir = os.getcwd()
tmpdir = tempfile.mkdtemp()
test_data_dir = Path(f"tests/res/{test_name}").resolve()
test_data_dir = Path(f"{test_name}").resolve()
os.chdir(tmpdir)
shutil.copytree(test_data_dir, tmpdir, dirs_exist_ok=True)
# force creation of SQLite tables
Expand Down

0 comments on commit c3e39db

Please sign in to comment.