Merge branch 'main' into media-tests

ServiceNow · Jan 8, 2025 · 0a11ff4 · 0a11ff4
2 parents e20f061 + 504b50f
commit 0a11ff4
Show file tree

Hide file tree

Showing 149 changed files with 4,108 additions and 1,302 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,9 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
-# C extensions
-*.so
+*.pyc
 
 # Distribution / packaging
 .Python
@@ -26,12 +24,6 @@ share/python-wheels/
 *.egg
 MANIFEST
 
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
@@ -51,75 +43,13 @@ coverage.xml
 .pytest_cache/
 cover/
 
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
 # Jupyter Notebook
 .ipynb_checkpoints
 
 # IPython
 profile_default/
 ipython_config.py
 
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
 
 # Environments
 .env
@@ -130,9 +60,6 @@ ENV/
 env.bak/
 venv.bak/
 
-# Spyder project settings
-.spyderproject
-.spyproject
 
 # Rope project settings
 .ropeproject
@@ -154,29 +81,33 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
+# macOS
 .DS_Store
+
+# IDE
 .idea/
-.local/
 .vscode/launch.json
 .vscode/settings.json
 .vscode/tasks.json
 *.code-workspace
+
+# data
+/outputs/
+/examples/**/outputs/
+/data/
+
 *cache*.json
-!/tests/res/*/*.json
+!/tests/**.json
+
 *.jsonl
-!tests/res/*/*.jsonl
+!tests/**.jsonl
+
+*.zip
+
 *.log
-*.pyc
+
 *.sqlite
-!tests/res/*/*.sqlite
-*.zip
-**/__pycache__/
-**/.hypothesis/
-**/*.egg-info/
-/dist/*
+!tests/**.sqlite
+
+# Weights & Biases
 /wandb/
-config_tree
-outputs/
-data/
-package-lock.json
-package.json
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,42 @@
+cff-version: 1.2.0
+title: >-
+  TapeAgents: a Holistic Framework for Agent Development and
+  Optimization
+message: 'If you want to refer to our paper or code, please cite it as below.'
+type: software
+authors:
+  - family-names: Bahdanau
+    given-names: Dzmitry
+  - family-names: Gontier
+    given-names: Nicolas
+  - family-names: Huang
+    given-names: Gabriel
+  - family-names: Kamalloo
+    given-names: Ehsan
+  - family-names: Pardinas
+    given-names: Rafael
+  - family-names: Piché
+    given-names: Alex
+  - family-names: Scholak
+    given-names: Torsten
+  - family-names: Shliazhko
+    given-names: Oleh
+  - family-names: Jordan Prince
+    given-names: Tremblay
+  - family-names: Ghanem
+    given-names: Karam
+  - family-names: Parikh
+    given-names: Soham
+  - family-names: Tiwari
+    given-names: Mitul
+  - family-names: Vohra
+    given-names: Quaizar
+identifiers:
+  - type: url
+    value: 'https://arxiv.org/abs/2412.08445'
+repository-code: 'https://github.com/ServiceNow/TapeAgents'
+abstract: >-
+  TapeAgents is a framework that facilitates all stages of
+  the LLM Agent development lifecycle.
+license: Apache-2.0
+date-released: '2024-10-16'
diff --git a/README.md b/README.md
@@ -1,54 +1,65 @@
+
 # TapeAgents
 
 [![GitHub Release](https://img.shields.io/github/v/release/ServiceNow/TapeAgents?logo=bookstack&logoColor=white)](https://github.com/ServiceNow/TapeAgents/releases)
 [![PyPI - Version](https://img.shields.io/pypi/v/TapeAgents?logo=pypi&logoColor=white)](https://pypi.org/project/TapeAgents/)
+[![Paper](https://img.shields.io/badge/Arxiv-Paper-B31B1B?logo=arxiv)](https://arxiv.org/abs/2412.08445)
 [![Documentation](https://img.shields.io/badge/MkDocs-Documentation-blue?logo=materialformkdocs&logoColor=white)](https://servicenow.github.io/TapeAgents/)
 ![Build Status](https://github.com/ServiceNow/TapeAgents/actions/workflows/build.yml/badge.svg)
 ![Tests Status](https://github.com/ServiceNow/TapeAgents/actions/workflows/python-tests.yml/badge.svg)
 
-
+---
+<img src="assets/logo.jpg" width="250" align="left" alt= "TapeAgents Logo" style="margin-right: 30px; margin-bottom: 30px" />
 
 **TapeAgents** is a framework that leverages a structured, replayable log (**Tape**) of the agent session to facilitate all stages of the LLM Agent development lifecycle. In TapeAgents, the agent reasons by processing the tape and the LLM output to produce new thoughts, actions, control flow steps and append them to the tape. The environment then reacts to the agent’s actions by likewise appending observation steps to the tape.
 
-![image](https://github.com/ServiceNow/TapeAgents/raw/main/assets/overview.png)
-
-
 Key features:
+
 - Build your agent as a low-level state machine, as a high-level multi-agent team configuration, or as a mono-agent guided by multiple prompts
 - Debug your agent with TapeAgent studio or TapeBrowser apps
 - Serve your agent with response streaming
 - Optimize your agent's configuration using successful tapes; finetune the LLM using revised tapes.
 
+![TapeAgents Overview](assets/overview.jpg)
+
 The Tape-centric design of TapeAgents will help you at all stages of your project:
+
 - Build with ultimate flexibility of having access to tape for making prompts and generating next steps
 - Change your prompts or team structure and resume  the debug session as long as the new agent can continue from the older tape
 - Fully control the Agent's tape and the Agent's acting when you use a TapeAgent in an app
 - Optimize tapes and agents using the carefully crafted metadata structure that links together tapes, steps, llm calls and agent configurations
 
 # Get Started
 
-We highly recommend starting with the [introductory Jupyter notebook](https://github.com/ServiceNow/TapeAgents/blob/main/intro.ipynb). The notebook will introduce you to all the core concepts of framework. 
+We highly recommend starting with the [introductory Jupyter notebook](https://github.com/ServiceNow/TapeAgents/blob/main/intro.ipynb). The notebook will introduce you to all the core concepts of framework.
 
 # Installation
+
 Install the latest release:
-```
+
+```zsh
 pip install TapeAgents
 ```
 
 If you want to install the version from the sources:
+
 1. Clone the repository:
-```
+
+```zsh
 git clone https://github.com/ServiceNow/TapeAgents.git
 cd TapeAgents
 ```
 
 2. Create conda environment `tapeagents` and install the package in editable mode inside the environment:
-```
+
+```zsh
 make setup
 ```
 
 # Examples
+
 The simplest agent just to show the basic structure of the agent:
+
 ```python
 from tapeagents.agent import Agent, Node
 from tapeagents.core import Prompt
@@ -81,26 +92,26 @@ The [examples/](https://github.com/ServiceNow/TapeAgents/tree/main/examples) dir
 - How to build [a team of TapeAgents](https://github.com/ServiceNow/TapeAgents/tree/main/examples/data_science) with [AutoGen](https://github.com/microsoft/autogen)-style low-code programming paradigm
 - How to [finetune a TapeAgent](https://github.com/ServiceNow/TapeAgents/tree/main/examples/gsm8k_tuning) with a small LLM to be better at math problem solving on GSM-8k dataset.
 
-
 Other notable examples that demonstrate the main aspects of the framework:
+
 - [workarena](https://github.com/ServiceNow/TapeAgents/tree/main/examples/workarena) - custom agent that solves [WorkArena](https://github.com/ServiceNow/WorkArena) benchmark using [BrowserGym](https://github.com/ServiceNow/BrowserGym) environment.
 - [tape_improver.py](https://github.com/ServiceNow/TapeAgents/tree/main/examples/tape_improver) - the agent that revisit and improves the tapes produced by another agent.
 
+# Learn more
 
-# Learn more 
+See our full TapeAgents [documentation](https://servicenow.github.io/TapeAgents/).
 
-See our [technical report](https://www.servicenow.com/research/TapeAgentsFramework.pdf) on TapeAgents.
+For an in-depth understanding of the design principles, architecture, and research behind TapeAgents, see our [technical report](https://arxiv.org/abs/2412.08445).
 
 # Contacts
 
 Feel free to reach out to the team:
-- Dzmitry Bahdanau, [email protected]
-- Oleh Shliazhko, [email protected]
-- Jordan Prince Tremblay, [email protected]
-- Alexandre Piché, [email protected]
+
+- Dzmitry Bahdanau, <[email protected]>
+- Oleh Shliazhko, <[email protected]>
+- Jordan Prince Tremblay, <[email protected]>
+- Alexandre Piché, <[email protected]>
 
 # Acknowledgements
 
 We acknowledge the inspiration we took from prior frameworks, in particular [LangGraph](https://github.com/langchain-ai/langgraph), [AutoGen](https://github.com/microsoft/autogen), [AIWaves Agents](https://github.com/aiwaves-cn/agents) and [DSPy](https://github.com/stanfordnlp/dspy).
-
-
diff --git a/assets/logo.jpg b/assets/logo.jpg
diff --git a/assets/overview.jpg b/assets/overview.jpg
diff --git a/assets/overview.png b/assets/overview.png
diff --git a/conf/finetune/rl_llama31_8b.yaml b/conf/finetune/rl_llama31_8b.yaml
@@ -24,7 +24,7 @@ seq_length: 4096
 load_as_bf16: True
 max_train_steps: 100000
 save_checkpoint_steps: ???
-optim: adafactor # adamw_torch runs OOM with accelerate
+optim: adamw_torch # adamw_torch runs OOM with accelerate
 objective: rl
 log_each_n_steps: 1
 resume_dataloader: false

diff --git a/conf/rl_deepseek.yaml b/conf/rl_deepseek.yaml
@@ -0,0 +1,23 @@
+defaults:
+  - rl_gsm8k
+  - _self_
+
+finetune:
+  rl:
+    algo: reinforce
+    kl_coef: 0.0
+    reward_minus_kl_coef: 0.0
+    use_advantages: false
+    relu_log_p_weights: true
+  train_batch_size: 4
+  gradient_accumulation_passes: 1
+  learning_rate: 0.000001
+force_restart: true
+max_agent_forks: 5000
+model_path: /mnt/llmd/base_models/deepseek-math-7b-instruct
+n_workers_per_gpu: 32
+get_logprobs_workers_per_gpu: 1
+use_rejection_sampling: true
+test_every_n_iterations: 10
+attempts: 8
+dataset_name: math
diff --git a/conf/rl_gemma2b.yaml b/conf/rl_gemma2b.yaml
@@ -0,0 +1,34 @@
+defaults:
+  - rl_gsm8k
+  - _self_
+
+dataset_name: math
+max_agent_forks: 5000
+attempts: 8
+
+finetune:
+  train_batch_size: 4
+  gradient_accumulation_passes: 8
+  gradient_clipping_threshold: 1.0
+  learning_rate: 0.000001
+  weight_decay: 0.1 # why?
+  rl:
+    algo: reinforce
+    reward_minus_kl_coef: 0.0
+    kl_coef: 0.0
+    use_advantages: false
+  save_checkpoint_steps: 156 # 5000 / 8 * 4, better than 8 in Alex's original run
+
+use_rejection_sampling: true
+test_every_n_iterations: 10
+model_path: /mnt/llmd/base_models/gemma-2-2b-it
+
+
+# https://wandb.ai/apiche/tapeagents/runs/dec13_math_gemma_faster_lr_0_000001_attempts_8_pass_8_checkpoint_8_reinforce/overview
+#
+# --config-name rl_math finetune.rl.algo=reinforce finetune.train_batch_size=4
+# finetune.gradient_accumulation_passes=8 finetune.rl.reward_minus_kl_coef=0.0 
+# finetune.rl.kl_coef=0.0 test_every_n_iterations=10 finetune.learning_rate=0.000001
+#  finetune.gradient_clipping_threshold=1.0 finetune.save_checkpoint_steps=8 finetune.weight_decay=0.1
+#   finetune.rl.use_advantages=false max_agent_forks=5000 attempts=8 use_rejection_sampling=true 
+#   model_path=google/gemma-2-2b-it output_dir=/mnt/llmd/results/exps/alex/gsm8k/dec13_math_gemma_faster_lr_0_000001_attempts_8_pass_8_checkpoint_8_reinforce
diff --git a/conf/rl_gsm8k.yaml b/conf/rl_gsm8k.yaml
@@ -41,9 +41,11 @@ vllm_config:
     --gpu-memory-utilization: 0.9
     # VLLM get log probs OOM https://github.com/vllm-project/vllm/issues/5907
     --enable-chunked-prefill: ""
+    --max-num-batched-tokens: 256
 
-output_dir: outputs/rl_gsm8k
+output_dir: outputs/rl_gsm8k_deepspeed
 accelerate_cfg_path: conf/accelerate/accelerate_base.yaml
+use_deepspeed: false
 
 hydra:
   run: