From 228fa249d883fd38bd1ad3d26986c57bd5039bef Mon Sep 17 00:00:00 2001 From: Samion Suwito <79575349+samionsuwito@users.noreply.github.com> Date: Sat, 12 Oct 2024 21:26:26 -0700 Subject: [PATCH 1/5] Migrate openai to v1 for eval --- eval/get_llm_responses.py | 10 ++++++---- eval/get_llm_responses_retriever.py | 10 ++++++---- eval/retrievers/gpt.py | 8 ++++---- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/eval/get_llm_responses.py b/eval/get_llm_responses.py index b02f3ef08..aef3d1636 100644 --- a/eval/get_llm_responses.py +++ b/eval/get_llm_responses.py @@ -58,14 +58,16 @@ def get_response(get_response_input, api_key): try: if "gpt" in model: - openai.api_key = api_key - responses = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key=api_key, + ) + responses = client.chat.completions.create( model=model, messages=question, - n=1, + n = 1, temperature=0, ) - response = responses['choices'][0]['message']['content'] + response = responses.choices[0].message.content elif "claude" in model: client = anthropic.Anthropic(api_key=api_key) responses = client.completions.create( diff --git a/eval/get_llm_responses_retriever.py b/eval/get_llm_responses_retriever.py index 6667e3527..c9107790c 100644 --- a/eval/get_llm_responses_retriever.py +++ b/eval/get_llm_responses_retriever.py @@ -59,14 +59,16 @@ def get_response(get_response_input, api_key): try: if "gpt" in model: - openai.api_key = api_key - responses = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key=api_key, + ) + responses = client.chat.completions.create( model=model, messages=question, - n=1, + n = 1, temperature=0, ) - response = responses['choices'][0]['message']['content'] + response = responses.choices[0].message.content elif "claude" in model: client = anthropic.Anthropic(api_key=api_key) responses = client.completions.create( diff --git a/eval/retrievers/gpt.py b/eval/retrievers/gpt.py index e3518cfeb..1c498cfb0 100644 --- a/eval/retrievers/gpt.py +++ b/eval/retrievers/gpt.py @@ -36,10 +36,10 @@ def get_embeddings( ) -> List[List[float]]: assert len(list_of_text) <= 2048, "The number of docs should be <= 2048" list_of_text = [text.replace("\n", " ") for text in list_of_text] - openai.api_key = os.environ["OPENAI_API_KEY"] - data = openai.Embedding.create(input=list_of_text, engine="text-embedding-ada-002").data - data = sorted(data, key=lambda x: x["index"]) # maintain the same order as input. - return [d["embedding"] for d in data] + client = openai.OpenAI() # os.environ["OPENAI_API_KEY"] is default + data = client.embeddings.create(input=list_of_text,model="text-embedding-ada-002").data + data = sorted(data, key=lambda x: x.index) # maintain the same order as input. + return [d.embedding for d in data] def from_documents(self, documents: List): contents = [document.page_content for document in documents] From 24b29bfd546524d8c590604fddfed533f8d81c28 Mon Sep 17 00:00:00 2001 From: Samion Suwito <79575349+samionsuwito@users.noreply.github.com> Date: Sat, 12 Oct 2024 22:42:40 -0700 Subject: [PATCH 2/5] Migrate colab notebooks to openai v1 --- README.md | 6 +++--- inference/README.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ea251a0f7..32972d902 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ **🟢 Gorilla is Apache 2.0** With Gorilla being fine-tuned on MPT, and Falcon, you can use Gorilla commercially with no obligations! :golf: -**:rocket: Try Gorilla in 60s** [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DEBPsccVLF_aUnmD0FwPeHFrtdC0QIUP?usp=sharing) +**:rocket: Try Gorilla in 60s** [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1q7T7g3wmRNEUwsWujLcgjHLK7YNPjQxh?usp=sharing) :computer: Use [Gorilla in your CLI](https://github.com/gorilla-llm/gorilla-cli) with `pip install gorilla-cli` @@ -36,12 +36,12 @@ - 🟢 [06/06] Released Commercially usable, Apache 2.0 licensed Gorilla models - :rocket: [05/30] Provided the [CLI interface](inference/README.md) to chat with Gorilla! - :rocket: [05/28] Released Torch Hub and TensorFlow Hub Models! -- :rocket: [05/27] Released the first Gorilla model! [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DEBPsccVLF_aUnmD0FwPeHFrtdC0QIUP?usp=sharing) or [:hugs:](https://huggingface.co/gorilla-llm/gorilla-7b-hf-delta-v0)! +- :rocket: [05/27] Released the first Gorilla model! [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1q7T7g3wmRNEUwsWujLcgjHLK7YNPjQxh?usp=sharing) or [:hugs:](https://huggingface.co/gorilla-llm/gorilla-7b-hf-delta-v0)! - :fire: [05/27] We released the APIZoo contribution guide for community API contributions! - :fire: [05/25] We release the APIBench dataset and the evaluation code of Gorilla! ## Gorilla Gradio -**Try Gorilla LLM models in [HF Spaces](https://huggingface.co/spaces/gorilla-llm/gorilla-demo/) or [![Gradio Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ktnVWPJOgqTC9hLW8lJPVZszuIddMy7y?usp=sharing)** +**Try Gorilla LLM models in [HF Spaces](https://huggingface.co/spaces/gorilla-llm/gorilla-demo/) or [![Gradio Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/18ru6QUVVegJGTTa9TzbLAXVok42KDkWx?usp=sharing)** ![gorilla_webUI_2](https://github.com/TanmayDoesAI/gorilla/assets/85993243/f30645bf-6798-4bd2-ac6e-6943840ae095) diff --git a/inference/README.md b/inference/README.md index 275160b33..7c4739ad3 100644 --- a/inference/README.md +++ b/inference/README.md @@ -4,7 +4,7 @@ ## Get Started -You can either run Gorilla through our hosted [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DEBPsccVLF_aUnmD0FwPeHFrtdC0QIUP?usp=sharing) or [chat with it using cli](#inference-using-cli). We also provide instructions for [evaluating batched prompts](#optional-batch-inference-on-a-prompt-file). Here, are the instructions to run it locally. +You can either run Gorilla through our hosted [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1q7T7g3wmRNEUwsWujLcgjHLK7YNPjQxh?usp=sharing) or [chat with it using cli](#inference-using-cli). We also provide instructions for [evaluating batched prompts](#optional-batch-inference-on-a-prompt-file). Here, are the instructions to run it locally. New: We release `gorilla-mpt-7b-hf-v0` and `gorilla-falcon-7b-hf-v0` - two Apache 2.0 licensed models (commercially usable). From 9754f57bf4c945773e7e3b7aff1e98785270eac3 Mon Sep 17 00:00:00 2001 From: Samion Suwito <79575349+samionsuwito@users.noreply.github.com> Date: Sat, 12 Oct 2024 23:12:22 -0700 Subject: [PATCH 3/5] Migrate openfunctions-v1 readme and collab to openai v1 --- openfunctions/openfunctions-v1/README.md | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/openfunctions/openfunctions-v1/README.md b/openfunctions/openfunctions-v1/README.md index 30235dfaf..76f5bedbc 100644 --- a/openfunctions/openfunctions-v1/README.md +++ b/openfunctions/openfunctions-v1/README.md @@ -1,7 +1,7 @@ # Gorilla OpenFunctions 💡 Comes with Parallel Function Calling! -🚀 Try it out on [Colab](https://colab.research.google.com/drive/16M5J2H9F8YQora_W2PDnp120slZH-Mqd?usp=sharing) +🚀 Try it out on [Colab](https://colab.research.google.com/drive/1qe3xecYw8xvS3JbaXUDcl5vmnFVQZMTx?usp=sharing) 📣 Read more in our [OpenFunctions release blog](https://gorilla.cs.berkeley.edu/blogs/4_open_functions.html) ## Introduction @@ -21,7 +21,7 @@ All of our models are hosted on our Huggingface UC Berkeley gorilla-llm org: [go 1. OpenFunctions is compatible with OpenAI Functions ```bash -!pip install openai==0.28.1 +!pip install openai ``` 2. Point to Gorilla hosted servers @@ -30,10 +30,12 @@ All of our models are hosted on our Huggingface UC Berkeley gorilla-llm org: [go import openai def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", model="gorilla-openfunctions-v0", functions=[]): - openai.api_key = "EMPTY" - openai.api_base = "http://luigi.millennium.berkeley.edu:8000/v1" try: - completion = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key = "EMPTY" + base_url = "http://luigi.millennium.berkeley.edu:8000/v1" + ) + completion = client.chat.completions.create( model="gorilla-openfunctions-v1", temperature=0.0, messages=[{"role": "user", "content": prompt}], @@ -149,11 +151,11 @@ print(output) ## Self-Hosting OpenFunctions -This section provides a guide on how to self-host the OpenFunctions model on your local machine or serve it locally for your enterprise. The server deploys the OpenFunctions-v0 model with uvicorn, while the client interacts with this local server using the OpenAI package (0.28.xx). +This section provides a guide on how to self-host the OpenFunctions model on your local machine or serve it locally for your enterprise. The server deploys the OpenFunctions-v0 model with uvicorn, while the client interacts with this local server using the OpenAI package. ### Setting Up Your Local Server -The server API endpoint mirrors the interface of the API call executed by `openai.ChatCompletion.create`, ensuring compatibility with clients using the OpenAI package. +The server API endpoint mirrors the interface of the API call executed by `client.chat.completions.create`, ensuring compatibility with clients using the OpenAI package. Ensure you have the required libraries: ```bash @@ -246,10 +248,10 @@ if __name__ == "__main__": Ensure you have the required libraries: ```bash -!pip install openai==0.28.1 +!pip install openai ``` -The example client below demonstrates how to interact with the locally hosted OpenFunctions model using `openai.ChatCompletion.create`, akin to using Gorilla hosted servers. +The example client below demonstrates how to interact with the locally hosted OpenFunctions model using `client.chat.completions.create`, akin to using Gorilla hosted servers. ```python import openai @@ -272,10 +274,12 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley Raises: - Exception: If there's an issue with processing the request or communicating with the server. """ - openai.api_key = "EMPTY" - openai.api_base = "http://localhost:8000" # Point to the local server try: - completion = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key = "EMPTY", + base_url = "http://localhost:8000" # Point to the local server + ) + completion = client.chat.completions.create( model="gorilla-openfunctions-v0", temperature=0.0, messages=[{"role": "user", "content": prompt}], From 068494e7110a586bf60a1a5aca2a4760242694f4 Mon Sep 17 00:00:00 2001 From: Samion Suwito <79575349+samionsuwito@users.noreply.github.com> Date: Sat, 12 Oct 2024 23:20:20 -0700 Subject: [PATCH 4/5] migrate openfunctions to openai v1 --- openfunctions/README.md | 10 ++++++---- openfunctions/inference_hosted.py | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/openfunctions/README.md b/openfunctions/README.md index ba48cad9b..b757910f6 100644 --- a/openfunctions/README.md +++ b/openfunctions/README.md @@ -35,7 +35,7 @@ All of our models are hosted on our Huggingface UC Berkeley gorilla-llm org: [go 1. OpenFunctions is compatible with OpenAI Functions ```bash -!pip install openai==0.28.1 +!pip install openai ``` 2. Point to Gorilla hosted servers @@ -44,10 +44,12 @@ All of our models are hosted on our Huggingface UC Berkeley gorilla-llm org: [go import openai def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", model="gorilla-openfunctions-v0", functions=[]): - openai.api_key = "EMPTY" - openai.api_base = "http://luigi.millennium.berkeley.edu:8000/v1" try: - completion = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key = "EMPTY", + base_url = "http://luigi.millennium.berkeley.edu:8000/v1" + ) + completion = client.chat.completions.create( model="gorilla-openfunctions-v2", temperature=0.0, messages=[{"role": "user", "content": prompt}], diff --git a/openfunctions/inference_hosted.py b/openfunctions/inference_hosted.py index f51e84958..725951325 100644 --- a/openfunctions/inference_hosted.py +++ b/openfunctions/inference_hosted.py @@ -1,9 +1,6 @@ import openai import json -openai.api_key = "EMPTY" -openai.api_base = "http://luigi.millennium.berkeley.edu:8000/v1" - # Example dummy function hard coded to return the same weather # In production, this could be your backend API or an external API def get_current_weather(location, unit="fahrenheit"): @@ -36,7 +33,11 @@ def run_conversation(): }, } ] - completion = openai.ChatCompletion.create( + client = openai.OpenAI( + api_key = "EMPTY", + base_url = "http://luigi.millennium.berkeley.edu:8000/v1" + ) + completion = client.chat.completions.create( model='gorilla-openfunctions-v2', messages=messages, functions=functions, From b743404d84688f038f84db3ca28f2ceb337f4287 Mon Sep 17 00:00:00 2001 From: Samion Suwito <79575349+samionsuwito@users.noreply.github.com> Date: Sun, 13 Oct 2024 22:01:41 -0700 Subject: [PATCH 5/5] changed to tools from functions --- openfunctions/README.md | 27 +++--- openfunctions/inference_hosted.py | 29 +++--- openfunctions/inference_local.py | 44 ++++++--- openfunctions/openfunctions-v1/README.md | 110 +++++++++++++++-------- 4 files changed, 136 insertions(+), 74 deletions(-) diff --git a/openfunctions/README.md b/openfunctions/README.md index b757910f6..1c170520b 100644 --- a/openfunctions/README.md +++ b/openfunctions/README.md @@ -53,7 +53,7 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley model="gorilla-openfunctions-v2", temperature=0.0, messages=[{"role": "user", "content": prompt}], - functions=functions, + tools=functions, ) return completion.choices[0] except Exception as e: @@ -66,19 +66,22 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley query = "What's the weather like in the two cities of Boston and San Francisco?" functions = [ { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + "required": ["location"], }, - "required": ["location"], - }, + } } ] get_gorilla_response(query, functions=functions) diff --git a/openfunctions/inference_hosted.py b/openfunctions/inference_hosted.py index 725951325..fefcd5313 100644 --- a/openfunctions/inference_hosted.py +++ b/openfunctions/inference_hosted.py @@ -18,19 +18,22 @@ def run_conversation(): messages = [{"role": "user", "content": "What's the weather like in the two cities of Boston and San Francisco?"}] functions = [ { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + "required": ["location"], }, - "required": ["location"], - }, + } } ] client = openai.OpenAI( @@ -40,8 +43,8 @@ def run_conversation(): completion = client.chat.completions.create( model='gorilla-openfunctions-v2', messages=messages, - functions=functions, - function_call="auto", # auto is default, but we'll be explicit + tools=functions, + tool_choice="auto", # auto is default, but we'll be explicit ) print("--------------------") diff --git a/openfunctions/inference_local.py b/openfunctions/inference_local.py index 572fbd59e..33dd0d30f 100644 --- a/openfunctions/inference_local.py +++ b/openfunctions/inference_local.py @@ -79,26 +79,46 @@ def format_response(response: str): query_1: str = "What's the weather like in the two cities of Boston and San Francisco?" functions_1 = [ { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + "required": ["location"], }, - "required": ["location"], - }, + } } ] # Example usage 2 # This should return an error since the function cann't help with the prompt query_2: str = "What is the freezing point of water at a pressure of 10 kPa?" -functions_2 = [{"name": "thermodynamics.calculate_boiling_point", "description": "Calculate the boiling point of a given substance at a specific pressure.", "parameters": {"type": "object", "properties": {"substance": {"type": "string", "description": "The substance for which to calculate the boiling point."}, "pressure": {"type": "number", "description": "The pressure at which to calculate the boiling point."}, "unit": {"type": "string", "description": "The unit of the pressure. Default is 'kPa'."}}, "required": ["substance", "pressure"]}}] +functions_2 = [ + { + "type": "function", + "function": { + "name": "thermodynamics.calculate_boiling_point", + "description": "Calculate the boiling point of a given substance at a specific pressure.", + "parameters": { + "type": "object", + "properties": { + "substance": {"type": "string", "description": "The substance for which to calculate the boiling point."}, + "pressure": {"type": "number", "description": "The pressure at which to calculate the boiling point."}, + "unit": {"type": "string", "description": "The unit of the pressure. Default is 'kPa'."} + }, + "required": ["substance", "pressure"] + } + } + } +] # Generate prompt and obtain model output prompt_1 = get_prompt(query_1, functions=functions_1) diff --git a/openfunctions/openfunctions-v1/README.md b/openfunctions/openfunctions-v1/README.md index 76f5bedbc..29a48c650 100644 --- a/openfunctions/openfunctions-v1/README.md +++ b/openfunctions/openfunctions-v1/README.md @@ -39,7 +39,7 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley model="gorilla-openfunctions-v1", temperature=0.0, messages=[{"role": "user", "content": prompt}], - functions=functions, + tools=functions, ) return completion.choices[0].message.content except Exception as e: @@ -52,10 +52,29 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley query = "Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes" functions = [ { - "name": "Uber Carpool", - "api_name": "uber.ride", - "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters", - "parameters": [{"name": "loc", "description": "location of the starting place of the uber ride"}, {"name":"type", "enum": ["plus", "comfort", "black"], "description": "types of uber ride user is ordering"}, {"name": "time", "description": "the amount of time in minutes the customer is willing to wait"}] + "type": "function", + "function": { + "name": "uber.ride", + "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters", + "parameters": { + "type": "object", + "properties": { + "loc": { + "type": "string", + "description": "description": "location of the starting place of the uber ride" + }, + "type": { + "type": "string", + "enum": ["plus", "comfort","black"], + "description": "types of uber ride user is ordering" + }, + "time": { + "type": "string", + "description": "The amount of time in minutes the customer is willing to wait" + } + } + } + } } ] get_gorilla_response(query, functions=functions) @@ -69,7 +88,7 @@ uber.ride(loc="berkeley", type="plus", time=10) ## Running OpenFunctions Locally -You can try this out on our [Local OpenFunctions Colab](https://colab.research.google.com/drive/1I9UJoKh9sngE2MfPfQD5kbn2-twq2xvY?usp=sharing) to see how it works! +You can try this out on our [Local OpenFunctions Colab](https://colab.research.google.com/drive/1h9r2PZWXkAm3xmIn0dw6CrQDpXbNsgOj?usp=sharing) to see how it works! If you want to Run OpenFunctions locally, here is the prompt format that we used: @@ -109,7 +128,7 @@ device : str = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Model and tokenizer setup -model_id : str = "gorilla-llm/gorilla-openfunctions-v0" +model_id : str = "gorilla-llm/gorilla-openfunctions-v1" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True) @@ -131,14 +150,29 @@ pipe = pipeline( query: str = "Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes" functions = [ { - "name": "Uber Carpool", - "api_name": "uber.ride", - "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters", - "parameters": [ - {"name": "loc", "description": "Location of the starting place of the Uber ride"}, - {"name": "type", "enum": ["plus", "comfort", "black"], "description": "Types of Uber ride user is ordering"}, - {"name": "time", "description": "The amount of time in minutes the customer is willing to wait"} - ] + "type": "function", + "function": { + "name": "uber.ride", + "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters", + "parameters": { + "type": "object", + "properties": { + "loc": { + "type": "string", + "description": "description": "location of the starting place of the uber ride" + }, + "type": { + "type": "string", + "enum": ["plus", "comfort","black"], + "description": "types of uber ride user is ordering" + }, + "time": { + "type": "string", + "description": "The amount of time in minutes the customer is willing to wait" + } + } + } + } } ] @@ -151,7 +185,7 @@ print(output) ## Self-Hosting OpenFunctions -This section provides a guide on how to self-host the OpenFunctions model on your local machine or serve it locally for your enterprise. The server deploys the OpenFunctions-v0 model with uvicorn, while the client interacts with this local server using the OpenAI package. +This section provides a guide on how to self-host the OpenFunctions model on your local machine or serve it locally for your enterprise. The server deploys the OpenFunctions-v1 model with uvicorn, while the client interacts with this local server using the OpenAI package. ### Setting Up Your Local Server @@ -177,7 +211,7 @@ class ChatCompletionRequest(BaseModel): model: str temperature: float messages: list - functions: list = [] + tools: list = [] # Initialize the FastAPI app app = FastAPI() @@ -187,7 +221,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Model and tokenizer setup -model_id = "gorilla-llm/gorilla-openfunctions-v0" +model_id = "gorilla-llm/gorilla-openfunctions-v1" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True) @@ -226,7 +260,7 @@ def get_prompt(user_query: str, functions: list) -> str: @app.post("/chat/completions") async def chat_completion(request: ChatCompletionRequest): user_query = request.messages[0]['content'] - prompt = get_prompt(user_query, request.functions) + prompt = get_prompt(user_query, request.tools) output = pipe(prompt) generated_text = output[0]['generated_text'] @@ -256,7 +290,7 @@ The example client below demonstrates how to interact with the locally hosted Op ```python import openai -def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", model="gorilla-openfunctions-v0", functions=[]): +def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes", model="gorilla-openfunctions-v1", functions=[]): """ Sends a request to the self-hosted OpenFunctions model and retrieves the response. @@ -280,10 +314,10 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley base_url = "http://localhost:8000" # Point to the local server ) completion = client.chat.completions.create( - model="gorilla-openfunctions-v0", + model="gorilla-openfunctions-v1", temperature=0.0, messages=[{"role": "user", "content": prompt}], - functions=functions, + tools=functions, ) return completion.choices[0].text except Exception as e: @@ -293,20 +327,22 @@ def get_gorilla_response(prompt="Call me an Uber ride type \"Plus\" in Berkeley query: str = "Get the latest news headlines from CNN." functions = [ { - "name": "News Headlines", - "api_call": "news.get_headlines", - "description": "Retrieve the latest news headlines from a specific news source.", - "parameters": { - "type": "object", - "properties": { - "source": { - "type": "string", - "description": "The news source, e.g. CNN" - } - }, - "required": [ - "source" - ] + "type": "function", + "function": { + "name": "news.get_headlines", + "description": "Retrieve the latest news headlines from a specific news source.", + "parameters": { + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "The news source, e.g. CNN" + } + }, + "required": [ + "source" + ] + } } } ] @@ -316,7 +352,7 @@ print(resp) ``` ### Try Out Self-Hosting OpenFunctions on Colab -You can try out setting up a local server to self-host the OpenFunctions model using this [OpenFunctions Self-Hosted Colab notebook](https://colab.research.google.com/drive/1aBxYJ9VncxDRN1-DyMT3J-ozmCRRvSje?usp=sharing). +You can try out setting up a local server to self-host the OpenFunctions model using this [OpenFunctions Self-Hosted Colab notebook](https://colab.research.google.com/drive/1_12w6fBxMcS8SqoYFEBaZ45Xfy0rdfIR?usp=sharing). Make sure you select an A100/V100 instance to run the notebook – smaller instances like T-4 do not suffice due to memory constraints. To remotely access the server running on the Colab instance from a local client, ngrok is used to tunnel the server ports from the Colab instance to public URLs. Instructions for setting up ngrok are provided in the notebook.