openai[minor]: release 0.3 (#29100)

## Goal Solve the following problems with `langchain-openai`: - Structured output with `o1` [breaks out of the box](https://langchain.slack.com/archives/C050X0VTN56/p1735232400232099). - `with_structured_output` by default does not use OpenAI’s [structured output feature](https://platform.openai.com/docs/guides/structured-outputs). - We override API defaults for temperature and other parameters. ## Breaking changes: - Default method for structured output is changing to OpenAI’s dedicated [structured output feature](https://platform.openai.com/docs/guides/structured-outputs). For schemas specified via TypedDict or JSON schema, strict schema validation is disabled by default but can be enabled by specifying `strict=True`. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - Models that don’t support `method="json_schema"` (e.g., `gpt-4` and `gpt-3.5-turbo`, currently the default model for ChatOpenAI) will raise an error unless `method` is explicitly specified. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - Schemas specified via Pydantic `BaseModel` that have fields with non-null defaults or metadata (like min/max constraints) will raise an error. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - `strict` now defaults to False for `method="json_schema"` when schemas are specified via TypedDict or JSON schema. - To recover previous behavior, use `with_structured_output(schema, strict=True)` - Schemas specified via Pydantic V1 will raise a warning (and use `method="function_calling"`) unless `method` is explicitly specified. - To remove the warning, pass `method="function_calling"` into `with_structured_output`. - Streaming with default structured output method / Pydantic schema no longer generates intermediate streamed chunks. - To recover previous behavior, pass `method="function_calling"` into `with_structured_output`. - We no longer override default temperature (was 0.7 in LangChain, now will follow OpenAI, currently 1.0). - To recover previous behavior, initialize `ChatOpenAI` or `AzureChatOpenAI` with `temperature=0.7`. - Note: conceptually there is a difference between forcing a tool call and forcing a response format. Tool calls may have more concise arguments vs. generating content adhering to a schema. Prompts may need to be adjusted to recover desired behavior. --------- Co-authored-by: Jacob Lee <[email protected]> Co-authored-by: Bagatur <[email protected]>
langchain-ai · Jan 10, 2025 · 6e63ccb · 6e63ccb
1 parent facfd42
commit 6e63ccb
Show file tree

Hide file tree

Showing 14 changed files with 912 additions and 295 deletions.
diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
diff --git a/libs/partners/openai/poetry.lock b/libs/partners/openai/poetry.lock
diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "langchain-openai"
-version = "0.2.14"
+version = "0.3.0"
 description = "An integration package connecting OpenAI and LangChain"
 authors = []
 readme = "README.md"
@@ -23,7 +23,7 @@ ignore_missing_imports = true
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
-langchain-core = "^0.3.27"
+langchain-core = "^0.3.29"
 openai = "^1.58.1"
 tiktoken = ">=0.7,<1"
 

diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
@@ -55,6 +55,10 @@ def chat_model_params(self) -> dict:
             "azure_endpoint": OPENAI_API_BASE,
         }
 
+    @property
+    def structured_output_kwargs(self) -> dict:
+        return {"method": "function_calling"}
+
     @pytest.mark.xfail(reason="Not yet supported.")
     def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
         super().test_usage_metadata_streaming(model)
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -630,20 +630,39 @@ def test_bind_tools_tool_choice() -> None:
     assert not msg.tool_calls
 
 
-def test_openai_structured_output() -> None:
+@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1"])
+def test_openai_structured_output(model: str) -> None:
     class MyModel(BaseModel):
         """A Person"""
 
         name: str
         age: int
 
-    llm = ChatOpenAI().with_structured_output(MyModel)
+    llm = ChatOpenAI(model=model).with_structured_output(MyModel)
     result = llm.invoke("I'm a 27 year old named Erick")
     assert isinstance(result, MyModel)
     assert result.name == "Erick"
     assert result.age == 27
 
 
+def test_structured_output_errors_with_legacy_models() -> None:
+    class MyModel(BaseModel):
+        """A Person"""
+
+        name: str
+        age: int
+
+    llm = ChatOpenAI(model="gpt-4").with_structured_output(MyModel)
+
+    with pytest.warns(UserWarning, match="with_structured_output"):
+        with pytest.raises(openai.BadRequestError):
+            _ = llm.invoke("I'm a 27 year old named Erick")
+
+    with pytest.warns(UserWarning, match="with_structured_output"):
+        with pytest.raises(openai.BadRequestError):
+            _ = list(llm.stream("I'm a 27 year old named Erick"))
+
+
 def test_openai_proxy() -> None:
     """Test ChatOpenAI with proxy."""
     chat_openai = ChatOpenAI(openai_proxy="http://localhost:8080")
@@ -820,20 +839,18 @@ class magic_function(BaseModel):
 
 
 @pytest.mark.parametrize(
-    ("model", "method", "strict"),
-    [("gpt-4o", "function_calling", True), ("gpt-4o-2024-08-06", "json_schema", None)],
+    ("model", "method"),
+    [("gpt-4o", "function_calling"), ("gpt-4o-2024-08-06", "json_schema")],
 )
 def test_structured_output_strict(
-    model: str,
-    method: Literal["function_calling", "json_schema"],
-    strict: Optional[bool],
+    model: str, method: Literal["function_calling", "json_schema"]
 ) -> None:
     """Test to verify structured output with strict=True."""
 
     from pydantic import BaseModel as BaseModelProper
     from pydantic import Field as FieldProper
 
-    llm = ChatOpenAI(model=model, temperature=0)
+    llm = ChatOpenAI(model=model)
 
     class Joke(BaseModelProper):
         """Joke to tell user."""
@@ -842,10 +859,7 @@ class Joke(BaseModelProper):
         punchline: str = FieldProper(description="answer to resolve the joke")
 
     # Pydantic class
-    # Type ignoring since the interface only officially supports pydantic 1
-    # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
-    # We'll need to do a pass updating the type signatures.
-    chat = llm.with_structured_output(Joke, method=method, strict=strict)
+    chat = llm.with_structured_output(Joke, method=method, strict=True)
     result = chat.invoke("Tell me a joke about cats.")
     assert isinstance(result, Joke)
 
@@ -854,7 +868,7 @@ class Joke(BaseModelProper):
 
     # Schema
     chat = llm.with_structured_output(
-        Joke.model_json_schema(), method=method, strict=strict
+        Joke.model_json_schema(), method=method, strict=True
     )
     result = chat.invoke("Tell me a joke about cats.")
     assert isinstance(result, dict)
@@ -875,26 +889,24 @@ class InvalidJoke(BaseModelProper):
             default="foo", description="answer to resolve the joke"
         )
 
-    chat = llm.with_structured_output(InvalidJoke, method=method, strict=strict)
+    chat = llm.with_structured_output(InvalidJoke, method=method, strict=True)
     with pytest.raises(openai.BadRequestError):
         chat.invoke("Tell me a joke about cats.")
     with pytest.raises(openai.BadRequestError):
         next(chat.stream("Tell me a joke about cats."))
 
     chat = llm.with_structured_output(
-        InvalidJoke.model_json_schema(), method=method, strict=strict
+        InvalidJoke.model_json_schema(), method=method, strict=True
     )
     with pytest.raises(openai.BadRequestError):
         chat.invoke("Tell me a joke about cats.")
     with pytest.raises(openai.BadRequestError):
         next(chat.stream("Tell me a joke about cats."))
 
 
-@pytest.mark.parametrize(
-    ("model", "method", "strict"), [("gpt-4o-2024-08-06", "json_schema", None)]
-)
+@pytest.mark.parametrize(("model", "method"), [("gpt-4o-2024-08-06", "json_schema")])
 def test_nested_structured_output_strict(
-    model: str, method: Literal["json_schema"], strict: Optional[bool]
+    model: str, method: Literal["json_schema"]
 ) -> None:
     """Test to verify structured output with strict=True for nested object."""
 
@@ -914,7 +926,7 @@ class JokeWithEvaluation(TypedDict):
         self_evaluation: SelfEvaluation
 
     # Schema
-    chat = llm.with_structured_output(JokeWithEvaluation, method=method, strict=strict)
+    chat = llm.with_structured_output(JokeWithEvaluation, method=method, strict=True)
     result = chat.invoke("Tell me a joke about cats.")
     assert isinstance(result, dict)
     assert set(result.keys()) == {"setup", "punchline", "self_evaluation"}
@@ -927,6 +939,46 @@ class JokeWithEvaluation(TypedDict):
     assert set(chunk["self_evaluation"].keys()) == {"score", "text"}
 
 
+@pytest.mark.parametrize(
+    ("strict", "method"),
+    [
+        (True, "json_schema"),
+        (False, "json_schema"),
+        (True, "function_calling"),
+        (False, "function_calling"),
+    ],
+)
+def test_json_schema_openai_format(
+    strict: bool, method: Literal["json_schema", "function_calling"]
+) -> None:
+    """Test we can pass in OpenAI schema format specifying strict."""
+    llm = ChatOpenAI(model="gpt-4o-mini")
+    schema = {
+        "name": "get_weather",
+        "description": "Fetches the weather in the given location",
+        "strict": strict,
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {
+                    "type": "string",
+                    "description": "The location to get the weather for",
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to return the temperature in",
+                    "enum": ["F", "C"],
+                },
+            },
+            "additionalProperties": False,
+            "required": ["location", "unit"],
+        },
+    }
+    chat = llm.with_structured_output(schema, method=method)
+    result = chat.invoke("What is the weather in New York?")
+    assert isinstance(result, dict)
+
+
 def test_json_mode() -> None:
     llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
     response = llm.invoke(

diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr
@@ -15,7 +15,6 @@
       }),
       'max_retries': 2,
       'max_tokens': 100,
-      'n': 1,
       'openai_api_key': dict({
         'id': list([
           'AZURE_OPENAI_API_KEY',

diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr
@@ -11,7 +11,6 @@
       'max_retries': 2,
       'max_tokens': 100,
       'model_name': 'gpt-3.5-turbo',
-      'n': 1,
       'openai_api_key': dict({
         'id': list([
           'OPENAI_API_KEY',

diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -877,8 +877,6 @@ def test__get_request_payload() -> None:
         ],
         "model": "gpt-4o-2024-08-06",
         "stream": False,
-        "n": 1,
-        "temperature": 0.7,
     }
     payload = llm._get_request_payload(messages)
     assert payload == expected

diff --git a/libs/partners/xai/Makefile b/libs/partners/xai/Makefile
@@ -11,6 +11,9 @@ integration_test integration_tests: TEST_FILE=tests/integration_tests/
 test tests:
 	poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
 
+test_watch:
+	poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
+
 integration_test integration_tests:
 	poetry run pytest $(TEST_FILE)
 

diff --git a/libs/partners/xai/langchain_xai/chat_models.py b/libs/partners/xai/langchain_xai/chat_models.py
@@ -320,9 +320,9 @@ def _get_ls_params(
     @model_validator(mode="after")
     def validate_environment(self) -> Self:
         """Validate that api key and python package exists in environment."""
-        if self.n < 1:
+        if self.n is not None and self.n < 1:
             raise ValueError("n must be at least 1.")
-        if self.n > 1 and self.streaming:
+        if self.n is not None and self.n > 1 and self.streaming:
             raise ValueError("n must be 1 when streaming.")
 
         client_params: dict = {
@@ -331,10 +331,11 @@ def validate_environment(self) -> Self:
             ),
             "base_url": self.xai_api_base,
             "timeout": self.request_timeout,
-            "max_retries": self.max_retries,
             "default_headers": self.default_headers,
             "default_query": self.default_query,
         }
+        if self.max_retries is not None:
+            client_params["max_retries"] = self.max_retries
 
         if client_params["api_key"] is None:
             raise ValueError(

diff --git a/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr b/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr
@@ -10,7 +10,6 @@
       'max_retries': 2,
       'max_tokens': 100,
       'model_name': 'grok-beta',
-      'n': 1,
       'request_timeout': 60.0,
       'stop': list([
       ]),