From 6e63ccba8429e55ece4d40b6e8f4a069579ba290 Mon Sep 17 00:00:00 2001 From: ccurme Date: Fri, 10 Jan 2025 10:50:32 -0500 Subject: [PATCH] openai[minor]: release 0.3 (#29100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Goal Solve the following problems with `langchain-openai`: - Structured output with `o1` [breaks out of the box](https://langchain.slack.com/archives/C050X0VTN56/p1735232400232099). - `with_structured_output` by default does not use OpenAI’s [structured output feature](https://platform.openai.com/docs/guides/structured-outputs). - We override API defaults for temperature and other parameters. ## Breaking changes: - Default method for structured output is changing to OpenAI’s dedicated [structured output feature](https://platform.openai.com/docs/guides/structured-outputs). For schemas specified via TypedDict or JSON schema, strict schema validation is disabled by default but can be enabled by specifying `strict=True`. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - Models that don’t support `method="json_schema"` (e.g., `gpt-4` and `gpt-3.5-turbo`, currently the default model for ChatOpenAI) will raise an error unless `method` is explicitly specified. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - Schemas specified via Pydantic `BaseModel` that have fields with non-null defaults or metadata (like min/max constraints) will raise an error. - To recover previous default, pass `method="function_calling"` into `with_structured_output`. - `strict` now defaults to False for `method="json_schema"` when schemas are specified via TypedDict or JSON schema. - To recover previous behavior, use `with_structured_output(schema, strict=True)` - Schemas specified via Pydantic V1 will raise a warning (and use `method="function_calling"`) unless `method` is explicitly specified. - To remove the warning, pass `method="function_calling"` into `with_structured_output`. - Streaming with default structured output method / Pydantic schema no longer generates intermediate streamed chunks. - To recover previous behavior, pass `method="function_calling"` into `with_structured_output`. - We no longer override default temperature (was 0.7 in LangChain, now will follow OpenAI, currently 1.0). - To recover previous behavior, initialize `ChatOpenAI` or `AzureChatOpenAI` with `temperature=0.7`. - Note: conceptually there is a difference between forcing a tool call and forcing a response format. Tool calls may have more concise arguments vs. generating content adhering to a schema. Prompts may need to be adjusted to recover desired behavior. --------- Co-authored-by: Jacob Lee Co-authored-by: Bagatur --- .../langchain_openai/chat_models/azure.py | 334 ++++++++- .../langchain_openai/chat_models/base.py | 675 +++++++++++------- libs/partners/openai/poetry.lock | 6 +- libs/partners/openai/pyproject.toml | 4 +- .../chat_models/test_azure_standard.py | 4 + .../chat_models/test_base.py | 92 ++- .../__snapshots__/test_azure_standard.ambr | 1 - .../__snapshots__/test_base_standard.ambr | 1 - .../tests/unit_tests/chat_models/test_base.py | 2 - libs/partners/xai/Makefile | 3 + .../partners/xai/langchain_xai/chat_models.py | 7 +- .../test_chat_models_standard.ambr | 1 - .../integration_tests/chat_models.py | 59 +- .../langchain_tests/unit_tests/chat_models.py | 18 + 14 files changed, 912 insertions(+), 295 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index 2e1e5f8abfe03..c2cc41100b6cf 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -18,13 +18,15 @@ ) import openai +from langchain_core.language_models import LanguageModelInput from langchain_core.language_models.chat_models import LangSmithParams from langchain_core.messages import BaseMessage from langchain_core.outputs import ChatResult +from langchain_core.runnables import Runnable from langchain_core.utils import from_env, secret_from_env from langchain_core.utils.pydantic import is_basemodel_subclass from pydantic import BaseModel, Field, SecretStr, model_validator -from typing_extensions import Self +from typing_extensions import Literal, Self from langchain_openai.chat_models.base import BaseChatOpenAI @@ -79,7 +81,7 @@ class AzureChatOpenAI(BaseChatOpenAI): https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning timeout: Union[float, Tuple[float, float], Any, None] Timeout for requests. - max_retries: int + max_retries: Optional[int] Max number of retries. organization: Optional[str] OpenAI organization ID. If not passed in will be read from env @@ -586,9 +588,9 @@ def is_lc_serializable(cls) -> bool: @model_validator(mode="after") def validate_environment(self) -> Self: """Validate that api key and python package exists in environment.""" - if self.n < 1: + if self.n is not None and self.n < 1: raise ValueError("n must be at least 1.") - if self.n > 1 and self.streaming: + elif self.n is not None and self.n > 1 and self.streaming: raise ValueError("n must be 1 when streaming.") if self.disabled_params is None: @@ -641,10 +643,12 @@ def validate_environment(self) -> Self: "organization": self.openai_organization, "base_url": self.openai_api_base, "timeout": self.request_timeout, - "max_retries": self.max_retries, "default_headers": self.default_headers, "default_query": self.default_query, } + if self.max_retries is not None: + client_params["max_retries"] = self.max_retries + if not self.client: sync_specific = {"http_client": self.http_client} self.root_client = openai.AzureOpenAI(**client_params, **sync_specific) # type: ignore[arg-type] @@ -737,3 +741,323 @@ def _create_chat_result( ) return chat_result + + def with_structured_output( + self, + schema: Optional[_DictOrPydanticClass] = None, + *, + method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema", + include_raw: bool = False, + strict: Optional[bool] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, _DictOrPydantic]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: + The output schema. Can be passed in as: + + - a JSON Schema, + - a TypedDict class, + - or a Pydantic class, + - an OpenAI function/tool schema. + + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + method: The method for steering model generation, one of: + + - "json_schema": + Uses OpenAI's Structured Output API: + https://platform.openai.com/docs/guides/structured-outputs + Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later + models. + - "function_calling": + Uses OpenAI's tool-calling (formerly called function calling) + API: https://platform.openai.com/docs/guides/function-calling + - "json_mode": + Uses OpenAI's JSON mode. Note that if using JSON mode then you + must include instructions for formatting the output into the + desired schema into the model call: + https://platform.openai.com/docs/guides/structured-outputs/json-mode + + Learn more about the differences between the methods and which models + support which methods here: + + - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode + - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format + + include_raw: + If False then only the parsed structured output is returned. If + an error occurs during model output parsing it will be raised. If True + then both the raw model response (a BaseMessage) and the parsed model + response will be returned. If an error occurs during output parsing it + will be caught and returned as well. The final output is always a dict + with keys "raw", "parsed", and "parsing_error". + strict: + + - True: + Model output is guaranteed to exactly match the schema. + The input schema will also be validated according to + https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + - False: + Input schema will not be validated and model output will not be + validated. + - None: + ``strict`` argument will not be passed to the model. + + If schema is specified via TypedDict or JSON schema, ``strict`` is not + enabled by default. Pass ``strict=True`` to enable it. + + Note: ``strict`` can only be non-null if ``method`` is + ``"json_schema"`` or ``"function_calling"``. + + kwargs: Additional keyword args aren't supported. + + Returns: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. + + | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict. + + | If ``include_raw`` is True, then Runnable outputs a dict with keys: + + - "raw": BaseMessage + - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - "parsing_error": Optional[BaseException] + + .. versionchanged:: 0.1.20 + + Added support for TypedDict class ``schema``. + + .. versionchanged:: 0.1.21 + + Support for ``strict`` argument added. + Support for ``method="json_schema"`` added. + + .. versionchanged:: 0.3.0 + + ``method`` default changed from "function_calling" to "json_schema". + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False, strict=True + + Note, OpenAI has a number of restrictions on what types of schemas can be + provided if ``strict`` = True. When using Pydantic, our model cannot + specify any Field metadata (like min/max constraints) and fields cannot + have default values. + + See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + + .. code-block:: python + + from typing import Optional + + from langchain_openai import AzureChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = AzureChatOpenAI(azure_deployment="...", model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False, strict=False + + .. code-block:: python + + from typing import Optional + + from langchain_openai import AzureChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = AzureChatOpenAI(azure_deployment="...", model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, method="function_calling" + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True + + .. code-block:: python + + from langchain_openai import AzureChatOpenAI + from pydantic import BaseModel + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: str + + + llm = AzureChatOpenAI(azure_deployment="...", model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), + # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=TypedDict class, method="json_schema", include_raw=False, strict=False + + .. code-block:: python + + from typing_extensions import Annotated, TypedDict + + from langchain_openai import AzureChatOpenAI + + + class AnswerWithJustification(TypedDict): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + + + llm = AzureChatOpenAI(azure_deployment="...", model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=OpenAI function schema, method="json_schema", include_raw=False + + .. code-block:: python + + from langchain_openai import AzureChatOpenAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = AzureChatOpenAI( + azure_deployment="...", + model="gpt-4o", + temperature=0, + ) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True + + .. code-block:: + + from langchain_openai import AzureChatOpenAI + from pydantic import BaseModel + + class AnswerWithJustification(BaseModel): + answer: str + justification: str + + llm = AzureChatOpenAI( + azure_deployment="...", + model="gpt-4o", + temperature=0, + ) + structured_llm = llm.with_structured_output( + AnswerWithJustification, + method="json_mode", + include_raw=True + ) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=None, method="json_mode", include_raw=True + + .. code-block:: + + structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': { + # 'answer': 'They are both the same weight.', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' + # }, + # 'parsing_error': None + # } + """ # noqa: E501 + return super().with_structured_output( + schema, method=method, include_raw=include_raw, strict=strict, **kwargs + ) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index f4e26253484e5..624e465ded9c5 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -92,6 +92,7 @@ ) from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator +from pydantic.v1 import BaseModel as BaseModelV1 from typing_extensions import Self logger = logging.getLogger(__name__) @@ -393,6 +394,32 @@ def _update_token_usage( return new_usage +def _handle_openai_bad_request(e: openai.BadRequestError) -> None: + if ( + "'response_format' of type 'json_schema' is not supported with this model" + ) in e.message: + message = ( + "This model does not support OpenAI's structured output feature, which " + "is the default method for `with_structured_output` as of " + "langchain-openai==0.3. To use `with_structured_output` with this model, " + 'specify `method="function_calling"`.' + ) + warnings.warn(message) + raise e + elif "Invalid schema for response_format" in e.message: + message = ( + "Invalid schema for OpenAI's structured output feature, which is the " + "default method for `with_structured_output` as of langchain-openai==0.3. " + 'Specify `method="function_calling"` instead or update your schema. ' + "See supported schemas: " + "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas" # noqa: E501 + ) + warnings.warn(message) + raise e + else: + raise + + class _FunctionCall(TypedDict): name: str @@ -415,7 +442,7 @@ class BaseChatOpenAI(BaseChatModel): root_async_client: Any = Field(default=None, exclude=True) #: :meta private: model_name: str = Field(default="gpt-3.5-turbo", alias="model") """Model name to use.""" - temperature: float = 0.7 + temperature: Optional[float] = None """What sampling temperature to use.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" @@ -436,7 +463,7 @@ class BaseChatOpenAI(BaseChatModel): ) """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or None.""" - max_retries: int = 2 + max_retries: Optional[int] = None """Maximum number of retries to make when generating.""" presence_penalty: Optional[float] = None """Penalizes repeated tokens.""" @@ -454,7 +481,7 @@ class BaseChatOpenAI(BaseChatModel): """Modify the likelihood of specified tokens appearing in the completion.""" streaming: bool = False """Whether to stream the results or not.""" - n: int = 1 + n: Optional[int] = None """Number of chat completions to generate for each prompt.""" top_p: Optional[float] = None """Total probability mass of tokens to consider at each step.""" @@ -538,9 +565,9 @@ def validate_temperature(cls, values: Dict[str, Any]) -> Any: @model_validator(mode="after") def validate_environment(self) -> Self: """Validate that api key and python package exists in environment.""" - if self.n < 1: + if self.n is not None and self.n < 1: raise ValueError("n must be at least 1.") - if self.n > 1 and self.streaming: + elif self.n is not None and self.n > 1 and self.streaming: raise ValueError("n must be 1 when streaming.") # Check OPENAI_ORGANIZATION for backwards compatibility. @@ -557,10 +584,12 @@ def validate_environment(self) -> Self: "organization": self.openai_organization, "base_url": self.openai_api_base, "timeout": self.request_timeout, - "max_retries": self.max_retries, "default_headers": self.default_headers, "default_query": self.default_query, } + if self.max_retries is not None: + client_params["max_retries"] = self.max_retries + if self.openai_proxy and (self.http_client or self.http_async_client): openai_proxy = self.openai_proxy http_client = self.http_client @@ -615,14 +644,14 @@ def _default_params(self) -> Dict[str, Any]: "stop": self.stop or None, # also exclude empty list for this "max_tokens": self.max_tokens, "extra_body": self.extra_body, + "n": self.n, + "temperature": self.temperature, "reasoning_effort": self.reasoning_effort, } params = { "model": self.model_name, "stream": self.streaming, - "n": self.n, - "temperature": self.temperature, **{k: v for k, v in exclude_if_none.items() if v is not None}, **self.model_kwargs, } @@ -683,26 +712,31 @@ def _stream( else: response = self.client.create(**payload) context_manager = response - with context_manager as response: - is_first_chunk = True - for chunk in response: - if not isinstance(chunk, dict): - chunk = chunk.model_dump() - generation_chunk = _convert_chunk_to_generation_chunk( - chunk, - default_chunk_class, - base_generation_info if is_first_chunk else {}, - ) - if generation_chunk is None: - continue - default_chunk_class = generation_chunk.message.__class__ - logprobs = (generation_chunk.generation_info or {}).get("logprobs") - if run_manager: - run_manager.on_llm_new_token( - generation_chunk.text, chunk=generation_chunk, logprobs=logprobs + try: + with context_manager as response: + is_first_chunk = True + for chunk in response: + if not isinstance(chunk, dict): + chunk = chunk.model_dump() + generation_chunk = _convert_chunk_to_generation_chunk( + chunk, + default_chunk_class, + base_generation_info if is_first_chunk else {}, ) - is_first_chunk = False - yield generation_chunk + if generation_chunk is None: + continue + default_chunk_class = generation_chunk.message.__class__ + logprobs = (generation_chunk.generation_info or {}).get("logprobs") + if run_manager: + run_manager.on_llm_new_token( + generation_chunk.text, + chunk=generation_chunk, + logprobs=logprobs, + ) + is_first_chunk = False + yield generation_chunk + except openai.BadRequestError as e: + _handle_openai_bad_request(e) if hasattr(response, "get_final_completion") and "response_format" in payload: final_completion = response.get_final_completion() generation_chunk = self._get_generation_chunk_from_completion( @@ -735,7 +769,10 @@ def _generate( "specified." ) payload.pop("stream") - response = self.root_client.beta.chat.completions.parse(**payload) + try: + response = self.root_client.beta.chat.completions.parse(**payload) + except openai.BadRequestError as e: + _handle_openai_bad_request(e) elif self.include_response_headers: raw_response = self.client.with_raw_response.create(**payload) response = raw_response.parse() @@ -843,26 +880,31 @@ async def _astream( else: response = await self.async_client.create(**payload) context_manager = response - async with context_manager as response: - is_first_chunk = True - async for chunk in response: - if not isinstance(chunk, dict): - chunk = chunk.model_dump() - generation_chunk = _convert_chunk_to_generation_chunk( - chunk, - default_chunk_class, - base_generation_info if is_first_chunk else {}, - ) - if generation_chunk is None: - continue - default_chunk_class = generation_chunk.message.__class__ - logprobs = (generation_chunk.generation_info or {}).get("logprobs") - if run_manager: - await run_manager.on_llm_new_token( - generation_chunk.text, chunk=generation_chunk, logprobs=logprobs + try: + async with context_manager as response: + is_first_chunk = True + async for chunk in response: + if not isinstance(chunk, dict): + chunk = chunk.model_dump() + generation_chunk = _convert_chunk_to_generation_chunk( + chunk, + default_chunk_class, + base_generation_info if is_first_chunk else {}, ) - is_first_chunk = False - yield generation_chunk + if generation_chunk is None: + continue + default_chunk_class = generation_chunk.message.__class__ + logprobs = (generation_chunk.generation_info or {}).get("logprobs") + if run_manager: + await run_manager.on_llm_new_token( + generation_chunk.text, + chunk=generation_chunk, + logprobs=logprobs, + ) + is_first_chunk = False + yield generation_chunk + except openai.BadRequestError as e: + _handle_openai_bad_request(e) if hasattr(response, "get_final_completion") and "response_format" in payload: final_completion = await response.get_final_completion() generation_chunk = self._get_generation_chunk_from_completion( @@ -895,9 +937,12 @@ async def _agenerate( "specified." ) payload.pop("stream") - response = await self.root_async_client.beta.chat.completions.parse( - **payload - ) + try: + response = await self.root_async_client.beta.chat.completions.parse( + **payload + ) + except openai.BadRequestError as e: + _handle_openai_bad_request(e) elif self.include_response_headers: raw_response = await self.async_client.with_raw_response.create(**payload) response = raw_response.parse() @@ -1237,7 +1282,7 @@ def with_structured_output( API: https://platform.openai.com/docs/guides/function-calling - "json_schema": Uses OpenAI's Structured Output API: https://platform.openai.com/docs/guides/structured-outputs - Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", and later + Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later models. - "json_mode": Uses OpenAI's JSON mode. Note that if using JSON mode then you @@ -1270,10 +1315,6 @@ def with_structured_output( - None: ``strict`` argument will not be passed to the model. - If ``method`` is "json_schema" defaults to True. If ``method`` is - "function_calling" or "json_mode" defaults to None. Can only be - non-null if ``method`` is "function_calling" or "json_schema". - kwargs: Additional keyword args aren't supported. Returns: @@ -1295,193 +1336,6 @@ def with_structured_output( Support for ``strict`` argument added. Support for ``method`` = "json_schema" added. - - .. note:: Planned breaking changes in version `0.3.0` - - - ``method`` default will be changed to "json_schema" from - "function_calling". - - ``strict`` will default to True when ``method`` is - "function_calling" as of version `0.3.0`. - - - .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False, strict=True - - Note, OpenAI has a number of restrictions on what types of schemas can be - provided if ``strict`` = True. When using Pydantic, our model cannot - specify any Field metadata (like min/max constraints) and fields cannot - have default values. - - See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas - - .. code-block:: python - - from typing import Optional - - from langchain_openai import ChatOpenAI - from pydantic import BaseModel, Field - - - class AnswerWithJustification(BaseModel): - '''An answer to the user question along with justification for the answer.''' - - answer: str - justification: Optional[str] = Field( - default=..., description="A justification for the answer." - ) - - - llm = ChatOpenAI(model="gpt-4o", temperature=0) - structured_llm = llm.with_structured_output( - AnswerWithJustification, strict=True - ) - - structured_llm.invoke( - "What weighs more a pound of bricks or a pound of feathers" - ) - - # -> AnswerWithJustification( - # answer='They weigh the same', - # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' - # ) - - .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=True - - .. code-block:: python - - from langchain_openai import ChatOpenAI - from pydantic import BaseModel - - - class AnswerWithJustification(BaseModel): - '''An answer to the user question along with justification for the answer.''' - - answer: str - justification: str - - - llm = ChatOpenAI(model="gpt-4o", temperature=0) - structured_llm = llm.with_structured_output( - AnswerWithJustification, include_raw=True - ) - - structured_llm.invoke( - "What weighs more a pound of bricks or a pound of feathers" - ) - # -> { - # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), - # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), - # 'parsing_error': None - # } - - .. dropdown:: Example: schema=TypedDict class, method="function_calling", include_raw=False - - .. code-block:: python - - # IMPORTANT: If you are using Python <=3.8, you need to import Annotated - # from typing_extensions, not from typing. - from typing_extensions import Annotated, TypedDict - - from langchain_openai import ChatOpenAI - - - class AnswerWithJustification(TypedDict): - '''An answer to the user question along with justification for the answer.''' - - answer: str - justification: Annotated[ - Optional[str], None, "A justification for the answer." - ] - - - llm = ChatOpenAI(model="gpt-4o", temperature=0) - structured_llm = llm.with_structured_output(AnswerWithJustification) - - structured_llm.invoke( - "What weighs more a pound of bricks or a pound of feathers" - ) - # -> { - # 'answer': 'They weigh the same', - # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' - # } - - .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False - - .. code-block:: python - - from langchain_openai import ChatOpenAI - - oai_schema = { - 'name': 'AnswerWithJustification', - 'description': 'An answer to the user question along with justification for the answer.', - 'parameters': { - 'type': 'object', - 'properties': { - 'answer': {'type': 'string'}, - 'justification': {'description': 'A justification for the answer.', 'type': 'string'} - }, - 'required': ['answer'] - } - } - - llm = ChatOpenAI(model="gpt-4o", temperature=0) - structured_llm = llm.with_structured_output(oai_schema) - - structured_llm.invoke( - "What weighs more a pound of bricks or a pound of feathers" - ) - # -> { - # 'answer': 'They weigh the same', - # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' - # } - - .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True - - .. code-block:: - - from langchain_openai import ChatOpenAI - from pydantic import BaseModel - - class AnswerWithJustification(BaseModel): - answer: str - justification: str - - llm = ChatOpenAI(model="gpt-4o", temperature=0) - structured_llm = llm.with_structured_output( - AnswerWithJustification, - method="json_mode", - include_raw=True - ) - - structured_llm.invoke( - "Answer the following question. " - "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" - "What's heavier a pound of bricks or a pound of feathers?" - ) - # -> { - # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), - # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), - # 'parsing_error': None - # } - - .. dropdown:: Example: schema=None, method="json_mode", include_raw=True - - .. code-block:: - - structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) - - structured_llm.invoke( - "Answer the following question. " - "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" - "What's heavier a pound of bricks or a pound of feathers?" - ) - # -> { - # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), - # 'parsed': { - # 'answer': 'They are both the same weight.', - # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' - # }, - # 'parsing_error': None - # } """ # noqa: E501 if kwargs: raise ValueError(f"Received unsupported arguments {kwargs}") @@ -1490,6 +1344,21 @@ class AnswerWithJustification(BaseModel): "Argument `strict` is not supported with `method`='json_mode'" ) is_pydantic_schema = _is_pydantic_class(schema) + + # Check for Pydantic BaseModel V1 + if ( + method == "json_schema" + and is_pydantic_schema + and issubclass(schema, BaseModelV1) # type: ignore[arg-type] + ): + warnings.warn( + "Received a Pydantic BaseModel V1 schema. This is not supported by " + 'method="json_schema". Please use method="function_calling" ' + "or specify schema via JSON Schema or Pydantic V2 BaseModel. " + 'Overriding to method="function_calling".' + ) + method = "function_calling" + if method == "function_calling": if schema is None: raise ValueError( @@ -1618,7 +1487,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] timeout: Union[float, Tuple[float, float], Any, None] Timeout for requests. - max_retries: int + max_retries: Optional[int] Max number of retries. api_key: Optional[str] OpenAI API key. If not passed in will be read from env var OPENAI_API_KEY. @@ -2147,6 +2016,320 @@ async def _astream( async for chunk in super()._astream(*args, **kwargs): yield chunk + def with_structured_output( + self, + schema: Optional[_DictOrPydanticClass] = None, + *, + method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema", + include_raw: bool = False, + strict: Optional[bool] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, _DictOrPydantic]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: + The output schema. Can be passed in as: + + - a JSON Schema, + - a TypedDict class, + - or a Pydantic class, + - an OpenAI function/tool schema. + + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + method: The method for steering model generation, one of: + + - "json_schema": + Uses OpenAI's Structured Output API: + https://platform.openai.com/docs/guides/structured-outputs + Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later + models. + - "function_calling": + Uses OpenAI's tool-calling (formerly called function calling) + API: https://platform.openai.com/docs/guides/function-calling + - "json_mode": + Uses OpenAI's JSON mode. Note that if using JSON mode then you + must include instructions for formatting the output into the + desired schema into the model call: + https://platform.openai.com/docs/guides/structured-outputs/json-mode + + Learn more about the differences between the methods and which models + support which methods here: + + - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode + - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format + + include_raw: + If False then only the parsed structured output is returned. If + an error occurs during model output parsing it will be raised. If True + then both the raw model response (a BaseMessage) and the parsed model + response will be returned. If an error occurs during output parsing it + will be caught and returned as well. The final output is always a dict + with keys "raw", "parsed", and "parsing_error". + strict: + + - True: + Model output is guaranteed to exactly match the schema. + The input schema will also be validated according to + https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + - False: + Input schema will not be validated and model output will not be + validated. + - None: + ``strict`` argument will not be passed to the model. + + If schema is specified via TypedDict or JSON schema, ``strict`` is not + enabled by default. Pass ``strict=True`` to enable it. + + Note: ``strict`` can only be non-null if ``method`` is + ``"json_schema"`` or ``"function_calling"``. + + kwargs: Additional keyword args aren't supported. + + Returns: + A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`. + + | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict. + + | If ``include_raw`` is True, then Runnable outputs a dict with keys: + + - "raw": BaseMessage + - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - "parsing_error": Optional[BaseException] + + .. versionchanged:: 0.1.20 + + Added support for TypedDict class ``schema``. + + .. versionchanged:: 0.1.21 + + Support for ``strict`` argument added. + Support for ``method="json_schema"`` added. + + .. versionchanged:: 0.3.0 + + ``method`` default changed from "function_calling" to "json_schema". + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False, strict=True + + Note, OpenAI has a number of restrictions on what types of schemas can be + provided if ``strict`` = True. When using Pydantic, our model cannot + specify any Field metadata (like min/max constraints) and fields cannot + have default values. + + See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + + .. code-block:: python + + from typing import Optional + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False, strict=False + + .. code-block:: python + + from typing import Optional + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, method="function_calling" + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True + + .. code-block:: python + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: str + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), + # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=TypedDict class, method="json_schema", include_raw=False, strict=False + + .. code-block:: python + + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + + from langchain_openai import ChatOpenAI + + + class AnswerWithJustification(TypedDict): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=OpenAI function schema, method="json_schema", include_raw=False + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True + + .. code-block:: + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel + + class AnswerWithJustification(BaseModel): + answer: str + justification: str + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, + method="json_mode", + include_raw=True + ) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=None, method="json_mode", include_raw=True + + .. code-block:: + + structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': { + # 'answer': 'They are both the same weight.', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' + # }, + # 'parsing_error': None + # } + """ # noqa: E501 + return super().with_structured_output( + schema, method=method, include_raw=include_raw, strict=strict, **kwargs + ) + def _is_pydantic_class(obj: Any) -> bool: return isinstance(obj, type) and is_basemodel_subclass(obj) @@ -2263,7 +2446,11 @@ def _convert_to_openai_response_format( elif isinstance(schema, dict) and "name" in schema and "schema" in schema: response_format = {"type": "json_schema", "json_schema": schema} else: - strict = strict if strict is not None else True + if strict is None: + if isinstance(schema, dict) and isinstance(schema.get("strict"), bool): + strict = schema["strict"] + else: + strict = False function = convert_to_openai_function(schema, strict=strict) function["schema"] = function.pop("parameters") response_format = {"type": "json_schema", "json_schema": function} diff --git a/libs/partners/openai/poetry.lock b/libs/partners/openai/poetry.lock index d7610271ad57f..85b65dbf801a6 100644 --- a/libs/partners/openai/poetry.lock +++ b/libs/partners/openai/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -496,7 +496,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.3.27" +version = "0.3.29" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.9,<4.0" @@ -1647,4 +1647,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "71de53990a6cfb9cd6a25249b40eeef52e089840a9a06b54ac556fe7fa60504c" +content-hash = "0bc715ae349e68aa13cce7541210fb9596a6a66a9a5479fdc5c891c79ca11688" diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml index 693e34eda37d1..77ef40180e498 100644 --- a/libs/partners/openai/pyproject.toml +++ b/libs/partners/openai/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "langchain-openai" -version = "0.2.14" +version = "0.3.0" description = "An integration package connecting OpenAI and LangChain" authors = [] readme = "README.md" @@ -23,7 +23,7 @@ ignore_missing_imports = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -langchain-core = "^0.3.27" +langchain-core = "^0.3.29" openai = "^1.58.1" tiktoken = ">=0.7,<1" diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py index acf44a5ac0b3a..f069750934729 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py @@ -55,6 +55,10 @@ def chat_model_params(self) -> dict: "azure_endpoint": OPENAI_API_BASE, } + @property + def structured_output_kwargs(self) -> dict: + return {"method": "function_calling"} + @pytest.mark.xfail(reason="Not yet supported.") def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: super().test_usage_metadata_streaming(model) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 506799aef4b59..d116688ebef61 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -630,20 +630,39 @@ def test_bind_tools_tool_choice() -> None: assert not msg.tool_calls -def test_openai_structured_output() -> None: +@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1"]) +def test_openai_structured_output(model: str) -> None: class MyModel(BaseModel): """A Person""" name: str age: int - llm = ChatOpenAI().with_structured_output(MyModel) + llm = ChatOpenAI(model=model).with_structured_output(MyModel) result = llm.invoke("I'm a 27 year old named Erick") assert isinstance(result, MyModel) assert result.name == "Erick" assert result.age == 27 +def test_structured_output_errors_with_legacy_models() -> None: + class MyModel(BaseModel): + """A Person""" + + name: str + age: int + + llm = ChatOpenAI(model="gpt-4").with_structured_output(MyModel) + + with pytest.warns(UserWarning, match="with_structured_output"): + with pytest.raises(openai.BadRequestError): + _ = llm.invoke("I'm a 27 year old named Erick") + + with pytest.warns(UserWarning, match="with_structured_output"): + with pytest.raises(openai.BadRequestError): + _ = list(llm.stream("I'm a 27 year old named Erick")) + + def test_openai_proxy() -> None: """Test ChatOpenAI with proxy.""" chat_openai = ChatOpenAI(openai_proxy="http://localhost:8080") @@ -820,20 +839,18 @@ class magic_function(BaseModel): @pytest.mark.parametrize( - ("model", "method", "strict"), - [("gpt-4o", "function_calling", True), ("gpt-4o-2024-08-06", "json_schema", None)], + ("model", "method"), + [("gpt-4o", "function_calling"), ("gpt-4o-2024-08-06", "json_schema")], ) def test_structured_output_strict( - model: str, - method: Literal["function_calling", "json_schema"], - strict: Optional[bool], + model: str, method: Literal["function_calling", "json_schema"] ) -> None: """Test to verify structured output with strict=True.""" from pydantic import BaseModel as BaseModelProper from pydantic import Field as FieldProper - llm = ChatOpenAI(model=model, temperature=0) + llm = ChatOpenAI(model=model) class Joke(BaseModelProper): """Joke to tell user.""" @@ -842,10 +859,7 @@ class Joke(BaseModelProper): punchline: str = FieldProper(description="answer to resolve the joke") # Pydantic class - # Type ignoring since the interface only officially supports pydantic 1 - # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2. - # We'll need to do a pass updating the type signatures. - chat = llm.with_structured_output(Joke, method=method, strict=strict) + chat = llm.with_structured_output(Joke, method=method, strict=True) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, Joke) @@ -854,7 +868,7 @@ class Joke(BaseModelProper): # Schema chat = llm.with_structured_output( - Joke.model_json_schema(), method=method, strict=strict + Joke.model_json_schema(), method=method, strict=True ) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, dict) @@ -875,14 +889,14 @@ class InvalidJoke(BaseModelProper): default="foo", description="answer to resolve the joke" ) - chat = llm.with_structured_output(InvalidJoke, method=method, strict=strict) + chat = llm.with_structured_output(InvalidJoke, method=method, strict=True) with pytest.raises(openai.BadRequestError): chat.invoke("Tell me a joke about cats.") with pytest.raises(openai.BadRequestError): next(chat.stream("Tell me a joke about cats.")) chat = llm.with_structured_output( - InvalidJoke.model_json_schema(), method=method, strict=strict + InvalidJoke.model_json_schema(), method=method, strict=True ) with pytest.raises(openai.BadRequestError): chat.invoke("Tell me a joke about cats.") @@ -890,11 +904,9 @@ class InvalidJoke(BaseModelProper): next(chat.stream("Tell me a joke about cats.")) -@pytest.mark.parametrize( - ("model", "method", "strict"), [("gpt-4o-2024-08-06", "json_schema", None)] -) +@pytest.mark.parametrize(("model", "method"), [("gpt-4o-2024-08-06", "json_schema")]) def test_nested_structured_output_strict( - model: str, method: Literal["json_schema"], strict: Optional[bool] + model: str, method: Literal["json_schema"] ) -> None: """Test to verify structured output with strict=True for nested object.""" @@ -914,7 +926,7 @@ class JokeWithEvaluation(TypedDict): self_evaluation: SelfEvaluation # Schema - chat = llm.with_structured_output(JokeWithEvaluation, method=method, strict=strict) + chat = llm.with_structured_output(JokeWithEvaluation, method=method, strict=True) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, dict) assert set(result.keys()) == {"setup", "punchline", "self_evaluation"} @@ -927,6 +939,46 @@ class JokeWithEvaluation(TypedDict): assert set(chunk["self_evaluation"].keys()) == {"score", "text"} +@pytest.mark.parametrize( + ("strict", "method"), + [ + (True, "json_schema"), + (False, "json_schema"), + (True, "function_calling"), + (False, "function_calling"), + ], +) +def test_json_schema_openai_format( + strict: bool, method: Literal["json_schema", "function_calling"] +) -> None: + """Test we can pass in OpenAI schema format specifying strict.""" + llm = ChatOpenAI(model="gpt-4o-mini") + schema = { + "name": "get_weather", + "description": "Fetches the weather in the given location", + "strict": strict, + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for", + }, + "unit": { + "type": "string", + "description": "The unit to return the temperature in", + "enum": ["F", "C"], + }, + }, + "additionalProperties": False, + "required": ["location", "unit"], + }, + } + chat = llm.with_structured_output(schema, method=method) + result = chat.invoke("What is the weather in New York?") + assert isinstance(result, dict) + + def test_json_mode() -> None: llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) response = llm.invoke( diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr index 2b8c3563b9443..2060512958a9f 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr @@ -15,7 +15,6 @@ }), 'max_retries': 2, 'max_tokens': 100, - 'n': 1, 'openai_api_key': dict({ 'id': list([ 'AZURE_OPENAI_API_KEY', diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr index b7ab1ce9c072c..e7307c6158fbc 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr @@ -11,7 +11,6 @@ 'max_retries': 2, 'max_tokens': 100, 'model_name': 'gpt-3.5-turbo', - 'n': 1, 'openai_api_key': dict({ 'id': list([ 'OPENAI_API_KEY', diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 2e6cca0cd2d96..5eac32c0447dd 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -877,8 +877,6 @@ def test__get_request_payload() -> None: ], "model": "gpt-4o-2024-08-06", "stream": False, - "n": 1, - "temperature": 0.7, } payload = llm._get_request_payload(messages) assert payload == expected diff --git a/libs/partners/xai/Makefile b/libs/partners/xai/Makefile index 963563e2d7099..6859cc789a179 100644 --- a/libs/partners/xai/Makefile +++ b/libs/partners/xai/Makefile @@ -11,6 +11,9 @@ integration_test integration_tests: TEST_FILE=tests/integration_tests/ test tests: poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE) +test_watch: + poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE) + integration_test integration_tests: poetry run pytest $(TEST_FILE) diff --git a/libs/partners/xai/langchain_xai/chat_models.py b/libs/partners/xai/langchain_xai/chat_models.py index 775d22740cd4e..a854be5487d4c 100644 --- a/libs/partners/xai/langchain_xai/chat_models.py +++ b/libs/partners/xai/langchain_xai/chat_models.py @@ -320,9 +320,9 @@ def _get_ls_params( @model_validator(mode="after") def validate_environment(self) -> Self: """Validate that api key and python package exists in environment.""" - if self.n < 1: + if self.n is not None and self.n < 1: raise ValueError("n must be at least 1.") - if self.n > 1 and self.streaming: + if self.n is not None and self.n > 1 and self.streaming: raise ValueError("n must be 1 when streaming.") client_params: dict = { @@ -331,10 +331,11 @@ def validate_environment(self) -> Self: ), "base_url": self.xai_api_base, "timeout": self.request_timeout, - "max_retries": self.max_retries, "default_headers": self.default_headers, "default_query": self.default_query, } + if self.max_retries is not None: + client_params["max_retries"] = self.max_retries if client_params["api_key"] is None: raise ValueError( diff --git a/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr b/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr index 5c6f113f2174a..4cd1261555c90 100644 --- a/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr +++ b/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr @@ -10,7 +10,6 @@ 'max_retries': 2, 'max_tokens': 100, 'model_name': 'grok-beta', - 'n': 1, 'request_timeout': 60.0, 'stop': list([ ]), diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index f569135004497..69116b6cfb7a9 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -21,6 +21,7 @@ from pydantic import BaseModel, Field from pydantic.v1 import BaseModel as BaseModelV1 from pydantic.v1 import Field as FieldV1 +from typing_extensions import Annotated, TypedDict from langchain_tests.unit_tests.chat_models import ( ChatModelTests, @@ -191,6 +192,19 @@ def tool_choice_value(self) -> Optional[str]: def has_structured_output(self) -> bool: return True + .. dropdown:: structured_output_kwargs + + Dict property that can be used to specify additional kwargs for + ``with_structured_output``. Useful for testing different models. + + Example: + + .. code-block:: python + + @property + def structured_output_kwargs(self) -> dict: + return {"method": "function_calling"} + .. dropdown:: supports_json_mode Boolean property indicating whether the chat model supports JSON mode in @@ -1128,10 +1142,7 @@ def has_tool_calling(self) -> bool: Joke = _get_joke_class() # Pydantic class - # Type ignoring since the interface only officially supports pydantic 1 - # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2. - # We'll need to do a pass updating the type signatures. - chat = model.with_structured_output(Joke) # type: ignore[arg-type] + chat = model.with_structured_output(Joke, **self.structured_output_kwargs) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, Joke) @@ -1139,7 +1150,9 @@ def has_tool_calling(self) -> bool: assert isinstance(chunk, Joke) # Schema - chat = model.with_structured_output(Joke.model_json_schema()) + chat = model.with_structured_output( + Joke.model_json_schema(), **self.structured_output_kwargs + ) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, dict) assert set(result.keys()) == {"setup", "punchline"} @@ -1182,10 +1195,7 @@ def has_tool_calling(self) -> bool: Joke = _get_joke_class() # Pydantic class - # Type ignoring since the interface only officially supports pydantic 1 - # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2. - # We'll need to do a pass updating the type signatures. - chat = model.with_structured_output(Joke) # type: ignore[arg-type] + chat = model.with_structured_output(Joke, **self.structured_output_kwargs) result = await chat.ainvoke("Tell me a joke about cats.") assert isinstance(result, Joke) @@ -1193,7 +1203,9 @@ def has_tool_calling(self) -> bool: assert isinstance(chunk, Joke) # Schema - chat = model.with_structured_output(Joke.model_json_schema()) + chat = model.with_structured_output( + Joke.model_json_schema(), **self.structured_output_kwargs + ) result = await chat.ainvoke("Tell me a joke about cats.") assert isinstance(result, dict) assert set(result.keys()) == {"setup", "punchline"} @@ -1244,7 +1256,7 @@ class Joke(BaseModelV1): # Uses langchain_core.pydantic_v1.BaseModel punchline: str = FieldV1(description="answer to resolve the joke") # Pydantic class - chat = model.with_structured_output(Joke) + chat = model.with_structured_output(Joke, **self.structured_output_kwargs) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, Joke) @@ -1252,7 +1264,9 @@ class Joke(BaseModelV1): # Uses langchain_core.pydantic_v1.BaseModel assert isinstance(chunk, Joke) # Schema - chat = model.with_structured_output(Joke.schema()) + chat = model.with_structured_output( + Joke.schema(), **self.structured_output_kwargs + ) result = chat.invoke("Tell me a joke about cats.") assert isinstance(result, dict) assert set(result.keys()) == {"setup", "punchline"} @@ -1293,6 +1307,7 @@ def has_tool_calling(self) -> bool: if not self.has_tool_calling: pytest.skip("Test requires tool calling.") + # Pydantic class Joke(BaseModel): """Joke to tell user.""" @@ -1301,7 +1316,7 @@ class Joke(BaseModel): default=None, description="answer to resolve the joke" ) - chat = model.with_structured_output(Joke) # type: ignore[arg-type] + chat = model.with_structured_output(Joke, **self.structured_output_kwargs) setup_result = chat.invoke( "Give me the setup to a joke about cats, no punchline." ) @@ -1310,6 +1325,24 @@ class Joke(BaseModel): joke_result = chat.invoke("Give me a joke about cats, include the punchline.") assert isinstance(joke_result, Joke) + # Schema + chat = model.with_structured_output( + Joke.model_json_schema(), **self.structured_output_kwargs + ) + result = chat.invoke("Tell me a joke about cats.") + assert isinstance(result, dict) + + # TypedDict + class JokeDict(TypedDict): + """Joke to tell user.""" + + setup: Annotated[str, ..., "question to set up a joke"] + punchline: Annotated[Optional[str], None, "answer to resolve the joke"] + + chat = model.with_structured_output(JokeDict, **self.structured_output_kwargs) + result = chat.invoke("Tell me a joke about cats.") + assert isinstance(result, dict) + def test_json_mode(self, model: BaseChatModel) -> None: """Test structured output via `JSON mode. `_ diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py index 766367f7359c2..84a51385b6d05 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py @@ -132,6 +132,11 @@ def has_structured_output(self) -> bool: is not BaseChatModel.with_structured_output ) + @property + def structured_output_kwargs(self) -> dict: + """If specified, additional kwargs for with_structured_output.""" + return {} + @property def supports_json_mode(self) -> bool: """(bool) whether the chat model supports JSON mode.""" @@ -299,6 +304,19 @@ def tool_choice_value(self) -> Optional[str]: def has_structured_output(self) -> bool: return True + .. dropdown:: structured_output_kwargs + + Dict property that can be used to specify additional kwargs for + ``with_structured_output``. Useful for testing different models. + + Example: + + .. code-block:: python + + @property + def structured_output_kwargs(self) -> dict: + return {"method": "function_calling"} + .. dropdown:: supports_json_mode Boolean property indicating whether the chat model supports JSON mode in