From 8ad7adad8784d01ad1cbbb8b4c5f8102dbf11a63 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Wed, 30 Oct 2024 21:31:01 -0700 Subject: [PATCH] infra: build api docs from package listing (#27774) --- .github/scripts/check_diff.py | 2 + .github/scripts/prep_api_docs_build.py | 81 ++++++++++++ .github/workflows/api_doc_build.yml | 166 ++++++------------------- libs/packages.yml | 149 ++++++++++++++++++++++ 4 files changed, 272 insertions(+), 126 deletions(-) create mode 100644 .github/scripts/prep_api_docs_build.py create mode 100644 libs/packages.yml diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py index 17f08fe0bbf56..068c75ef01782 100644 --- a/.github/scripts/check_diff.py +++ b/.github/scripts/check_diff.py @@ -300,6 +300,8 @@ def _get_configs_for_multi_dirs( ] != ["README.md"]: dirs_to_run["test"].add(f"libs/partners/{partner_dir}") # Skip if the directory was deleted or is just a tombstone readme + elif file == "libs/packages.yml": + continue elif file.startswith("libs/"): raise ValueError( f"Unknown lib: {file}. check_diff.py likely needs " diff --git a/.github/scripts/prep_api_docs_build.py b/.github/scripts/prep_api_docs_build.py new file mode 100644 index 0000000000000..b48a6343f6fd4 --- /dev/null +++ b/.github/scripts/prep_api_docs_build.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +"""Script to sync libraries from various repositories into the main langchain repository.""" + +import os +import shutil +import yaml +from pathlib import Path +from typing import Dict, Any + + +def load_packages_yaml() -> Dict[str, Any]: + """Load and parse the packages.yml file.""" + with open("langchain/libs/packages.yml", "r") as f: + return yaml.safe_load(f) + + +def clean_target_directories(packages: Dict[str, Any]) -> None: + """Remove old directories that will be replaced.""" + base_path = Path("langchain/libs/partners") + for package in packages["packages"]: + if package["repo"] != "langchain-ai/langchain": + package_name = package["name"].replace("langchain-", "") + target_dir = base_path / package_name + if target_dir.exists(): + print(f"Removing {target_dir}") + shutil.rmtree(target_dir) + + +def move_libraries(packages: Dict[str, Any]) -> None: + """Move libraries from their source locations to the target directories.""" + for package in packages["packages"]: + # Skip if it's the main langchain repo or disabled + if package["repo"] == "langchain-ai/langchain" or package.get( + "disabled", False + ): + continue + + repo_name = package["repo"].split("/")[1] + package_name = package["name"].replace("langchain-", "") + source_path = package["path"] + target_dir = f"langchain/libs/partners/{package_name}" + + # Handle root path case + if source_path == ".": + source_dir = repo_name + else: + source_dir = f"{repo_name}/{source_path}" + + print(f"Moving {source_dir} to {target_dir}") + + # Ensure target directory exists + os.makedirs(os.path.dirname(target_dir), exist_ok=True) + + try: + # Move the directory + shutil.move(source_dir, target_dir) + except Exception as e: + print(f"Error moving {source_dir} to {target_dir}: {e}") + + +def main(): + """Main function to orchestrate the library sync process.""" + try: + # Load packages configuration + packages = load_packages_yaml() + + # Clean target directories + clean_target_directories(packages) + + # Move libraries to their new locations + move_libraries(packages) + + print("Library sync completed successfully!") + + except Exception as e: + print(f"Error during library sync: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/api_doc_build.yml b/.github/workflows/api_doc_build.yml index 442a5ab18ed80..0825400669db1 100644 --- a/.github/workflows/api_doc_build.yml +++ b/.github/workflows/api_doc_build.yml @@ -22,133 +22,27 @@ jobs: repository: langchain-ai/langchain-api-docs-html path: langchain-api-docs-html token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }} - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-google - path: langchain-google - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-datastax - path: langchain-datastax - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-nvidia - path: langchain-nvidia - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-cohere - path: langchain-cohere - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-elastic - path: langchain-elastic - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-postgres - path: langchain-postgres - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-aws - path: langchain-aws - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-weaviate - path: langchain-weaviate - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-ai21 - path: langchain-ai21 - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-together - path: langchain-together - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-experimental - path: langchain-experimental - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-milvus - path: langchain-milvus - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-unstructured - path: langchain-unstructured - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-databricks - path: langchain-databricks - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-ibm - path: langchain-ibm - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-azure - path: langchain-azure - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-mongodb - path: langchain-mongodb - - uses: actions/checkout@v4 - with: - repository: langchain-ai/langchain-redis - path: langchain-redis - + - name: Install yq + run: | + sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 + sudo chmod a+x /usr/local/bin/yq - - name: Set Git config - working-directory: langchain + - name: Parse YAML and checkout repos run: | - git config --local user.email "actions@github.com" - git config --local user.name "Github Actions" + # Get unique repositories + REPOS=$(yq '.packages[].repo' langchain/libs/packages.yml | sort -u) - - name: Move libs - run: | - rm -rf \ - langchain/libs/partners/google-genai \ - langchain/libs/partners/google-vertexai \ - langchain/libs/partners/astradb \ - langchain/libs/partners/nvidia-trt \ - langchain/libs/partners/nvidia-ai-endpoints \ - langchain/libs/partners/cohere \ - langchain/libs/partners/elasticsearch \ - langchain/libs/partners/upstage \ - langchain/libs/partners/ai21 \ - langchain/libs/partners/together \ - langchain/libs/standard-tests \ - langchain/libs/experimental \ - langchain/libs/partners/milvus \ - langchain/libs/partners/unstructured \ - langchain/libs/partners/databricks \ - langchain/libs/partners/ibm \ - langchain/libs/partners/azure-dynamic-sessions \ - langchain/libs/partners/mongodb \ - langchain/libs/partners/redis - mv langchain-google/libs/genai langchain/libs/partners/google-genai - mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai - mv langchain-google/libs/community langchain/libs/partners/google-community - mv langchain-datastax/libs/astradb langchain/libs/partners/astradb - mv langchain-nvidia/libs/ai-endpoints langchain/libs/partners/nvidia-ai-endpoints - mv langchain-cohere/libs/cohere langchain/libs/partners/cohere - mv langchain-elastic/libs/elasticsearch langchain/libs/partners/elasticsearch - mv langchain-postgres langchain/libs/partners/postgres - mv langchain-aws/libs/aws langchain/libs/partners/aws - mv langchain-weaviate/libs/weaviate langchain/libs/partners/weaviate - mv langchain-ai21/libs/ai21 langchain/libs/partners/ai21 - mv langchain-together/libs/together langchain/libs/partners/together - mv langchain-experimental/libs/experimental langchain/libs/experimental - mv langchain-milvus/libs/milvus langchain/libs/partners/milvus - mv langchain-unstructured/libs/unstructured langchain/libs/partners/unstructured - mv langchain-databricks/libs/databricks langchain/libs/partners/databricks - mv langchain-ibm/libs/ibm langchain/libs/partners/ibm - mv langchain-azure/libs/azure-dynamic-sessions langchain/libs/partners/azure-dynamic-sessions - mv langchain-mongodb/libs/mongodb langchain/libs/partners/mongodb - mv langchain-redis/libs/redis langchain/libs/partners/redis - - - name: Rm old html - run: - rm -rf langchain-api-docs-html/api_reference_build/html + # Checkout each unique repository + for repo in $REPOS; do + if [ "$repo" != "langchain-ai/langchain" ]; then + REPO_NAME=$(echo $repo | cut -d'/' -f2) + echo "Checking out $repo to $REPO_NAME" + git clone --depth 1 https://github.com/$repo.git $REPO_NAME + fi + done + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python ${{ env.PYTHON_VERSION }} + Poetry ${{ env.POETRY_VERSION }} uses: "./langchain/.github/actions/poetry_setup" @@ -158,16 +52,36 @@ jobs: cache-key: api-docs working-directory: langchain - - name: Install dependencies + - name: Install initial py deps working-directory: langchain run: | python -m pip install -U uv - python -m uv pip install --upgrade --no-cache-dir pip setuptools + python -m uv pip install --upgrade --no-cache-dir pip setuptools pyyaml + + - name: Move libs with script + run: python langchain/.github/scripts/prep_api_docs_build.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Rm old html + run: + rm -rf langchain-api-docs-html/api_reference_build/html + + - name: Install dependencies + working-directory: langchain + run: | + # skip airbyte due to pandas dependency issue python -m uv pip install $(ls ./libs/partners | grep -vE "airbyte" | xargs -I {} echo "./libs/partners/{}") python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental python -m uv pip install -r docs/api_reference/requirements.txt + - name: Set Git config + working-directory: langchain + run: | + git config --local user.email "actions@github.com" + git config --local user.name "Github Actions" + - name: Build docs working-directory: langchain run: | @@ -182,4 +96,4 @@ jobs: - uses: EndBug/add-and-commit@v9 with: cwd: langchain-api-docs-html - message: 'Update API docs build' \ No newline at end of file + message: 'Update API docs build' diff --git a/libs/packages.yml b/libs/packages.yml new file mode 100644 index 0000000000000..994bb3a718637 --- /dev/null +++ b/libs/packages.yml @@ -0,0 +1,149 @@ +# this file is used to define the packages that are used in the project +# it is EXPERIMENTAL and may be removed in the future + +packages: + - name: langchain-core + repo: langchain-ai/langchain + path: libs/core + - name: langchain-text-splitters + repo: langchain-ai/langchain + path: libs/text-splitters + - name: langchain + repo: langchain-ai/langchain + path: libs/langchain + - name: langchain-community + repo: langchain-ai/langchain + path: libs/community + - name: langchain-experimental + repo: langchain-ai/langchain-experimental + path: libs/experimental + - name: langchain-cli + repo: langchain-ai/langchain + path: libs/cli + - name: langchain-ai21 + repo: langchain-ai/langchain-ai21 + path: libs/ai21 + - name: langchain-airbyte + repo: langchain-ai/langchain + path: libs/partners/airbyte + disabled: true # dependency issues / stale + - name: langchain-anthropic + repo: langchain-ai/langchain + path: libs/partners/anthropic + - name: langchain-chroma + repo: langchain-ai/langchain + path: libs/partners/chroma + - name: langchain-exa + repo: langchain-ai/langchain + path: libs/partners/exa + - name: langchain-fireworks + repo: langchain-ai/langchain + path: libs/partners/fireworks + - name: langchain-groq + repo: langchain-ai/langchain + path: libs/partners/groq + - name: langchain-huggingface + repo: langchain-ai/langchain + path: libs/partners/huggingface + - name: langchain-ibm + repo: langchain-ai/langchain + path: libs/partners/ibm + - name: langchain-milvus + repo: langchain-ai/langchain-milvus + path: libs/milvus + - name: langchain-mistralai + repo: langchain-ai/langchain + path: libs/partners/mistralai + - name: langchain-mongodb + repo: langchain-ai/langchain-mongodb + path: libs/mongodb + - name: langchain-nomic + repo: langchain-ai/langchain + path: libs/partners/nomic + - name: langchain-openai + repo: langchain-ai/langchain + path: libs/partners/openai + - name: langchain-pinecone + repo: langchain-ai/langchain + path: libs/partners/pinecone + - name: langchain-prompty + repo: langchain-ai/langchain + path: libs/partners/prompty + - name: langchain-qdrant + repo: langchain-ai/langchain + path: libs/partners/qdrant + - name: langchain-sema4 + repo: langchain-ai/langchain-sema4 + path: libs/sema4 + - name: langchain-together + repo: langchain-ai/langchain-together + path: libs/together + - name: langchain-upstage + repo: langchain-ai/langchain-upstage + path: libs/upstage + - name: langchain-voyageai + repo: langchain-ai/langchain + path: libs/partners/voyageai + - name: langchain-aws + repo: langchain-ai/langchain-aws + path: libs/aws + - name: langchain-astradb + repo: langchain-ai/langchain-datastax + path: libs/astradb + - name: langchain-google-genai + repo: langchain-ai/langchain-google + path: libs/genai + - name: langchain-google-vertexai + repo: langchain-ai/langchain-google + path: libs/vertexai + - name: langchain-google-community + repo: langchain-ai/langchain-google + path: libs/community + - name: langchain-weaviate + repo: langchain-ai/langchain-weaviate + path: libs/weaviate + - name: langchain-cohere + repo: langchain-ai/langchain-cohere + path: libs/cohere + - name: langchain-elasticsearch + repo: langchain-ai/langchain-elastic + path: libs/elasticsearch + - name: langchain-nvidia-ai-endpoints + repo: langchain-ai/langchain-nvidia + path: libs/ai-endpoints + - name: langchain-postgres + repo: langchain-ai/langchain-postgres + path: . + - name: langchain-redis + repo: langchain-ai/langchain-redis + path: libs/redis + - name: langchain-unstructured + repo: langchain-ai/langchain-unstructured + path: libs/unstructured + - name: langchain-azure-dynamic-sessions + repo: langchain-ai/langchain-azure + path: libs/azure-dynamic-sessions + - name: langchain-sqlserver + repo: langchain-ai/langchain-azure + path: libs/sqlserver + - name: langchain-cerebras + repo: langchain-ai/langchain-cerebras + path: libs/cerebras + - name: langchain-snowflake + repo: langchain-ai/langchain-snowflake + path: libs/snowflake + - name: langchain-databricks + repo: langchain-ai/langchain-databricks + path: libs/databricks + - name: langchain-ibm + repo: langchain-ai/langchain-ibm + path: libs/ibm + - name: langchain-couchbase + repo: langchain-ai/langchain + path: libs/partners/couchbase + - name: langchain-ollama + repo: langchain-ai/langchain + path: libs/partners/ollama + - name: langchain-box + repo: langchain-ai/langchain + path: libs/partners/box