diff --git a/examples/001-getting-started.ipynb b/examples/001-getting-started.ipynb deleted file mode 100644 index 5bf27ef..0000000 --- a/examples/001-getting-started.ipynb +++ /dev/null @@ -1,131 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "929301f6-5982-4662-b68f-9e72269cace7", - "metadata": {}, - "outputs": [], - "source": [ - "%reload_ext jupyter_ai " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f7f8325c-18b8-4943-b7e8-14a839060958", - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "Here's the information about Datalab in markdown format:\n", - "\n", - "# Datalab\n", - "\n", - "Datalab is a tool developed by Google Cloud Platform that provides an interactive environment for data exploration, analysis, and machine learning. Key features include:\n", - "\n", - "## Features\n", - "\n", - "- **Jupyter Notebooks**: Uses Jupyter notebooks for interactive coding and visualization\n", - "- **Cloud Integration**: Seamlessly integrates with Google Cloud services\n", - "- **Big Data Support**: Designed to work with large datasets using BigQuery and other Google Cloud data services\n", - "- **Pre-installed Libraries**: Comes with popular data science libraries like pandas, numpy, and scikit-learn\n", - "- **Collaborative**: Allows for easy sharing and collaboration on data projects\n", - "\n", - "## Use Cases\n", - "\n", - "- Data exploration and visualization\n", - "- Machine learning model development\n", - "- Big data analysis\n", - "- Prototyping data pipelines\n", - "\n", - "## Advantages\n", - "\n", - "- Easy setup and configuration\n", - "- Cost-effective (pay only for resources used)\n", - "- Scalable to handle large datasets\n", - "- Integrates well with other Google Cloud services\n", - "\n", - "Datalab is particularly useful for data scientists and analysts who work with Google Cloud Platform and need a powerful, cloud-based environment for their data projects." - ], - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": { - "text/markdown": { - "jupyter_ai": { - "model_id": "claude-3-5-sonnet-20240620", - "provider_id": "anthropic-chat" - } - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "%%ai anthropic-chat:claude-3-5-sonnet-20240620\n", - "Do you know what datalab is?" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "12aaa0b1-6ccd-4645-b578-d1fde5cd2614", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UsageError: Line magic function `%%yellowhammer` not found.\n" - ] - } - ], - "source": [ - "\"\"\" (mock up ) \"\"\"\n", - "%%yellowhammer\n", - "Do you know what datalab is?" - ] - }, - { - "cell_type": "markdown", - "id": "b943d5eb-a83a-4ed5-b531-e69a44d6d701", - "metadata": {}, - "source": [ - "Yes, *datalab* is ACTUALLY an open source research data management tool for chemistry and materials science, and here is the functionality I know about..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d93f30b-3dde-457a-8aa9-9d08cc19f3ad", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/Demo_anthropic.ipynb b/examples/Demo_anthropic.ipynb new file mode 100644 index 0000000..b56cf27 --- /dev/null +++ b/examples/Demo_anthropic.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2671fc38-c9ca-49ef-948a-abb7815ca2b9", + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext yellowhammer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "32c5ffc5-ea83-458c-8e5a-75bb15da8a2d", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter your LLM API key ········\n" + ] + } + ], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "os.environ[\"LLM_PROVIDER\"] = \"ANTHROPIC\"\n", + "os.environ[\"LLM_API_KEY\"] = getpass.getpass(\"Enter your LLM API key\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2f61e78f-b8a3-4f0c-b5e4-503af30019f3", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter your Datalab API key ········\n" + ] + } + ], + "source": [ + "os.environ[\"DATALAB_API_KEY\"] = getpass.getpass(\"Enter your Datalab API key\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f44cabeb-58ef-4ccd-86d7-5cb04a3f8643", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Datalab is a data management platform designed to help scientists manage their experimental data, plan experiments, analyze data, and plot results. It provides a structured way to store, organize, and interact with scientific data, particularly in the context of materials science and chemistry experiments. \n", + "\n", + "Some key features of Datalab include:\n", + "\n", + "1. Sample management: You can create, store, and retrieve information about experimental samples, including their chemical composition, synthesis methods, and related metadata.\n", + "\n", + "2. File attachments: Datalab allows you to attach files (such as raw data or images) to sample entries, making it easy to keep all relevant information together.\n", + "\n", + "3. Data blocks: These are used to parse attached files according to scientific schemas and generate plots, facilitating data analysis and visualization.\n", + "\n", + "4. Search functionality: You can search for items (samples, materials, etc.) across the Datalab instance.\n", + "\n", + "5. Relationship tracking: Datalab can track relationships between different items, helping to maintain the context of experiments and materials.\n", + "\n", + "6. API access: Datalab provides a Python API that allows programmatic interaction with the platform, enabling integration with other tools and scripts.\n", + "\n", + "Datalab is particularly useful for maintaining experimental workflows, ensuring data provenance, and facilitating collaboration among scientists. It helps in organizing complex scientific data in a structured manner, making it easier to retrieve, analyze, and share research findings." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%llm\n", + "What is datalab?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d10442e5-7c5c-40cd-becf-9c2e9a79fd45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "To create a sample with the specified details using the Datalab API, we'll use the `create_item` method of the `DatalabClient`. Here's what the code will do:\n", + "\n", + "1. Import the necessary module\n", + "2. Create a DatalabClient instance\n", + "3. Prepare the sample data as a dictionary\n", + "4. Use the create_item method to create the sample\n", + "5. Print the response to confirm the sample creation" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%llm\n", + "Create a sample with ID llm-test4, sample name \"virtual sample (Claude)\", formula FrCl" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6ef8a1c2-50c8-4807-b84b-008c0bfb4efa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/yue/Library/CloudStorage/OneDrive-Personal/code/yellowhammer/.venv/lib/python3.10/site-packages/datalab_api/_base.py:165: UserWarning: Found API URL https://demo-api.datalab-org.io in HTML meta tag. Creating client with this URL instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample creation response:\n", + "{'collections': [], 'creator_ids': [{'$oid': '66abc00dcb992f4b299aa60a'}], 'creators': [{'contact_email': None, 'display_name': 'Yue Wu'}], 'date': '2024-10-22T20:06:55.888755', 'item_id': 'llm-test4', 'name': 'virtual sample (Claude)', 'nblocks': 0, 'refcode': 'demo:YASTWQ', 'type': 'samples'}\n" + ] + } + ], + "source": [ + "from datalab_api import DatalabClient\n", + "\n", + "# Create a DatalabClient instance\n", + "with DatalabClient(\"https://demo.datalab-org.io\") as client:\n", + " # Prepare the sample data\n", + " sample_data = {\n", + " \"item_id\": \"llm-test4\",\n", + " \"name\": \"virtual sample (Claude)\",\n", + " \"chemform\": \"FrCl\",\n", + " \"type\": \"samples\",\n", + " }\n", + "\n", + " # Create the sample\n", + " response = client.create_item(item_id=\"llm-test4\", item_type=\"samples\", item_data=sample_data)\n", + "\n", + " # Print the response\n", + " print(\"Sample creation response:\")\n", + " print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f3241bc-ba1f-4807-babb-3a928baad6ea", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "yellowhammer", + "language": "python", + "name": "yellowhammer" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/Demo_openai.ipynb b/examples/Demo_openai.ipynb new file mode 100644 index 0000000..59cfd16 --- /dev/null +++ b/examples/Demo_openai.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2671fc38-c9ca-49ef-948a-abb7815ca2b9", + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext yellowhammer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "32c5ffc5-ea83-458c-8e5a-75bb15da8a2d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "os.environ[\"LLM_PROVIDER\"] = \"OPENAI\"\n", + "os.environ[\"LLM_API_KEY\"] = getpass.getpass(\"Enter your LLM API key\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2f61e78f-b8a3-4f0c-b5e4-503af30019f3", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"DATALAB_API_KEY\"] = getpass.getpass(\"Enter your Datalab API key\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f44cabeb-58ef-4ccd-86d7-5cb04a3f8643", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Datalab is a data management platform designed for scientists and researchers to manage their experimental data efficiently. It allows users to organize, analyze, and visualize their data through a structured interface. Datalab uses the concept of \"data blocks\" to handle files associated with samples, enabling users to parse data according to scientific schemas and create plots for better data interpretation. The platform also provides an API for programmatic access, allowing users to automate tasks such as data entry, file uploads, and data analysis." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%llm\n", + "What is datalab?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d10442e5-7c5c-40cd-becf-9c2e9a79fd45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "To create a new sample in Datalab with the specified ID, name, and formula, we will use the `create_item` method of the `DatalabClient`. This method requires the item ID, item type, and the data that matches the schema for samples. In this case, we will set the `item_id` to 'llm-test3', the `name` to 'virtual sample', and the `chemform` to 'RbCl'." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%llm\n", + "Create a sample with ID llm-test3, sample name \"virtual sample\", formula RbCl" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "33c0e59e-586d-48f3-aa5f-f6fa743dcc96", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/yue/Library/CloudStorage/OneDrive-Personal/code/yellowhammer/.venv/lib/python3.10/site-packages/datalab_api/_base.py:165: UserWarning: Found API URL https://demo-api.datalab-org.io in HTML meta tag. Creating client with this URL instead.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from datalab_api import DatalabClient\n", + "import os\n", + "\n", + "with DatalabClient(\"https://demo.datalab-org.io\") as client:\n", + " json_data = {\"name\": \"virtual sample\", \"chemform\": \"RbCl\"}\n", + " client.create_item(item_id=\"llm-test3\", item_type=\"samples\", item_data=json_data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "yellowhammer", + "language": "python", + "name": "yellowhammer" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/prompts/datalab-api-prompt.md b/prompts/datalab-api-prompt.md new file mode 100755 index 0000000..9beb219 --- /dev/null +++ b/prompts/datalab-api-prompt.md @@ -0,0 +1,447 @@ +Use the datalab Python API package to query entries on the datalab instance at https://demo.datalab-org.io/. +Each method of the DatalabClient class will return a dictionary constructed directly +from the JSON response of the Datalab API. + +Datalab uses "data blocks" to take a file attached to a sample, parse it +according to some scientific schema, and then make a plot. + +The rest of this prompt contains the README for the datalab python API module `datalab_api`, which you already have installed. + +Assume the `DATALAB_API_KEY` has been set an environment variable. + +Python API +This package implements basic functionality for displaying and manipulating entries: + +```python +from datalab_api import DatalabClient + +with DatalabClient("https://demo.datalab-org.io") as client: + + # Get the info about this datalab instance + client.get_info() + + # Get the current user's info + client.authenticate() + + # Search for items with the string + items = client.search_items("search-values") + + # List all items of a given type + # Types can be 'samples' or 'starting_materials' + items = client.get_items(item_type="samples") + + # Get more info on a particular item called 'test' + item = client.get_item(item_id="test") + + # Create a new item with some data that matches the corresponding `item_type` schema + json_data = {"chemform": "NaCl"} + client.create_item(item_id="test_new", item_type="samples", item_data=json_data) + + # Attach a file to an item and get the uploaded ID + file_response = client.upload_file(filepath="my_echem_data.mpr", item_id="test") + file_id = file_response["file_id"] + + # Create a data block for a sample, then show the plot + client.create_data_block(item_id="test", file_ids=file_id) + + # Download all files attached to a sample and return their paths + file_paths = client.get_item_files(item_id="test") + + # Get the item graph, useful for finding relationships + graph = client.get_item_graph() + +``` + +Here is an abridged JSONSchema for a sample, that also has some info about other +types. + +```json +{ + "title": "Sample", + "description": "A model for representing an experimental sample.", + "type": "object", + "properties": { + "blocks_obj": { + "title": "Blocks Obj", + "default": {}, + "type": "object" + }, + "display_order": { + "title": "Display Order", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "collections": { + "title": "Collections", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/Collection" + } + }, + "revision": { + "title": "Revision", + "default": 1, + "type": "integer" + }, + "revisions": { + "title": "Revisions", + "type": "object" + }, + "creator_ids": { + "title": "Creator Ids", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "creators": { + "title": "Creators", + "type": "array", + "items": { + "$ref": "#/definitions/Person" + } + }, + "type": { + "title": "Type", + "default": "samples", + "const": "samples", + "pattern": "^samples$", + "type": "string" + }, + "immutable_id": { + "title": "Immutable ID", + "type": "string" + }, + "last_modified": { + "title": "Last Modified", + "type": "date", + "format": "date-time" + }, + "relationships": { + "title": "Relationships", + "type": "array", + "items": { + "$ref": "#/definitions/TypedRelationship" + } + }, + "refcode": { + "title": "Refcode", + "minLength": 1, + "maxLength": 40, + "pattern": "^[a-z]{2,10}:(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$", + "type": "string" + }, + "item_id": { + "title": "Item Id", + "minLength": 1, + "maxLength": 40, + "pattern": "^(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$", + "type": "string" + }, + "description": { + "title": "Description", + "type": "string" + }, + "date": { + "title": "Date", + "type": "date", + "format": "date-time" + }, + "name": { + "title": "Name", + "type": "string" + }, + "files": { + "title": "Files", + "type": "array", + "items": { + "$ref": "#/definitions/File" + } + }, + "file_ObjectIds": { + "title": "File Objectids", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "chemform": { + "title": "Chemform", + "example": [ + "Na3P", + "LiNiO2@C" + ], + "type": "string" + }, + "synthesis_constituents": { + "title": "Synthesis Constituents", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/Constituent" + } + }, + "synthesis_description": { + "title": "Synthesis Description", + "type": "string" + } + }, + "required": [ + "item_id" + ], + "definitions": { + "KnownType": { + "title": "KnownType", + "description": "An enumeration of the types of entry known by this implementation, should be made dynamic in the future.", + "enum": [ + "samples", + "starting_materials", + "blocks", + "files", + "people", + "collections" + ], + "type": "string" + }, + "File": { + "title": "File", + "description": "A model for representing a file that has been tracked or uploaded to datalab.", + "type": "object", + "properties": { + "revision": { + "title": "Revision", + "default": 1, + "type": "integer" + }, + "revisions": { + "title": "Revisions", + "type": "object" + }, + "creator_ids": { + "title": "Creator Ids", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "creators": { + "title": "Creators", + "type": "array", + "items": { + "$ref": "#/definitions/Person" + } + }, + "type": { + "title": "Type", + "default": "files", + "const": "files", + "pattern": "^files$", + "type": "string" + }, + "immutable_id": { + "title": "Immutable ID", + "type": "string" + }, + "last_modified": { + "title": "Last Modified", + "type": "date", + "format": "date-time" + }, + "relationships": { + "title": "Relationships", + "type": "array", + "items": { + "$ref": "#/definitions/TypedRelationship" + } + }, + "size": { + "title": "Size", + "description": "The size of the file on disk in bytes.", + "type": "integer" + }, + "last_modified_remote": { + "title": "Last Modified Remote", + "description": "The last date/time at which the remote file was modified.", + "type": "date", + "format": "date-time" + }, + "item_ids": { + "title": "Item Ids", + "description": "A list of item IDs associated with this file.", + "type": "array", + "items": { + "type": "string" + } + }, + "blocks": { + "title": "Blocks", + "description": "A list of block IDs associated with this file.", + "type": "array", + "items": { + "type": "string" + } + }, + "name": { + "title": "Name", + "description": "The filename on disk.", + "type": "string" + }, + "extension": { + "title": "Extension", + "description": "The file extension that the file was uploaded with.", + "type": "string" + }, + "original_name": { + "title": "Original Name", + "description": "The raw filename as uploaded.", + "type": "string" + }, + "location": { + "title": "Location", + "description": "The location of the file on disk.", + "type": "string" + }, + "url_path": { + "title": "Url Path", + "description": "The path to a remote file.", + "type": "string" + }, + "source": { + "title": "Source", + "description": "The source of the file, e.g. 'remote' or 'uploaded'.", + "type": "string" + }, + "time_added": { + "title": "Time Added", + "description": "The timestamp for the original file upload.", + "type": "string", + "format": "date-time" + }, + "metadata": { + "title": "Metadata", + "description": "Any additional metadata.", + "type": "object" + }, + "representation": { + "title": "Representation" + }, + "source_server_name": { + "title": "Source Server Name", + "description": "The server name at which the file is stored.", + "type": "string" + }, + "source_path": { + "title": "Source Path", + "description": "The path to the file on the remote resource.", + "type": "string" + }, + "is_live": { + "title": "Is Live", + "description": "Whether or not the file should be watched for future updates.", + "type": "boolean" + } + }, + "required": [ + "item_ids", + "blocks", + "name", + "extension", + "time_added", + "is_live" + ] + }, + "EntryReference": { + "title": "EntryReference", + "description": "A reference to a database entry by ID and type.\n\nCan include additional arbitarary metadata useful for\ninlining the item data.", + "type": "object", + "properties": { + "type": { + "title": "Type", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "immutable_id": { + "title": "Immutable Id", + "type": "string" + }, + "item_id": { + "title": "Item Id", + "minLength": 1, + "maxLength": 40, + "pattern": "^(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$", + "type": "string" + }, + "refcode": { + "title": "Refcode", + "minLength": 1, + "maxLength": 40, + "pattern": "^[a-z]{2,10}:(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$", + "type": "string" + } + }, + "required": [ + "type" + ] + }, + "InlineSubstance": { + "title": "InlineSubstance", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "chemform": { + "title": "Chemform", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Constituent": { + "title": "Constituent", + "description": "A constituent of a sample.", + "type": "object", + "properties": { + "item": { + "title": "Item", + "anyOf": [ + { + "$ref": "#/definitions/EntryReference" + }, + { + "$ref": "#/definitions/InlineSubstance" + } + ] + }, + "quantity": { + "title": "Quantity", + "minimum": 0, + "type": "number" + }, + "unit": { + "title": "Unit", + "default": "g", + "type": "string" + } + }, + "required": [ + "item", + "quantity" + ] + } + } +} +``` diff --git a/prompts/system-prompt.md b/prompts/system-prompt.md new file mode 100644 index 0000000..18e81c6 --- /dev/null +++ b/prompts/system-prompt.md @@ -0,0 +1 @@ +You are a virtual assistant that helps scientists use the data management platform datalab to manage their experimental data, plan experiments, analyse data and plot results. Here is the documentation of the datalab API: {context}. Answer user questions and use the provided documentation to interact with datalab. Your code responses will be output to a Jupyter notebook cell that has access to the datalab API and common python scientific libraries. Ensure any code you provide can be executed with all required imports and variables defined. Structure your answer with a description of the code solution. Then list the imports. Finally list the functioning code block. Here is the user question: diff --git a/pyproject.toml b/pyproject.toml index ed363dc..e4bff8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "datalab-api>=0.2.4", "jupyter-ai>=2.24.1", "jupyterlab>=4.2.5", + "langchain>=0.2.16", "langchain-anthropic>=0.1.23", "langchain-openai>=0.1.25", "notebook>=7.2.2", @@ -35,4 +36,8 @@ ignore_missing_imports = true follow_imports = "skip" [tool.uv] -dev-dependencies = ["ipykernel>=6.29.5", "pre-commit>=4.0.0", "pytest>=8.3.3"] +dev-dependencies = [ + "ipykernel>=6.29.5", + "pre-commit>=4.0.0", + "pytest>=8.3.3", +] diff --git a/src/yellowhammer/__init__.py b/src/yellowhammer/__init__.py index 77003b9..c883b66 100644 --- a/src/yellowhammer/__init__.py +++ b/src/yellowhammer/__init__.py @@ -1,4 +1,5 @@ from importlib.metadata import PackageNotFoundError, version +from .magics import DatalabMagics try: __version__ = version("yellowhammer") @@ -6,3 +7,12 @@ __version__ = "develop" __all__ = ("__version__",) + + +def load_ipython_extension(ipython): + """ + Any module file that define a function named `load_ipython_extension` + can be loaded via `%load_ext module.path` or be configured to be + autoloaded by IPython at startup time. + """ + ipython.register_magics(DatalabMagics) diff --git a/src/yellowhammer/llm.py b/src/yellowhammer/llm.py new file mode 100644 index 0000000..a2601ac --- /dev/null +++ b/src/yellowhammer/llm.py @@ -0,0 +1,68 @@ +from langchain_core.prompts import ChatPromptTemplate +from pydantic import BaseModel, Field +from .prompt import SYSTEM_PROMPT +from typing import Union + + +# Pydantic schema to use with .with_structured_output() +class code(BaseModel): + """Schema for code solutions""" + + prefix: str = Field(description="Description of the problem and approach") + imports: str = Field(description="Code block import statements") + code: str = Field(description="Code block not including import statements") + + +class ConversationalResponse(BaseModel): + """Respond in a conversational manner. Be kind and helpful.""" + + response: str = Field(description="A conversational response to the user's query") + + +class FinalResponse(BaseModel): + """The final response can be either a code solution or a conversational response""" + + final_output: Union[code, ConversationalResponse] + + +def get_chain( + api_provider, + api_key, + api_model=None, + api_temperature=0, +): + # API provider logic + if api_provider.lower() == "openai": + from langchain_openai import ChatOpenAI + + if api_model is None: + api_model = "gpt-4o-mini" + llm = ChatOpenAI(model=api_model, temperature=api_temperature, openai_api_key=api_key) + + elif api_provider.lower() == "anthropic": + from langchain_anthropic import ChatAnthropic + + if api_model is None: + api_model = "claude-3-5-sonnet-20240620" + llm = ChatAnthropic( + model=api_model, + temperature=api_temperature, + anthropic_api_key=api_key, + ) + + # Prompt + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + SYSTEM_PROMPT, + ), # datalab API info is passed via {context} to the system prompt + ("placeholder", "{messages}"), + ] + ) + + # Create a chain where the final output takes the FinalResponse schema + chain = prompt | llm.with_structured_output(FinalResponse, include_raw=False) + + # Returns a runnable chain which accepts datalab API documentation "context" and user question "messages" + return chain diff --git a/src/yellowhammer/magics.py b/src/yellowhammer/magics.py new file mode 100644 index 0000000..fef7f6b --- /dev/null +++ b/src/yellowhammer/magics.py @@ -0,0 +1,91 @@ +""" +Magics to support LLM interactions in IPython/Jupyter. +Adapted from fperez/jupytee and jan-janssen/LangSim. +""" + +import os + +from IPython import get_ipython +from IPython.core.magic import ( + Magics, + magics_class, + line_cell_magic, +) +from IPython.core.magic_arguments import ( + magic_arguments, + argument, + parse_argstring, +) +from IPython.display import Markdown +from .llm import get_chain, code, ConversationalResponse +from .prompt import API_PROMPT + + +def get_output(messages, temp=0.1): + env = os.environ + agent_executor = get_chain( + api_provider=env.get("LLM_PROVIDER", "OPENAI"), + api_key=env.get("LLM_API_KEY"), + api_model=env.get("LLM_MODEL", None), + api_temperature=env.get("LLM_TEMP", temp), + ) + + return agent_executor.invoke({"context": API_PROMPT, "messages": messages}) + + +# Class to manage state and expose the main magics +@magics_class +class DatalabMagics(Magics): + def __init__(self, shell): + super().__init__(shell) + self.messages = [] + + # A datalab magic that returns a code block + @magic_arguments() + @argument( + "prompt", + nargs="*", + help="""Prompt for code generation. When used as a line magic, + it runs to the end of the line. In cell mode, the entire cell + is considered the code generation prompt. + """, + ) + @argument( + "-T", + "--temp", + type=float, + default=0.0, + help="""Temperature, float in [0,1]. Higher values push the algorithm + to generate more aggressive/"creative" output. [default=0.1].""", + ) + @line_cell_magic + def llm(self, line, cell=None): + """ + Chat with the LLM. Return either conversation or code. + """ + args = parse_argstring(self.llm, line) # self.llm is a bound method + + if cell is None: + prompt = " ".join(args.prompt) + else: + prompt = cell + + self.messages.append(("human", prompt)) + response = get_output(self.messages).final_output + + if isinstance(response, ConversationalResponse): + output = response.response + self.messages.append(("ai", output)) + return Markdown(output) + + elif isinstance(response, code): + output = response + self.messages.append(("ai", output.prefix)) + cell_fill = output.imports + "\n" + output.code + get_ipython().set_next_input(cell_fill) + return Markdown(output.prefix) + + +# If testing interactively, it's convenient to %run as a script in Jupyter +if __name__ == "__main__": + get_ipython().register_magics(DatalabMagics) diff --git a/src/yellowhammer/prompt.py b/src/yellowhammer/prompt.py new file mode 100644 index 0000000..1658829 --- /dev/null +++ b/src/yellowhammer/prompt.py @@ -0,0 +1,19 @@ +from pathlib import Path + + +def load_file_content(file_path): + with open(file_path, encoding="utf-8") as file: + return file.read() + + +# Load API_PROMPT +api_prompt_path = Path(__file__).parent.parent.parent / "prompts" / "datalab-api-prompt.md" +API_PROMPT = load_file_content(api_prompt_path) + +# Load SYSTEM_PROMPT +system_prompt_path = Path(__file__).parent.parent.parent / "prompts" / "system-prompt.md" +SYSTEM_PROMPT = load_file_content(system_prompt_path) + +if __name__ == "__main__": + print(SYSTEM_PROMPT) + print(API_PROMPT) diff --git a/src/yellowhammer/test.ipynb b/src/yellowhammer/test.ipynb new file mode 100644 index 0000000..7ff7249 --- /dev/null +++ b/src/yellowhammer/test.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from langchain_community.document_loaders import TextLoader\n", + "\n", + "path = \"datalab-api-prompt.md\"\n", + "loader = TextLoader(path)\n", + "API_PROMPT = loader.load()[0].page_content\n", + "\n", + "path = \"system-prompt.md\"\n", + "loader = TextLoader(path)\n", + "SYSTEM_PROMPT = loader.load()[0].page_content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Use the datalab Python API package to query entries on the datalab instance at https://demo.datalab-org.io/.\\nEach method of the DatalabClient class will return a dictionary constructed directly\\nfrom the JSON response of the Datalab API.\\n\\nDatalab uses \"data blocks\" to take a file attached to a sample, parse it\\naccording to some scientific schema, and then make a plot.\\n\\nThe rest of this prompt contains the README for the datalab python API module `datalab_api`, which you already have installed.\\n\\nAssume the `DATALAB_API_KEY` has been set an environment variable.\\n\\nPython API\\nThis package implements basic functionality for displaying and manipulating entries:\\n\\n```python\\nfrom datalab_api import DatalabClient\\n\\nwith DatalabClient(\"https://demo.datalab-org.io\") as client:\\n\\n # Get the info about this datalab instance\\n client.get_info()\\n\\n # Get the current user\\'s info\\n client.authenticate()\\n\\n # Search for items with the string\\n items = client.search_items(\"search-values\")\\n\\n # List all items of a given type\\n # Types can be \\'samples\\' or \\'starting_materials\\'\\n items = client.get_items(item_type=\"samples\")\\n\\n # Get more info on a particular item called \\'test\\'\\n item = client.get_item(item_id=\"test\")\\n\\n # Create a new item with some data that matches the corresponding `item_type` schema\\n json_data = {\"chemform\": \"NaCl\"}\\n client.create_item(item_id=\"test_new\", item_type=\"samples\", item_data=json_data)\\n\\n # Attach a file to an item and get the uploaded ID\\n file_response = client.upload_file(filepath=\"my_echem_data.mpr\", item_id=\"test\")\\n file_id = file_response[\"file_id\"]\\n\\n # Create a data block for a sample, then show the plot\\n client.create_data_block(item_id=\"test\", file_ids=file_id)\\n\\n # Download all files attached to a sample and return their paths\\n file_paths = client.get_item_files(item_id=\"test\")\\n\\n # Get the item graph, useful for finding relationships\\n graph = client.get_item_graph()\\n\\n```\\n\\nHere is an abridged JSONSchema for a sample, that also has some info about other\\ntypes.\\n\\n```json \\n{\\n \"title\": \"Sample\",\\n \"description\": \"A model for representing an experimental sample.\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"blocks_obj\": {\\n \"title\": \"Blocks Obj\",\\n \"default\": {},\\n \"type\": \"object\"\\n },\\n \"display_order\": {\\n \"title\": \"Display Order\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"collections\": {\\n \"title\": \"Collections\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/Collection\"\\n }\\n },\\n \"revision\": {\\n \"title\": \"Revision\",\\n \"default\": 1,\\n \"type\": \"integer\"\\n },\\n \"revisions\": {\\n \"title\": \"Revisions\",\\n \"type\": \"object\"\\n },\\n \"creator_ids\": {\\n \"title\": \"Creator Ids\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"creators\": {\\n \"title\": \"Creators\",\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/Person\"\\n }\\n },\\n \"type\": {\\n \"title\": \"Type\",\\n \"default\": \"samples\",\\n \"const\": \"samples\",\\n \"pattern\": \"^samples$\",\\n \"type\": \"string\"\\n },\\n \"immutable_id\": {\\n \"title\": \"Immutable ID\",\\n \"type\": \"string\"\\n },\\n \"last_modified\": {\\n \"title\": \"Last Modified\",\\n \"type\": \"date\",\\n \"format\": \"date-time\"\\n },\\n \"relationships\": {\\n \"title\": \"Relationships\",\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/TypedRelationship\"\\n }\\n },\\n \"refcode\": {\\n \"title\": \"Refcode\",\\n \"minLength\": 1,\\n \"maxLength\": 40,\\n \"pattern\": \"^[a-z]{2,10}:(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$\",\\n \"type\": \"string\"\\n },\\n \"item_id\": {\\n \"title\": \"Item Id\",\\n \"minLength\": 1,\\n \"maxLength\": 40,\\n \"pattern\": \"^(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$\",\\n \"type\": \"string\"\\n },\\n \"description\": {\\n \"title\": \"Description\",\\n \"type\": \"string\"\\n },\\n \"date\": {\\n \"title\": \"Date\",\\n \"type\": \"date\",\\n \"format\": \"date-time\"\\n },\\n \"name\": {\\n \"title\": \"Name\",\\n \"type\": \"string\"\\n },\\n \"files\": {\\n \"title\": \"Files\",\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/File\"\\n }\\n },\\n \"file_ObjectIds\": {\\n \"title\": \"File Objectids\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"chemform\": {\\n \"title\": \"Chemform\",\\n \"example\": [\\n \"Na3P\",\\n \"LiNiO2@C\"\\n ],\\n \"type\": \"string\"\\n },\\n \"synthesis_constituents\": {\\n \"title\": \"Synthesis Constituents\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/Constituent\"\\n }\\n },\\n \"synthesis_description\": {\\n \"title\": \"Synthesis Description\",\\n \"type\": \"string\"\\n }\\n },\\n \"required\": [\\n \"item_id\"\\n ],\\n \"definitions\": {\\n \"KnownType\": {\\n \"title\": \"KnownType\",\\n \"description\": \"An enumeration of the types of entry known by this implementation, should be made dynamic in the future.\",\\n \"enum\": [\\n \"samples\",\\n \"starting_materials\",\\n \"blocks\",\\n \"files\",\\n \"people\",\\n \"collections\"\\n ],\\n \"type\": \"string\"\\n },\\n \"File\": {\\n \"title\": \"File\",\\n \"description\": \"A model for representing a file that has been tracked or uploaded to datalab.\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"revision\": {\\n \"title\": \"Revision\",\\n \"default\": 1,\\n \"type\": \"integer\"\\n },\\n \"revisions\": {\\n \"title\": \"Revisions\",\\n \"type\": \"object\"\\n },\\n \"creator_ids\": {\\n \"title\": \"Creator Ids\",\\n \"default\": [],\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"creators\": {\\n \"title\": \"Creators\",\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/Person\"\\n }\\n },\\n \"type\": {\\n \"title\": \"Type\",\\n \"default\": \"files\",\\n \"const\": \"files\",\\n \"pattern\": \"^files$\",\\n \"type\": \"string\"\\n },\\n \"immutable_id\": {\\n \"title\": \"Immutable ID\",\\n \"type\": \"string\"\\n },\\n \"last_modified\": {\\n \"title\": \"Last Modified\",\\n \"type\": \"date\",\\n \"format\": \"date-time\"\\n },\\n \"relationships\": {\\n \"title\": \"Relationships\",\\n \"type\": \"array\",\\n \"items\": {\\n \"$ref\": \"#/definitions/TypedRelationship\"\\n }\\n },\\n \"size\": {\\n \"title\": \"Size\",\\n \"description\": \"The size of the file on disk in bytes.\",\\n \"type\": \"integer\"\\n },\\n \"last_modified_remote\": {\\n \"title\": \"Last Modified Remote\",\\n \"description\": \"The last date/time at which the remote file was modified.\",\\n \"type\": \"date\",\\n \"format\": \"date-time\"\\n },\\n \"item_ids\": {\\n \"title\": \"Item Ids\",\\n \"description\": \"A list of item IDs associated with this file.\",\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"blocks\": {\\n \"title\": \"Blocks\",\\n \"description\": \"A list of block IDs associated with this file.\",\\n \"type\": \"array\",\\n \"items\": {\\n \"type\": \"string\"\\n }\\n },\\n \"name\": {\\n \"title\": \"Name\",\\n \"description\": \"The filename on disk.\",\\n \"type\": \"string\"\\n },\\n \"extension\": {\\n \"title\": \"Extension\",\\n \"description\": \"The file extension that the file was uploaded with.\",\\n \"type\": \"string\"\\n },\\n \"original_name\": {\\n \"title\": \"Original Name\",\\n \"description\": \"The raw filename as uploaded.\",\\n \"type\": \"string\"\\n },\\n \"location\": {\\n \"title\": \"Location\",\\n \"description\": \"The location of the file on disk.\",\\n \"type\": \"string\"\\n },\\n \"url_path\": {\\n \"title\": \"Url Path\",\\n \"description\": \"The path to a remote file.\",\\n \"type\": \"string\"\\n },\\n \"source\": {\\n \"title\": \"Source\",\\n \"description\": \"The source of the file, e.g. \\'remote\\' or \\'uploaded\\'.\",\\n \"type\": \"string\"\\n },\\n \"time_added\": {\\n \"title\": \"Time Added\",\\n \"description\": \"The timestamp for the original file upload.\",\\n \"type\": \"string\",\\n \"format\": \"date-time\"\\n },\\n \"metadata\": {\\n \"title\": \"Metadata\",\\n \"description\": \"Any additional metadata.\",\\n \"type\": \"object\"\\n },\\n \"representation\": {\\n \"title\": \"Representation\"\\n },\\n \"source_server_name\": {\\n \"title\": \"Source Server Name\",\\n \"description\": \"The server name at which the file is stored.\",\\n \"type\": \"string\"\\n },\\n \"source_path\": {\\n \"title\": \"Source Path\",\\n \"description\": \"The path to the file on the remote resource.\",\\n \"type\": \"string\"\\n },\\n \"is_live\": {\\n \"title\": \"Is Live\",\\n \"description\": \"Whether or not the file should be watched for future updates.\",\\n \"type\": \"boolean\"\\n }\\n },\\n \"required\": [\\n \"item_ids\",\\n \"blocks\",\\n \"name\",\\n \"extension\",\\n \"time_added\",\\n \"is_live\"\\n ]\\n },\\n \"EntryReference\": {\\n \"title\": \"EntryReference\",\\n \"description\": \"A reference to a database entry by ID and type.\\\\n\\\\nCan include additional arbitarary metadata useful for\\\\ninlining the item data.\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"type\": {\\n \"title\": \"Type\",\\n \"type\": \"string\"\\n },\\n \"name\": {\\n \"title\": \"Name\",\\n \"type\": \"string\"\\n },\\n \"immutable_id\": {\\n \"title\": \"Immutable Id\",\\n \"type\": \"string\"\\n },\\n \"item_id\": {\\n \"title\": \"Item Id\",\\n \"minLength\": 1,\\n \"maxLength\": 40,\\n \"pattern\": \"^(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$\",\\n \"type\": \"string\"\\n },\\n \"refcode\": {\\n \"title\": \"Refcode\",\\n \"minLength\": 1,\\n \"maxLength\": 40,\\n \"pattern\": \"^[a-z]{2,10}:(?:[a-zA-Z0-9]+|[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9])$\",\\n \"type\": \"string\"\\n }\\n },\\n \"required\": [\\n \"type\"\\n ]\\n },\\n \"InlineSubstance\": {\\n \"title\": \"InlineSubstance\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"name\": {\\n \"title\": \"Name\",\\n \"type\": \"string\"\\n },\\n \"chemform\": {\\n \"title\": \"Chemform\",\\n \"type\": \"string\"\\n }\\n },\\n \"required\": [\\n \"name\"\\n ]\\n },\\n \"Constituent\": {\\n \"title\": \"Constituent\",\\n \"description\": \"A constituent of a sample.\",\\n \"type\": \"object\",\\n \"properties\": {\\n \"item\": {\\n \"title\": \"Item\",\\n \"anyOf\": [\\n {\\n \"$ref\": \"#/definitions/EntryReference\"\\n },\\n {\\n \"$ref\": \"#/definitions/InlineSubstance\"\\n }\\n ]\\n },\\n \"quantity\": {\\n \"title\": \"Quantity\",\\n \"minimum\": 0,\\n \"type\": \"number\"\\n },\\n \"unit\": {\\n \"title\": \"Unit\",\\n \"default\": \"g\",\\n \"type\": \"string\"\\n }\\n },\\n \"required\": [\\n \"item\",\\n \"quantity\"\\n ]\\n }\\n }\\n}\\n```\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "API_PROMPT" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def load_file_content(file_path):\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " return file.read()\n", + "\n", + "\n", + "# Load API_PROMPT\n", + "api_prompt_path = \"datalab-api-prompt.md\"\n", + "API_PROMPT_ = load_file_content(api_prompt_path)\n", + "\n", + "# Load SYSTEM_PROMPT\n", + "system_prompt_path = \"system-prompt.md\"\n", + "SYSTEM_PROMPT_ = load_file_content(system_prompt_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11679" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(API_PROMPT_)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11679" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(API_PROMPT)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "698" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(SYSTEM_PROMPT_)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "698" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(SYSTEM_PROMPT)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'You are a virtual assistant that helps scientists use the data management platform datalab to manage their experimental data, plan experiments, analyse data and plot results. Here is the documentation of the datalab API: {context}. Answer user questions and use the provided documentation to interact with datalab. Your code responses will be output to a Jupyter notebook cell that has access to the datalab API and common python scientific libraries. Ensure any code you provide can be executed with all required imports and variables defined. Structure your answer with a description of the code solution. Then list the imports. Finally list the functioning code block. Here is the user question:'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "SYSTEM_PROMPT" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name '__file__' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m file\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Load API_PROMPT\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m api_prompt_path \u001b[38;5;241m=\u001b[39m Path(\u001b[38;5;18;43m__file__\u001b[39;49m)\u001b[38;5;241m.\u001b[39mparent \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompts\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatalab-api-prompt.md\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 9\u001b[0m API_PROMPT \u001b[38;5;241m=\u001b[39m load_file_content(api_prompt_path)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Load SYSTEM_PROMPT\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name '__file__' is not defined" + ] + } + ], + "source": [ + "def load_file_content(file_path):\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " return file.read()\n", + "\n", + "\n", + "# Load API_PROMPT\n", + "api_prompt_path = Path(__file__).parent / \"prompts\" / \"datalab-api-prompt.md\"\n", + "API_PROMPT = load_file_content(api_prompt_path)\n", + "\n", + "# Load SYSTEM_PROMPT\n", + "system_prompt_path = Path(__file__).parent / \"prompts\" / \"system-prompt.md\"\n", + "SYSTEM_PROMPT = load_file_content(system_prompt_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/yellowhammmer/__init__.py b/src/yellowhammmer/__init__.py deleted file mode 100644 index 9485887..0000000 --- a/src/yellowhammmer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from importlib import metadata - -__version__ = metadata.version("yellowhammer") - -__all__ = ("__version__",) diff --git a/uv.lock b/uv.lock index 5939b9a..d6a0291 100644 --- a/uv.lock +++ b/uv.lock @@ -3053,12 +3053,13 @@ wheels = [ [[package]] name = "yellowhammer" -version = "0.1.dev8+g117550e.d20241008" +version = "0.1.dev12+g7b44707.d20241024" source = { editable = "." } dependencies = [ { name = "datalab-api" }, { name = "jupyter-ai" }, { name = "jupyterlab" }, + { name = "langchain" }, { name = "langchain-anthropic" }, { name = "langchain-openai" }, { name = "notebook" }, @@ -3076,6 +3077,7 @@ requires-dist = [ { name = "datalab-api", specifier = ">=0.2.4" }, { name = "jupyter-ai", specifier = ">=2.24.1" }, { name = "jupyterlab", specifier = ">=4.2.5" }, + { name = "langchain", specifier = ">=0.2.16" }, { name = "langchain-anthropic", specifier = ">=0.1.23" }, { name = "langchain-openai", specifier = ">=0.1.25" }, { name = "notebook", specifier = ">=7.2.2" },