Skip to content

Commit

Permalink
Merge pull request #84 from STRIDES/updated_tutorials_zy
Browse files Browse the repository at this point in the history
Updated All Azure tutorials
  • Loading branch information
kyleoconnell-NIH authored Mar 22, 2024
2 parents aca5975 + 82ad076 commit 125a1cf
Show file tree
Hide file tree
Showing 16 changed files with 1,444 additions and 2,026 deletions.
1,165 changes: 587 additions & 578 deletions notebooks/GWAS/GWAS_coat_color.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def build_chain():

def run_chain(chain, prompt: str, history=[]):
print(prompt)
return chain({"question": prompt, "chat_history": history})
return chain.invoke({"question": prompt, "chat_history": history})

if __name__ == "__main__":
chat_history = []
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from langchain.retrievers import PubMedRetriever
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI
import sys
import json
import os
Expand Down Expand Up @@ -66,7 +67,7 @@ def build_chain():

def run_chain(chain, prompt: str, history=[]):
print(prompt)
return chain({"question": prompt, "chat_history": history})
return chain.invoke({"question": prompt, "chat_history": history})

if __name__ == "__main__":
chat_history = []
Expand Down
26 changes: 18 additions & 8 deletions notebooks/GenAI/example_scripts/workshop_embedding.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity # must pip install openai[embeddings]
import pandas as pd
import numpy as np
import os
Expand All @@ -13,20 +12,31 @@


# set keys and configure Azure OpenAI
openai.api_type = 'azure'
openai.api_version = os.environ['AZURE_OPENAI_VERSION']
openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT']
openai.api_key = os.environ['AZURE_OPENAI_KEY']
os.environ["AZURE_OPENAI_ENDPOINT"] = "<YOUR OPENAI ENDPOINT>"
os.environ["AZURE_OPENAI_KEY"] = "<YOUR OPENAI KEY>"

#create embeddings functions to apply to a given column

client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_KEY"),
api_version="2023-05-15",
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)]


# read the data file to be embed
df = pd.read_csv('microsoft-earnings.csv')
print(df)

#embeddings function
def get_embedding(text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
return client.embeddings.create(input = [text], model=model).data[0].embedding


# calculate word embeddings
df['embedding'] = df['text'].apply(lambda x:get_embedding(x, engine='text-embedding-ada-002'))
df.to_csv('microsoft-earnings_embeddings.csv')
# calculate word embeddings
df['embedding'] = df['text'].apply(lambda x: get_embedding(x))
df.to_csv('microsoft-earnings_embeddings.csv', index=False)
time.sleep(3)
print(df)

Expand Down
22 changes: 14 additions & 8 deletions notebooks/GenAI/example_scripts/workshop_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity # must pip install openai[embeddings]
import pandas as pd
import numpy as np
import os
Expand All @@ -10,14 +9,21 @@

load_dotenv()




# set keys and configure Azure OpenAI
openai.api_type = 'azure'
openai.api_version = os.environ['AZURE_OPENAI_VERSION']
openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT']
openai.api_key = os.environ['AZURE_OPENAI_KEY']
os.environ["AZURE_OPENAI_ENDPOINT"] = "<YOUR OPENAI ENDPOINT>"
os.environ["AZURE_OPENAI_KEY"] = "<YOUR OPENAI KEY>"

#create embeddings functions to apply to a given column

client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_KEY"),
api_version="2023-05-15",
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)

#create cosine function
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# read in the embeddings .csv
# convert elements in 'embedding' column back to numpy array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@
"id": "19a076aa-d2c1-4709-aec2-9532e6457e48",
"metadata": {},
"source": [
"Create your Azure AI Search service."
"Create your Azure AI Search service. We will be using the free tier that holds 50MB of memory and allows you to create up to 3 indexes."
]
},
{
Expand Down Expand Up @@ -759,9 +759,7 @@
" {\"role\": \"user\", \"content\": \"Context: \"+ context + \"\\n\\n Query: \" + query}\n",
" ],\n",
" #max_tokens=100,\n",
" temperature=1,\n",
" top_p=1,\n",
" n=1\n",
" temperature=0,\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@
"metadata": {},
"source": [
"## Conclusion\n",
"And that is it! You successfully created a simple chat bot that runs queries against structured data! This is a complex problem and there are a lot of good blogs out there that describe more complex architectures. We recommend you do some investigation and see if you can come up with an even better solution for your use case! \n",
"And that is it! You successfully created a simple chat bot that runs queries against structured data! This is a complex problem and there are a lot of good blogs out there that describe more complex architectures. We recommend you do some investigating and see if you can come up with an even better solution for your use case! \n",
"\n",
"Key skills you learned were to: \n",
"+ Create embeddings and a vector store using Azure AI Search in the console\n",
Expand Down
36 changes: 26 additions & 10 deletions notebooks/GenAI/notebooks/AzureAIStudio_langchain.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
"outputs": [],
"source": [
"# pip install other packages as needed\n",
"! pip install 'langchain' 'langchain-core' 'langchain-community' 'langchain-openai'"
"! pip install 'langchain' 'langchain-core' 'langchain-community' 'langchain-openai' 'bs4'"
]
},
{
Expand Down Expand Up @@ -122,10 +122,28 @@
"source": [
"import os\n",
"from langchain_core.messages import HumanMessage\n",
"from langchain.chains.summarize import load_summarize_chain\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_community.document_loaders import WebBaseLoader\n",
"from langchain_openai import AzureChatOpenAI\n",
"from langchain_openai import AzureOpenAIEmbeddings\n"
"from langchain_openai import AzureOpenAIEmbeddings"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Tip:** If you receieve an error that bs4 was not loaded you can manually load it using the below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#uncomment if necessary\n",
"#from bs4 import BeautifulSoup"
]
},
{
Expand Down Expand Up @@ -161,10 +179,13 @@
},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"<Your_Azure_Endpoint>\"\n",
"os.environ[\"AZURE_OPENAI_API_KEY\"] = \"<Your_OpenAI_API_Key>\"\n",
"\n",
"llm = AzureChatOpenAI(\n",
" openai_api_version=\"<Your_OpenAI_API_Version>\",\n",
" azure_endpoint = \"<Your_Azure_Endpoint>\",\n",
" openai_api_key=\"<Your_OpenAI_API_Key>\",\n",
" openai_api_version=\"<Your_OpenAI_Model_Version>\",\n",
" azure_deployment=\"<Your_OpenAI_API_Name>\",\n",
")"
]
},
Expand Down Expand Up @@ -310,11 +331,6 @@
"## Clean up\n",
"Make sure to shut down your Azure ML compute and if desired you can delete your deployed model on Azure AI Studio."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
Expand Down
116 changes: 81 additions & 35 deletions notebooks/GenAI/notebooks/AzureAIStudio_sql_chatbot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,29 +56,6 @@
"- Creating a chatbot and utilizing langchains SQL agent to connect the bot to a database"
]
},
{
"cell_type": "markdown",
"id": "3d2aa60a-cf87-4083-80fa-e9dc9179dcc8",
"metadata": {},
"source": [
"## Table of Contents"
]
},
{
"cell_type": "markdown",
"id": "3bad1638-6fcd-4299-b714-48c7cfd865ff",
"metadata": {},
"source": [
"- [Summary](#summary)\n",
"- [Install Packages](#packages)\n",
"- [Create Azure SQL Database](#azure_db)\n",
"- [Create Azure SQL Table](#azure_table)\n",
"- [Submitting a Query](#query)\n",
"- [Setting up a Chatbot](#chatbot)\n",
"- [Conclusion](#conclusion)\n",
"- [Cleaning up Resources](#cleanup)"
]
},
{
"cell_type": "markdown",
"id": "3d98bdb4",
Expand Down Expand Up @@ -125,7 +102,76 @@
"id": "64e959a6-4515-49cd-bdf8-b0da0544c10a",
"metadata": {},
"source": [
"Follow the instructions [here](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart?view=azuresql&tabs=azure-portal) to create a single database in Azure SQL Database. Note that for this tutorials database the field name **Use existing data** was set to **None**."
"Follow the instructions [here](https://learn.microsoft.com/en-us/azure/azure-sql/database/single-database-create-quickstart?view=azuresql&tabs=azure-portal) to create a single database in Azure SQL Database. \n",
"\n",
"**Note:** For this tutorials database the field name **'Use existing data'** was set to **None** and the **location** to **US East 2**."
]
},
{
"cell_type": "markdown",
"id": "4fdf2998-f598-4814-a580-62db432c2d61",
"metadata": {},
"source": [
"### Install SQL Driver <a id=\"sql_driver\"></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e07ed164-d94a-4cac-bb37-b082b0fa9285",
"metadata": {},
"outputs": [],
"source": [
"Download the drivers package."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fce56601-fb8d-495f-b0fd-9e58cac7d0c9",
"metadata": {},
"outputs": [],
"source": [
"! curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list"
]
},
{
"cell_type": "markdown",
"id": "32e91b50-d685-4a9a-b593-fcd1f1eb2f74",
"metadata": {},
"source": [
"Run the following in your terminal. If prompted to accept the license terms of installing the driver use your arrow keys to select **'yes'** then press **enter**."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2318532f-b884-4cf2-a7f1-f9a83b2a4aa9",
"metadata": {},
"outputs": [],
"source": [
"#RUN IN TERMINAL\n",
"sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0677acc-d8a1-4683-ba34-d12d517369cb",
"metadata": {},
"outputs": [],
"source": [
"!echo 'export PATH=\"$PATH:/opt/mssql-tools18/bin\"' >> ~/.bashrc"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6114fcf3-af53-4a62-b9ac-cd017308ff7b",
"metadata": {},
"outputs": [],
"source": [
"!source ~/.bashrc"
]
},
{
Expand All @@ -144,6 +190,14 @@
"Now that we have our SQL database we will connect to it using the python package `pyodbc` which will allow us to commit changes to our database and query tables."
]
},
{
"cell_type": "markdown",
"id": "b13c9608-32a7-42ba-a2a9-6b249949d7c8",
"metadata": {},
"source": [
"**Tip:** If you run into firewall errors go to **Azure SQL** on the console, click on the your SQL server name, on the left side of the screen go to **Networking** under Security, make sure your IP address is saved under Firewall rules and where is says **'Allow Azure services and resources to access this server'** is checkmarked."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -154,12 +208,12 @@
"import pyodbc\n",
"\n",
"server_name = \"<ENTER_DB_SERVER_NAME>\"\n",
"user = \"<ENTER_DB_USERNAME>\"\n",
"username = \"<ENTER_DB_USERNAME>\"\n",
"password = \"<ENTER_DB_PASSWORD>\"\n",
"database = \"<ENTER_DB_NAME>\"\n",
"driver= '{ODBC Driver 18 for SQL Server}'\n",
"\n",
"conn = pyodbc.connect('DRIVER='+driver+';PORT=1433;SERVER='+server+'.database.windows.net/;PORT=1443;DATABASE='+database+';UID='+user+';PWD='+ password)"
"conn = pyodbc.connect(f'DRIVER={driver};PORT=1433;SERVER={server_name}.database.windows.net;PORT=1443;DATABASE={database};UID={username};PWD={password}')"
]
},
{
Expand Down Expand Up @@ -418,7 +472,7 @@
},
"outputs": [],
"source": [
"py_connectionString=f\"mssql+pyodbc://{user}:{password}@{server_name}.database.windows.net/{database}?driver={driver}\"\n",
"py_connectionString=f\"mssql+pyodbc://{username}:{password}@{server_name}.database.windows.net/{database}?driver={driver}\"\n",
"db_engine = create_engine(py_connectionString)"
]
},
Expand Down Expand Up @@ -633,14 +687,6 @@
"subscription_id='<Enter_Subscription_ID>'\n",
"!az sql server delete --name {server_name} --resource-group {resource_group} --subscription {subscription_id} -y"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f91700b-bfe3-452c-b5a0-0b8fed115fd8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 125a1cf

Please sign in to comment.