-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #83 from STRIDES/add-extramural-signup
added back tutorials dir to fix broken links in ms email flows
- Loading branch information
Showing
38 changed files
with
8,999 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) Microsoft Corporation | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
79 changes: 79 additions & 0 deletions
79
tutorials/notebooks/GenAI/embedding_demos/acs_embeddings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from langchain.retrievers import AzureCognitiveSearchRetriever | ||
from langchain.embeddings import OpenAIEmbeddings | ||
from langchain.vectorstores import FAISS | ||
from langchain.chains import RetrievalQA | ||
from langchain.chat_models import AzureChatOpenAI | ||
from PIL import Image | ||
import os | ||
import streamlit as st | ||
from dotenv import load_dotenv | ||
|
||
# load in .env variables | ||
load_dotenv() | ||
|
||
def config_keys(): | ||
# set api keys for AOAI and Azure Search | ||
os.environ['OPENAI_API_VERSION'] = os.getenv('AZURE_OPENAI_VERSION') | ||
os.environ['OPENAI_API_KEY'] = os.getenv('AZURE_OPENAI_KEY') | ||
os.environ['OPENAI_API_BASE'] = os.getenv('AZURE_OPENAI_ENDPOINT') | ||
os.environ['OPENAI_EMBEDDING_DEPLOYMENT_NAME'] = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME') | ||
os.environ['AZURE_COGNITIVE_SEARCH_SERVICE_NAME'] = os.getenv('AZURE_COGNITIVE_SEARCH_SERVICE_NAME') | ||
os.environ['AZURE_COGNITIVE_SEARCH_API_KEY'] = os.getenv('AZURE_COGNITIVE_SEARCH_API_KEY') | ||
os.environ['AZURE_COGNITIVE_SEARCH_INDEX_NAME'] = os.getenv('AZURE_COGNITIVE_SEARCH_INDEX_NAME') | ||
|
||
|
||
def main(): | ||
# Streamlit config | ||
st.title("Demo - Azure OpenAI & Cognitive Search Embeddings") | ||
image = Image.open('image_logo2.png') | ||
st.image(image, caption = '') | ||
st.write('This program is designed to chat over your files in Azure Cognitive Search. \ | ||
Be specific and clear with the questions you ask. \ | ||
Welcome to CHATGPT over your own data !!') | ||
if 'generated' not in st.session_state: | ||
st.session_state.generated = [] | ||
if 'past' not in st.session_state: | ||
st.session_state.past = [] | ||
|
||
# create your LLM and embeddings. Will be conifuring 'azure' in the openai_api_type parameter. | ||
llm = AzureChatOpenAI( | ||
deployment_name = "gpt-35-turbo", | ||
openai_api_type = "azure", | ||
model = "gpt-35-turbo", | ||
temperature=0.7, | ||
max_tokens=200 | ||
) | ||
|
||
embeddings = OpenAIEmbeddings(chunk_size=1, openai_api_type="azure") | ||
|
||
# ask for the user query | ||
query = st.text_input("Enter a search query: ", key='search_term', placeholder="") | ||
|
||
if query: | ||
st.session_state.past.append(query) | ||
|
||
# set up Azure Cognitive Search to retrieve documents | ||
# top_k = 1: we only want first related doc | ||
retriever = AzureCognitiveSearchRetriever(content_key="content", top_k=1) | ||
|
||
# get the relevant document from Azure Cognitive Search that are only relevant to the query being asked | ||
docs = retriever.get_relevant_documents(query) | ||
|
||
# create embedding from the document retrieved and place in a FAISS vector database | ||
db = FAISS.from_documents(documents=docs, embedding=embeddings) | ||
|
||
# set up the chain that will feed the retrieved document to the LLM | ||
chain = RetrievalQA.from_chain_type(llm=llm, retriever = db.as_retriever(), chain_type="stuff") | ||
|
||
# run the chain on the query asked | ||
response = chain.run(query) | ||
st.session_state.generated.append(response) | ||
|
||
with st.expander('Vector Search'): | ||
for i in range(len(st.session_state.generated)-1, -1, -1): | ||
st.info(st.session_state.past[i]) | ||
st.success(st.session_state.generated[i]) | ||
|
||
if __name__ == '__main__': | ||
config_keys() | ||
main() |
102 changes: 102 additions & 0 deletions
102
tutorials/notebooks/GenAI/embedding_demos/aoai_embeddings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import openai | ||
from openai.embeddings_utils import get_embedding, cosine_similarity # must pip install openai[embeddings] | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
import streamlit as st | ||
import time | ||
from PIL import Image | ||
from dotenv import load_dotenv | ||
|
||
# load in .env variables | ||
load_dotenv() | ||
|
||
# configure azure openai keys | ||
openai.api_type = 'azure' | ||
openai.api_version = os.environ['AZURE_OPENAI_VERSION'] | ||
openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT'] | ||
openai.api_key = os.environ['AZURE_OPENAI_KEY'] | ||
|
||
def embedding_create(): | ||
# acquire the filename to be embed | ||
st.subheader("Vector Creation") | ||
st.write('This program is designed to embed your pre-chunked .csv file. \ | ||
By accomplishing this task, you will be able to chat over all cotent in your .csv via vector searching. \ | ||
Just enter the file and the program will take care of the rest (specify file path if not in this directory). \ | ||
Welcome to CHATGPT over your own data !!') | ||
filename = st.text_input("Enter a file: ", key='filename', value="") | ||
|
||
# start the embeddings process if filename provided | ||
if filename: | ||
|
||
# read the data file to be embed | ||
df = pd.read_csv('C:\\src\\AzureOpenAI_Gov_Workshop\\' + filename) | ||
st.write(df) | ||
|
||
# calculate word embeddings | ||
df['embedding'] = df['text'].apply(lambda x:get_embedding(x, engine='text-embedding-ada-002')) | ||
df.to_csv('C:\\src\\AzureOpenAI_Gov_Workshop\\microsoft-earnings_embeddings.csv') | ||
time.sleep(3) | ||
st.subheader("Post Embedding") | ||
st.success('Embeddings Created Sucessfully!!') | ||
st.write(df) | ||
|
||
|
||
def embeddings_search(): | ||
|
||
# Streamlit configuration | ||
st.subheader("Vector Search") | ||
st.write('This program is designed to chat over your vector stored (embedding) .csv file. \ | ||
This Chat Bot works alongside the "Embeddings Bot" Chat Bot. \ | ||
Be specific with the information you want to obtain over your data. \ | ||
Welcome to CHATGPT over your own data !!') | ||
if 'answer' not in st.session_state: | ||
st.session_state.answer = [] | ||
if 'score' not in st.session_state: | ||
st.session_state.score = [] | ||
if 'past' not in st.session_state: | ||
st.session_state.past = [] | ||
|
||
# read in the embeddings .csv | ||
# convert elements in 'embedding' column back to numpy array | ||
df = pd.read_csv('C:\\src\\AzureOpenAI_Gov_Workshop\\microsoft-earnings_embeddings.csv') | ||
df['embedding'] = df['embedding'].apply(eval).apply(np.array) | ||
|
||
# caluculate user query embedding | ||
search_term = st.text_input("Enter a search query: ", key='search_term', placeholder="") | ||
if search_term: | ||
st.session_state.past.append(search_term) | ||
search_term_vector = get_embedding(search_term, engine='text-embedding-ada-002') | ||
|
||
# find similiarity between query and vectors | ||
df['similarities'] = df['embedding'].apply(lambda x:cosine_similarity(x, search_term_vector)) | ||
df1 = df.sort_values("similarities", ascending=False).head(5) | ||
|
||
# output the response | ||
answer = df1['text'].loc[df1.index[0]] | ||
score = df1['similarities'].loc[df1.index[0]] | ||
st.session_state.answer.append(answer) | ||
st.session_state.score.append(score) | ||
with st.expander('Vector Search'): | ||
for i in range(len(st.session_state.answer)-1, -1, -1): | ||
st.info(st.session_state.past[i]) | ||
st.write(st.session_state.answer[i]) | ||
st.write('Score: ', st.session_state.score[i]) | ||
|
||
|
||
def main(): | ||
# Streamlit config | ||
st.title("Demo-Azure OpenAI Embeddings") | ||
image = Image.open('image_logo2.png') | ||
st.image(image, caption = '') | ||
st.sidebar.title('Chat Bot Type Selection') | ||
chat_style = st.sidebar.selectbox( | ||
'Choose between Embeddings Bot or Search Bot', ['Embeddings Bot','Search Bot'] | ||
) | ||
if chat_style == 'Embeddings Bot': | ||
embedding_create() | ||
elif chat_style == 'Search Bot': | ||
embeddings_search() | ||
|
||
if __name__ == '__main__': | ||
main() |
99 changes: 99 additions & 0 deletions
99
tutorials/notebooks/GenAI/example_scripts/example_azureaisearch_openaichat_zeroshot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
from langchain.chains import ConversationalRetrievalChain | ||
from langchain.prompts import PromptTemplate | ||
from langchain_community.retrievers import AzureCognitiveSearchRetriever | ||
from langchain_openai import AzureChatOpenAI | ||
import sys | ||
import json | ||
import os | ||
|
||
|
||
class bcolors: | ||
HEADER = '\033[95m' | ||
OKBLUE = '\033[94m' | ||
OKCYAN = '\033[96m' | ||
OKGREEN = '\033[92m' | ||
WARNING = '\033[93m' | ||
FAIL = '\033[91m' | ||
ENDC = '\033[0m' | ||
BOLD = '\033[1m' | ||
UNDERLINE = '\033[4m' | ||
|
||
MAX_HISTORY_LENGTH = 1 | ||
|
||
def build_chain(): | ||
|
||
os.getenv("AZURE_OPENAI_API_KEY") | ||
os.getenv("AZURE_OPENAI_ENDPOINT") | ||
os.getenv("AZURE_COGNITIVE_SEARCH_SERVICE_NAME") | ||
os.getenv("AZURE_COGNITIVE_SEARCH_INDEX_NAME") | ||
os.getenv("AZURE_COGNITIVE_SEARCH_API_KEY") | ||
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] | ||
|
||
llm = AzureChatOpenAI( | ||
openai_api_version="2023-05-15", | ||
azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME, | ||
#max_tokens = 3000 | ||
) | ||
|
||
retriever = AzureCognitiveSearchRetriever(content_key="content", top_k=2) | ||
|
||
|
||
prompt_template = """ | ||
Instructions: | ||
I will provide you question and scientific documents you will answer my question with information from documents in English, and you will create a cumulative summary that should be concise and should accurately. | ||
You should not include any personal opinions or interpretations in your summary, but rather focus on objectively presenting the information from the papers. | ||
Your summary should be written in your own words and ensure that your summary is clear, and concise. | ||
{question} Answer "don't know" if not present in the documents. | ||
{context} | ||
Solution:""" | ||
|
||
|
||
PROMPT = PromptTemplate( | ||
template=prompt_template, input_variables=["context", "question"], | ||
) | ||
|
||
condense_qa_template = """ | ||
Chat History: | ||
{chat_history} | ||
Here is a new question for you: {question} | ||
Standalone question:""" | ||
standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) | ||
|
||
qa = ConversationalRetrievalChain.from_llm( | ||
llm=llm, | ||
retriever=retriever, | ||
condense_question_prompt=standalone_question_prompt, | ||
return_source_documents=True, | ||
combine_docs_chain_kwargs={"prompt":PROMPT} | ||
) | ||
return qa | ||
|
||
def run_chain(chain, prompt: str, history=[]): | ||
print(prompt) | ||
return chain({"question": prompt, "chat_history": history}) | ||
|
||
if __name__ == "__main__": | ||
chat_history = [] | ||
qa = build_chain() | ||
print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) | ||
print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) | ||
print(">", end=" ", flush=True) | ||
for query in sys.stdin: | ||
if (query.strip().lower().startswith("new search:")): | ||
query = query.strip().lower().replace("new search:","") | ||
chat_history = [] | ||
elif (len(chat_history) == MAX_HISTORY_LENGTH): | ||
chat_history.pop(0) | ||
result = run_chain(qa, query, chat_history) | ||
chat_history.append((query, result["answer"])) | ||
print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) | ||
if 'source_documents' in result: | ||
print(bcolors.OKGREEN + 'Sources:') | ||
for d in result['source_documents']: | ||
dict_meta=json.loads(d.metadata['metadata']) | ||
print(dict_meta['source']) | ||
print(bcolors.ENDC) | ||
print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) | ||
print(">", end=" ", flush=True) | ||
print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) |
93 changes: 93 additions & 0 deletions
93
tutorials/notebooks/GenAI/example_scripts/example_langchain_openaichat_zeroshot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from langchain.retrievers import PubMedRetriever | ||
from langchain.chains import ConversationalRetrievalChain | ||
from langchain.prompts import PromptTemplate | ||
import sys | ||
import json | ||
import os | ||
|
||
|
||
class bcolors: | ||
HEADER = '\033[95m' | ||
OKBLUE = '\033[94m' | ||
OKCYAN = '\033[96m' | ||
OKGREEN = '\033[92m' | ||
WARNING = '\033[93m' | ||
FAIL = '\033[91m' | ||
ENDC = '\033[0m' | ||
BOLD = '\033[1m' | ||
UNDERLINE = '\033[4m' | ||
|
||
MAX_HISTORY_LENGTH = 1 | ||
|
||
def build_chain(): | ||
os.getenv("AZURE_OPENAI_API_KEY") | ||
os.getenv("AZURE_OPENAI_ENDPOINT") | ||
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] | ||
|
||
llm = AzureChatOpenAI( | ||
openai_api_version="2023-05-15", | ||
azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME, | ||
#max_tokens = 3000 | ||
) | ||
|
||
retriever= PubMedRetriever() | ||
|
||
prompt_template = """ | ||
Ignore everything before. | ||
Instructions: | ||
I will provide you with research papers on a specific topic in English, and you will create a cumulative summary. | ||
The summary should be concise and should accurately and objectively communicate the takeaway of the papers related to the topic. | ||
You should not include any personal opinions or interpretations in your summary, but rather focus on objectively presenting the information from the papers. | ||
Your summary should be written in your own words and ensure that your summary is clear, concise, and accurately reflects the content of the original papers. First, provide a concise summary then citations at the end. | ||
{question} Answer "don't know" if not present in the document. | ||
{context} | ||
Solution:""" | ||
|
||
|
||
PROMPT = PromptTemplate( | ||
template=prompt_template, input_variables=["context", "question"], | ||
) | ||
|
||
condense_qa_template = """ | ||
Chat History: | ||
{chat_history} | ||
Here is a new question for you: {question} | ||
Standalone question:""" | ||
standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) | ||
|
||
qa = ConversationalRetrievalChain.from_llm( | ||
llm=llm, | ||
retriever=retriever, | ||
condense_question_prompt=standalone_question_prompt, | ||
return_source_documents=True, | ||
combine_docs_chain_kwargs={"prompt":PROMPT}, | ||
) | ||
return qa | ||
|
||
def run_chain(chain, prompt: str, history=[]): | ||
print(prompt) | ||
return chain({"question": prompt, "chat_history": history}) | ||
|
||
if __name__ == "__main__": | ||
chat_history = [] | ||
qa = build_chain() | ||
print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) | ||
print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) | ||
print(">", end=" ", flush=True) | ||
for query in sys.stdin: | ||
if (query.strip().lower().startswith("new search:")): | ||
query = query.strip().lower().replace("new search:","") | ||
chat_history = [] | ||
elif (len(chat_history) == MAX_HISTORY_LENGTH): | ||
chat_history.pop(0) | ||
result = run_chain(qa, query, chat_history) | ||
chat_history.append((query, result["answer"])) | ||
print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) | ||
if 'source_documents' in result: | ||
print(bcolors.OKGREEN + 'Sources:') | ||
for idx, ref in enumerate(result["source_documents"]): | ||
print("PubMed UID: "+ref.metadata["uid"]) | ||
print(bcolors.ENDC) | ||
print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) | ||
print(">", end=" ", flush=True) | ||
print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) |
Oops, something went wrong.