Skip to content

Commit

Permalink
Updated streamlit embedding demo
Browse files Browse the repository at this point in the history
  • Loading branch information
cjackson202 committed Nov 5, 2024
1 parent 39c233e commit cc4b935
Show file tree
Hide file tree
Showing 9 changed files with 422 additions and 188 deletions.
4 changes: 4 additions & 0 deletions notebooks/GenAI/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__
.venv
.env
microsoft-earnings_embeddings.csv
61 changes: 61 additions & 0 deletions notebooks/GenAI/embedding_demos/Demo_Suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import streamlit as st
from styling import global_page_style

def main():
# Set page configuration
# st.set_page_config(page_title="Azure OpenAI RAG Demo Suite", layout="wide")

# Title and subtitle
# Create columns for logo and title

st.markdown(
f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" }" width="{60}"></div>',
unsafe_allow_html=True
)
st.title("Azure OpenAI RAG Demo Suite")
st.markdown("### Demo Overviews")
st.write("""
Welcome to the Azure OpenAI RAG Demo Suite. On the left side-panel, you will find various demonstrations that showcase the capabilities of Azure OpenAI with a Streamlit frontend. Each demonstration is described in detail below, highlighting their unique features and functionalities.
""")

# Horizontal divider
st.markdown("---")

# Chat with Your Data section
st.markdown("### Chat with Your Data using Azure OpenAI API and AI Search Index (AI Search Query)")
st.write("""
This demo allows users to interact with data stored in their Azure AI Search Index using a combination of semantic and vector search methods.
""")
st.write("""
- **Semantic Search**: Understands the meaning and context of your queries to deliver more relevant results.
- **Vector Search**: Utilizes numerical representations of text to find similar content based on cosine similarity.
""")
# Ensure the user has created the Azure AI search index already
st.write("""
**Note**: Users must have created the Azure AI search index already as shown here: [Upload your own data and query over it](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md)
""")

# Horizontal divider
st.markdown("---")

# Generate & Search with Azure OpenAI Embeddings section
st.markdown("### Generate & Search with Azure OpenAI Embeddings (AOAI Embeddings)")
st.write("""
This demo enables users to generate embeddings from a pre-chunked CSV file and perform searches over the content using vector search.
""")
st.write("""
- **Vectorize**: Creates embeddings based on the "microsoft-earnings.csv" file provided in this directory. The embeddings are generated from the "text" column. The CSV file is pre-chunked, meaning the text has already been split and prepared for embedding generation. A new CSV file will be created to store all generated embeddings, forming your vector store.
- **Retrieve**: Generates embeddings based on user queries. The query embedding is then used to search for the most similar document within the vector store using cosine similarity.
""")
st.write("""
Example questions a user can ask about the microsoft-earnings.csv:
- What was said about the budget?
- How many people utilize GitHub to build software?
- How many points did Microsoft Cloud gross margin percentage increase by?
- What are the expectations for the Q2 cash flow?
""")


if __name__ == '__main__':
global_page_style()
main()
79 changes: 0 additions & 79 deletions notebooks/GenAI/embedding_demos/acs_embeddings.py

This file was deleted.

102 changes: 0 additions & 102 deletions notebooks/GenAI/embedding_demos/aoai_embeddings.py

This file was deleted.

106 changes: 106 additions & 0 deletions notebooks/GenAI/embedding_demos/pages/AI_Search_Query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from openai import AzureOpenAI
import os
import streamlit as st
from dotenv import load_dotenv
from styling import global_page_style

# load in .env variables
load_dotenv()

# Configure Azure OpenAI params, using an Azure OpenAI account with a deployment of an embedding model
azure_endpoint: str = os.getenv('AZURE_OPENAI_BASE')
azure_openai_api_key: str = os.getenv('AZURE_OPENAI_KEY')
azure_openai_api_version: str = os.getenv('AZURE_OPENAI_VERSION')
azure_ada_deployment: str = os.getenv('AZURE_EMBEDDINGS_DEPLOYMENT')
azure_gpt_deployment: str = os.getenv('AZURE_GPT_DEPLOYMENT')

# Configure Azure AI Search params
search_endpoint: str = os.getenv('AZURE_SEARCH_ENDPOINT')
search_key: str = os.getenv('AZURE_SEARCH_ADMIN_KEY')

def chat_on_your_data(query, search_index, messages):
messages.append({"role": "user", "content":query})
with st.chat_message("user"):
st.markdown(query)
with st.spinner('Processing...'):
client = AzureOpenAI(
azure_endpoint=azure_endpoint,
api_key=azure_openai_api_key,
api_version=azure_openai_api_version,
)
completion = client.chat.completions.create(
model=azure_gpt_deployment,
messages=[
{"role": "system", "content": "You are an AI assistant that helps people find information. \
Ensure the Markdown responses are correctly formatted before responding."},
{"role": "user", "content": query}
],
max_tokens=800,
temperature=0.7,
top_p=0.95,
frequency_penalty=0,
presence_penalty=0,
stop=None,
stream=False,
extra_body={
"data_sources": [{
"type": "azure_search",
"parameters": {
"endpoint": f"{search_endpoint}",
"index_name": search_index,
"semantic_configuration": "default",
"query_type": "vector_simple_hybrid",
"fields_mapping": {},
"in_scope": True,
"role_information": "You are an AI assistant that helps people find information.",
"filter": None,
"strictness": 3,
"top_n_documents": 5,
"authentication": {
"type": "api_key",
"key": f"{search_key}"
},
"embedding_dependency": {
"type": "deployment_name",
"deployment_name": azure_ada_deployment
}
}
}]
}
)
print(completion)
response_data = completion.to_dict()
ai_response = response_data['choices'][0]['message']['content']
messages.append({"role": "assistant", "content":ai_response})
with st.chat_message("assistant"):
st.markdown(ai_response)

def main():
st.markdown(
f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" }" width="{60}"></div>',
unsafe_allow_html=True
)
st.title("Demo - Azure OpenAI & AI Search")
# image = Image.open('image_logo2.png')
# st.image(image, caption = '')
st.write('This demo showcases an innovative way for users to engage with data housed in their Azure AI Search Index by leveraging both \
semantic and vector search techniques. Semantic search enhances the querying process by comprehending the meaning and context of \
user queries, thereby providing more pertinent results. Vector search, on the other hand, employs numerical representations of \
text to identify similar content using cosine similarity. ***For users to effectively utilize this demo, it is essential that they \
have previously created their Azure AI Search Index, following the necessary steps to upload and query their data as outlined [here](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md).***')
if 'messages' not in st.session_state:
st.session_state.messages = []
index_name = st.text_input(label="Azure AI Search index name:", value="")
st.write('-'*50)
if index_name:
query = st.chat_input('Input search query here...')
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message['content'])
if query:
chat_on_your_data(query, index_name, st.session_state.messages)


if __name__ == '__main__':
global_page_style()
main()
Loading

0 comments on commit cc4b935

Please sign in to comment.