Skip to content

Commit

Permalink
feat: basic chat rag works
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Oct 6, 2024
1 parent 51454d4 commit 0f82015
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 16 deletions.
5 changes: 4 additions & 1 deletion .dev.vars.example
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
OPENAI_API_KEY="MY_SECRET"
OPENAI_API_KEY=""
SUPABASE_URL=""
SUPABASE_KEY=""
VOYAGEAI_API_KEY=""
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"knip-ci": "knip --no-exit-code --reporter json --config .github/knip.ts",
"prepare": "husky install",
"test": "jest --setupFiles dotenv/config --coverage",
"worker": "wrangler dev --env dev --port 4000"
"worker": "wrangler dev --env dev --port 5000"
},
"keywords": [
"typescript",
Expand Down
2 changes: 1 addition & 1 deletion src/adapters/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { SuperSupabase } from "./supabase/helpers/supabase";
import { Embedding as VoyageEmbedding } from "./voyage/helpers/embedding";
import { SuperVoyage } from "./voyage/helpers/voyage";
import { VoyageAIClient } from "voyageai";
import { Issue } from "./supabase/helpers/issue";
import { Issue } from "./supabase/helpers/issues";
import { SuperOpenAi } from "./openai/helpers/openai";
import OpenAI from "openai";
import { Completions } from "./openai/helpers/completions";
Expand Down
4 changes: 2 additions & 2 deletions src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import OpenAI from "openai";
import { Context } from "../../../types";
import { SuperOpenAi } from "./openai";
const MAX_TOKENS = 3072;
const MAX_TOKENS = 3000;

export interface CompletionsType {
answer: string;
Expand Down Expand Up @@ -46,7 +46,7 @@ export class Completions extends SuperOpenAi {
],
},
],
temperature: 0,
temperature: 0.2,
max_tokens: MAX_TOKENS,
top_p: 1,
frequency_penalty: 0,
Expand Down
14 changes: 12 additions & 2 deletions src/adapters/supabase/helpers/comment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ export interface CommentType {
embedding: number[];
}

export interface CommentSimilaritySearchResult {
comment_id: string;
comment_plaintext: string;
comment_issue_id: string;
similarity: number;
text_similarity: number;
}

export class Comment extends SuperSupabase {
constructor(supabase: SupabaseClient, context: Context) {
super(supabase, context);
Expand All @@ -24,12 +32,14 @@ export class Comment extends SuperSupabase {
return data;
}

async findSimilarComments(query: string, threshold: number, currentId: string): Promise<CommentType[] | null> {
async findSimilarComments(query: string, threshold: number, currentId: string): Promise<CommentSimilaritySearchResult[] | null> {
const embedding = await this.context.adapters.voyage.embedding.createEmbedding(query);
const { data, error } = await this.supabase.rpc("find_similar_comments_with_vector_search_ftse", {
const { data, error } = await this.supabase.rpc("find_similar_comments", {
current_id: currentId,
query_text: query,
query_embedding: embedding,
threshold: threshold,
max_results: 10,
});
if (error) {
this.context.logger.error("Error finding similar comments", error);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export interface IssueSimilaritySearchResult {
issue_id: string;
issue_plaintext: string;
similarity: number;
text_similarity: number;
}

export interface IssueType {
Expand Down Expand Up @@ -33,10 +34,12 @@ export class Issue extends SuperSupabase {
}
async findSimilarIssues(plaintext: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[] | null> {
const embedding = await this.context.adapters.voyage.embedding.createEmbedding(plaintext);
const { data, error } = await this.supabase.rpc("find_similar_issues_vector_search_ftse", {
const { data, error } = await this.supabase.rpc("find_similar_issue_ftse", {
current_id: currentId,
query_text: plaintext,
query_embedding: embedding,
threshold: threshold,
max_results: 10,
});
if (error) {
this.context.logger.error("Error finding similar issues", error);
Expand Down
2 changes: 1 addition & 1 deletion src/adapters/voyage/helpers/embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export class Embedding extends SuperVoyage {
} else {
const response = await this.client.embed({
input: text,
model: "voyage-large-3",
model: "voyage-large-2-instruct",
});
return (response.data && response.data[0]?.embedding) || [];
}
Expand Down
3 changes: 2 additions & 1 deletion src/adapters/voyage/helpers/rerankers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ export class Rerankers extends SuperVoyage {
const response = await this.client.rerank({
query,
documents: results,
model: "voyage-large-3",
model: "rerank-2",
returnDocuments: true,
topK: 5,
});
const rerankedResults = response.data || [];
return rerankedResults.map((result) => result.document).filter((document): document is string => document !== undefined);
Expand Down
18 changes: 15 additions & 3 deletions src/handlers/ask-gpt.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Context } from "../types";
import { CompletionsType } from "../adapters/openai/helpers/completions";
import { CommentType } from "../adapters/supabase/helpers/comment";
import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment";
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";

export async function askQuestion(context: Context, question: string) {
if (!question) {
Expand All @@ -20,12 +21,23 @@ export async function askGpt(context: Context, question: string): Promise<Comple
const similarComments = (await context.adapters.supabase.comment.findSimilarComments(question, similarityThreshold, "")) || [];
const similarIssues = (await context.adapters.supabase.issue.findSimilarIssues(question, similarityThreshold, "")) || [];
//Create a new object with plain text from both the objects
const similarText = similarComments.map((comment: CommentType) => comment.plaintext);
similarText.push(...similarIssues.map((issue) => issue.issue_plaintext));
const similarText = similarComments.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext);
similarText.push(...similarIssues.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext));
//Rerank Similar Comments and Issues
const rerankedText = await context.adapters.voyage.reranker.reRankResults(similarText, question);
//Remove unwanted characters from the text
rerankedText.forEach((text) => removeUnwantedChars(text));
//TODO: Temporary workaround
//const chat = createChatHistory(formattedChat);
//logger.info(`Sending chat to OpenAI`, { chat });
return context.adapters.openai.completions.createCompletion(question, model, rerankedText);
}

/**
* Removes unwanted characters from the text like emojis, special characters etc.
* @param text
* @returns
*/
const removeUnwantedChars = (text: string) => {
return text.replace(/[^a-zA-Z0-9\s]/g, "");
};
3 changes: 2 additions & 1 deletion src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ export async function plugin(inputs: PluginInputs, env: Env) {
const voyageClient = new VoyageAIClient({
apiKey: env.VOYAGEAI_API_KEY,
});

const openaiClient = new OpenAI({
apiKey: env.OPENAI_API_KEY,
baseURL: inputs.settings.openAiBaseUrl || "https://api.openai.com",
...(inputs.settings.openAiBaseUrl && { baseUrl: inputs.settings.openAiBaseUrl }),
});
const context: Context = {
eventName: inputs.eventName,
Expand Down
2 changes: 1 addition & 1 deletion src/types/plugin-inputs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export interface PluginInputs<T extends SupportedEventsU = SupportedEventsU, TU
export const pluginSettingsSchema = T.Object({
model: T.String({ default: "o1-mini" }),
openAiBaseUrl: T.Optional(T.String()),
similarityThreshold: T.Number({ default: 0.8 }),
similarityThreshold: T.Number({ default: 0.1 }),
});

export const pluginSettingsValidator = new StandardValidator(pluginSettingsSchema);
Expand Down
119 changes: 119 additions & 0 deletions supabase/migrations/20241005200943_comments_function.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
CREATE OR REPLACE FUNCTION find_similar_issue_ftse(
current_id VARCHAR,
query_text TEXT,
query_embedding VECTOR(1024),
threshold DOUBLE PRECISION,
max_results INTEGER DEFAULT 10
)
RETURNS TABLE(
issue_id VARCHAR,
issue_plaintext TEXT,
similarity DOUBLE PRECISION,
text_similarity DOUBLE PRECISION
) AS $$
DECLARE
query_tokens TEXT[];
query_tsquery TSQUERY;
BEGIN
-- Generate query tokens
SELECT array_agg(DISTINCT lower(word))
INTO query_tokens
FROM unnest(regexp_split_to_array(query_text, '\s+')) AS word
WHERE length(word) > 2;

-- Create tsquery from tokens
SELECT to_tsquery(string_agg(lexeme || ':*', ' | '))
INTO query_tsquery
FROM unnest(query_tokens) lexeme;

RETURN QUERY
WITH vector_similarity AS (
SELECT
id,
plaintext,
(1 - (embedding <-> query_embedding))::DOUBLE PRECISION AS vec_similarity
FROM issues
WHERE id <> current_id
AND (1 - (embedding <-> query_embedding))::DOUBLE PRECISION > threshold
),
text_similarity AS (
SELECT
id,
plaintext,
ts_rank(to_tsvector('english', plaintext), query_tsquery)::DOUBLE PRECISION AS text_sim
FROM issues
WHERE to_tsvector('english', plaintext) @@ query_tsquery
)
SELECT
vs.id AS issue_id,
vs.plaintext AS issue_plaintext,
vs.vec_similarity AS similarity,
COALESCE(ts.text_sim, 0::DOUBLE PRECISION) AS text_similarity
FROM vector_similarity vs
LEFT JOIN text_similarity ts ON vs.id = ts.id
ORDER BY (vs.vec_similarity + COALESCE(ts.text_sim, 0::DOUBLE PRECISION)) DESC
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION find_similar_comments(
current_id VARCHAR,
query_text TEXT,
query_embedding VECTOR(1024),
threshold DOUBLE PRECISION,
max_results INTEGER DEFAULT 10
)
RETURNS TABLE(
comment_id VARCHAR,
comment_plaintext TEXT,
comment_issue_id VARCHAR,
similarity DOUBLE PRECISION,
text_similarity DOUBLE PRECISION
) AS $$
DECLARE
query_tokens TEXT[];
query_tsquery TSQUERY;
BEGIN
-- Generate query tokens
SELECT array_agg(DISTINCT lower(word))
INTO query_tokens
FROM unnest(regexp_split_to_array(query_text, '\s+')) AS word
WHERE length(word) > 2;

-- Create tsquery from tokens
SELECT to_tsquery(string_agg(lexeme || ':*', ' | '))
INTO query_tsquery
FROM unnest(query_tokens) lexeme;

RETURN QUERY
WITH vector_similarity AS (
SELECT
id,
plaintext,
issue_id,
1 - (l2_distance(query_embedding, embedding))::DOUBLE PRECISION AS vec_similarity
FROM issue_comments
WHERE id <> current_id
AND 1 - (l2_distance(query_embedding, embedding))::DOUBLE PRECISION > threshold
),
text_similarity AS (
SELECT
id,
plaintext,
issue_id,
ts_rank(to_tsvector('english', plaintext), query_tsquery)::DOUBLE PRECISION AS text_sim
FROM issue_comments
WHERE to_tsvector('english', plaintext) @@ query_tsquery
)
SELECT
vs.id AS comment_id,
vs.plaintext AS comment_plaintext,
vs.issue_id AS comment_issue_id,
vs.vec_similarity AS similarity,
COALESCE(ts.text_sim, 0::DOUBLE PRECISION) AS text_similarity
FROM vector_similarity vs
LEFT JOIN text_similarity ts ON vs.id = ts.id
ORDER BY (vs.vec_similarity + COALESCE(ts.text_sim, 0::DOUBLE PRECISION)) DESC
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;
1 change: 0 additions & 1 deletion tests/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ describe("Ask plugin tests", () => {

it("should ask GPT a question", async () => {
const ctx = createContext(TEST_SLASH_COMMAND);
console.log(ctx.adapters);
createComments([transformCommentTemplate(1, 1, TEST_QUESTION, "ubiquity", "test-repo", true)]);
const res = await askQuestion(ctx, TEST_QUESTION);

Expand Down

0 comments on commit 0f82015

Please sign in to comment.