Skip to content

Commit

Permalink
feat: add similar issues and comments to common struct
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Jan 15, 2025
1 parent 4c14ea9 commit b6f9736
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 92 deletions.
82 changes: 49 additions & 33 deletions evals/handlers/setup-context.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import { SupabaseClient } from "@supabase/supabase-js";
import { createAdapters } from "../../src/adapters";
import { CommentSimilaritySearchResult } from "../../src/adapters/supabase/helpers/comment";
import { IssueSimilaritySearchResult } from "../../src/adapters/supabase/helpers/issues";
import { fetchRepoLanguageStats, fetchRepoDependencies } from "../../src/handlers/ground-truths/chat-bot";
import { findGroundTruths } from "../../src/handlers/ground-truths/find-ground-truths";
import { logger } from "../../src/helpers/errors";
import { formatChatHistory } from "../../src/helpers/format-chat-history";
import { Context } from "../../src/types";
import { VoyageAIClient } from "voyageai";
import OpenAI from "openai";
import { fetchSimilarContent } from "../../src/helpers/issue-fetching";

const SEPERATOR = "######################################################\n";

export interface FetchContext {
rerankedText: string[];
formattedChat: string[];
groundTruths: string[];
}
Expand Down Expand Up @@ -41,45 +39,69 @@ export const initAdapters = (context: Context, clients: EvalClients): Context =>

export async function fetchContext(context: Context, question: string): Promise<FetchContext> {
const {
config: { similarityThreshold },
config: { similarityThreshold, model, maxDepth },
adapters: {
supabase: { comment, issue },
voyage: { reranker },
openai: { completions },
},
} = context;
let formattedChat = await formatChatHistory(context);
logger.info(`${formattedChat.join("")}`);
// using db functions to find similar comments and issues
const [similarComments, similarIssues] = await Promise.all([
// Calculate total available tokens
const modelMaxTokens = completions.getModelMaxTokenLimit(model);
const maxCompletionTokens = completions.getModelMaxOutputLimit(model);
let availableTokens = modelMaxTokens - maxCompletionTokens;

// Calculate base prompt tokens (system message + query template)
const basePromptTokens = await completions.getPromptTokens();
availableTokens -= basePromptTokens;
logger.debug(`Base prompt tokens: ${basePromptTokens}`);

// Find similar comments and issues from Supabase
const [similarCommentsSearch, similarIssuesSearch] = await Promise.all([
comment.findSimilarComments(question, 1 - similarityThreshold, ""),
issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
]);
// combine the similar comments and issues into a single array

// Fetch full content for similar items using GitHub API
const { similarIssues, similarComments } = await fetchSimilarContent(context, similarIssuesSearch || [], similarCommentsSearch || []);

logger.debug(`Fetched similar comments: ${JSON.stringify(similarComments)}`);
logger.debug(`Fetched similar issues: ${JSON.stringify(similarIssues)}`);

// Rerank similar content
const { similarIssues: rerankedIssues, similarComments: rerankedComments } = await reranker.reRankSimilarContent(question, similarIssues, similarComments);

// Calculate token usage from reranked content
const similarText = [
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
...rerankedComments.map((comment) => comment.body).filter((body): body is string => !!body),
...rerankedIssues.map((issue) => issue.body).filter((body): body is string => !!body),
];
// filter out any empty strings
formattedChat = formattedChat.filter((text) => text);
// rerank the similar text using voyageai
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
// gather structural data about the payload repository
const similarTextTokens = await completions.findTokenLength(similarText.join("\n"));
availableTokens -= similarTextTokens;
logger.debug(`Similar text tokens: ${similarTextTokens}`);

// Gather repository data and calculate ground truths
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);

// Initialize ground truths
let groundTruths: string[] = [];
if (!languages.length) {
groundTruths.push("No languages found in the repository");
}
if (!Reflect.ownKeys(dependencies).length) {
groundTruths.push("No dependencies found in the repository");
}
if (!Reflect.ownKeys(devDependencies).length) {
groundTruths.push("No devDependencies found in the repository");
}
if (groundTruths.length > 3) {
if (!languages.length) groundTruths.push("No languages found in the repository");
if (!Reflect.ownKeys(dependencies).length) groundTruths.push("No dependencies found in the repository");
if (!Reflect.ownKeys(devDependencies).length) groundTruths.push("No devDependencies found in the repository");

// If not all empty, get full ground truths
if (groundTruths.length !== 3) {
groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
}

// Calculate ground truths tokens
const groundTruthsTokens = await completions.findTokenLength(groundTruths.join("\n"));
availableTokens -= groundTruthsTokens;
logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);

// Get formatted chat history with remaining tokens and reranked content
const formattedChat = await formatChatHistory(context, maxDepth, availableTokens, rerankedIssues, rerankedComments);
return {
rerankedText,
formattedChat,
groundTruths,
};
Expand All @@ -92,12 +114,6 @@ export function formattedHistory(fetchContext: FetchContext): string {
formattedChat += chat;
});
formattedChat += SEPERATOR;
//Iterate through the reranked text and add it to the final formatted chat
formattedChat += "#################### Reranked Text ####################\n";
fetchContext.rerankedText.forEach((reranked) => {
formattedChat += reranked;
});
formattedChat += SEPERATOR;
//Iterate through the ground truths and add it to the final formatted chat
formattedChat += "#################### Ground Truths ####################\n";
fetchContext.groundTruths.forEach((truth) => {
Expand Down
7 changes: 4 additions & 3 deletions evals/llm.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import { writeFileSync } from "fs";
import { fetchContext, formattedHistory, initAdapters } from "./handlers/setup-context";
import { LOG_LEVEL, Logs } from "@ubiquity-os/ubiquity-os-logger";

import { config } from "dotenv";
config();

// Required environment variables with type assertion
const requiredEnvVars = {
OPENAI_API_KEY: process.env.OPENAI_API_KEY as string,
Expand Down Expand Up @@ -138,19 +141,17 @@ export async function main() {

initialContext = initAdapters(initialContext, clients);
const chatHistory = await fetchContext(initialContext, scenario.issue.question);
const formattedContextHistory = formattedHistory(chatHistory);
const result = await initialContext.adapters.openai.completions.createCompletion(
scenario.issue.question,
initialContext.config.model || "gpt-4o",
chatHistory.rerankedText,
chatHistory.formattedChat,
chatHistory.groundTruths,
initialContext.env.UBIQUITY_OS_APP_NAME
);

return {
output: result.answer,
context: formattedContextHistory,
context: formattedHistory(chatHistory),
expected: scenario.expectedResponse,
};
},
Expand Down
24 changes: 5 additions & 19 deletions src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,15 @@ export class Completions extends SuperOpenAi {
return this.getModelMaxTokenLimit("o1-mini");
}

private _getSystemPromptTemplate(
groundTruths: string = "{groundTruths}",
botName: string = "{botName}",
localContext: string = "{localContext}",
additionalContext: string = "{additionalContext}"
): string {
private _getSystemPromptTemplate(groundTruths: string = "{groundTruths}", botName: string = "{botName}", localContext: string = "{localContext}"): string {
return [
"You Must obey the following ground truths: ",
groundTruths + "\n",
"You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.",
`Your name is: ${botName}`,
"\n",
"Main Context (Provide additional precedence in terms of information): ",
"Main Context",
localContext,
"Secondary Context: ",
additionalContext,
].join("\n");
}

Expand All @@ -96,17 +89,10 @@ export class Completions extends SuperOpenAi {
return encode(messagesStr, { disallowedSpecial: new Set() }).length;
}

async createCompletion(
query: string,
model: string = "o1-mini",
additionalContext: string[],
localContext: string[],
groundTruths: string[],
botName: string
): Promise<CompletionsType> {
const numTokens = await this.findTokenLength(query, additionalContext, localContext, groundTruths);
async createCompletion(query: string, model: string = "o1-mini", localContext: string[], groundTruths: string[], botName: string): Promise<CompletionsType> {
const numTokens = await this.findTokenLength(query, localContext, groundTruths);
logger.debug(`Number of tokens: ${numTokens}`);
const sysMsg = this._getSystemPromptTemplate(JSON.stringify(groundTruths), botName, localContext.join("\n"), additionalContext.join("\n"));
const sysMsg = this._getSystemPromptTemplate(JSON.stringify(groundTruths), botName, localContext.join("\n"));
logger.info(`System message: ${sysMsg}`);

const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
Expand Down
67 changes: 67 additions & 0 deletions src/adapters/voyage/helpers/rerankers.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import { VoyageAIClient } from "voyageai";
import { Context } from "../../../types";
import { SimilarIssue, SimilarComment } from "../../../types/github-types";
import { SuperVoyage } from "./voyage";

interface DocumentWithMetadata {
document: string;
metadata: {
type: "issue" | "comment";
originalData: SimilarIssue | SimilarComment;
};
}

export class Rerankers extends SuperVoyage {
protected context: Context;

Expand All @@ -27,4 +36,62 @@ export class Rerankers extends SuperVoyage {
const rerankedResults = response.data || [];
return rerankedResults.map((result) => result.document).filter((document): document is string => document !== undefined);
}

async reRankSimilarContent(
query: string,
similarIssues: SimilarIssue[],
similarComments: SimilarComment[],
topK: number = 5
): Promise<{ similarIssues: SimilarIssue[]; similarComments: SimilarComment[] }> {
try {
// Prepare documents for reranking
const issueDocuments: DocumentWithMetadata[] = similarIssues.map((issue) => ({
document: issue.body || "",
metadata: { type: "issue", originalData: issue },
}));

const commentDocuments: DocumentWithMetadata[] = similarComments.map((comment) => ({
document: comment.body || "",
metadata: { type: "comment", originalData: comment },
}));

const allDocuments = [...issueDocuments, ...commentDocuments].filter((doc) => doc.document);

if (allDocuments.length === 0) {
return { similarIssues, similarComments };
}

// Rerank all documents together
const response = await this.client.rerank({
query,
documents: allDocuments.map((doc) => doc.document),
model: "rerank-2",
returnDocuments: true,
topK: Math.min(topK, allDocuments.length),
});

const rerankedResults = response.data || [];

// Separate and reconstruct the reranked issues and comments
const rerankedIssues: SimilarIssue[] = [];
const rerankedComments: SimilarComment[] = [];

rerankedResults.forEach((result, index) => {
const originalDoc = allDocuments[index];
if (originalDoc.metadata.type === "issue") {
rerankedIssues.push(originalDoc.metadata.originalData as SimilarIssue);
} else {
rerankedComments.push(originalDoc.metadata.originalData as SimilarComment);
}
});

return {
similarIssues: rerankedIssues,
similarComments: rerankedComments,
};
} catch (e: unknown) {
this.context.logger.error("Reranking similar content failed!", { e });
return { similarIssues, similarComments };
}
}
}
33 changes: 17 additions & 16 deletions src/handlers/ask-llm.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { Context } from "../types";
import { CompletionsType } from "../adapters/openai/helpers/completions";
import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment";
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
import { formatChatHistory } from "../helpers/format-chat-history";
import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/chat-bot";
import { findGroundTruths } from "./ground-truths/find-ground-truths";
import { bubbleUpErrorComment, logger } from "../helpers/errors";
import { fetchSimilarContent } from "../helpers/issue-fetching";

export async function askQuestion(context: Context, question: string): Promise<CompletionsType> {
if (!question) {
Expand Down Expand Up @@ -34,28 +33,30 @@ export async function askQuestion(context: Context, question: string): Promise<C
availableTokens -= basePromptTokens;
logger.debug(`Base prompt tokens: ${basePromptTokens}`);

// Find similar comments and issues
const [similarComments, similarIssues] = await Promise.all([
// Find similar comments and issues from Supabase
const [similarCommentsSearch, similarIssuesSearch] = await Promise.all([
comment.findSimilarComments(question, 1 - similarityThreshold, ""),
issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
]);

//Simialr comments and issues tokens
logger.debug(`Similar comments: ${JSON.stringify(similarComments)}`);
logger.debug(`Similar issues: ${JSON.stringify(similarIssues)}`);
// Fetch full content for similar items using GitHub API
const { similarIssues, similarComments } = await fetchSimilarContent(context, similarIssuesSearch || [], similarCommentsSearch || []);

// Combine and calculate similar text tokens
logger.debug(`Fetched similar comments: ${JSON.stringify(similarComments)}`);
logger.debug(`Fetched similar issues: ${JSON.stringify(similarIssues)}`);

// Rerank similar content
const { similarIssues: rerankedIssues, similarComments: rerankedComments } = await reranker.reRankSimilarContent(question, similarIssues, similarComments);

// Calculate token usage from reranked content
const similarText = [
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
...rerankedComments.map((comment) => comment.body).filter((body): body is string => !!body),
...rerankedIssues.map((issue) => issue.body).filter((body): body is string => !!body),
];
const similarTextTokens = await completions.findTokenLength(similarText.join("\n"));
availableTokens -= similarTextTokens;
logger.debug(`Similar text tokens: ${similarTextTokens}`);

// Rerank similar text
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];

// Gather repository data and calculate ground truths
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);

Expand All @@ -75,12 +76,12 @@ export async function askQuestion(context: Context, question: string): Promise<C
availableTokens -= groundTruthsTokens;
logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);

// Get formatted chat history with remaining tokens
const formattedChat = await formatChatHistory(context, maxDepth, availableTokens);
// Get formatted chat history with remaining tokens and reranked content
const formattedChat = await formatChatHistory(context, maxDepth, availableTokens, rerankedIssues, rerankedComments);
logger.debug("Formatted chat history: " + formattedChat.join("\n"));

// Create completion with all components
return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
return await completions.createCompletion(question, model, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
} catch (error) {
throw bubbleUpErrorComment(context, error, false);
}
Expand Down
Loading

0 comments on commit b6f9736

Please sign in to comment.