From b6f9736a6d32a369595411bc459961879d82fcaa Mon Sep 17 00:00:00 2001
From: Shivaditya Shivganesh <sshivaditya@gmail.com>
Date: Tue, 14 Jan 2025 19:07:35 -0500
Subject: [PATCH] feat: add similar issues and comments to common struct

---
 evals/handlers/setup-context.ts            |  82 +++++----
 evals/llm.eval.ts                          |   7 +-
 src/adapters/openai/helpers/completions.ts |  24 +--
 src/adapters/voyage/helpers/rerankers.ts   |  67 +++++++
 src/handlers/ask-llm.ts                    |  33 ++--
 src/helpers/format-chat-history.ts         | 103 ++++++++---
 src/helpers/issue-fetching.ts              | 192 ++++++++++++++++++++-
 src/types/github-types.ts                  |  72 ++++++++
 8 files changed, 488 insertions(+), 92 deletions(-)
diff --git a/evals/handlers/setup-context.ts b/evals/handlers/setup-context.ts
index 519768c..ddb5483 100644
--- a/evals/handlers/setup-context.ts
+++ b/evals/handlers/setup-context.ts
@@ -1,7 +1,5 @@
 import { SupabaseClient } from "@supabase/supabase-js";
 import { createAdapters } from "../../src/adapters";
-import { CommentSimilaritySearchResult } from "../../src/adapters/supabase/helpers/comment";
-import { IssueSimilaritySearchResult } from "../../src/adapters/supabase/helpers/issues";
 import { fetchRepoLanguageStats, fetchRepoDependencies } from "../../src/handlers/ground-truths/chat-bot";
 import { findGroundTruths } from "../../src/handlers/ground-truths/find-ground-truths";
 import { logger } from "../../src/helpers/errors";
@@ -9,11 +7,11 @@ import { formatChatHistory } from "../../src/helpers/format-chat-history";
 import { Context } from "../../src/types";
 import { VoyageAIClient } from "voyageai";
 import OpenAI from "openai";
+import { fetchSimilarContent } from "../../src/helpers/issue-fetching";
 
 const SEPERATOR = "######################################################\n";
 
 export interface FetchContext {
-  rerankedText: string[];
   formattedChat: string[];
   groundTruths: string[];
 }
@@ -41,45 +39,69 @@ export const initAdapters = (context: Context, clients: EvalClients): Context =>
 
 export async function fetchContext(context: Context, question: string): Promise<FetchContext> {
   const {
-    config: { similarityThreshold },
+    config: { similarityThreshold, model, maxDepth },
     adapters: {
       supabase: { comment, issue },
       voyage: { reranker },
+      openai: { completions },
     },
   } = context;
-  let formattedChat = await formatChatHistory(context);
-  logger.info(`${formattedChat.join("")}`);
-  // using db functions to find similar comments and issues
-  const [similarComments, similarIssues] = await Promise.all([
+  // Calculate total available tokens
+  const modelMaxTokens = completions.getModelMaxTokenLimit(model);
+  const maxCompletionTokens = completions.getModelMaxOutputLimit(model);
+  let availableTokens = modelMaxTokens - maxCompletionTokens;
+
+  // Calculate base prompt tokens (system message + query template)
+  const basePromptTokens = await completions.getPromptTokens();
+  availableTokens -= basePromptTokens;
+  logger.debug(`Base prompt tokens: ${basePromptTokens}`);
+
+  // Find similar comments and issues from Supabase
+  const [similarCommentsSearch, similarIssuesSearch] = await Promise.all([
     comment.findSimilarComments(question, 1 - similarityThreshold, ""),
     issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
   ]);
-  // combine the similar comments and issues into a single array
+
+  // Fetch full content for similar items using GitHub API
+  const { similarIssues, similarComments } = await fetchSimilarContent(context, similarIssuesSearch || [], similarCommentsSearch || []);
+
+  logger.debug(`Fetched similar comments: ${JSON.stringify(similarComments)}`);
+  logger.debug(`Fetched similar issues: ${JSON.stringify(similarIssues)}`);
+
+  // Rerank similar content
+  const { similarIssues: rerankedIssues, similarComments: rerankedComments } = await reranker.reRankSimilarContent(question, similarIssues, similarComments);
+
+  // Calculate token usage from reranked content
   const similarText = [
-    ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
-    ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
+    ...rerankedComments.map((comment) => comment.body).filter((body): body is string => !!body),
+    ...rerankedIssues.map((issue) => issue.body).filter((body): body is string => !!body),
   ];
-  // filter out any empty strings
-  formattedChat = formattedChat.filter((text) => text);
-  // rerank the similar text using voyageai
-  const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
-  // gather structural data about the payload repository
+  const similarTextTokens = await completions.findTokenLength(similarText.join("\n"));
+  availableTokens -= similarTextTokens;
+  logger.debug(`Similar text tokens: ${similarTextTokens}`);
+
+  // Gather repository data and calculate ground truths
   const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);
+
+  // Initialize ground truths
   let groundTruths: string[] = [];
-  if (!languages.length) {
-    groundTruths.push("No languages found in the repository");
-  }
-  if (!Reflect.ownKeys(dependencies).length) {
-    groundTruths.push("No dependencies found in the repository");
-  }
-  if (!Reflect.ownKeys(devDependencies).length) {
-    groundTruths.push("No devDependencies found in the repository");
-  }
-  if (groundTruths.length > 3) {
+  if (!languages.length) groundTruths.push("No languages found in the repository");
+  if (!Reflect.ownKeys(dependencies).length) groundTruths.push("No dependencies found in the repository");
+  if (!Reflect.ownKeys(devDependencies).length) groundTruths.push("No devDependencies found in the repository");
+
+  // If not all empty, get full ground truths
+  if (groundTruths.length !== 3) {
     groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
   }
+
+  // Calculate ground truths tokens
+  const groundTruthsTokens = await completions.findTokenLength(groundTruths.join("\n"));
+  availableTokens -= groundTruthsTokens;
+  logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);
+
+  // Get formatted chat history with remaining tokens and reranked content
+  const formattedChat = await formatChatHistory(context, maxDepth, availableTokens, rerankedIssues, rerankedComments);
   return {
-    rerankedText,
     formattedChat,
     groundTruths,
   };
@@ -92,12 +114,6 @@ export function formattedHistory(fetchContext: FetchContext): string {
     formattedChat += chat;
   });
   formattedChat += SEPERATOR;
-  //Iterate through the reranked text and add it to the final formatted chat
-  formattedChat += "#################### Reranked Text ####################\n";
-  fetchContext.rerankedText.forEach((reranked) => {
-    formattedChat += reranked;
-  });
-  formattedChat += SEPERATOR;
   //Iterate through the ground truths and add it to the final formatted chat
   formattedChat += "#################### Ground Truths ####################\n";
   fetchContext.groundTruths.forEach((truth) => {
diff --git a/evals/llm.eval.ts b/evals/llm.eval.ts
index 924fc51..37a5201 100644
--- a/evals/llm.eval.ts
+++ b/evals/llm.eval.ts
@@ -12,6 +12,9 @@ import { writeFileSync } from "fs";
 import { fetchContext, formattedHistory, initAdapters } from "./handlers/setup-context";
 import { LOG_LEVEL, Logs } from "@ubiquity-os/ubiquity-os-logger";
 
+import { config } from "dotenv";
+config();
+
 // Required environment variables with type assertion
 const requiredEnvVars = {
   OPENAI_API_KEY: process.env.OPENAI_API_KEY as string,
@@ -138,11 +141,9 @@ export async function main() {
 
         initialContext = initAdapters(initialContext, clients);
         const chatHistory = await fetchContext(initialContext, scenario.issue.question);
-        const formattedContextHistory = formattedHistory(chatHistory);
         const result = await initialContext.adapters.openai.completions.createCompletion(
           scenario.issue.question,
           initialContext.config.model || "gpt-4o",
-          chatHistory.rerankedText,
           chatHistory.formattedChat,
           chatHistory.groundTruths,
           initialContext.env.UBIQUITY_OS_APP_NAME
@@ -150,7 +151,7 @@ export async function main() {
 
         return {
           output: result.answer,
-          context: formattedContextHistory,
+          context: formattedHistory(chatHistory),
           expected: scenario.expectedResponse,
         };
       },
diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts
index 06cd44a..12a5760 100644
--- a/src/adapters/openai/helpers/completions.ts
+++ b/src/adapters/openai/helpers/completions.ts
@@ -59,22 +59,15 @@ export class Completions extends SuperOpenAi {
     return this.getModelMaxTokenLimit("o1-mini");
   }
 
-  private _getSystemPromptTemplate(
-    groundTruths: string = "{groundTruths}",
-    botName: string = "{botName}",
-    localContext: string = "{localContext}",
-    additionalContext: string = "{additionalContext}"
-  ): string {
+  private _getSystemPromptTemplate(groundTruths: string = "{groundTruths}", botName: string = "{botName}", localContext: string = "{localContext}"): string {
     return [
       "You Must obey the following ground truths: ",
       groundTruths + "\n",
       "You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.",
       `Your name is: ${botName}`,
       "\n",
-      "Main Context (Provide additional precedence in terms of information): ",
+      "Main Context",
       localContext,
-      "Secondary Context: ",
-      additionalContext,
     ].join("\n");
   }
 
@@ -96,17 +89,10 @@ export class Completions extends SuperOpenAi {
     return encode(messagesStr, { disallowedSpecial: new Set() }).length;
   }
 
-  async createCompletion(
-    query: string,
-    model: string = "o1-mini",
-    additionalContext: string[],
-    localContext: string[],
-    groundTruths: string[],
-    botName: string
-  ): Promise<CompletionsType> {
-    const numTokens = await this.findTokenLength(query, additionalContext, localContext, groundTruths);
+  async createCompletion(query: string, model: string = "o1-mini", localContext: string[], groundTruths: string[], botName: string): Promise<CompletionsType> {
+    const numTokens = await this.findTokenLength(query, localContext, groundTruths);
     logger.debug(`Number of tokens: ${numTokens}`);
-    const sysMsg = this._getSystemPromptTemplate(JSON.stringify(groundTruths), botName, localContext.join("\n"), additionalContext.join("\n"));
+    const sysMsg = this._getSystemPromptTemplate(JSON.stringify(groundTruths), botName, localContext.join("\n"));
     logger.info(`System message: ${sysMsg}`);
 
     const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
diff --git a/src/adapters/voyage/helpers/rerankers.ts b/src/adapters/voyage/helpers/rerankers.ts
index 9d68aee..6d418cd 100644
--- a/src/adapters/voyage/helpers/rerankers.ts
+++ b/src/adapters/voyage/helpers/rerankers.ts
@@ -1,7 +1,16 @@
 import { VoyageAIClient } from "voyageai";
 import { Context } from "../../../types";
+import { SimilarIssue, SimilarComment } from "../../../types/github-types";
 import { SuperVoyage } from "./voyage";
 
+interface DocumentWithMetadata {
+  document: string;
+  metadata: {
+    type: "issue" | "comment";
+    originalData: SimilarIssue | SimilarComment;
+  };
+}
+
 export class Rerankers extends SuperVoyage {
   protected context: Context;
 
@@ -27,4 +36,62 @@ export class Rerankers extends SuperVoyage {
     const rerankedResults = response.data || [];
     return rerankedResults.map((result) => result.document).filter((document): document is string => document !== undefined);
   }
+
+  async reRankSimilarContent(
+    query: string,
+    similarIssues: SimilarIssue[],
+    similarComments: SimilarComment[],
+    topK: number = 5
+  ): Promise<{ similarIssues: SimilarIssue[]; similarComments: SimilarComment[] }> {
+    try {
+      // Prepare documents for reranking
+      const issueDocuments: DocumentWithMetadata[] = similarIssues.map((issue) => ({
+        document: issue.body || "",
+        metadata: { type: "issue", originalData: issue },
+      }));
+
+      const commentDocuments: DocumentWithMetadata[] = similarComments.map((comment) => ({
+        document: comment.body || "",
+        metadata: { type: "comment", originalData: comment },
+      }));
+
+      const allDocuments = [...issueDocuments, ...commentDocuments].filter((doc) => doc.document);
+
+      if (allDocuments.length === 0) {
+        return { similarIssues, similarComments };
+      }
+
+      // Rerank all documents together
+      const response = await this.client.rerank({
+        query,
+        documents: allDocuments.map((doc) => doc.document),
+        model: "rerank-2",
+        returnDocuments: true,
+        topK: Math.min(topK, allDocuments.length),
+      });
+
+      const rerankedResults = response.data || [];
+
+      // Separate and reconstruct the reranked issues and comments
+      const rerankedIssues: SimilarIssue[] = [];
+      const rerankedComments: SimilarComment[] = [];
+
+      rerankedResults.forEach((result, index) => {
+        const originalDoc = allDocuments[index];
+        if (originalDoc.metadata.type === "issue") {
+          rerankedIssues.push(originalDoc.metadata.originalData as SimilarIssue);
+        } else {
+          rerankedComments.push(originalDoc.metadata.originalData as SimilarComment);
+        }
+      });
+
+      return {
+        similarIssues: rerankedIssues,
+        similarComments: rerankedComments,
+      };
+    } catch (e: unknown) {
+      this.context.logger.error("Reranking similar content failed!", { e });
+      return { similarIssues, similarComments };
+    }
+  }
 }
diff --git a/src/handlers/ask-llm.ts b/src/handlers/ask-llm.ts
index 244f261..7ec3f51 100644
--- a/src/handlers/ask-llm.ts
+++ b/src/handlers/ask-llm.ts
@@ -1,11 +1,10 @@
 import { Context } from "../types";
 import { CompletionsType } from "../adapters/openai/helpers/completions";
-import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment";
-import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
 import { formatChatHistory } from "../helpers/format-chat-history";
 import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/chat-bot";
 import { findGroundTruths } from "./ground-truths/find-ground-truths";
 import { bubbleUpErrorComment, logger } from "../helpers/errors";
+import { fetchSimilarContent } from "../helpers/issue-fetching";
 
 export async function askQuestion(context: Context, question: string): Promise<CompletionsType> {
   if (!question) {
@@ -34,28 +33,30 @@ export async function askQuestion(context: Context, question: string): Promise<C
     availableTokens -= basePromptTokens;
     logger.debug(`Base prompt tokens: ${basePromptTokens}`);
 
-    // Find similar comments and issues
-    const [similarComments, similarIssues] = await Promise.all([
+    // Find similar comments and issues from Supabase
+    const [similarCommentsSearch, similarIssuesSearch] = await Promise.all([
       comment.findSimilarComments(question, 1 - similarityThreshold, ""),
       issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
     ]);
 
-    //Simialr comments and issues tokens
-    logger.debug(`Similar comments: ${JSON.stringify(similarComments)}`);
-    logger.debug(`Similar issues: ${JSON.stringify(similarIssues)}`);
+    // Fetch full content for similar items using GitHub API
+    const { similarIssues, similarComments } = await fetchSimilarContent(context, similarIssuesSearch || [], similarCommentsSearch || []);
 
-    // Combine and calculate similar text tokens
+    logger.debug(`Fetched similar comments: ${JSON.stringify(similarComments)}`);
+    logger.debug(`Fetched similar issues: ${JSON.stringify(similarIssues)}`);
+
+    // Rerank similar content
+    const { similarIssues: rerankedIssues, similarComments: rerankedComments } = await reranker.reRankSimilarContent(question, similarIssues, similarComments);
+
+    // Calculate token usage from reranked content
     const similarText = [
-      ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
-      ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
+      ...rerankedComments.map((comment) => comment.body).filter((body): body is string => !!body),
+      ...rerankedIssues.map((issue) => issue.body).filter((body): body is string => !!body),
     ];
     const similarTextTokens = await completions.findTokenLength(similarText.join("\n"));
     availableTokens -= similarTextTokens;
     logger.debug(`Similar text tokens: ${similarTextTokens}`);
 
-    // Rerank similar text
-    const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
-
     // Gather repository data and calculate ground truths
     const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);
 
@@ -75,12 +76,12 @@ export async function askQuestion(context: Context, question: string): Promise<C
     availableTokens -= groundTruthsTokens;
     logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);
 
-    // Get formatted chat history with remaining tokens
-    const formattedChat = await formatChatHistory(context, maxDepth, availableTokens);
+    // Get formatted chat history with remaining tokens and reranked content
+    const formattedChat = await formatChatHistory(context, maxDepth, availableTokens, rerankedIssues, rerankedComments);
     logger.debug("Formatted chat history: " + formattedChat.join("\n"));
 
     // Create completion with all components
-    return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
+    return await completions.createCompletion(question, model, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
   } catch (error) {
     throw bubbleUpErrorComment(context, error, false);
   }
diff --git a/src/helpers/format-chat-history.ts b/src/helpers/format-chat-history.ts
index bda99e6..05f7e4f 100644
--- a/src/helpers/format-chat-history.ts
+++ b/src/helpers/format-chat-history.ts
@@ -6,6 +6,8 @@ import { logger } from "./errors";
 import { Issue } from "../types/github-types";
 import { updateTokenCount, createDefaultTokenLimits } from "./token-utils";
 
+import { SimilarIssue, SimilarComment } from "../types/github-types";
+
 interface TreeNode {
   key: string;
   issue: Issue;
@@ -14,7 +16,8 @@ interface TreeNode {
   depth: number;
   comments?: StreamlinedComment[];
   body?: string;
-  similarIssues?: Issue[];
+  similarIssues?: SimilarIssue[];
+  similarComments?: SimilarComment[];
   codeSnippets?: { body: string; path: string }[];
   readmeSection?: string;
 }
@@ -272,8 +275,52 @@ async function processTreeNode(node: TreeNode, prefix: string, output: string[],
   const childPrefix = prefix + (node.parent ? "    " : "");
   const contentPrefix = childPrefix + "    ";
 
-  // Process body if exists and within token limits
-  if (node.body?.trim()) {
+  // Process body and similar content for root node
+  if (!node.parent) {
+    // Process body if exists
+    if (node.body?.trim()) {
+      const bodyContent = formatContent("Body", node.body, childPrefix, contentPrefix, tokenLimits);
+      if (bodyContent.length > 0) {
+        output.push(...bodyContent);
+        output.push("");
+      }
+    }
+
+    // Process similar issues
+    if (node.similarIssues?.length) {
+      output.push(`${childPrefix}Similar Issues:`);
+      for (const issue of node.similarIssues) {
+        const line = `${contentPrefix}- Issue #${issue.issueNumber} (${issue.url}) - Similarity: ${(issue.similarity * 100).toFixed(2)}%`;
+        if (!updateTokenCount(line, tokenLimits)) break;
+        output.push(line);
+
+        if (issue.body) {
+          const bodyLine = `${contentPrefix}  ${issue.body.split("\n")[0]}...`;
+          if (!updateTokenCount(bodyLine, tokenLimits)) break;
+          output.push(bodyLine);
+        }
+      }
+      output.push("");
+    }
+
+    // Process similar comments
+    if (node.similarComments?.length) {
+      output.push(`${childPrefix}Similar Comments:`);
+      for (const comment of node.similarComments) {
+        const line = `${contentPrefix}- Comment by ${comment.user?.login} - Similarity: ${(comment.similarity * 100).toFixed(2)}%`;
+        if (!updateTokenCount(line, tokenLimits)) break;
+        output.push(line);
+
+        if (comment.body) {
+          const bodyLine = `${contentPrefix}  ${comment.body.split("\n")[0]}...`;
+          if (!updateTokenCount(bodyLine, tokenLimits)) break;
+          output.push(bodyLine);
+        }
+      }
+      output.push("");
+    }
+  } else if (node.body?.trim()) {
+    // Process body for non-root nodes
     const bodyContent = formatContent("Body", node.body, childPrefix, contentPrefix, tokenLimits);
     if (bodyContent.length > 0) {
       output.push(...bodyContent);
@@ -363,37 +410,53 @@ function formatContent(type: string, content: string, prefix: string, contentPre
   return output;
 }
 
-export async function formatChatHistory(context: Context, maxDepth: number = 2, availableTokens?: number): Promise<string[]> {
+export async function buildChatHistoryTree(context: Context, maxDepth: number = 2): Promise<{ tree: TreeNode | null; tokenLimits: TokenLimits }> {
   const specAndBodies: Record<string, string> = {};
-  const fetchTokenLimits = createDefaultTokenLimits(context);
+  const tokenLimits = createDefaultTokenLimits(context);
+  const { tree } = await buildTree(context, specAndBodies, maxDepth, tokenLimits);
 
-  // If availableTokens is provided, override the default tokensRemaining
-  if (availableTokens !== undefined) {
-    fetchTokenLimits.tokensRemaining = availableTokens;
+  if (tree && "pull_request" in context.payload) {
+    const { diff_hunk, position, original_position, path, body } = context.payload.comment || {};
+    if (diff_hunk) {
+      tree.body += `\nPrimary Context: ${body || ""}\nDiff: ${diff_hunk}\nPath: ${path || ""}\nLines: ${position || ""}-${original_position || ""}`;
+      tree.comments = tree.comments?.filter((comment) => comment.id !== context.payload.comment?.id);
+    }
   }
 
-  const { tree } = await buildTree(context, specAndBodies, maxDepth, fetchTokenLimits);
+  return { tree, tokenLimits };
+}
+
+export async function formatChatHistory(
+  context: Context,
+  maxDepth: number = 2,
+  availableTokens?: number,
+  similarIssues?: SimilarIssue[],
+  similarComments?: SimilarComment[]
+): Promise<string[]> {
+  const { tree, tokenLimits } = await buildChatHistoryTree(context, maxDepth);
+
   if (!tree) {
     return ["No main issue found."];
   }
 
-  logger.debug(`Tokens: ${fetchTokenLimits.runningTokenCount}/${fetchTokenLimits.tokensRemaining}`);
+  // If availableTokens is provided, override the default tokensRemaining
+  if (availableTokens !== undefined) {
+    tokenLimits.tokensRemaining = availableTokens;
+  }
 
-  if ("pull_request" in context.payload) {
-    const { diff_hunk, position, original_position, path, body } = context.payload.comment || {};
-    if (diff_hunk) {
-      tree.body += `\nPrimary Context: ${body || ""}\nDiff: ${diff_hunk}\nPath: ${path || ""}\nLines: ${position || ""}-${original_position || ""}`;
-      tree.comments = tree.comments?.filter((comment) => comment.id !== context.payload.comment?.id);
-    }
+  // Add similar issues and comments to the tree
+  if (similarIssues?.length) {
+    tree.similarIssues = similarIssues;
+  }
+  if (similarComments?.length) {
+    tree.similarComments = similarComments;
   }
 
   const treeOutput: string[] = [];
   const headerLine = "Issue Tree Structure:";
   treeOutput.push(headerLine, "");
 
-  // Create new token limits for formatting phase to avoid double counting
-  const formatTokenLimits = createDefaultTokenLimits(context);
-  await processTreeNode(tree, "", treeOutput, formatTokenLimits);
-  logger.debug(`Final tokens: ${formatTokenLimits.runningTokenCount}/${formatTokenLimits.tokensRemaining}`);
+  await processTreeNode(tree, "", treeOutput, tokenLimits);
+  logger.debug(`Final tokens: ${tokenLimits.runningTokenCount}/${tokenLimits.tokensRemaining}`);
   return treeOutput;
 }
diff --git a/src/helpers/issue-fetching.ts b/src/helpers/issue-fetching.ts
index 2b8775a..fde4b75 100644
--- a/src/helpers/issue-fetching.ts
+++ b/src/helpers/issue-fetching.ts
@@ -1,4 +1,14 @@
-import { FetchParams, Issue, LinkedIssues, SimplifiedComment } from "../types/github-types";
+import { Context } from "@ubiquity-os/plugin-sdk";
+import {
+  FetchParams,
+  Issue,
+  LinkedIssues,
+  SimplifiedComment,
+  SimilarIssue,
+  SimilarComment,
+  IssueSearchResult,
+  CommentIssueSearchResult,
+} from "../types/github-types";
 import { TokenLimits } from "../types/llm";
 import { logger } from "./errors";
 import { idIssueFromComment } from "./issue";
@@ -117,6 +127,186 @@ export async function fetchIssue(params: FetchParams, tokenLimits?: TokenLimits)
   }
 }
 
+export const GET_ISSUE_BY_ID = /* GraphQL */ `
+  query GetIssueById($id: ID!) {
+    node(id: $id) {
+      ... on Issue {
+        id
+        number
+        title
+        body
+        url
+        repository {
+          name
+          owner {
+            login
+          }
+        }
+        author {
+          login
+        }
+        comments(first: 100) {
+          nodes {
+            id
+            body
+            author {
+              login
+            }
+          }
+        }
+      }
+    }
+  }
+`;
+
+export const GET_COMMENT_BY_ID = /* GraphQL */ `
+  query GetCommentById($id: ID!) {
+    node(id: $id) {
+      ... on IssueComment {
+        id
+        body
+        author {
+          login
+        }
+        issue {
+          id
+          number
+          title
+          url
+          repository {
+            name
+            owner {
+              login
+            }
+          }
+        }
+      }
+      ... on PullRequestReviewComment {
+        id
+        body
+        author {
+          login
+        }
+        pullRequest {
+          id
+          number
+          title
+          url
+          repository {
+            name
+            owner {
+              login
+            }
+          }
+        }
+      }
+    }
+  }
+`;
+
+// Helper function to convert GitHub node ID to LinkedIssues format
+export async function fetchIssueFromId(context: Context, nodeId: string): Promise<LinkedIssues | null> {
+  try {
+    const { octokit } = context;
+    const response = await octokit.graphql<IssueSearchResult>(GET_ISSUE_BY_ID, { id: nodeId });
+    const issue = response.node;
+
+    if (!issue) return null;
+
+    return {
+      issueNumber: issue.number,
+      repo: issue.repository.name,
+      owner: issue.repository.owner.login,
+      url: issue.url,
+      body: issue.body,
+      comments: issue.comments.nodes.map((comment) => ({
+        id: comment.id,
+        body: comment.body,
+        user: { login: comment.author?.login },
+        org: issue.repository.owner.login,
+        repo: issue.repository.name,
+        issueUrl: issue.url,
+        commentType: "issue_comment",
+      })),
+    };
+  } catch (error: unknown) {
+    context.logger.error("Error fetching issue by ID", { error: error instanceof Error ? error : Error("Unknown Error"), nodeId });
+    return null;
+  }
+}
+
+// Helper function to convert GitHub node ID to SimplifiedComment format
+export async function fetchCommentFromId(context: Context, nodeId: string): Promise<SimplifiedComment | null> {
+  try {
+    const { octokit } = context;
+    const response = await octokit.graphql<CommentIssueSearchResult>(GET_COMMENT_BY_ID, { id: nodeId });
+    const comment = response.node;
+
+    if (!comment) return null;
+
+    const isIssueOrPr = comment.issue || comment.pullRequest;
+
+    if (!isIssueOrPr) {
+      context.logger.error("Comment has no associated issue or PR", { commentId: comment.id });
+      return null;
+    }
+
+    return {
+      id: comment.id,
+      body: comment.body,
+      user: { login: comment.author?.login },
+      org: isIssueOrPr.repository.owner.login,
+      repo: isIssueOrPr.repository.name,
+      issueUrl: isIssueOrPr.url,
+      commentType: comment.issue ? "issue_comment" : "pull_request_review_comment",
+    };
+  } catch (error) {
+    context.logger.error("Error fetching comment by ID", { error: error instanceof Error ? error : Error("Unknown Error"), nodeId });
+    return null;
+  }
+}
+
+export async function fetchSimilarContent(
+  context: Context,
+  similarIssues: Array<{ issue_id: string; similarity: number; text_similarity: number }>,
+  similarComments: Array<{ comment_id: string; similarity: number; text_similarity: number; comment_issue_id: string }>
+): Promise<{ similarIssues: SimilarIssue[]; similarComments: SimilarComment[] }> {
+  const fetchedIssues: SimilarIssue[] = [];
+  const fetchedComments: SimilarComment[] = [];
+
+  // Fetch similar issues
+  for (const issue of similarIssues) {
+    const fetchedIssue = await fetchIssueFromId(context, issue.issue_id);
+    if (fetchedIssue) {
+      fetchedIssues.push({
+        ...fetchedIssue,
+        similarity: issue.similarity,
+        text_similarity: issue.text_similarity,
+        issue_id: issue.issue_id,
+      });
+    }
+  }
+
+  // Fetch similar comments
+  for (const comment of similarComments) {
+    const fetchedComment = await fetchCommentFromId(context, comment.comment_id);
+    if (fetchedComment) {
+      fetchedComments.push({
+        ...fetchedComment,
+        similarity: comment.similarity,
+        text_similarity: comment.text_similarity,
+        comment_id: comment.comment_id,
+        comment_issue_id: comment.comment_issue_id,
+      });
+    }
+  }
+
+  return {
+    similarIssues: fetchedIssues,
+    similarComments: fetchedComments,
+  };
+}
+
 export async function fetchIssueComments(params: FetchParams, tokenLimits?: TokenLimits) {
   const { octokit, payload, logger } = params.context;
   const { issueNum, owner, repo } = params;
diff --git a/src/types/github-types.ts b/src/types/github-types.ts
index fb4e58f..9eda371 100644
--- a/src/types/github-types.ts
+++ b/src/types/github-types.ts
@@ -110,6 +110,19 @@ export type FetchedCodes = {
   issueNumber: number;
 };
 
+export interface SimilarIssue extends LinkedIssues {
+  similarity: number;
+  text_similarity: number;
+  issue_id: string;
+}
+
+export interface SimilarComment extends SimplifiedComment {
+  similarity: number;
+  text_similarity: number;
+  comment_id: string;
+  comment_issue_id: string;
+}
+
 export interface TreeNode {
   issue: LinkedIssues;
   children: string[];
@@ -123,6 +136,8 @@ export interface TreeNode {
     commentCount: number;
     linkedIssuesCount: number;
     hasCodeReferences: boolean;
+    similarIssues?: SimilarIssue[];
+    similarComments?: SimilarComment[];
   };
 }
 
@@ -132,3 +147,60 @@ export interface TreeProcessingQueue {
   parent?: string;
   priority: number;
 }
+
+export interface IssueSearchResult {
+  node: {
+    id: string;
+    number: number;
+    body: string;
+    repository: Repository;
+    title: string;
+    url: string;
+    author: {
+      login: string;
+    };
+    comments: {
+      nodes: Array<{
+        id: string;
+        body: string;
+        author: {
+          login: string;
+        };
+      }>;
+    };
+  };
+}
+
+export interface CommentIssueSearchResult {
+  node: {
+    id: string;
+    body: string;
+    author: {
+      login: string;
+    };
+    issue?: {
+      id: string;
+      number: number;
+      title: string;
+      url: string;
+      repository: {
+        name: string;
+        owner: {
+          login: string;
+        };
+      };
+    };
+    pullRequest?: {
+      id: string;
+      number: number;
+      title: string;
+      url: string;
+      repository: {
+        name: string;
+        owner: {
+          login: string;
+        };
+      };
+    };
+  };
+}