From e2e601e1bc03c75bb5814314dc3691b8f849cb5d Mon Sep 17 00:00:00 2001
From: Shivaditya Shivganesh <sshivaditya@gmail.com>
Date: Tue, 14 Jan 2025 12:15:14 -0500
Subject: [PATCH] fix: token limit, should be fetched from the
 maxCompletionToken Function

---
 evals/llm.eval.ts                          |  4 +-
 src/adapters/openai/helpers/completions.ts | 53 +++++++++----
 src/handlers/ask-llm.ts                    | 91 ++++++++++++----------
 src/helpers/format-chat-history.ts         | 17 ++--
 src/helpers/token-utils.ts                 |  2 +-
 src/types/plugin-input.ts                  |  1 -
 6 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/evals/llm.eval.ts b/evals/llm.eval.ts
index 52f85e2..924fc51 100644
--- a/evals/llm.eval.ts
+++ b/evals/llm.eval.ts
@@ -66,7 +66,6 @@ const inputs = {
   config: {
     model: "gpt-4o",
     similarityThreshold: 0.8,
-    maxTokens: 1000,
   },
   settings: {
     openAiBaseUrl: "https://openrouter.ai/api/v1",
@@ -146,8 +145,7 @@ export async function main() {
           chatHistory.rerankedText,
           chatHistory.formattedChat,
           chatHistory.groundTruths,
-          initialContext.env.UBIQUITY_OS_APP_NAME,
-          initialContext.config.maxTokens
+          initialContext.env.UBIQUITY_OS_APP_NAME
         );
 
         return {
diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts
index 6c94f1a..06cd44a 100644
--- a/src/adapters/openai/helpers/completions.ts
+++ b/src/adapters/openai/helpers/completions.ts
@@ -59,28 +59,54 @@ export class Completions extends SuperOpenAi {
     return this.getModelMaxTokenLimit("o1-mini");
   }
 
+  private _getSystemPromptTemplate(
+    groundTruths: string = "{groundTruths}",
+    botName: string = "{botName}",
+    localContext: string = "{localContext}",
+    additionalContext: string = "{additionalContext}"
+  ): string {
+    return [
+      "You Must obey the following ground truths: ",
+      groundTruths + "\n",
+      "You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.",
+      `Your name is: ${botName}`,
+      "\n",
+      "Main Context (Provide additional precedence in terms of information): ",
+      localContext,
+      "Secondary Context: ",
+      additionalContext,
+    ].join("\n");
+  }
+
+  async getPromptTokens(query: string = "{query}"): Promise<number> {
+    const systemTemplate = this._getSystemPromptTemplate();
+    const messages = [
+      {
+        role: "system",
+        content: [{ type: "text", text: systemTemplate }],
+      },
+      {
+        role: "user",
+        content: [{ type: "text", text: query }],
+      },
+    ];
+
+    // Convert messages to string to count tokens
+    const messagesStr = JSON.stringify(messages);
+    return encode(messagesStr, { disallowedSpecial: new Set() }).length;
+  }
+
   async createCompletion(
     query: string,
     model: string = "o1-mini",
     additionalContext: string[],
     localContext: string[],
     groundTruths: string[],
-    botName: string,
-    maxTokens: number
+    botName: string
   ): Promise<CompletionsType> {
     const numTokens = await this.findTokenLength(query, additionalContext, localContext, groundTruths);
     logger.debug(`Number of tokens: ${numTokens}`);
-    const sysMsg = [
-      "You Must obey the following ground truths: ",
-      JSON.stringify(groundTruths) + "\n",
-      "You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.",
-      `Your name is: ${botName}`,
-      "\n",
-      "Main Context (Provide additional precedence in terms of information): ",
-      localContext.join("\n"),
-      "Secondary Context: ",
-      additionalContext.join("\n"),
-    ].join("\n");
+    const sysMsg = this._getSystemPromptTemplate(JSON.stringify(groundTruths), botName, localContext.join("\n"), additionalContext.join("\n"));
     logger.info(`System message: ${sysMsg}`);
 
     const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
@@ -106,7 +132,6 @@ export class Completions extends SuperOpenAi {
         },
       ],
       temperature: 0.2,
-      max_tokens: maxTokens,
       top_p: 0.5,
       frequency_penalty: 0,
       presence_penalty: 0,
diff --git a/src/handlers/ask-llm.ts b/src/handlers/ask-llm.ts
index 379b60c..244f261 100644
--- a/src/handlers/ask-llm.ts
+++ b/src/handlers/ask-llm.ts
@@ -7,73 +7,80 @@ import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/c
 import { findGroundTruths } from "./ground-truths/find-ground-truths";
 import { bubbleUpErrorComment, logger } from "../helpers/errors";
 
-export async function askQuestion(context: Context, question: string) {
-  const {
-    config: { maxDepth },
-  } = context;
+export async function askQuestion(context: Context, question: string): Promise<CompletionsType> {
   if (!question) {
     throw logger.error("No question provided");
   }
-  // build a nicely structure system message containing a streamlined chat history
-  // includes the current issue, any linked issues, and any linked PRs
-  const formattedChat = await formatChatHistory(context, maxDepth);
-  logger.debug("Formatted chat history " + formattedChat.join("\n"));
-  return await askLlm(context, question, formattedChat);
-}
-
-export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
-  const {
-    env: { UBIQUITY_OS_APP_NAME },
-    config: { model, similarityThreshold, maxTokens },
-    adapters: {
-      supabase: { comment, issue },
-      voyage: { reranker },
-      openai: { completions },
-    },
-  } = context;
 
   context.logger.info("Asking LLM question: " + question);
   try {
-    // using db functions to find similar comments and issues
+    const {
+      env: { UBIQUITY_OS_APP_NAME },
+      config: { model, similarityThreshold, maxDepth },
+      adapters: {
+        supabase: { comment, issue },
+        voyage: { reranker },
+        openai: { completions },
+      },
+    } = context;
+
+    // Calculate total available tokens
+    const modelMaxTokens = completions.getModelMaxTokenLimit(model);
+    const maxCompletionTokens = completions.getModelMaxOutputLimit(model);
+    let availableTokens = modelMaxTokens - maxCompletionTokens;
+
+    // Calculate base prompt tokens (system message + query template)
+    const basePromptTokens = await completions.getPromptTokens();
+    availableTokens -= basePromptTokens;
+    logger.debug(`Base prompt tokens: ${basePromptTokens}`);
+
+    // Find similar comments and issues
     const [similarComments, similarIssues] = await Promise.all([
       comment.findSimilarComments(question, 1 - similarityThreshold, ""),
       issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
     ]);
 
-    // combine the similar comments and issues into a single array
+    //Simialr comments and issues tokens
+    logger.debug(`Similar comments: ${JSON.stringify(similarComments)}`);
+    logger.debug(`Similar issues: ${JSON.stringify(similarIssues)}`);
+
+    // Combine and calculate similar text tokens
     const similarText = [
       ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
       ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
     ];
+    const similarTextTokens = await completions.findTokenLength(similarText.join("\n"));
+    availableTokens -= similarTextTokens;
+    logger.debug(`Similar text tokens: ${similarTextTokens}`);
 
-    context.logger.debug("Similar text: " + similarText.join("\n"));
-
-    // rerank the similar text using voyageai
+    // Rerank similar text
     const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
-    // gather structural data about the payload repository
+
+    // Gather repository data and calculate ground truths
     const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);
 
-    context.logger.debug("Languages: " + languages.join(", "));
+    // Initialize ground truths
     let groundTruths: string[] = [];
+    if (!languages.length) groundTruths.push("No languages found in the repository");
+    if (!Reflect.ownKeys(dependencies).length) groundTruths.push("No dependencies found in the repository");
+    if (!Reflect.ownKeys(devDependencies).length) groundTruths.push("No devDependencies found in the repository");
 
-    if (!languages.length) {
-      groundTruths.push("No languages found in the repository");
-    }
-
-    if (!Reflect.ownKeys(dependencies).length) {
-      groundTruths.push("No dependencies found in the repository");
+    // If not all empty, get full ground truths
+    if (groundTruths.length !== 3) {
+      groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
     }
 
-    if (!Reflect.ownKeys(devDependencies).length) {
-      groundTruths.push("No devDependencies found in the repository");
-    }
+    // Calculate ground truths tokens
+    const groundTruthsTokens = await completions.findTokenLength(groundTruths.join("\n"));
+    availableTokens -= groundTruthsTokens;
+    logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);
 
-    if (groundTruths.length === 3) {
-      return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens);
-    }
+    // Get formatted chat history with remaining tokens
+    const formattedChat = await formatChatHistory(context, maxDepth, availableTokens);
+    logger.debug("Formatted chat history: " + formattedChat.join("\n"));
 
-    groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
-    return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens);
+    // Create completion with all components
+    return await completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
   } catch (error) {
     throw bubbleUpErrorComment(context, error, false);
   }
diff --git a/src/helpers/format-chat-history.ts b/src/helpers/format-chat-history.ts
index c646d64..76e3445 100644
--- a/src/helpers/format-chat-history.ts
+++ b/src/helpers/format-chat-history.ts
@@ -363,11 +363,16 @@ function formatContent(type: string, content: string, prefix: string, contentPre
   return output;
 }
 
-export async function formatChatHistory(context: Context, maxDepth: number = 2): Promise<string[]> {
+export async function formatChatHistory(context: Context, maxDepth: number = 2, availableTokens?: number): Promise<string[]> {
   const specAndBodies: Record<string, string> = {};
-  const tokenLimits = createDefaultTokenLimits(context);
+  const fetchTokenLimits = createDefaultTokenLimits(context);
 
-  const { tree } = await buildTree(context, specAndBodies, maxDepth, tokenLimits);
+  // If availableTokens is provided, override the default tokensRemaining
+  if (availableTokens !== undefined) {
+    fetchTokenLimits.tokensRemaining = availableTokens;
+  }
+
+  const { tree } = await buildTree(context, specAndBodies, maxDepth, fetchTokenLimits);
   if (!tree) {
     return ["No main issue found."];
   }
@@ -384,8 +389,10 @@ export async function formatChatHistory(context: Context, maxDepth: number = 2):
   const headerLine = "Issue Tree Structure:";
   treeOutput.push(headerLine, "");
 
-  await processTreeNode(tree, "", treeOutput, tokenLimits);
-  logger.debug(`Final tokens: ${tokenLimits.runningTokenCount}/${tokenLimits.tokensRemaining}`);
+  // Create new token limits for formatting phase to avoid double counting
+  const formatTokenLimits = createDefaultTokenLimits(context);
+  await processTreeNode(tree, "", treeOutput, formatTokenLimits);
+  logger.debug(`Final tokens: ${formatTokenLimits.runningTokenCount}/${formatTokenLimits.tokensRemaining}`);
 
   return treeOutput;
 }
diff --git a/src/helpers/token-utils.ts b/src/helpers/token-utils.ts
index 2124195..d5fac51 100644
--- a/src/helpers/token-utils.ts
+++ b/src/helpers/token-utils.ts
@@ -4,7 +4,7 @@ import { encode } from "gpt-tokenizer";
 
 export function createDefaultTokenLimits(context: Context): TokenLimits {
   const modelMaxTokenLimit = context.adapters.openai.completions.getModelMaxTokenLimit(context.config.model);
-  const maxCompletionTokens = context.config.maxTokens || context.adapters.openai.completions.getModelMaxOutputLimit(context.config.model);
+  const maxCompletionTokens = context.adapters.openai.completions.getModelMaxOutputLimit(context.config.model);
 
   return {
     modelMaxTokenLimit,
diff --git a/src/types/plugin-input.ts b/src/types/plugin-input.ts
index 37f0060..b5d7535 100644
--- a/src/types/plugin-input.ts
+++ b/src/types/plugin-input.ts
@@ -13,7 +13,6 @@ export const pluginSettingsSchema = T.Object({
   openAiBaseUrl: T.Optional(T.String()),
   similarityThreshold: T.Number({ default: 0.9 }),
   maxDepth: T.Optional(T.Number({ default: 3 })), // max depth of the chat history to be fetched
-  maxTokens: T.Number({ default: 10000 }),
 });
 
 export type PluginSettings = StaticDecode<typeof pluginSettingsSchema>;