Skip to content

Commit

Permalink
fix: token counting and reranker
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Jan 17, 2025
1 parent 2b97a15 commit 4fc3c30
Show file tree
Hide file tree
Showing 7 changed files with 334 additions and 219 deletions.
2 changes: 1 addition & 1 deletion evals/handlers/setup-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export async function fetchContext(context: Context, question: string): Promise<
logger.debug(`Ground truths tokens: ${groundTruthsTokens}`);

// Get formatted chat history with remaining tokens and reranked content
const formattedChat = await formatChatHistory(context, maxDepth, availableTokens, rerankedIssues, rerankedComments);
const formattedChat = await formatChatHistory(context, maxDepth, rerankedIssues, rerankedComments, availableTokens);
return {
formattedChat,
groundTruths,
Expand Down
82 changes: 82 additions & 0 deletions src/adapters/voyage/helpers/rerankers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { VoyageAIClient } from "voyageai";
import { Context } from "../../../types";
import { SimilarIssue, SimilarComment } from "../../../types/github-types";
import { SuperVoyage } from "./voyage";
import { TreeNode } from "../../../helpers/format-chat-history";

interface DocumentWithMetadata {
document: string;
Expand All @@ -19,6 +20,87 @@ export class Rerankers extends SuperVoyage {
this.context = context;
}

private async _reRankNodesAtLevel(nodes: TreeNode[], query: string, topK: number = 100): Promise<TreeNode[]> {
if (nodes.length === 0) return nodes;

// Extract content from each node to create documents for reranking
const documents = nodes.map((node) => {
const content = [
node.body || "",
...(node.comments?.map((comment) => comment.body || "") || []),
...(node.similarIssues?.map((issue) => issue.body || "") || []),
...(node.similarComments?.map((comment) => comment.body || "") || []),
...(node.codeSnippets?.map((snippet) => snippet.body || "") || []),
node.readmeSection || "",
]
.filter(Boolean)
.join("\n");

return {
document: content,
metadata: { originalNode: node },
};
});

// Rerank the documents
const response = await this.client.rerank({
query,
documents: documents.map((doc) => doc.document),
model: "rerank-2",
returnDocuments: true,
topK: Math.min(topK, documents.length),
});

const rerankedResults = response.data || [];

// Map the reranked results back to their original nodes with scores
return rerankedResults
.map((result, index) => {
const originalNode = documents[index].metadata.originalNode;
// Try different possible score properties from the API response
const score = result.relevanceScore || 0;
if (originalNode && typeof score === "number") {
return {
node: originalNode,
score,
};
}
return null;
})
.filter((item): item is { node: TreeNode; score: number } => item !== null)
.sort((a, b) => b.score - a.score) // Sort by score in descending order
.map((item) => item.node);
}

async reRankTreeNodes(rootNode: TreeNode, query: string, topK: number = 100): Promise<TreeNode> {
try {
// Helper function to process a node and its children recursively
const processNode = async (node: TreeNode, parentNode?: TreeNode): Promise<TreeNode> => {
// Create a new node with all properties from the original
const processedNode: TreeNode = {
...node,
parent: parentNode, // Set the parent reference
children: [], // Clear children array to be populated with reranked children
};

// Rerank children if they exist
if (node.children.length > 0) {
const rerankedChildren = await this._reRankNodesAtLevel(node.children, query, topK);
// Process each reranked child recursively, passing the current node as parent
processedNode.children = await Promise.all(rerankedChildren.map((child) => processNode(child, processedNode)));
}

return processedNode;
};

// Process the entire tree starting from the root (no parent for root node)
return await processNode(rootNode);
} catch (e: unknown) {
this.context.logger.error("Reranking tree nodes failed!", { e });
return rootNode;
}
}

async reRankResults(results: string[], query: string, topK: number = 5): Promise<string[]> {
let response;
try {
Expand Down
Loading

0 comments on commit 4fc3c30

Please sign in to comment.