diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index 4b5ddac..cdd9c2f 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -809,6 +809,7 @@ async function fetchModelLoop( const additionalHeaders = secret.metadata?.additionalHeaders || {}; let httpCode = undefined; + let httpHeaders = new Headers(); endpointCalls.add(1, loggableInfo); try { proxyResponse = await fetchModel( @@ -836,54 +837,63 @@ async function fetchModelLoop( proxyResponse.response.statusText, ); httpCode = proxyResponse.response.status; - - // If we hit a rate-limit error, and we're at the end of the - // loop, and we haven't waited the maximum allotted time, then - // sleep for a bit, and reset the loop. - if ( - httpCode === RATE_LIMIT_ERROR_CODE && - i === secrets.length - 1 && - totalWaitedTime < RATE_LIMIT_MAX_WAIT_MS - ) { - const limitReset = tryParseRateLimitReset( - proxyResponse.response.headers, - ); - delayMs = Math.max( - // Make sure we sleep at least 10ms. Sometimes the random backoff logic can get wonky. - Math.min( - // If we have a rate limit reset time, use that. Otherwise, use a random backoff. - // Sometimes, limitReset is 0 (errantly), so fall back to the random backoff in that case too. - // And never sleep longer than 10 seconds or the remaining budget. - limitReset || delayMs * (BACKOFF_EXPONENT - Math.random()), - 10 * 1000, - RATE_LIMIT_MAX_WAIT_MS - totalWaitedTime, - ), - 10, - ); - console.warn( - `Ran out of endpoints and hit rate limit errors, so sleeping for ${delayMs}ms`, - loopIndex, - ); - await new Promise((r) => setTimeout(r, delayMs)); - - totalWaitedTime += delayMs; - i = -1; // Reset the loop variable - } + httpHeaders = proxyResponse.response.headers; } } catch (e) { lastException = e; if (e instanceof TypeError) { - console.log( - "Failed to fetch (most likely an invalid URL", - secret.id, - e, - ); + if ("cause" in e && e.cause && isObject(e.cause)) { + if ("statusCode" in e.cause) { + httpCode = e.cause.statusCode; + } + if ("headers" in e.cause) { + httpHeaders = new Headers(e.cause.headers); + } + } + if (!httpCode) { + console.log( + "Failed to fetch with a generic error (could be an invalid URL or an unhandled network error)", + secret.id, + e, + ); + } } else { endpointFailures.add(1, loggableInfo); throw e; } } + // If we hit a rate-limit error, and we're at the end of the + // loop, and we haven't waited the maximum allotted time, then + // sleep for a bit, and reset the loop. + if ( + httpCode === RATE_LIMIT_ERROR_CODE && + i === secrets.length - 1 && + totalWaitedTime < RATE_LIMIT_MAX_WAIT_MS + ) { + const limitReset = tryParseRateLimitReset(httpHeaders); + delayMs = Math.max( + // Make sure we sleep at least 10ms. Sometimes the random backoff logic can get wonky. + Math.min( + // If we have a rate limit reset time, use that. Otherwise, use a random backoff. + // Sometimes, limitReset is 0 (errantly), so fall back to the random backoff in that case too. + // And never sleep longer than 10 seconds or the remaining budget. + limitReset || delayMs * (BACKOFF_EXPONENT - Math.random()), + 10 * 1000, + RATE_LIMIT_MAX_WAIT_MS - totalWaitedTime, + ), + 10, + ); + console.warn( + `Ran out of endpoints and hit rate limit errors, so sleeping for ${delayMs}ms`, + loopIndex, + ); + await new Promise((r) => setTimeout(r, delayMs)); + + totalWaitedTime += delayMs; + i = -1; // Reset the loop variable + } + endpointRetryableErrors.add(1, { ...loggableInfo, http_code: httpCode,