diff --git a/src/backlinks/handler.js b/src/backlinks/handler.js index 3cd165ae..0f66b277 100644 --- a/src/backlinks/handler.js +++ b/src/backlinks/handler.js @@ -18,6 +18,7 @@ import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client'; import { AbortController, AbortError } from '@adobe/fetch'; import { retrieveSiteBySiteId } from '../utils/data-access.js'; import { enhanceBacklinksWithFixes, fetch } from '../support/utils.js'; +import { obtainSitemapUrls } from '../sitemap/handler.js'; const TIMEOUT = 3000; @@ -64,6 +65,8 @@ export default async function auditBrokenBacklinks(message, context) { const { type, url: siteId, auditContext = {} } = message; const { dataAccess, log, sqs } = context; const { + AWS_REGION: region, + SPACECAT_STATISTICS_LAMBDA_ARN: statisticsServiceArn, AUDIT_RESULTS_QUEUE_URL: queueUrl, } = context.env; @@ -100,23 +103,7 @@ export default async function auditBrokenBacklinks(message, context) { const filteredBacklinks = result?.backlinks?.filter( (backlink) => !excludedURLs?.includes(backlink.url_to), ); - let brokenBacklinks = await filterOutValidBacklinks(filteredBacklinks, log); - - if (configuration.isHandlerEnabledForSite(`${type}-auto-suggest`, site)) { - try { - const topPages = await dataAccess.getTopPagesForSite(siteId, 'ahrefs', 'global'); - const keywords = topPages.map( - (page) => ({ - url: page.getURL(), - keyword: page.getTopKeyword(), - traffic: page.getTraffic(), - }), - ); - brokenBacklinks = enhanceBacklinksWithFixes(brokenBacklinks, keywords, log); - } catch (e) { - log.error(`Enhancing backlinks with fixes for siteId ${siteId} failed with error: ${e.message}`, e); - } - } + const brokenBacklinks = await filterOutValidBacklinks(filteredBacklinks, log); auditResult = { finalUrl: auditContext.finalUrl, @@ -146,8 +133,24 @@ export default async function auditBrokenBacklinks(message, context) { auditContext, auditResult, }; + await sqs.sendMessage(queueUrl, data); + const baseUrl = site.getBaseURL(); + const sitemaps = await obtainSitemapUrls(baseUrl, log); + if (sitemaps?.success && sitemaps?.paths) { + await enhanceBacklinksWithFixes( + siteId, + auditResult.brokenBacklinks, + Object.keys(sitemaps.paths), + { + region, + statisticsServiceArn, + log, + }, + ); + } + log.info(`Successfully audited ${siteId} for ${type} type audit`); return noContent(); } catch (e) { diff --git a/src/sitemap/handler.js b/src/sitemap/handler.js index 844c6aed..62119581 100644 --- a/src/sitemap/handler.js +++ b/src/sitemap/handler.js @@ -253,7 +253,7 @@ export async function getBaseUrlPagesFromSitemaps(baseUrl, urls) { * @param log * @returns {Promise<{success: boolean, reasons: Array<{value}>, paths?: any}>} result of sitemap */ -export async function findSitemap(inputUrl, log) { +export async function obtainSitemapUrls(inputUrl, log) { const logMessages = []; const parsedUrl = extractDomainAndProtocol(inputUrl); @@ -334,7 +334,7 @@ export async function sitemapAuditRunner(baseURL, context) { const { log } = context; log.info(`Received sitemap audit request for ${baseURL}`); const startTime = process.hrtime(); - const auditResult = await findSitemap(baseURL, log); + const auditResult = await obtainSitemapUrls(baseURL, log); const endTime = process.hrtime(startTime); const elapsedSeconds = endTime[0] + endTime[1] / 1e9; diff --git a/src/support/utils.js b/src/support/utils.js index 97c68c9e..537b02f3 100644 --- a/src/support/utils.js +++ b/src/support/utils.js @@ -15,6 +15,7 @@ import { hasText, prependSchema, resolveCustomerSecretsName } from '@adobe/space import URI from 'urijs'; import { JSDOM } from 'jsdom'; import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager'; +import { InvokeCommand, LambdaClient } from '@aws-sdk/client-lambda'; URI.preventInvalidHostname = true; @@ -216,69 +217,58 @@ export const extractKeywordsFromUrl = (url, log) => { }; /** - * Processes broken backlinks to find suggested URLs based on keywords. - * - * @param {Array} brokenBacklinks - The array of broken backlink objects to process. - * @param {Array} keywords - The array of keyword objects to match against. - * @param {Object} log - The logger object for logging messages. - * @returns {Array} A new array of backlink objects with suggested URLs added. + * Enhances the backlinks with fixes, triggers a Lambda function to calculate the fixes. + * @param siteId - The site ID. + * @param brokenBacklinks - The broken backlinks. + * @param sitemapPaths - Paths of all sitemaps of the site. + * @param config - The configuration object. + * @param config.region - The AWS region. + * @param config.statisticsService - The statistics service Lambda function name. + * @param config.log - The logger. + * @returns {Promise<{status: string}>} */ -export const enhanceBacklinksWithFixes = (brokenBacklinks, keywords, log) => { - const result = []; - - for (const backlink of brokenBacklinks) { - log.info(`trying to find redirect for: ${backlink.url_to}`); - const extractedKeywords = extractKeywordsFromUrl(backlink.url_to, log); - - const matchedData = []; - - // Match keywords and include rank in the matched data - keywords.forEach((entry) => { - const matchingKeyword = extractedKeywords.find( - (keywordObj) => { - const regex = new RegExp(`\\b${keywordObj.keyword}\\b`, 'i'); - return regex.test(entry.keyword); - }, - ); - if (matchingKeyword) { - matchedData.push({ ...entry, rank: matchingKeyword.rank }); - } +export async function enhanceBacklinksWithFixes(siteId, brokenBacklinks, sitemapPaths, config) { + const { + region, statisticsServiceArn, log, + } = config; + log.info(`Enhancing backlinks with fixes for site ${siteId}`); + + const client = new LambdaClient({ region }); + + const invokeLambdaForBatch = async (batch) => { + const payload = { + type: 'broken-backlinks', + payload: { + siteId, + brokenBacklinks: batch, + sitemapPaths, + }, + }; + + const command = new InvokeCommand({ + FunctionName: statisticsServiceArn, + Payload: JSON.stringify(payload), + InvocationType: 'Event', }); - // Try again with split keywords if no matches found - if (matchedData.length === 0) { - const splitKeywords = extractedKeywords - .map((keywordObj) => keywordObj.keyword.split(' ').map((k) => ({ keyword: k, rank: keywordObj.rank }))) - .flat(); - - splitKeywords.forEach((keywordObj) => { - keywords.forEach((entry) => { - const regex = new RegExp(`\\b${keywordObj.keyword}\\b`, 'i'); - if (regex.test(entry.keyword)) { - matchedData.push({ ...entry, rank: keywordObj.rank }); - } - }); - }); + try { + await client.send(command); + log.info(`Lambda function ${statisticsServiceArn} invoked successfully for batch.`); + } catch (error) { + log.error(`Error invoking Lambda function ${statisticsServiceArn} for batch:`, error); } + }; - // Sort by rank and then by traffic - matchedData.sort((a, b) => { - if (b.rank === a.rank) { - return b.traffic - a.traffic; // Higher traffic ranks first - } - return a.rank - b.rank; // Higher rank ranks first (1 is highest) - }); + // Invoke Lambda in batches of 10 + const batchSize = 10; + const promises = []; - const newBacklink = { ...backlink }; + for (let i = 0; i < brokenBacklinks.length; i += batchSize) { + const batch = brokenBacklinks.slice(i, i + batchSize); + promises.push(invokeLambdaForBatch(batch)); + } - if (matchedData.length > 0) { - log.info(`found ${matchedData.length} keywords for backlink ${backlink.url_to}`); - newBacklink.url_suggested = matchedData[0].url; - } else { - log.info(`could not find suggested URL for backlink ${backlink.url_to} with keywords ${extractedKeywords.map((k) => k.keyword).join(', ')}`); - } + await Promise.all(promises); - result.push(newBacklink); - } - return result; -}; + return { status: `Lambda function invoked for ${promises.length} batch(es)` }; +} diff --git a/test/audits/backlinks.test.js b/test/audits/backlinks.test.js index f7a5a766..5a7397e2 100644 --- a/test/audits/backlinks.test.js +++ b/test/audits/backlinks.test.js @@ -13,7 +13,6 @@ /* eslint-env mocha */ import { createSite } from '@adobe/spacecat-shared-data-access/src/models/site.js'; -import { createSiteTopPage } from '@adobe/spacecat-shared-data-access/src/models/site-top-page.js'; import { createConfiguration } from '@adobe/spacecat-shared-data-access/src/models/configuration.js'; import { createOrganization } from '@adobe/spacecat-shared-data-access/src/models/organization.js'; @@ -22,6 +21,7 @@ import chaiAsPromised from 'chai-as-promised'; import sinon from 'sinon'; import sinonChai from 'sinon-chai'; import nock from 'nock'; +import { LambdaClient } from '@aws-sdk/client-lambda'; import auditBrokenBacklinks from '../../src/backlinks/handler.js'; use(sinonChai); @@ -70,26 +70,6 @@ describe('Backlinks Tests', function () { const configuration = createConfiguration(configurationData); - const siteTopPage = createSiteTopPage({ - siteId: site.getId(), - url: `${site.getBaseURL()}/foo.html`, - traffic: 1000, - source: 'ahrefs', - geo: 'global', - importedAt: new Date('2024-06-18').toISOString(), - topKeyword: '404', - }); - - const siteTopPage2 = createSiteTopPage({ - siteId: site.getId(), - url: `${site.getBaseURL()}/bar.html`, - traffic: 500, - source: 'ahrefs', - geo: 'global', - importedAt: new Date('2024-06-18').toISOString(), - topKeyword: '429', - }); - const site2 = createSite({ id: 'site2', baseURL: 'https://foo.com', @@ -177,7 +157,6 @@ describe('Backlinks Tests', function () { url_from: 'https://from.com/from-3', url_to: 'https://foo.com/returns-429', domain_traffic: 1000, - url_suggested: 'https://bar.foo.com/bar.html', }, { title: 'backlink that is not excluded', @@ -257,7 +236,6 @@ describe('Backlinks Tests', function () { }); it('should filter out excluded URLs and include valid backlinks', async () => { - mockDataAccess.getTopPagesForSite.resolves([siteTopPage, siteTopPage2]); mockDataAccess.getSiteByID = sinon.stub().withArgs('site1').resolves(siteWithExcludedUrls); mockDataAccess.getConfiguration = sinon.stub().resolves(configuration); @@ -289,59 +267,46 @@ describe('Backlinks Tests', function () { ); }); - it('should successfully perform an audit to detect broken backlinks, save and send the proper audit result', async () => { + it('should successfully perform an audit to detect broken backlinks, save and send the proper audit result, then trigger suggested fix lambda', async () => { mockDataAccess.getSiteByID = sinon.stub().withArgs('site1').resolves(site); - mockDataAccess.getTopPagesForSite.resolves([]); mockDataAccess.getConfiguration = sinon.stub().resolves(configuration); + const invokeStub = sinon.stub(LambdaClient.prototype, 'send').resolves(); + const url = site.getBaseURL(); - nock(site.getBaseURL()) + const sampleSitemap = '\n' + + '\n' + + ` ${url}/foo\n` + + ` ${url}/bar\n` + + ''; + + nock(url) .get(/.*/) .reply(200); nock('https://ahrefs.com') .get(/.*/) .reply(200, auditResult); + nock(url) + .get('/robots.txt') + .reply(200, 'Allow: /'); - const expectedMessage = { - type: message.type, - url: site.getBaseURL(), - auditContext: { - finalUrl: 'bar.foo.com', - }, - auditResult: { - finalUrl: 'bar.foo.com', - brokenBacklinks: auditResult.backlinks, - fullAuditRef: 'https://ahrefs.com/site-explorer/broken-backlinks?select=title%2Curl_from%2Curl_to%2Ctraffic_domain&limit=50&mode=prefix&order_by=domain_rating_source%3Adesc%2Ctraffic_domain%3Adesc&target=bar.foo.com&output=json&where=%7B%22and%22%3A%5B%7B%22field%22%3A%22domain_rating_source%22%2C%22is%22%3A%5B%22gte%22%2C29.5%5D%7D%2C%7B%22field%22%3A%22traffic_domain%22%2C%22is%22%3A%5B%22gte%22%2C500%5D%7D%2C%7B%22field%22%3A%22links_external%22%2C%22is%22%3A%5B%22lte%22%2C300%5D%7D%5D%7D', - }, - }; - - const response = await auditBrokenBacklinks(message, context); - - expect(response.status).to.equal(204); - expect(mockDataAccess.addAudit).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been - .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); - expect(context.log.info).to.have.been.calledWith('Successfully audited site1 for broken-backlinks type audit'); - }); - - it('should successfully perform an audit to detect broken backlinks and suggest fixes based on keywords from top pages if auto-suggest' - + ' enabled', async () => { - mockDataAccess.getSiteByID = sinon.stub().withArgs('site1').resolves(site); - mockDataAccess.getTopPagesForSite.resolves([siteTopPage, siteTopPage2]); - mockDataAccess.getConfiguration = sinon.stub().resolves(configuration); - - nock(site.getBaseURL()) - .get(/.*/) + nock(url) + .head('/sitemap.xml') .reply(200); + nock(url) + .head('/sitemap_index.xml') + .reply(404); - nock('https://ahrefs.com') - .get(/.*/) - .reply(200, auditResult); + nock(url) + .get('/sitemap.xml') + .reply(200, sampleSitemap); - const expectedEnhancedBacklinks = auditResult.backlinks; - expectedEnhancedBacklinks[0].url_suggested = 'https://bar.foo.com/foo.html'; - expectedEnhancedBacklinks[2].url_suggested = 'https://bar.foo.com/bar.html'; + nock(url) + .head('/foo') + .reply(200); + nock(url) + .head('/bar') + .reply(200); const expectedMessage = { type: message.type, @@ -357,105 +322,26 @@ describe('Backlinks Tests', function () { }; const response = await auditBrokenBacklinks(message, context); + const [command] = invokeStub.getCall(0).args; + const payload = JSON.parse(command.input.Payload); expect(response.status).to.equal(204); expect(mockDataAccess.addAudit).to.have.been.calledOnce; expect(context.sqs.sendMessage).to.have.been.calledOnce; expect(context.sqs.sendMessage).to.have.been .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); - }); - - it('should successfully perform an audit to detect broken backlinks and not suggest fixes if auto-suggest disabled', async () => { - mockDataAccess.getSiteByID = sinon.stub().withArgs('site2').resolves(site2); - configuration.disableHandlerForSite('broken-backlinks-auto-suggest', { getId: () => site2.getId(), getOrganizationId: () => org.getId() }); - mockDataAccess.getConfiguration = sinon.stub().resolves(configuration); - - nock(site2.getBaseURL()) - .get(/.*/) - .reply(301, undefined, { location: 'https://www.foo.com' }); - - nock('https://www.foo.com') - .get(/.*/) - .reply(200); - - nock('https://ahrefs.com') - .get(/.*/) - .reply(200, auditResult); - - const expectedMessage = { - type: message.type, - url: site2.getBaseURL(), - auditContext: { - finalUrl: 'www.foo.com', - }, - auditResult: { - finalUrl: 'www.foo.com', + expect(invokeStub.calledOnce).to.be.true; + expect(payload).to.deep.equal({ + type: 'broken-backlinks', + payload: { + siteId: 'site1', brokenBacklinks: auditResult.backlinks, - fullAuditRef: 'https://ahrefs.com/site-explorer/broken-backlinks?select=title%2Curl_from%2Curl_to%2Ctraffic_domain&limit=50&mode=prefix&order_by=domain_rating_source%3Adesc%2Ctraffic_domain%3Adesc&target=www.foo.com&output=json&where=%7B%22and%22%3A%5B%7B%22field%22%3A%22domain_rating_source%22%2C%22is%22%3A%5B%22gte%22%2C29.5%5D%7D%2C%7B%22field%22%3A%22traffic_domain%22%2C%22is%22%3A%5B%22gte%22%2C500%5D%7D%2C%7B%22field%22%3A%22links_external%22%2C%22is%22%3A%5B%22lte%22%2C300%5D%7D%5D%7D', - }, - }; - - const response = await auditBrokenBacklinks({ - url: site2.getId(), type: 'broken-backlinks', - }, context); - - expect(response.status).to.equal(204); - expect(mockDataAccess.getTopPagesForSite).to.not.have.been.called; - expect(mockDataAccess.addAudit).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been - .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); - expect(context.log.info).to.have.been.calledWith('Successfully audited site2 for broken-backlinks type audit'); - }); - - it('should detect broken backlinks and save the proper audit result, even if the suggested fix fails', async () => { - mockDataAccess.getSiteByID = sinon.stub().withArgs('site1').resolves(site); - mockDataAccess.getTopPagesForSite.resolves([createSiteTopPage({ - siteId: site.getId(), - url: `${site.getBaseURL()}/foo.html`, - traffic: 1000, - source: 'ahrefs', - geo: 'global', - importedAt: new Date('2024-06-18').toISOString(), - topKeyword: 'c++', - })]); - const brokenBacklink = { - backlinks: [ - { - title: 'backlink that has a faulty path', - url_from: 'https://from.com/from-1', - url_to: 'https://foo.com/c++', - domain_traffic: 4000, - }], - }; - mockDataAccess.getConfiguration = sinon.stub().resolves(configuration); - nock(site.getBaseURL()) - .get(/.*/) - .reply(200); - - nock('https://ahrefs.com') - .get(/.*/) - .reply(200, brokenBacklink); - - const expectedMessage = { - type: message.type, - url: site.getBaseURL(), - auditContext: { - finalUrl: 'bar.foo.com', + sitemapPaths: [ + 'https://bar.foo.com/sitemap.xml', + ], }, - auditResult: { - finalUrl: 'bar.foo.com', - brokenBacklinks: brokenBacklink.backlinks, - fullAuditRef: 'https://ahrefs.com/site-explorer/broken-backlinks?select=title%2Curl_from%2Curl_to%2Ctraffic_domain&limit=50&mode=prefix&order_by=domain_rating_source%3Adesc%2Ctraffic_domain%3Adesc&target=bar.foo.com&output=json&where=%7B%22and%22%3A%5B%7B%22field%22%3A%22domain_rating_source%22%2C%22is%22%3A%5B%22gte%22%2C29.5%5D%7D%2C%7B%22field%22%3A%22traffic_domain%22%2C%22is%22%3A%5B%22gte%22%2C500%5D%7D%2C%7B%22field%22%3A%22links_external%22%2C%22is%22%3A%5B%22lte%22%2C300%5D%7D%5D%7D', - }, - }; - const response = await auditBrokenBacklinks(message, context); - - expect(response.status).to.equal(204); - expect(mockDataAccess.addAudit).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been.calledOnce; - expect(context.sqs.sendMessage).to.have.been - .calledWith(context.env.AUDIT_RESULTS_QUEUE_URL, expectedMessage); + }); + expect(context.log.info).to.have.been.calledWith('Successfully audited site1 for broken-backlinks type audit'); }); it('should successfully perform an audit to detect broken backlinks and set finalUrl, for baseUrl redirecting to www domain', async () => { diff --git a/test/audits/sitemap.test.js b/test/audits/sitemap.test.js index 7b9426b5..67e322ba 100644 --- a/test/audits/sitemap.test.js +++ b/test/audits/sitemap.test.js @@ -19,7 +19,7 @@ import chaiAsPromised from 'chai-as-promised'; import { checkSitemap, ERROR_CODES, - findSitemap, + obtainSitemapUrls, isSitemapContentValid, checkRobotsForSitemap, sitemapAuditRunner, fetchContent, getBaseUrlPagesFromSitemaps, } from '../../src/sitemap/handler.js'; @@ -481,9 +481,9 @@ describe('Sitemap Audit', () => { }); }); - describe('findSitemap', () => { + describe('obtainSitemapUrls', () => { it('should return error when URL is invalid', async () => { - const result = await findSitemap('not a valid url'); + const result = await obtainSitemapUrls('not a valid url'); expect(result.success).to.equal(false); expect(result.reasons).to.deep.equal([{ error: ERROR_CODES.INVALID_URL, @@ -508,7 +508,7 @@ describe('Sitemap Audit', () => { .head('/bar') .reply(404); - const result = await findSitemap(url); + const result = await obtainSitemapUrls(url); expect(result.success).to.equal(false); expect(result.reasons).to.deep.include({ @@ -534,7 +534,7 @@ describe('Sitemap Audit', () => { .head('/bar') .reply(200); - const result = await findSitemap(url); + const result = await obtainSitemapUrls(url); expect(result.success).to.equal(true); expect(result.paths).to.deep.equal({ [`${url}/sitemap.xml`]: [`${url}/foo`, `${url}/bar`], @@ -566,7 +566,7 @@ describe('Sitemap Audit', () => { .head('/bar') .reply(200); - const result = await findSitemap('https://some-domain.adobe'); + const result = await obtainSitemapUrls('https://some-domain.adobe'); expect(result.success).to.equal(true); expect(result.paths).to.deep.equal({ [`${url}/sitemap.xml`]: [`${url}/foo`, `${url}/bar`], @@ -614,7 +614,7 @@ describe('Sitemap Audit', () => { .head('/cux') .reply(200); - const result = await findSitemap(url); + const result = await obtainSitemapUrls(url); expect(result.success).to.equal(true); expect(result.paths).to.deep.equal({ [`${url}/sitemap_foo.xml`]: [`${url}/foo`, `${url}/bar`], @@ -639,7 +639,7 @@ describe('Sitemap Audit', () => { .head('/bar') .reply(200); - const result = await findSitemap(`${protocol}://www.${domain}`); + const result = await obtainSitemapUrls(`${protocol}://www.${domain}`); expect(result.success).to.equal(true); expect(result.paths).to.deep.equal({ [`${url}/sitemap.xml`]: [`${protocol}://www.${domain}/foo`, `${protocol}://www.${domain}/bar`], @@ -659,7 +659,7 @@ describe('Sitemap Audit', () => { .head('/sitemap_index.xml') .reply(404); - const result = await findSitemap(url); + const result = await obtainSitemapUrls(url); expect(result.success).to.equal(false); }); @@ -684,7 +684,7 @@ describe('Sitemap Audit', () => { .get('/sitemap.xml') .reply(200, sitemapInvalidPaths); - const result = await findSitemap(url); + const result = await obtainSitemapUrls(url); expect(result.success).to.equal(false); }); }); diff --git a/test/support/utils.test.js b/test/support/utils.test.js index bdcef654..c1cfa6b6 100644 --- a/test/support/utils.test.js +++ b/test/support/utils.test.js @@ -15,6 +15,7 @@ import { expect, use } from 'chai'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; import sinon from 'sinon'; +import { LambdaClient } from '@aws-sdk/client-lambda'; import { enhanceBacklinksWithFixes, extractKeywordsFromUrl, @@ -88,85 +89,82 @@ describe('extractKeywordsFromUrl', () => { describe('enhanceBacklinksWithFixes', () => { let log; + let invokeStub; beforeEach(() => { - log = { info: sinon.stub() }; + log = { info: sinon.stub(), error: sinon.stub() }; + invokeStub = sinon.stub(LambdaClient.prototype, 'send').resolves(); }); afterEach(() => { sinon.restore(); }); - it('should prioritize keywords closer to the end of the URL path', async () => { + it('should invoke the Lambda function with the correct payload', async () => { + const siteId = 'testSiteId'; const brokenBacklinks = [ - { - url_to: 'https://www.example.com/foo/bar/baz.html', - }, - ]; - - const keywords = [ - { keyword: 'foo', traffic: 100, url: 'https://www.example.com/foo.html' }, - { keyword: 'bar', traffic: 200, url: 'https://www.example.com/foo/bar.html' }, - { keyword: 'baz', traffic: 50, url: 'https://www.example.com/baz.html' }, + { url_to: 'https://www.example.com/foo/bar/baz.html' }, ]; - - const result = enhanceBacklinksWithFixes(brokenBacklinks, keywords, log); - - expect(result).to.be.an('array').that.has.lengthOf(1); - expect(result[0].url_suggested).to.equal('https://www.example.com/baz.html'); - }); - - it('should use traffic as a secondary sort criterion', async () => { - const brokenBacklinks = [ - { - url_to: 'https://www.example.com/foo/bar/baz.html', + const sitemapUrls = ['https://www.example.com/sitemap.xml']; + const config = { + region: 'test-region', + statisticsServiceArn: 'testStatisticsService', + log, + }; + + const result = await enhanceBacklinksWithFixes(siteId, brokenBacklinks, sitemapUrls, config); + + expect(invokeStub.calledOnce).to.be.true; + const [command] = invokeStub.getCall(0).args; + expect(command.input.FunctionName).to.equal('testStatisticsService'); + const payload = JSON.parse(command.input.Payload); + expect(payload).to.deep.equal({ + type: 'broken-backlinks', + payload: { + siteId: 'testSiteId', + brokenBacklinks: [{ url_to: 'https://www.example.com/foo/bar/baz.html' }], + sitemapPaths: ['https://www.example.com/sitemap.xml'], }, - ]; + }); - const keywords = [ - { keyword: 'foo', traffic: 300, url: 'https://www.example.com/foo.html' }, - { keyword: 'another baz', traffic: 200, url: 'https://www.example.com/foo/bar.html' }, - { keyword: 'baz', traffic: 100, url: 'https://www.example.com/baz.html' }, - ]; - - const result = enhanceBacklinksWithFixes(brokenBacklinks, keywords, log); - - expect(result).to.be.an('array').that.has.lengthOf(1); - expect(result[0].url_suggested).to.equal('https://www.example.com/foo/bar.html'); + expect(result).to.deep.equal({ status: 'Lambda function invoked for 1 batch(es)' }); }); - it('should correctly handle cases where keywords are split', async () => { + it('should log info message when Lambda function is invoked successfully', async () => { + const siteId = 'testSiteId'; const brokenBacklinks = [ - { - url_to: 'https://www.example.com/foo-bar-baz.html', - }, + { url_to: 'https://www.example.com/foo/bar/baz.html' }, ]; + const sitemapUrls = ['https://www.example.com/sitemap.xml']; + const config = { + region: 'test-region', + statisticsServiceArn: 'testStatisticsService', + log, + }; - const keywords = [ - { keyword: 'foo', traffic: 100, url: 'https://www.example.com/foo.html' }, - { keyword: 'bar', traffic: 300, url: 'https://www.example.com/bar.html' }, - { keyword: 'baz', traffic: 200, url: 'https://www.example.com/baz.html' }, - ]; + await enhanceBacklinksWithFixes(siteId, brokenBacklinks, sitemapUrls, config); - const result = enhanceBacklinksWithFixes(brokenBacklinks, keywords, log); - - expect(result).to.be.an('array').that.has.lengthOf(1); - expect(result[0].url_suggested).to.equal('https://www.example.com/bar.html'); + expect(log.info.calledWith('Lambda function testStatisticsService invoked successfully for batch.')).to.be.true; }); - it('should match keywords only for whole words', () => { + it('should log error message when Lambda function invocation fails', async () => { + invokeStub.rejects(new Error('Invocation failed')); + + const siteId = 'testSiteId'; const brokenBacklinks = [ - { - url_to: 'https://www.example.com/foo/bar.html', - }, - ]; - const keywords = [ - { keyword: 'foobar', traffic: 400, url: 'https://www.example.com/foobar.html' }, - { keyword: 'foo', traffic: 200, url: 'https://www.example.com/foo.html' }, - { keyword: 'bar', traffic: 50, url: 'https://www.example.com/bar.html' }, + { url_to: 'https://www.example.com/foo/bar/baz.html' }, ]; - const result = enhanceBacklinksWithFixes(brokenBacklinks, keywords, log); - expect(result).to.be.an('array').that.has.lengthOf(1); - expect(result[0].url_suggested).to.equal('https://www.example.com/bar.html'); + const sitemapUrls = ['https://www.example.com/sitemap.xml']; + const config = { + region: 'test-region', + statisticsServiceArn: 'testStatisticsService', + log, + }; + + await enhanceBacklinksWithFixes(siteId, brokenBacklinks, sitemapUrls, config); + + expect(log.error.calledOnce).to.be.true; + expect(log.error.args[0][0]).to.equal('Error invoking Lambda function testStatisticsService for batch:'); + expect(log.error.args[0][1]).to.be.an('error').that.has.property('message', 'Invocation failed'); }); });