-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain.js
61 lines (57 loc) · 2.1 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
const Apify = require('apify');
const querystring = require('querystring');
const { parseInput, proxyConfiguration } = require('./src/utils');
const { BASE_URL, PROJECTS_PER_PAGE } = require('./src/consts');
const { handleStart, handlePagination } = require('./src/routes');
const { log } = Apify.utils;
Apify.main(async () => {
const requestQueue = await Apify.openRequestQueue();
const input = await Apify.getInput();
// GETTING PARAMS FROM THE INPUT
const queryParameters = await parseInput(input);
let { maxResults } = input;
const { proxyConfig } = input;
const proxy = await proxyConfiguration({ proxyConfig });
if (!maxResults) maxResults = 200 * PROJECTS_PER_PAGE;
const params = querystring.stringify(queryParameters);
const firstUrl = `${BASE_URL}${params}`;
// ADDING TO THE QUEUE FIRST PAGE TO GET TOKEN
await requestQueue.addRequest({
url: firstUrl,
userData: {
page: 1,
label: 'START',
searchResults: [],
itemsToSave: [],
savedItems: 0,
maxResults,
},
});
// CRAWLER
const crawler = new Apify.BasicCrawler({
requestQueue,
maxConcurrency: 1,
useSessionPool: true,
maxRequestRetries: 10,
handleRequestFunction: async (context) => {
const { url, userData: { label } } = context.request;
log.info('Page opened.', { label, url });
// eslint-disable-next-line default-case
switch (label) {
case 'START':
return handleStart(context, queryParameters, requestQueue, proxy, maxResults);
case 'PAGINATION-LIST':
return handlePagination(context, requestQueue, proxy);
}
},
handleFailedRequestFunction: async ({
request,
error,
}) => {
log.error(`Request ${request.url} failed repeatedly, running out of retries (Error: ${error.message})`);
},
});
log.info('Starting crawler');
await crawler.run();
log.info('Crawler finished');
});