From c11b7855419dff34955de0d57ca24f0aeaeb5710 Mon Sep 17 00:00:00 2001 From: Matthias Rolke Date: Tue, 2 Jul 2024 11:47:12 +0200 Subject: [PATCH] refactor: data pipeline --- .gitignore | 1 + .tmp/.gitkeep | 0 README.md | 2 +- config.js | 37 +----------- package.json | 5 +- scripts/add-github-repo-infos.js | 78 ------------------------ scripts/aggregate.js | 72 ++++++++++++++++++++++ scripts/crawl.sh | 8 +++ scripts/get-commands.js | 9 +-- scripts/get-package-dependencies.js | 53 ++++++++++++++++ scripts/get-packages.js | 45 ++++++++++++++ scripts/get-plugins.js | 93 ----------------------------- scripts/get-repos.js | 40 +++++++++++++ scripts/meta.js | 21 ------- scripts/minify.js | 43 ------------- site/index.js | 2 +- utils.js | 49 +++++++++++++++ 17 files changed, 277 insertions(+), 281 deletions(-) create mode 100644 .tmp/.gitkeep delete mode 100755 scripts/add-github-repo-infos.js create mode 100755 scripts/aggregate.js create mode 100755 scripts/crawl.sh create mode 100755 scripts/get-package-dependencies.js create mode 100755 scripts/get-packages.js delete mode 100755 scripts/get-plugins.js create mode 100755 scripts/get-repos.js delete mode 100755 scripts/meta.js delete mode 100755 scripts/minify.js create mode 100644 utils.js diff --git a/.gitignore b/.gitignore index fddc1c8..92236c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ node_modules/ /site/data/ +/.tmp/ diff --git a/.tmp/.gitkeep b/.tmp/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index 824470f..29dba59 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,6 @@ This website indexes sf plugins based on npmjs.com and GitHub. ``` yarn install -GITHUB_TOKEN="$(gh auth token)" yarn build +GITHUB_TOKEN="$(gh auth token)" yarn run crawl yarn develop ``` diff --git a/config.js b/config.js index 122fe61..8fb3f52 100644 --- a/config.js +++ b/config.js @@ -20,7 +20,7 @@ export const minPluginFields = [ export const minCommandFields = ["pluginName", "id", "description", "link"]; -const excludeRules = [ +export const excludeRules = [ { shouldExcludePackage: (pkg) => pkg.name === "sfdx-falcon-template", reason: @@ -35,38 +35,3 @@ const excludeRules = [ reason: "the GitHub repo is a fork", }, ]; - -function compareWithUndefined(a, b) { - if (a === b) { - return 0; - } - if (a === undefined) { - return -1; - } - if (b === undefined) { - return 1; - } - return a - b; -} - -export function applyConfig(packages) { - return packages - .filter((pkg) => { - let keep = true; - for (const excludeRule of excludeRules) { - if (excludeRule.shouldExcludePackage(pkg)) { - console.error( - `ignoring package "${pkg.name}" because of "${excludeRule.reason}"` - ); - keep = false; - break; - } - } - return keep; - }) - .sort( - (a, b) => - -1 * - compareWithUndefined(a?.gitHubStargazersCount, b?.gitHubStargazersCount) - ); -} diff --git a/package.json b/package.json index c79e24e..8bc69ce 100644 --- a/package.json +++ b/package.json @@ -15,8 +15,9 @@ }, "scripts": { "build:vendor": "bash scripts/build.sh", - "build": "./scripts/get-plugins.js && ./scripts/add-github-repo-infos.js && ./scripts/get-commands.js && ./scripts/meta.js && ./scripts/minify.js", - "develop": "serve ./site", + "crawl": "./scripts/crawl.sh", + "build": "./scripts/aggregate.js", + "develop": "yarn run build && serve ./site", "start": "serve ./site" } } diff --git a/scripts/add-github-repo-infos.js b/scripts/add-github-repo-infos.js deleted file mode 100755 index 21ea798..0000000 --- a/scripts/add-github-repo-infos.js +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env node - -import { mkdir, readFile, writeFile } from "node:fs/promises"; -import { join } from "node:path"; -import pLimit from "p-limit"; -import { applyConfig } from "../config.js"; - -function getGitHubSlug(pkg) { - if (pkg.gitHubLink === undefined) { - return undefined; - } - try { - const url = new URL(pkg.gitHubLink); - if (url.hostname === "github.com") { - const slug = url.pathname.replace(/^\//, "").replace(/\.git$/, ""); - return slug; - } - } catch (_) { - return undefined; - } -} - -async function getRepoInfo(slug) { - const res = await fetch(`https://api.github.com/repos/${slug}`, { - headers: { - Accept: "application/vnd.github+json", - Authorization: `Bearer ${process.env.GITHUB_TOKEN}`, - }, - }); - const json = await res.json(); - return { - gitHubSlug: slug, - gitHubStargazersCount: json.stargazers_count, - gitHubArchived: json.archived, - gitHubFork: json.fork, - }; -} - -async function main() { - if (!process.env.GITHUB_TOKEN) { - throw new Error("Environment variable GITHUB_TOKEN is mandatory."); - } - await mkdir(join("site", "data"), { recursive: true }); - const packages = JSON.parse( - await readFile(join("site", "data", "packages-with-dependencies.json")) - ); - - const gitHubSlugs = packages.map((pkg) => getGitHubSlug(pkg)).filter(Boolean); - const limit = pLimit(10); - const repos = await Promise.all( - gitHubSlugs.map((slug) => limit(() => getRepoInfo(slug))) - ); - - await writeFile( - join("site", "data", "repos.json"), - JSON.stringify(repos), - "utf8" - ); - - const packagesWithDependenciesAndRepos = packages.map((pkg) => { - return { - ...pkg, - ...repos.find((repo) => repo.gitHubSlug === getGitHubSlug(pkg)), - }; - }); - - const result = applyConfig(packagesWithDependenciesAndRepos); - await writeFile( - join("site", "data", "packages-with-dependencies-and-github.json"), - JSON.stringify(result), - "utf8" - ); -} - -main().catch((e) => { - console.error(e); - process.exitCode = 1; -}); diff --git a/scripts/aggregate.js b/scripts/aggregate.js new file mode 100755 index 0000000..7e496ef --- /dev/null +++ b/scripts/aggregate.js @@ -0,0 +1,72 @@ +#!/usr/bin/env node + +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { excludeRules, minCommandFields, minPluginFields } from "../config.js"; +import { applyConfig, getGitHubSlug } from "../utils.js"; + +function pick(arr, fields) { + return arr.map((obj) => + Object.fromEntries( + Object.entries(obj).filter(([key, _]) => fields.includes(key)) + ) + ); +} + +async function buildPlugins() { + const packages = JSON.parse(await readFile(join(".tmp", "packages.json"))); + const dependencies = JSON.parse( + await readFile(join(".tmp", "package-dependencies.json")) + ); + const repos = JSON.parse(await readFile(join(".tmp", "repos.json"))); + const plugins = applyConfig( + packages.map((pkg) => { + return { + ...pkg, + ...dependencies.find((deps) => deps.name === pkg.name), + ...repos.find((repo) => repo.gitHubSlug === getGitHubSlug(pkg)), + }; + }), + { excludeRules } + ); + const minPlugins = pick(plugins, minPluginFields); + await writeFile( + join("site", "data", "plugins.min.json"), + JSON.stringify(minPlugins), + "utf8" + ); +} + +async function buildCommands() { + const commands = JSON.parse(await readFile(join(".tmp", "commands.json"))); + const minCommands = pick(commands, minCommandFields); + await writeFile( + join("site", "data", "commands.min.json"), + JSON.stringify(minCommands), + "utf8" + ); +} + +async function buildMeta() { + const meta = { + lastUpdated: new Date().toISOString(), + source: "https://github.com/amtrack/sf-plugin-explorer", + }; + await writeFile( + join("site", "data", "meta.json"), + JSON.stringify(meta), + "utf8" + ); +} + +async function main() { + await mkdir(join("site", "data"), { recursive: true }); + await buildPlugins(); + await buildCommands(); + await buildMeta(); +} + +main().catch((e) => { + console.error(e); + process.exitCode = 1; +}); diff --git a/scripts/crawl.sh b/scripts/crawl.sh new file mode 100755 index 0000000..f1b1dac --- /dev/null +++ b/scripts/crawl.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -eo pipefail + +./scripts/get-packages.js +./scripts/get-package-dependencies.js +./scripts/get-repos.js +./scripts/get-commands.js diff --git a/scripts/get-commands.js b/scripts/get-commands.js index 0e5b8eb..fe02bd0 100755 --- a/scripts/get-commands.js +++ b/scripts/get-commands.js @@ -1,6 +1,6 @@ #!/usr/bin/env node -import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { readFile, writeFile } from "node:fs/promises"; import { join } from "node:path"; import pLimit from "p-limit"; @@ -45,15 +45,12 @@ async function getCommands(plugins) { } async function main() { - await mkdir(join("site", "data"), { recursive: true }); const packageResults = JSON.parse( - await readFile( - join("site", "data", "packages-with-dependencies-and-github.json") - ) + await readFile(join(".tmp", "packages.json")) ); const commands = await getCommands(packageResults); await writeFile( - join("site", "data", "commands.json"), + join(".tmp", "commands.json"), JSON.stringify(commands), "utf8" ); diff --git a/scripts/get-package-dependencies.js b/scripts/get-package-dependencies.js new file mode 100755 index 0000000..94b83c2 --- /dev/null +++ b/scripts/get-package-dependencies.js @@ -0,0 +1,53 @@ +#!/usr/bin/env node + +import { readFile, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import pLimit from "p-limit"; + +async function getPackageDependencies(packageNames) { + const limit = pLimit(50); + const promises = packageNames.map((packageName) => + limit(async () => { + const res = await fetch( + `https://registry.npmjs.org/${packageName}/latest` + ); + return await res.json(); + }) + ); + const packages = await Promise.all(promises); + return packages.map((pkg) => { + return { + name: pkg.name, + dependenciesCount: pkg.dependencies + ? Object.keys(pkg.dependencies).length + : 0, + pluginLibrary: getPluginLibrary(pkg), + }; + }); +} + +function getPluginLibrary(pkg) { + if (pkg.dependencies?.["@salesforce/sf-plugins-core"]) { + return `@salesforce/sf-plugins-core@${pkg.dependencies?.["@salesforce/sf-plugins-core"]}`; + } + if (pkg.dependencies?.["@salesforce/command"]) { + return `@salesforce/command@${pkg.dependencies?.["@salesforce/command"]}`; + } + return "unknown"; +} + +async function main() { + const packages = JSON.parse(await readFile(join(".tmp", "packages.json"))); + const npmPackageNames = packages.map((pkg) => pkg.name); + const dependencies = await getPackageDependencies(npmPackageNames); + await writeFile( + join(".tmp", "package-dependencies.json"), + JSON.stringify(dependencies), + "utf8" + ); +} + +main().catch((e) => { + console.error(e); + process.exitCode = 1; +}); diff --git a/scripts/get-packages.js b/scripts/get-packages.js new file mode 100755 index 0000000..cda42b8 --- /dev/null +++ b/scripts/get-packages.js @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +import { writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { npmSearchQuery } from "../config.js"; + +async function searchNpmPackages(query, results = [], size = 250, page = 0) { + const from = size * page; + const res = await fetch( + `https://registry.npmjs.org/-/v1/search${query}&size=${size}&from=${from}` + ); + const data = await res.json(); + results.push( + ...data.objects.map((object) => { + return { + name: object.package.name, + version: object.package.version, + date: object.package.date, + description: object.package.description, + authorName: object.package.author?.name, + npmLink: object.package.links.npm, + gitHubLink: object.package.links.repository, + npmScoreFinal: object.score.final, + }; + }) + ); + if (results.length < data.total) { + return await searchNpmPackages(query, results, size, page + 1); + } + return results; +} + +async function main() { + const packages = await searchNpmPackages(npmSearchQuery); + await writeFile( + join(".tmp", "packages.json"), + JSON.stringify(packages), + "utf8" + ); +} + +main().catch((e) => { + console.error(e); + process.exitCode = 1; +}); diff --git a/scripts/get-plugins.js b/scripts/get-plugins.js deleted file mode 100755 index ab9fd9d..0000000 --- a/scripts/get-plugins.js +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env node - -import { mkdir, writeFile } from "node:fs/promises"; -import { join } from "node:path"; -import pLimit from "p-limit"; -import { applyConfig, npmSearchQuery } from "../config.js"; - -async function searchNpmPackages(query, results = [], size = 250, page = 0) { - const from = size * page; - const res = await fetch( - `https://registry.npmjs.org/-/v1/search${query}&size=${size}&from=${from}` - ); - const data = await res.json(); - results.push( - ...data.objects.map((object) => { - return { - name: object.package.name, - version: object.package.version, - date: object.package.date, - description: object.package.description, - authorName: object.package.author?.name, - npmLink: object.package.links.npm, - gitHubLink: object.package.links.repository, - npmScoreFinal: object.score.final, - }; - }) - ); - if (results.length < data.total) { - return await searchNpmPackages(query, results, size, page + 1); - } - return results; -} - -async function getPackageDependencies(packageNames) { - const limit = pLimit(50); - const promises = packageNames.map((packageName) => - limit(async () => { - const res = await fetch( - `https://registry.npmjs.org/${packageName}/latest` - ); - return await res.json(); - }) - ); - const packages = await Promise.all(promises); - return packages.map((pkg) => { - return { - name: pkg.name, - dependenciesCount: pkg.dependencies - ? Object.keys(pkg.dependencies).length - : 0, - pluginLibrary: getPluginLibrary(pkg), - }; - }); -} - -function getPluginLibrary(pkg) { - if (pkg.dependencies?.["@salesforce/sf-plugins-core"]) { - return `@salesforce/sf-plugins-core@${pkg.dependencies?.["@salesforce/sf-plugins-core"]}`; - } - if (pkg.dependencies?.["@salesforce/command"]) { - return `@salesforce/command@${pkg.dependencies?.["@salesforce/command"]}`; - } - return "unknown"; -} - -async function main() { - await mkdir(join("site", "data"), { recursive: true }); - const npmSearchResult = await searchNpmPackages(npmSearchQuery); - await writeFile( - join("site", "data", "npm-search-result.json"), - JSON.stringify(npmSearchResult), - "utf8" - ); - const npmPackageNames = npmSearchResult.map((pkg) => pkg.name); - const packageDependencies = await getPackageDependencies(npmPackageNames); - const packagesWithDependencies = npmSearchResult.map((pkg) => { - return { - ...pkg, - ...packageDependencies.find((deps) => deps.name === pkg.name), - }; - }); - const result = applyConfig(packagesWithDependencies); - await writeFile( - join("site", "data", "packages-with-dependencies.json"), - JSON.stringify(result), - "utf8" - ); -} - -main().catch((e) => { - console.error(e); - process.exitCode = 1; -}); diff --git a/scripts/get-repos.js b/scripts/get-repos.js new file mode 100755 index 0000000..82a807c --- /dev/null +++ b/scripts/get-repos.js @@ -0,0 +1,40 @@ +#!/usr/bin/env node + +import { readFile, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import pLimit from "p-limit"; +import { getGitHubSlug } from "../utils.js"; + +async function getRepoInfo(slug) { + const res = await fetch(`https://api.github.com/repos/${slug}`, { + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${process.env.GITHUB_TOKEN}`, + }, + }); + const json = await res.json(); + return { + gitHubSlug: slug, + gitHubStargazersCount: json.stargazers_count, + gitHubArchived: json.archived, + gitHubFork: json.fork, + }; +} + +async function main() { + if (!process.env.GITHUB_TOKEN) { + throw new Error("Environment variable GITHUB_TOKEN is mandatory."); + } + const plugins = JSON.parse(await readFile(join(".tmp", "packages.json"))); + const gitHubSlugs = plugins.map((pkg) => getGitHubSlug(pkg)).filter(Boolean); + const limit = pLimit(10); + const repos = await Promise.all( + gitHubSlugs.map((slug) => limit(() => getRepoInfo(slug))) + ); + await writeFile(join(".tmp", "repos.json"), JSON.stringify(repos), "utf8"); +} + +main().catch((e) => { + console.error(e); + process.exitCode = 1; +}); diff --git a/scripts/meta.js b/scripts/meta.js deleted file mode 100755 index f80137b..0000000 --- a/scripts/meta.js +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env node - -import { writeFile } from "node:fs/promises"; -import { join } from "node:path"; - -async function main() { - const data = { - lastUpdated: new Date().toISOString(), - source: "https://github.com/amtrack/sf-plugin-explorer", - }; - await writeFile( - join("site", "data", "meta.json"), - JSON.stringify(data), - "utf8" - ); -} - -main().catch((e) => { - console.error(e); - process.exitCode = 1; -}); diff --git a/scripts/minify.js b/scripts/minify.js deleted file mode 100755 index 8ff6507..0000000 --- a/scripts/minify.js +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env node - -import { mkdir, readFile, writeFile } from "node:fs/promises"; -import { join } from "node:path"; -import { minPluginFields, minCommandFields } from "../config.js"; - -function pick(arr, fields) { - return arr.map((obj) => - Object.fromEntries( - Object.entries(obj).filter(([key, _]) => fields.includes(key)) - ) - ); -} - -async function main() { - const packages = JSON.parse( - await readFile( - join("site", "data", "packages-with-dependencies-and-github.json") - ) - ); - const minPackages = pick(packages, minPluginFields); - const commands = JSON.parse( - await readFile(join("site", "data", "commands.json")) - ); - const minCommands = pick(commands, minCommandFields); - - await mkdir(join("site", "data"), { recursive: true }); - await writeFile( - join("site", "data", "packages.min.json"), - JSON.stringify(minPackages), - "utf8" - ); - await writeFile( - join("site", "data", "commands.min.json"), - JSON.stringify(minCommands), - "utf8" - ); -} - -main().catch((e) => { - console.error(e); - process.exitCode = 1; -}); diff --git a/site/index.js b/site/index.js index 84bdf55..3de94de 100644 --- a/site/index.js +++ b/site/index.js @@ -100,7 +100,7 @@ const pluginsGrid = new Grid({ }, ], server: { - url: "data/packages.min.json", + url: "data/plugins.min.json", then: (data) => data.map((pkg) => [ pkg.name, diff --git a/utils.js b/utils.js new file mode 100644 index 0000000..7bb4137 --- /dev/null +++ b/utils.js @@ -0,0 +1,49 @@ +export function compareWithUndefined(a, b) { + if (a === b) { + return 0; + } + if (a === undefined) { + return -1; + } + if (b === undefined) { + return 1; + } + return a - b; +} + +export function applyConfig(packages, config) { + return packages + .filter((pkg) => { + let keep = true; + for (const excludeRule of config.excludeRules) { + if (excludeRule.shouldExcludePackage(pkg)) { + console.error( + `ignoring package "${pkg.name}" because of "${excludeRule.reason}"` + ); + keep = false; + break; + } + } + return keep; + }) + .sort( + (a, b) => + -1 * + compareWithUndefined(a?.gitHubStargazersCount, b?.gitHubStargazersCount) + ); +} + +export function getGitHubSlug(pkg) { + if (pkg.gitHubLink === undefined) { + return undefined; + } + try { + const url = new URL(pkg.gitHubLink); + if (url.hostname === "github.com") { + const slug = url.pathname.replace(/^\//, "").replace(/\.git$/, ""); + return slug; + } + } catch (_) { + return undefined; + } +}