Skip to content

Commit

Permalink
Meta: Optimize PR warning insertion (#24)
Browse files Browse the repository at this point in the history
* Use more specific error types.
* For compatibility, switch from `import(jsonFileName)` to
  `JSON.parse(readFile(jsonFileName))`.
* For performance, rather than parsing HTML into a complete AST, use a
  streaming rewriter (and switch to passthrough after prepending to
  `<body>`).
  • Loading branch information
gibson042 authored Jan 1, 2025
1 parent 6d372ad commit 04b49a7
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 44 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
"devDependencies": {
"@tc39/ecma262-biblio": "^2.1.2775",
"ecmarkup": "^20.0.0",
"jsdom": "^25.0.1"
"jsdom": "^25.0.1",
"parse5-html-rewriting-stream": "^7.0.0",
"tmp": "^0.2.3"
},
"engines": {
"node": ">= 12"
Expand Down
128 changes: 85 additions & 43 deletions scripts/insert_warning.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import fs from 'node:fs';
import pathlib from 'node:path';
import { pipeline } from 'node:stream/promises';
import { parseArgs } from 'node:util';
import { JSDOM, VirtualConsole } from 'jsdom';
import { JSDOM } from 'jsdom';
import { RewritingStream } from 'parse5-html-rewriting-stream';
import tmp from 'tmp';

const { positionals: cliArgs } = parseArgs({
allowPositionals: true,
Expand All @@ -12,7 +15,7 @@ if (cliArgs.length < 3) {
console.error(`Usage: node ${self} <template.html> <data.json> <file.html>...
{{identifier}} substrings in template.html are replaced from data.json, then
the result is inserted at the start of the body element in each file.html.`);
the result is inserted into each file.html.`);
process.exit(64);
}

Expand All @@ -21,58 +24,97 @@ const main = async args => {

// Substitute data into the template.
const template = fs.readFileSync(templateFile, 'utf8');
const { default: data } =
await import(pathlib.resolve(dataFile), { with: { type: 'json' } });
const data = JSON.parse(fs.readFileSync(dataFile, 'utf8'));
const formatErrors = [];
const placeholderPatt = /[{][{](?:([\p{ID_Start}$_][\p{ID_Continue}$]*)[}][}]|.*?(?:[}][}]|(?=[{][{])|$))/gsu;
const placeholderPatt =
/[{][{](?:([\p{ID_Start}$_][\p{ID_Continue}$]*)[}][}]|.*?(?:[}][}]|(?=[{][{])|$))/gsu;
const resolved = template.replaceAll(placeholderPatt, (m, name, i) => {
if (!name) {
const trunc = m.replace(/([^\n]{29}(?!$)|[^\n]{,29}(?=\n)).*/s, '$1…');
formatErrors.push(Error(`bad placeholder at index ${i}: ${trunc}`));
formatErrors.push(SyntaxError(`bad placeholder at index ${i}: ${trunc}`));
} else if (!Object.hasOwn(data, name)) {
formatErrors.push(Error(`no data for ${m}`));
formatErrors.push(ReferenceError(`no data for ${m}`));
}
return data[name];
});
if (formatErrors.length > 0) throw AggregateError(formatErrors);

// Parse the template into DOM nodes for appending to page <head>s (metadata
// such as <style> elements) or prepending to page <body>s (everything else).
// https://html.spec.whatwg.org/multipage/dom.html#metadata-content-2
// https://html.spec.whatwg.org/multipage/semantics.html#allowed-in-the-body
// https://html.spec.whatwg.org/multipage/links.html#body-ok
const bodyOkRelPatt =
/^(?:dns-prefetch|modulepreload|pingback|preconnect|prefetch|preload|stylesheet)$/i;
const forceHead = node =>
node.matches?.('base, style, title, meta:not([itemprop])') ||
(node.matches?.('link:not([itemprop])') &&
[...node.relList].some(rel => !rel.match(bodyOkRelPatt)));
const insertDom = JSDOM.fragment(resolved);
// Node.js v22+:
// const { headInserts, bodyInserts } = Object.groupBy(
// insertDom.childNodes,
// node => (forceHead(node) ? 'headInserts' : 'bodyInserts'),
// );
const headInserts = [], bodyInserts = [];
for (const node of insertDom.childNodes) {
if (forceHead(node)) headInserts.push(node);
else bodyInserts.push(node);
}
// Parse the template into DOM nodes for appending to page head (metadata such
// as <style> elements) or prepending to page body (everything else).
const jsdomOpts = { contentType: 'text/html; charset=utf-8' };
const { document } = new JSDOM(resolved, jsdomOpts).window;
const headHTML = document.head.innerHTML;
const bodyHTML = document.body.innerHTML;

// Perform the insertions, suppressing JSDOM warnings from e.g. unsupported
// CSS features.
const virtualConsole = new VirtualConsole();
virtualConsole.on('error', () => {});
const jsdomOpts = { contentType: 'text/html; charset=utf-8', virtualConsole };
const getInserts =
files.length > 1 ? nodes => nodes.map(n => n.cloneNode(true)) : x => x;
const results = await Promise.allSettled(files.map(async file => {
let dom = await JSDOM.fromFile(file, jsdomOpts);
const { head, body } = dom.window.document;
if (headInserts.length > 0) head.append(...getInserts(headInserts));
if (bodyInserts.length > 0) body.prepend(...getInserts(bodyInserts));
fs.writeFileSync(file, dom.serialize(), 'utf8');
}));
// Perform the insertions.
const work = files.map(async file => {
await null;
const { name: tmpName, fd, removeCallback } = tmp.fileSync({
tmpdir: pathlib.dirname(file),
prefix: pathlib.basename(file),
postfix: '.tmp',
detachDescriptor: true,
});
try {
// Make a pipeline: fileReader -> inserter -> finisher -> fileWriter
const fileReader = fs.createReadStream(file, 'utf8');
const fileWriter = fs.createWriteStream('', { fd, flush: true });

// Insert headHTML at the end of a possibly implied head, and bodyHTML at
// the beginning of a possibly implied body.
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
let mode = 'before html'; // | 'before head' | 'in head' | 'after head' | '$DONE'
const stayInHead = new Set([
...['base', 'basefont', 'bgsound', 'link', 'meta', 'title'],
...['noscript', 'noframes', 'style', 'script', 'template'],
'head',
]);
const inserter = new RewritingStream();
const onEndTag = function (tag) {
if (tag.tagName === 'head') {
this.emitRaw(headHTML);
mode = 'after head';
}
this.emitEndTag(tag);
};
const onStartTag = function (tag) {
const preserve = () => this.emitStartTag(tag);
if (mode === 'before html' && tag.tagName === 'html') {
mode = 'before head';
} else if (mode !== 'after head' && stayInHead.has(tag.tagName)) {
mode = 'in head';
} else {
if (mode !== 'after head') this.emitRaw(headHTML);
// Emit either `${bodyTag}${bodyHTML}` or `${bodyHTML}${otherTag}`.
const emits = [preserve, () => this.emitRaw(bodyHTML)];
if (tag.tagName !== 'body') emits.reverse();
for (const emit of emits) emit();
mode = '$DONE';
this.removeListener('endTag', onEndTag);
this.removeListener('startTag', onStartTag);
return;
}
preserve();
};
inserter.on('endTag', onEndTag).on('startTag', onStartTag);

// Ensure headHTML/bodyHTML insertion before EOF.
const finisher = async function* (source) {
for await (const chunk of source) yield chunk;
if (mode === '$DONE') return;
if (mode !== 'after head') yield headHTML;
yield bodyHTML;
};

await pipeline(fileReader, inserter, finisher, fileWriter);

// Now that the temp file is complete, overwrite the source file.
fs.renameSync(tmpName, file);
} finally {
removeCallback();
}
});
const results = await Promise.allSettled(work);

const failures = results.filter(result => result.status !== 'fulfilled');
if (failures.length > 0) throw AggregateError(failures.map(r => r.reason));
Expand Down

0 comments on commit 04b49a7

Please sign in to comment.