All files / js encode_corpus.mjs

0% Statements 0/62
0% Branches 0/1
0% Functions 0/1
0% Lines 0/62

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63                                                                                                                             
#!/usr/bin/env node
// Build script: reads mailcorpus_plain.js, XOR-encodes, writes mailcorpus.js
// Usage: node js/encode_corpus.mjs

import vm from 'vm';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';

const __dir = path.dirname(fileURLToPath(import.meta.url));

// Read the plain corpus
const src = fs.readFileSync(path.join(__dir, 'mailcorpus_plain.js'), 'utf8');

// Strip export keywords and inject into a VM context
const ctx = {};
const modified = src.replace(/export const /g, 'const ');
const fullSrc = modified + `
ctx.SEED_MESSAGES = SEED_MESSAGES;
ctx.CORPUS = CORPUS;
ctx.REPLY_RULES = REPLY_RULES;
ctx.SOCIAL_ROUTING = SOCIAL_ROUTING;
ctx.SOCIAL_TEMPLATES = SOCIAL_TEMPLATES;
ctx.HOME_FILES = HOME_FILES;
ctx.TALK_CORPUS = TALK_CORPUS;
`;
vm.runInNewContext(fullSrc, { ctx });

// Serialize and XOR-encode (RegExp → {__re, __flags} so it survives JSON round-trip)
// Use duck-typing instead of instanceof because vm.runInNewContext gives the
// corpus its own RegExp class that fails the host-realm instanceof check.
const isRegExp = v => v != null && typeof v.test === 'function' && typeof v.source === 'string';
const json = JSON.stringify(ctx, (_, v) =>
    isRegExp(v) ? { __re: v.source, __flags: v.flags } : v);
const KEY = 0x42;
const encoded = Buffer.from(json).map(b => b ^ KEY);
const blob = encoded.toString('base64');

// Write encoded mailcorpus.js
const out = `// js/mailcorpus.js -- XOR-encoded email corpus.
// Plaintext source: mailcorpus_plain.js
// To regenerate: node js/encode_corpus.mjs
const _k = 0x42;
function _d(s) {
  const b = atob(s);
  return JSON.parse(
    Array.from(b).map(c => String.fromCharCode(c.charCodeAt(0) ^ _k)).join(''),
    (_, v) => (v && v.__re !== undefined) ? new RegExp(v.__re, v.__flags) : v
  );
}
const _x = _d('${blob}');
export const SEED_MESSAGES = _x.SEED_MESSAGES;
export const CORPUS = _x.CORPUS;
export const REPLY_RULES = _x.REPLY_RULES;
export const SOCIAL_ROUTING = _x.SOCIAL_ROUTING;
export const SOCIAL_TEMPLATES = _x.SOCIAL_TEMPLATES;
export const HOME_FILES = _x.HOME_FILES;
export const TALK_CORPUS = _x.TALK_CORPUS;
`;

fs.writeFileSync(path.join(__dir, 'mailcorpus.js'), out);
console.log(`Done. Encoded ${json.length} chars → ${blob.length} base64 chars.`);