diff --git a/packages/compiler/scripts/parseLiquid.js b/packages/compiler/scripts/parseLiquid.js index 2101dfc6..cc5879e2 100644 --- a/packages/compiler/scripts/parseLiquid.js +++ b/packages/compiler/scripts/parseLiquid.js @@ -19,7 +19,7 @@ import {Bench} from 'tinybench'; import {Grammar} from 'ohm-js'; import {compileGrammars} from '../src/api.ts'; import {unparse} from '../test/_helpers.js'; -import {createReader, CstNodeType} from '../../runtime/src/cstReader.ts'; +import {CstNodeType} from '../../runtime/src/cstView.ts'; const __dirname = dirname(fileURLToPath(import.meta.url)); const datadir = join(__dirname, '../test/data'); @@ -35,7 +35,7 @@ const positionalArgs = process.argv.slice(2).filter(a => !a.startsWith('--')); // https://matklad.github.io/2024/03/22/basic-things.html const smallSize = flags.has('--small-size'); const includeUnparse = flags.has('--include-unparse'); -const useCstReader = flags.has('--cst-reader'); +const useCstView = flags.has('--cst-view'); // Get pattern from command line arguments const pattern = positionalArgs[0]; @@ -104,26 +104,26 @@ const pattern = positionalArgs[0]; opts ); - // Walk CST using CstReader, collecting terminal text. - function unparseCstReader(matchResult) { - const reader = createReader(matchResult); + // Walk CST using CstView, collecting terminal text. + function unparseCstView(matchResult) { + const cst = matchResult.cstView(); let ans = ''; function walk(handle) { - if (reader.type(handle) === CstNodeType.TERMINAL) { - ans += reader.sourceString(handle); + if (cst.type(handle) === CstNodeType.TERMINAL) { + ans += cst.sourceString(handle); return; } - reader.forEachChild(handle, (child, _leadingSpaces) => { + cst.forEachChild(handle, (child, _leadingSpaces) => { walk(child); }); } - walk(reader.root); + walk(cst.root); return ans; } const wasmLabel = includeUnparse ? 'Wasm parse+unparse' : 'Wasm parse'; bench.add( - useCstReader ? `${wasmLabel} (CstReader)` : wasmLabel, + useCstView ? `${wasmLabel} (CstView)` : wasmLabel, () => { let overriddenDuration = 0; for (const {input} of files) { @@ -140,7 +140,7 @@ const pattern = positionalArgs[0]; peakWasmMemoryBytes, exports.memory.buffer.byteLength ); - return useCstReader ? unparseCstReader(m) : unparse(g); + return useCstView ? unparseCstView(m) : unparse(g); }); if (includeUnparse) overriddenDuration += bench.now() - start; } diff --git a/packages/compiler/src/parseGrammars.ts b/packages/compiler/src/parseGrammars.ts index 092c4c6f..35211bb6 100644 --- a/packages/compiler/src/parseGrammars.ts +++ b/packages/compiler/src/parseGrammars.ts @@ -3,7 +3,6 @@ // building and validation. import {Grammar} from 'ohm-js'; -import type {CstNode} from 'ohm-js'; import {Grammar as ParsedGrammar} from 'ohm-js-legacy/src/Grammar.js'; @@ -45,7 +44,7 @@ export function grammars(source: string): Record { if (result.failed()) { throw new Error(`Failed to parse grammar:\n${result.message}`); } - buildGrammars(result.getCstRoot() as CstNode, ns, source); + buildGrammars(result.cstView().rootNode(), ns, source); }); return ns; } diff --git a/packages/compiler/test/_test-v24.js b/packages/compiler/test/_test-v24.js index c4ce5b56..15cd2d78 100644 --- a/packages/compiler/test/_test-v24.js +++ b/packages/compiler/test/_test-v24.js @@ -8,12 +8,12 @@ test('nested matching with `using`', async t => { { using outer = g.match('abc'); t.assert(outer.succeeded()); - const outerCst = outer.getCstRoot(); + const outerCst = outer.cstView().rootNode(); { using inner = g.match('1234'); t.assert(inner.succeeded()); - t.is(inner.getCstRoot().sourceString, '1234'); + t.is(inner.cstView().rootNode().sourceString, '1234'); } // Outer CST is still valid after inner is disposed. diff --git a/packages/compiler/test/test-cst-compat.js b/packages/compiler/test/test-cst-compat.js index 50280178..3d2b0b42 100644 --- a/packages/compiler/test/test-cst-compat.js +++ b/packages/compiler/test/test-cst-compat.js @@ -57,7 +57,7 @@ test.failing('compat: arithmetic', async t => { for (const input of ['1', '10 + 20', '1+276*(3+4)', '(10+ 999)- 1 +222']) { matchWithInput(wasmG, input); const wasmShape = serializeCst(wasmG._getCstRoot()); - const v18Shape = serializeCst(v18G.match(input).getCstRoot()); + const v18Shape = serializeCst(v18G.match(input).cstView().rootNode()); t.deepEqual(v18Shape, wasmShape); } }); @@ -76,7 +76,7 @@ test.failing('compat: liquid-html', async t => { for (const input of inputs) { matchWithInput(wasmG, input); const wasmShape = serializeCst(wasmG._getCstRoot()); - const v18Shape = serializeCst(v18G.match(input).getCstRoot()); + const v18Shape = serializeCst(v18G.match(input).cstView().rootNode()); t.deepEqual(v18Shape, wasmShape); } }); diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstView.js similarity index 64% rename from packages/compiler/test/test-cstReader.js rename to packages/compiler/test/test-cstView.js index 0b97acdf..29df4f39 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstView.js @@ -1,54 +1,48 @@ import test from 'ava'; import * as fc from 'fast-check'; -import {createReader, CstNodeType} from '../../runtime/src/cstReader.ts'; -import {createHandle} from '../../runtime/src/cstReaderShared.ts'; +import {CstNodeType} from '../../runtime/src/cstView.ts'; +import {createHandle} from '../../runtime/src/cstCommon.ts'; import {compileAndLoad, matchWithInput} from './_helpers.js'; -const childrenOf = (reader, handle) => { - const arr = []; - reader.forEachChild(handle, c => arr.push(c)); - return arr; -}; - test('root node basics', async t => { const g = await compileAndLoad('G { start = "ab" "cd" }'); t.is(matchWithInput(g, 'abcd'), 1); g.match('abcd').use(mr => { - const reader = createReader(mr); - t.is(reader.type(reader.root), CstNodeType.NONTERMINAL); - t.is(reader.matchLength(reader.root), 4); - t.is(reader.ctorName(reader.root), 'start'); - t.is(reader.childCount(reader.root), 2); - t.is(reader.input, 'abcd'); - t.is(reader.sourceString(reader.root), 'abcd'); - t.is(reader.startIdx(reader.root), 0); + const cst = mr.cstView(); + t.is(cst.type(cst.root), CstNodeType.NONTERMINAL); + t.is(cst.matchLength(cst.root), 4); + t.is(cst.ctorName(cst.root), 'start'); + t.is(cst.childCount(cst.root), 2); + t.is(cst.input, 'abcd'); + t.is(cst.sourceString(cst.root), 'abcd'); + t.is(cst.startIdx(cst.root), 0); }); }); test('terminal children', async t => { const g = await compileAndLoad('G { start = "ab" "cd" }'); g.match('abcd').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); const children = []; - reader.forEachChild(reader.root, (child, leadingSpaces, index) => { - children.push({child, leadingSpaces, startIdx: reader.startIdx(child), index}); + cst.forEachChild(cst.root, (child, leadingSpaces, index) => { + children.push({child, leadingSpaces, startIdx: cst.startIdx(child), index}); }); t.is(children.length, 2); // First child: "ab" - t.is(reader.type(children[0].child), CstNodeType.TERMINAL); - t.is(reader.matchLength(children[0].child), 2); - t.is(reader.ctorName(children[0].child), '_terminal'); - t.is(reader.sourceString(children[0].child), 'ab'); + t.is(cst.type(children[0].child), CstNodeType.TERMINAL); + t.is(cst.matchLength(children[0].child), 2); + t.is(cst.ctorName(children[0].child), '_terminal'); + t.is(cst.sourceString(children[0].child), 'ab'); t.is(children[0].leadingSpaces, 0); t.is(children[0].index, 0); // Second child: "cd" - t.is(reader.type(children[1].child), CstNodeType.TERMINAL); - t.is(reader.matchLength(children[1].child), 2); - t.is(reader.sourceString(children[1].child), 'cd'); + t.is(cst.type(children[1].child), CstNodeType.TERMINAL); + t.is(cst.matchLength(children[1].child), 2); + t.is(cst.sourceString(children[1].child), 'cd'); t.is(children[1].index, 1); }); }); @@ -56,36 +50,36 @@ test('terminal children', async t => { test('nonterminal children', async t => { const g = await compileAndLoad('G { start = a b\na = "x"\nb = "y" }'); g.match('xy').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); const children = []; - reader.forEachChild(reader.root, (child, ls, i) => { - children.push({child, ls, startIdx: reader.startIdx(child), i}); + cst.forEachChild(cst.root, (child, ls, i) => { + children.push({child, ls, startIdx: cst.startIdx(child), i}); }); t.is(children.length, 2); - t.is(reader.ctorName(children[0].child), 'a'); - t.is(reader.ctorName(children[1].child), 'b'); - t.is(reader.type(children[0].child), CstNodeType.NONTERMINAL); - t.is(reader.type(children[1].child), CstNodeType.NONTERMINAL); - t.is(reader.sourceString(children[0].child), 'x'); - t.is(reader.sourceString(children[1].child), 'y'); + t.is(cst.ctorName(children[0].child), 'a'); + t.is(cst.ctorName(children[1].child), 'b'); + t.is(cst.type(children[0].child), CstNodeType.NONTERMINAL); + t.is(cst.type(children[1].child), CstNodeType.NONTERMINAL); + t.is(cst.sourceString(children[0].child), 'x'); + t.is(cst.sourceString(children[1].child), 'y'); }); }); test('iteration (list) node', async t => { const g = await compileAndLoad('G { start = "a"* }'); g.match('aaa').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let listHandle; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { listHandle = child; }); - t.is(reader.type(listHandle), CstNodeType.LIST); - t.is(reader.ctorName(listHandle), '_list'); - t.is(reader.childCount(listHandle), 3); + t.is(cst.type(listHandle), CstNodeType.LIST); + t.is(cst.ctorName(listHandle), '_list'); + t.is(cst.childCount(listHandle), 3); const items = []; - reader.forEachChild(listHandle, child => { - items.push(reader.sourceString(child)); + cst.forEachChild(listHandle, child => { + items.push(cst.sourceString(child)); }); t.deepEqual(items, ['a', 'a', 'a']); }); @@ -94,15 +88,15 @@ test('iteration (list) node', async t => { test('iteration with nonterminals', async t => { const g = await compileAndLoad('G { start = letter* }'); g.match('abc').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let listHandle; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { listHandle = child; }); - t.is(reader.type(listHandle), CstNodeType.LIST); + t.is(cst.type(listHandle), CstNodeType.LIST); const items = []; - reader.forEachChild(listHandle, child => { - items.push(reader.sourceString(child)); + cst.forEachChild(listHandle, child => { + items.push(cst.sourceString(child)); }); t.is(items.length, 3); t.deepEqual(items, ['a', 'b', 'c']); @@ -112,49 +106,49 @@ test('iteration with nonterminals', async t => { test('optional node: present', async t => { const g = await compileAndLoad('G { start = "a"? }'); g.match('a').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let opt; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { opt = child; }); - t.is(reader.type(opt), CstNodeType.OPT); - t.is(reader.ctorName(opt), '_opt'); - t.is(reader.childCount(opt), 1); - t.is(reader.matchLength(opt), 1); + t.is(cst.type(opt), CstNodeType.OPT); + t.is(cst.ctorName(opt), '_opt'); + t.is(cst.childCount(opt), 1); + t.is(cst.matchLength(opt), 1); }); }); test('optional node: absent', async t => { const g = await compileAndLoad('G { start = "a"? }'); g.match('').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let opt; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { opt = child; }); - t.is(reader.type(opt), CstNodeType.OPT); - t.is(reader.childCount(opt), 0); - t.is(reader.matchLength(opt), 0); + t.is(cst.type(opt), CstNodeType.OPT); + t.is(cst.childCount(opt), 0); + t.is(cst.matchLength(opt), 0); }); }); test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { const g = await compileAndLoad('G { start = ("a" "b"?)* }'); g.match('abab').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let list; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { list = child; }); - t.is(reader.tupleArity(list), 2); + t.is(cst.tupleArity(list), 2); const tuples = []; - reader.forEachTuple(list, (a, b) => { + cst.forEachTuple(list, (a, b) => { tuples.push( - reader.sourceString(a) + - reader.withChildren(b, (_handle, child) => - reader.isPresent(b) ? reader.sourceString(child) : '' + cst.sourceString(a) + + cst.withChildren(b, (_handle, child) => + cst.isPresent(b) ? cst.sourceString(child) : '' ) ); }); @@ -162,16 +156,16 @@ test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { let emptyOpt; g.match('a').use(mr2 => { - const reader2 = createReader(mr2); - reader2.forEachChild(reader2.root, child => { + const cst2 = mr2.cstView(); + cst2.forEachChild(cst2.root, child => { list = child; }); - reader2.forEachTuple(list, (_a, b) => { + cst2.forEachTuple(list, (_a, b) => { emptyOpt = b; }); - t.false(reader2.isPresent(emptyOpt)); + t.false(cst2.isPresent(emptyOpt)); t.is( - reader2.withChildren(emptyOpt, (_handle, child) => + cst2.withChildren(emptyOpt, (_handle, child) => child === undefined ? 'missing' : 'present' ), 'missing' @@ -183,23 +177,23 @@ test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { test('type-specific helpers assert on the wrong handle kind', async t => { const g = await compileAndLoad('G { Start = ("a" "b"?)* }'); g.match('ab').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let list; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { list = child; }); let terminal; let opt; - reader.forEachTuple(list, (a, b) => { + cst.forEachTuple(list, (a, b) => { terminal = a; opt = b; }); - t.throws(() => reader.ruleId(list), {message: 'Not a nonterminal'}); - t.throws(() => reader.tupleArity(reader.root), {message: 'Not a list'}); - t.throws(() => reader.isPresent(terminal), {message: 'Not an opt'}); - t.true(reader.isPresent(opt)); + t.throws(() => cst.ruleId(list), {message: 'Not a nonterminal'}); + t.throws(() => cst.tupleArity(cst.root), {message: 'Not a list'}); + t.throws(() => cst.isPresent(terminal), {message: 'Not an opt'}); + t.true(cst.isPresent(opt)); }); }); @@ -208,16 +202,16 @@ test('type-specific helpers assert on the wrong handle kind', async t => { test('unparse: simple terminals', async t => { const g = await compileAndLoad('G { start = "ab" "cd" }'); g.match('abcd').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let ans = ''; function walk(handle) { - if (reader.type(handle) === CstNodeType.TERMINAL) { - ans += reader.sourceString(handle); + if (cst.type(handle) === CstNodeType.TERMINAL) { + ans += cst.sourceString(handle); return; } - reader.forEachChild(handle, child => walk(child)); + cst.forEachChild(handle, child => walk(child)); } - walk(reader.root); + walk(cst.root); t.is(ans, 'abcd'); }); }); @@ -225,16 +219,16 @@ test('unparse: simple terminals', async t => { test('unparse: with rule application', async t => { const g = await compileAndLoad('G { start = a b\na = "x"\nb = "y" }'); g.match('xy').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let ans = ''; function walk(handle) { - if (reader.type(handle) === CstNodeType.TERMINAL) { - ans += reader.sourceString(handle); + if (cst.type(handle) === CstNodeType.TERMINAL) { + ans += cst.sourceString(handle); return; } - reader.forEachChild(handle, child => walk(child)); + cst.forEachChild(handle, child => walk(child)); } - walk(reader.root); + walk(cst.root); t.is(ans, 'xy'); }); }); @@ -242,16 +236,16 @@ test('unparse: with rule application', async t => { test('unparse: with nonterminals', async t => { const g = await compileAndLoad('G { start = a b\na = "hello"\nb = "world" }'); g.match('helloworld').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let ans = ''; function walk(handle) { - if (reader.type(handle) === CstNodeType.TERMINAL) { - ans += reader.sourceString(handle); + if (cst.type(handle) === CstNodeType.TERMINAL) { + ans += cst.sourceString(handle); return; } - reader.forEachChild(handle, child => walk(child)); + cst.forEachChild(handle, child => walk(child)); } - walk(reader.root); + walk(cst.root); t.is(ans, 'helloworld'); }); }); @@ -260,16 +254,16 @@ test('unparse: unicode', async t => { const g = await compileAndLoad('G { start = any* }'); const input = 'Nöö'; g.match(input).use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let ans = ''; function walk(handle) { - if (reader.type(handle) === CstNodeType.TERMINAL) { - ans += reader.sourceString(handle); + if (cst.type(handle) === CstNodeType.TERMINAL) { + ans += cst.sourceString(handle); return; } - reader.forEachChild(handle, child => walk(child)); + cst.forEachChild(handle, child => walk(child)); } - walk(reader.root); + walk(cst.root); t.is(ans, input); }); }); @@ -279,35 +273,35 @@ test('unparse: unicode', async t => { test('rootLeadingSpacesLen: present', async t => { const g = await compileAndLoad('G { Start = "x" }'); g.match(' x').use(mr => { - const reader = createReader(mr); - t.is(reader.rootLeadingSpacesLen, 2); - t.is(reader.input.slice(0, reader.rootLeadingSpacesLen), ' '); - t.is(reader.startIdx(reader.root), 2); + const cst = mr.cstView(); + t.is(cst.rootLeadingSpacesLen, 2); + t.is(cst.input.slice(0, cst.rootLeadingSpacesLen), ' '); + t.is(cst.startIdx(cst.root), 2); }); }); test('rootLeadingSpacesLen: absent', async t => { const g = await compileAndLoad('G { Start = "x" }'); g.match('x').use(mr => { - const reader = createReader(mr); - t.is(reader.rootLeadingSpacesLen, 0); + const cst = mr.cstView(); + t.is(cst.rootLeadingSpacesLen, 0); }); }); test('child leadingSpaces in syntactic rule', async t => { const g = await compileAndLoad('G { Start = "a" "b" }'); g.match('a b').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); const spacesInfo = []; - reader.forEachChild(reader.root, (child, leadingSpacesLen, index) => { - const childStartIdx = reader.startIdx(child); + cst.forEachChild(cst.root, (child, leadingSpacesLen, index) => { + const childStartIdx = cst.startIdx(child); spacesInfo.push({ index, hasSpaces: leadingSpacesLen > 0, spacesLen: leadingSpacesLen, spacesStr: leadingSpacesLen > 0 - ? reader.input.slice(childStartIdx - leadingSpacesLen, childStartIdx) + ? cst.input.slice(childStartIdx - leadingSpacesLen, childStartIdx) : '', }); }); @@ -336,11 +330,11 @@ const spaceMemoIgnored = test.macro(async (t, twoBody, input = '> xx') => { `); g.match(input).use(mr => { t.true(mr.succeeded()); - const reader = createReader(mr); - const [two] = childrenOf(reader, reader.root); + const cst = mr.cstView(); + const two = cst.withChildren(cst.root, (_h, first) => first); const children = []; - reader.forEachChild(two, (child, leadingSpacesLen) => { - children.push({child, leadingSpacesLen, childStartIdx: reader.startIdx(child)}); + cst.forEachChild(two, (child, leadingSpacesLen) => { + children.push({child, leadingSpacesLen, childStartIdx: cst.startIdx(child)}); }); t.deepEqual( children.map(({leadingSpacesLen}) => leadingSpacesLen), @@ -377,8 +371,8 @@ test( test('ruleId returns a stable rule index for nonterminals', async t => { const g = await compileAndLoad('G { start = a\na = "x" }'); g.match('x').use(mr => { - const reader = createReader(mr); - t.true(reader.ruleId(reader.root) >= 0); + const cst = mr.cstView(); + t.true(cst.ruleId(cst.root) >= 0); }); }); @@ -387,13 +381,13 @@ test('ruleId returns a stable rule index for nonterminals', async t => { test('childCount is 0 for tagged terminals', async t => { const g = await compileAndLoad('G { start = "x" }'); g.match('x').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); let termChild; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { termChild = child; }); - t.is(reader.type(termChild), CstNodeType.TERMINAL); - t.is(reader.childCount(termChild), 0); + t.is(cst.type(termChild), CstNodeType.TERMINAL); + t.is(cst.childCount(termChild), 0); }); }); @@ -412,18 +406,18 @@ test('createHandle accepts max valid values', t => { t.true(handle > 0); }); -// --- isSyntactic via CstReader --- +// --- isSyntactic via CstView --- test('isSyntactic: true for syntactic rule, false for lexical', async t => { const g = await compileAndLoad('G { Start = inner\ninner = "x" }'); g.match('x').use(mr => { - const reader = createReader(mr); - t.true(reader.isSyntactic(reader.root)); // Start is syntactic + const cst = mr.cstView(); + t.true(cst.isSyntactic(cst.root)); // Start is syntactic let innerHandle; - reader.forEachChild(reader.root, child => { + cst.forEachChild(cst.root, child => { innerHandle = child; }); - t.false(reader.isSyntactic(innerHandle)); // inner is lexical + t.false(cst.isSyntactic(innerHandle)); // inner is lexical }); }); @@ -434,17 +428,17 @@ test('isSyntactic reads compiler-embedded classification', async t => { // directly rather than rederiving from rule names. const g = await compileAndLoad('G { Start = inner\ninner = "x" }'); g.match('x').use(mr => { - const reader = createReader(mr); + const cst = mr.cstView(); // Walk all nonterminals and verify classification function check(handle) { - if (reader.type(handle) === CstNodeType.NONTERMINAL) { - const name = reader.ctorName(handle); - if (name === 'Start') t.true(reader.isSyntactic(handle)); - if (name === 'inner') t.false(reader.isSyntactic(handle)); + if (cst.type(handle) === CstNodeType.NONTERMINAL) { + const name = cst.ctorName(handle); + if (name === 'Start') t.true(cst.isSyntactic(handle)); + if (name === 'inner') t.false(cst.isSyntactic(handle)); } - reader.forEachChild(handle, child => check(child)); + cst.forEachChild(handle, child => check(child)); } - check(reader.root); + check(cst.root); }); }); @@ -452,16 +446,16 @@ test('isSyntactic reads compiler-embedded classification', async t => { // Recursively check all invariants on a CST node and its descendants. // Returns an array of error strings (empty = all good). -function checkInvariants(reader, handle, isLexicalParent) { +function checkInvariants(cst, handle, isLexicalParent) { const errors = []; - const type = reader.type(handle); - const ctor = reader.ctorName(handle); - const start = reader.startIdx(handle); - const len = reader.matchLength(handle); + const type = cst.type(handle); + const ctor = cst.ctorName(handle); + const start = cst.startIdx(handle); + const len = cst.matchLength(handle); // -- Packed-handle span consistency -- - const actual = reader.sourceString(handle); - const expected = reader.input.slice(start, start + len); + const actual = cst.sourceString(handle); + const expected = cst.input.slice(start, start + len); if (actual !== expected) { errors.push( `span mismatch at ${start}: sourceString=${JSON.stringify(actual)}, ` + @@ -474,22 +468,22 @@ function checkInvariants(reader, handle, isLexicalParent) { if (ctor !== '_terminal') { errors.push(`terminal ctorName=${ctor}, expected '_terminal'`); } - if (reader.childCount(handle) !== 0) { - errors.push(`terminal childCount=${reader.childCount(handle)}, expected 0`); + if (cst.childCount(handle) !== 0) { + errors.push(`terminal childCount=${cst.childCount(handle)}, expected 0`); } // Terminals are leaves — nothing more to check. return errors; } if (type === CstNodeType.OPT) { - const cc = reader.childCount(handle); + const cc = cst.childCount(handle); if (cc !== 0 && cc !== 1) { errors.push(`opt childCount=${cc}, expected 0 or 1`); } } // -- Public iteration contract -- - const childCount = reader.childCount(handle); + const childCount = cst.childCount(handle); let callbackCount = 0; const indices = []; @@ -498,8 +492,8 @@ function checkInvariants(reader, handle, isLexicalParent) { let cursor = start; let reconstructed = ''; - reader.forEachChild(handle, (child, leadingSpacesLen, index) => { - const childStartIdx = reader.startIdx(child); + cst.forEachChild(handle, (child, leadingSpacesLen, index) => { + const childStartIdx = cst.startIdx(child); indices.push(index); callbackCount++; @@ -511,7 +505,7 @@ function checkInvariants(reader, handle, isLexicalParent) { } // LIST/OPT edges never report leading spaces (documented contract). - const childType = reader.type(child); + const childType = cst.type(child); if ( (childType === CstNodeType.LIST || childType === CstNodeType.OPT) && leadingSpacesLen > 0 @@ -533,20 +527,20 @@ function checkInvariants(reader, handle, isLexicalParent) { // Round-trip reconstruction: interleave spaces + child text. if (leadingSpacesLen > 0) { - reconstructed += reader.input.slice(childStartIdx - leadingSpacesLen, childStartIdx); + reconstructed += cst.input.slice(childStartIdx - leadingSpacesLen, childStartIdx); } - reconstructed += reader.sourceString(child); + reconstructed += cst.sourceString(child); - cursor = childStartIdx + reader.matchLength(child); + cursor = childStartIdx + cst.matchLength(child); // Recurse. Lexical context propagates through non-nonterminal wrappers. let childIsLexical; if (childType === CstNodeType.NONTERMINAL) { - childIsLexical = !reader.isSyntactic(child); + childIsLexical = !cst.isSyntactic(child); } else { childIsLexical = isLexicalParent; } - errors.push(...checkInvariants(reader, child, childIsLexical)); + errors.push(...checkInvariants(cst, child, childIsLexical)); }); // Tiling: children must cover entire parent span. @@ -575,27 +569,26 @@ function checkInvariants(reader, handle, isLexicalParent) { } // Check all invariants on a full match result, including root-level checks. -function checkMatch(reader) { +function checkMatch(cst) { const errors = []; - const {root, rootLeadingSpacesLen, input} = reader; + const {root, rootLeadingSpacesLen, input} = cst; // -- Root consumption invariant -- - if (reader.startIdx(root) !== rootLeadingSpacesLen) { + if (cst.startIdx(root) !== rootLeadingSpacesLen) { errors.push( - `root startIdx=${reader.startIdx(root)}, ` + - `rootLeadingSpacesLen=${rootLeadingSpacesLen}` + `root startIdx=${cst.startIdx(root)}, ` + `rootLeadingSpacesLen=${rootLeadingSpacesLen}` ); } - if (rootLeadingSpacesLen + reader.matchLength(root) !== input.length) { + if (rootLeadingSpacesLen + cst.matchLength(root) !== input.length) { errors.push( - `rootLeadingSpacesLen(${rootLeadingSpacesLen}) + matchLength(${reader.matchLength(root)}) ` + + `rootLeadingSpacesLen(${rootLeadingSpacesLen}) + matchLength(${cst.matchLength(root)}) ` + `!== input.length(${input.length})` ); } // -- Root round-trip: leadingSpaces + render(root) === input -- const rootSpaces = input.slice(0, rootLeadingSpacesLen); - const rootText = reader.sourceString(root); + const rootText = cst.sourceString(root); if (rootSpaces + rootText !== input) { errors.push( `root round-trip: ${JSON.stringify(rootSpaces + rootText)} !== ${JSON.stringify(input)}` @@ -603,8 +596,8 @@ function checkMatch(reader) { } // Recurse into the tree. - const isLexicalRoot = !reader.isSyntactic(root); - errors.push(...checkInvariants(reader, root, isLexicalRoot)); + const isLexicalRoot = !cst.isSyntactic(root); + errors.push(...checkInvariants(cst, root, isLexicalRoot)); return errors; } @@ -695,8 +688,8 @@ test('fast-check: CST structural invariants', async t => { if (!mr.succeeded()) { throw new Error(`expected match for input=${JSON.stringify(input)}`); } - const reader = createReader(mr); - const errors = checkMatch(reader); + const cst = mr.cstView(); + const errors = checkMatch(cst); if (errors.length > 0) { throw new Error(`input=${JSON.stringify(input)}\n${errors.join('\n')}`); } diff --git a/packages/compiler/test/test-liquid-html.js b/packages/compiler/test/test-liquid-html.js index 26b2e892..557bfeb4 100644 --- a/packages/compiler/test/test-liquid-html.js +++ b/packages/compiler/test/test-liquid-html.js @@ -79,7 +79,7 @@ test('liquidRawTagImpl', async t => { const g = await loadWasmLiquidHTML(); const r = g.match(sourceCode); t.true(r.succeeded()); - const root = r._cst; + const root = r.cstView().rootNode(); t.is(root.ctorName, 'Node'); t.is(root.startIdx, 5); const [opt, list] = root.children; diff --git a/packages/compiler/test/test-wasm.js b/packages/compiler/test/test-wasm.js index 471164f8..0d1febf1 100644 --- a/packages/compiler/test/test-wasm.js +++ b/packages/compiler/test/test-wasm.js @@ -193,7 +193,7 @@ test('cst: leadingSpaces children via lazy parsing', async t => { // Access children within the match lifecycle (evalSpacesFull needs live WASM state). g.match(' x').use(r => { t.true(r.succeeded()); - const root = r.getCstRoot(); + const root = r.cstView().rootNode(); const spaces = root.leadingSpaces; t.truthy(spaces); @@ -221,7 +221,7 @@ test('cst: leadingSpaces with custom spaces rule', async t => { }`); g.match('abc // yo\n def').use(r => { t.true(r.succeeded()); - const root = r.getCstRoot(); + const root = r.cstView().rootNode(); // The Plus list should contain two words. const list = root.children[0]; @@ -262,7 +262,7 @@ test('cst: leadingSpaces children are not corrupted by cached spaces', async t = const g = await compileAndLoad('G { Start = "a" "b" "c" }'); g.match('a b c').use(r => { t.true(r.succeeded()); - const root = r.getCstRoot(); + const root = r.cstView().rootNode(); // "b" has 2 leading spaces, "c" has 3 leading spaces. const [a, b, c] = root.children; @@ -304,7 +304,7 @@ test('cst: leadingSpaces suppressed in prealloc lexical rule', async t => { }`); g.match('a bc').use(r => { t.true(r.succeeded()); - const root = r.getCstRoot(); + const root = r.cstView().rootNode(); // Start > two > x x const two = root.children[1]; t.is(two.ctorName, 'two'); @@ -325,7 +325,7 @@ test('cst: lazy parsing survives memory.grow()', async t => { const g = await compileAndLoad('G { Start = "x" }'); g.match(' x').use(r => { t.true(r.succeeded()); - const root = r.getCstRoot(); + const root = r.cstView().rootNode(); const spaces = root.leadingSpaces; t.truthy(spaces); @@ -1407,9 +1407,9 @@ test('nested matching with use()', async t => { const g = await compileAndLoad('G { Start = letter+ | digit+ }'); g.match('abc').use(outer => { - const outerCst = outer.getCstRoot(); + const outerCst = outer.cstView().rootNode(); g.match('1234').use(inner => { - t.is(inner.getCstRoot().sourceString, '1234'); + t.is(inner.cstView().rootNode().sourceString, '1234'); // Both CSTs valid simultaneously. t.is(outerCst.sourceString, 'abc'); }); @@ -1781,7 +1781,7 @@ test('accessing CST node after dispose throws', async t => { let savedCst; wasmGrammar.match('abc').use(r => { t.true(r.succeeded(), 'match should succeed'); - savedCst = r.getCstRoot(); + savedCst = r.cstView().rootNode(); // Accessing CST inside use() should work fine. t.is(savedCst.sourceString, 'abc'); }); diff --git a/packages/lang-python/bench.ts b/packages/lang-python/bench.ts index c74fdd20..a63a4481 100644 --- a/packages/lang-python/bench.ts +++ b/packages/lang-python/bench.ts @@ -58,7 +58,7 @@ bench.add( const r = matchPython(input); r.use(r => { if (!r.succeeded()) throw new Error('Match failed'); - const cst = r.getCstRoot(); + const cst = r.cstView().rootNode(); const {input: tokenizedInput} = tokenize(input); const root = cst as NonterminalNode; const fullSource = tokenizedInput.slice(0, root.startIdx) + root.sourceString; diff --git a/packages/lang-python/convertToOhm.ts b/packages/lang-python/convertToOhm.ts index 4bdfe3cb..05833c86 100644 --- a/packages/lang-python/convertToOhm.ts +++ b/packages/lang-python/convertToOhm.ts @@ -288,6 +288,6 @@ export function convertToOhm(rawSource: string): string { if (!r.succeeded()) { throw new Error(String(r)); } - return rewrite(r.getCstRoot()); + return rewrite(r.cstView().rootNode()); }); } diff --git a/packages/runtime/index.ts b/packages/runtime/index.ts index adc7c22b..aa66ebf9 100644 --- a/packages/runtime/index.ts +++ b/packages/runtime/index.ts @@ -1,4 +1,5 @@ export {CstNodeType, Grammar} from './src/miniohm.ts'; +export type {CstView} from './src/cstView.ts'; export type { CstNode, CstNodeChildren, diff --git a/packages/runtime/ohm-js.api.md b/packages/runtime/ohm-js.api.md index 08ccfeca..7e12e344 100644 --- a/packages/runtime/ohm-js.api.md +++ b/packages/runtime/ohm-js.api.md @@ -22,6 +22,29 @@ export const CstNodeType: { // @public (undocumented) export type CstNodeType = (typeof CstNodeType)[keyof typeof CstNodeType]; +// @public +export class CstView { + childCount(handle: number): number; + ctorName(handle: number): string; + forEachChild(handle: number, fn: (child: number, leadingSpacesLen: number, index: number) => void): void; + forEachTuple(handle: number, fn: (...children: number[]) => void): void; + get input(): string; + isPresent(handle: number): boolean; + isSyntactic(handle: number): boolean; + matchLength(handle: number): number; + node(handle: number): CstNode; + readonly root: number; + readonly rootLeadingSpacesLen: number; + rootNode(): CstNode; + ruleId(handle: number): number; + get ruleNames(): readonly string[]; + sourceString(handle: number): string; + startIdx(handle: number): number; + tupleArity(handle: number): number; + type(handle: number): CstNodeType; + withChildren(handle: number, fn: (handle: number, ...children: number[]) => R): R; +} + // @public (undocumented) export class FailedMatchResult extends MatchResult { // (undocumented) @@ -163,8 +186,7 @@ export interface SeqNode ex // @public (undocumented) export class SucceededMatchResult extends MatchResult { - // (undocumented) - getCstRoot(): CstNode; + cstView(): CstView; } // @public (undocumented) diff --git a/packages/runtime/package.json b/packages/runtime/package.json index 5ef4b130..60296b47 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -24,9 +24,9 @@ "types": "./dist/src/unstableDebug.d.ts", "default": "./dist/src/unstableDebug.js" }, - "./cstReader": { - "types": "./dist/src/cstReader.d.ts", - "default": "./dist/src/cstReader.js" + "./cstView": { + "types": "./dist/src/cstView.d.ts", + "default": "./dist/src/cstView.js" } }, "files": ["dist"], diff --git a/packages/runtime/src/cstCommon.ts b/packages/runtime/src/cstCommon.ts new file mode 100644 index 00000000..6f1a2e25 --- /dev/null +++ b/packages/runtime/src/cstCommon.ts @@ -0,0 +1,116 @@ +// CST constants, types, and handle-packing utilities shared by cstView.ts +// and miniohm.ts. This module has no imports from either, so it can be +// imported by both without circular dependencies. + +export const MATCH_RECORD_TYPE_MASK = 0b11; + +// Byte offsets for fields in a CST match record (Wasm linear memory layout). +export const CST_MATCH_LENGTH_OFFSET = 0; +export const CST_TYPE_AND_DETAILS_OFFSET = 4; +export const CST_CHILD_COUNT_OFFSET = 8; +export const CST_CHILDREN_OFFSET = 16; + +/** Bit 1 of a child slot is the HAS_LEADING_SPACES edge flag. */ +export const CST_HAS_LEADING_SPACES_FLAG = 2; + +// Tagged terminal: (matchLength << 2) | 1. Bit 0 distinguishes from real pointers. +// Bit 1 is the HAS_LEADING_SPACES edge flag (set on child slots, not on root handles). +export function isTaggedTerminal(handle: number): boolean { + return (handle & 1) !== 0; +} + +// Extract the MatchRecordType from a raw (non-tagged-terminal) CST pointer. +export function rawMatchRecordType(view: DataView, ptr: number): MatchRecordType { + return (view.getInt32(ptr + CST_TYPE_AND_DETAILS_OFFSET, true) & + MATCH_RECORD_TYPE_MASK) as MatchRecordType; +} + +// A MatchRecord is the representation of a CstNode in Wasm linear memory. +export const MatchRecordType = { + NONTERMINAL: 0, + TERMINAL: 1, // Only for tagged-integer detection, never in heap nodes. + ITER_FLAG: 2, + OPTIONAL: 3, +} as const; + +export type MatchRecordType = (typeof MatchRecordType)[keyof typeof MatchRecordType]; + +// A _CST node_ is the user-facing representation, built from a match record. +export const CstNodeType = { + NONTERMINAL: 0, + TERMINAL: 1, + LIST: 2, + OPT: 3, + SEQ: 4, +} as const; + +// Define types with the same name as the values above. This gives us roughly the +// same functionality as a TypeScript enum, but works with erasableSyntaxOnly. +export type CstNodeType = (typeof CstNodeType)[keyof typeof CstNodeType]; + +export interface MatchContext { + ruleNames: string[]; + ruleIsSyntactic: boolean[]; + view: DataView; + input: string; + getSpacesLenAt?: (pos: number) => number; + evalSpacesFull?: (pos: number) => number; + memory?: WebAssembly.Memory; +} + +// --- Handle packing --- + +const HANDLE_BITS: number = 27; +const SHIFT: number = 2 ** HANDLE_BITS; // 134217728 +const MASK: number = SHIFT - 1; // 0x7FFFFFF +const START_IDX_BITS: number = 53 - HANDLE_BITS; +const START_IDX_LIMIT: number = 2 ** START_IDX_BITS; +const TERMINAL_LENGTH_LIMIT: number = 2 ** (HANDLE_BITS - 2); +const INPUT_LENGTH_LIMIT: number = Math.min(START_IDX_LIMIT, TERMINAL_LENGTH_LIMIT); + +/** + * Pack a raw CST handle and startIdx into a single Number handle. + * Uses 53 of the available integer-precision bits in an IEEE 754 double + * (27 bits for the pointer, 26 bits for startIdx). + */ +function pack(rawHandle: number, startIdx: number): number { + return startIdx * SHIFT + rawHandle; +} + +/** Extract the raw CST pointer from a packed handle. */ +export function rawHandle(handle: number): number { + return handle & MASK; +} + +/** Extract the startIdx from a packed handle. */ +export function unpackStartIdx(handle: number): number { + const raw = rawHandle(handle); + return (handle - raw) / SHIFT; +} + +/** + * Create a packed handle from a raw pointer and startIdx. + * Validates that both values fit in the packed representation. + */ +export function createHandle(rawPtr: number, startIdx: number): number { + if (rawPtr >= SHIFT) { + throw new Error( + `Raw CST pointer ${rawPtr} exceeds ${HANDLE_BITS}-bit limit (max ${SHIFT - 1})` + ); + } + if (startIdx >= START_IDX_LIMIT) { + throw new Error( + `startIdx ${startIdx} exceeds ${START_IDX_BITS}-bit limit (max ${START_IDX_LIMIT - 1})` + ); + } + return pack(rawPtr, startIdx); +} + +export {HANDLE_BITS, INPUT_LENGTH_LIMIT, SHIFT}; + +/** @internal */ +export const _nodeFactory: { + make: ((view: any, handle: number, leadingSpacesLen: number) => any) | null; +} = { + make: null, +}; diff --git a/packages/runtime/src/cstReaderFactory.ts b/packages/runtime/src/cstReaderFactory.ts deleted file mode 100644 index 4e05e2d0..00000000 --- a/packages/runtime/src/cstReaderFactory.ts +++ /dev/null @@ -1,26 +0,0 @@ -import type {MatchContext} from './miniohm.ts'; - -import {CstReader} from './cstReader.ts'; -import {createHandle, HANDLE_BITS, INPUT_LENGTH_LIMIT, SHIFT} from './cstReaderShared.ts'; - -/** - * Create a CstReader from a MatchContext and Wasm exports. - * Validates packed-handle limits (heap size and input length). - */ -export function createReaderFromCtx(ctx: MatchContext, exports: any): CstReader { - const heapTop = exports.__offset.value; - if (heapTop >= SHIFT) { - throw new Error( - `Wasm heap too large for CstReader: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` - ); - } - if (ctx.input.length >= INPUT_LENGTH_LIMIT) { - throw new Error( - `Input too long for CstReader: ${ctx.input.length} chars exceeds limit (${INPUT_LENGTH_LIMIT} chars)` - ); - } - - const rootLeadingSpacesLen = Math.max(0, exports.getSpacesLenAt(0)); - const rootPtr = exports.bindingsAt(0); - return new CstReader(ctx, createHandle(rootPtr, rootLeadingSpacesLen), rootLeadingSpacesLen); -} diff --git a/packages/runtime/src/cstReaderShared.ts b/packages/runtime/src/cstReaderShared.ts deleted file mode 100644 index 5ff82ffd..00000000 --- a/packages/runtime/src/cstReaderShared.ts +++ /dev/null @@ -1,47 +0,0 @@ -const HANDLE_BITS = 27; -const SHIFT = 2 ** HANDLE_BITS; // 134217728 -const MASK = SHIFT - 1; // 0x7FFFFFF -const START_IDX_BITS = 53 - HANDLE_BITS; -const START_IDX_LIMIT = 2 ** START_IDX_BITS; -const TERMINAL_LENGTH_LIMIT = 2 ** (HANDLE_BITS - 2); -const INPUT_LENGTH_LIMIT = Math.min(START_IDX_LIMIT, TERMINAL_LENGTH_LIMIT); - -/** - * Pack a raw CST handle and startIdx into a single Number handle. - * Uses 53 of the available integer-precision bits in an IEEE 754 double - * (27 bits for the pointer, 26 bits for startIdx). - */ -function pack(rawHandle: number, startIdx: number): number { - return startIdx * SHIFT + rawHandle; -} - -/** Extract the raw CST pointer from a packed handle. */ -export function rawHandle(handle: number): number { - return handle & MASK; -} - -/** Extract the startIdx from a packed handle. */ -export function unpackStartIdx(handle: number): number { - const raw = rawHandle(handle); - return (handle - raw) / SHIFT; -} - -/** - * Create a packed handle from a raw pointer and startIdx. - * Validates that both values fit in the packed representation. - */ -export function createHandle(rawPtr: number, startIdx: number): number { - if (rawPtr >= SHIFT) { - throw new Error( - `Raw CST pointer ${rawPtr} exceeds ${HANDLE_BITS}-bit limit (max ${SHIFT - 1})` - ); - } - if (startIdx >= START_IDX_LIMIT) { - throw new Error( - `startIdx ${startIdx} exceeds ${START_IDX_BITS}-bit limit (max ${START_IDX_LIMIT - 1})` - ); - } - return pack(rawPtr, startIdx); -} - -export {HANDLE_BITS, INPUT_LENGTH_LIMIT, SHIFT}; diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstView.ts similarity index 93% rename from packages/runtime/src/cstReader.ts rename to packages/runtime/src/cstView.ts index 5b6af244..842ca092 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstView.ts @@ -1,24 +1,27 @@ import { CST_CHILD_COUNT_OFFSET, CST_CHILDREN_OFFSET, - CST_MATCH_LENGTH_OFFSET, CST_HAS_LEADING_SPACES_FLAG, + CST_MATCH_LENGTH_OFFSET, CST_TYPE_AND_DETAILS_OFFSET, CstNodeType, + createHandle, isTaggedTerminal, MatchRecordType, + rawHandle, rawMatchRecordType, -} from './miniohm.ts'; -import {assert} from './assert.ts'; -import {createReaderFromCtx} from './cstReaderFactory.ts'; -import {createHandle, rawHandle, unpackStartIdx} from './cstReaderShared.ts'; + unpackStartIdx, + _nodeFactory, +} from './cstCommon.ts'; +import {assert, checkNotNull} from './assert.ts'; -import type {MatchContext, SucceededMatchResult} from './miniohm.ts'; +import type {MatchContext} from './cstCommon.ts'; +import type {CstNode} from './miniohm.ts'; export {CstNodeType}; -function nextEdgePos(reader: CstReader, child: number): number { - return reader.startIdx(child) + reader.matchLength(child); +function nextEdgePos(cst: CstView, child: number): number { + return cst.startIdx(child) + cst.matchLength(child); } /** @@ -32,7 +35,7 @@ function nextEdgePos(reader: CstReader, child: number): number { * Leading spaces are edge data (they belong to the parent→child relationship), * not node data. For each child edge: * - startIdx(childHandle) is the child's start position - * - leadingSpacesLen >= 0 + * - leadingSpacesLen \>= 0 * - leading spaces span: start = startIdx(childHandle) - leadingSpacesLen, length = leadingSpacesLen * - child source span: start = startIdx(childHandle), length = matchLength(childHandle) * @@ -40,7 +43,7 @@ function nextEdgePos(reader: CstReader, child: number): number { * - startIdx(root) === rootLeadingSpacesLen * - leading spaces before root are input.slice(0, rootLeadingSpacesLen) */ -export class CstReader { +export class CstView { /** @internal */ private _ctx: MatchContext; @@ -194,9 +197,8 @@ export class CstReader { * The caller must track `edgeStartIdx`: for the first child, it's * `startIdx(parentHandle)`; for subsequent children, it's * `startIdx(prevChild) + matchLength(prevChild)`. - * @internal */ - childAt(handle: number, index: number, edgeStartIdx: number): number { + private childAt(handle: number, index: number, edgeStartIdx: number): number { const raw = rawHandle(handle); const slot = this._ctx.view.getUint32(raw + CST_CHILDREN_OFFSET + index * 4, true); const hasLeadingSpaces = (slot & CST_HAS_LEADING_SPACES_FLAG) !== 0; @@ -346,9 +348,14 @@ export class CstReader { const type = rawMatchRecordType(this._ctx.view, rawChild); return type === MatchRecordType.NONTERMINAL || type === MatchRecordType.TERMINAL; } -} -export function createReader(result: SucceededMatchResult): CstReader { - const exports = (result.grammar as any)._instance.exports; - return createReaderFromCtx(result._ctx, exports); + /** Create a lazy CstNode wrapper for the given handle. */ + node(handle: number): CstNode { + return checkNotNull(_nodeFactory.make)(this, handle, 0); + } + + /** Create a lazy CstNode wrapper for the root, including leading spaces. */ + rootNode(): CstNode { + return checkNotNull(_nodeFactory.make)(this, this.root, this.rootLeadingSpacesLen); + } } diff --git a/packages/runtime/src/miniohm.ts b/packages/runtime/src/miniohm.ts index 9e34e4ab..825f9477 100644 --- a/packages/runtime/src/miniohm.ts +++ b/packages/runtime/src/miniohm.ts @@ -1,54 +1,48 @@ import {assert, checkNotNull} from './assert.ts'; -import {CstReader} from './cstReader.ts'; -import {createReaderFromCtx} from './cstReaderFactory.ts'; -import {createHandle, rawHandle} from './cstReaderShared.ts'; +import {CstView} from './cstView.ts'; +import { + createHandle, + CstNodeType, + HANDLE_BITS, + INPUT_LENGTH_LIMIT, + rawHandle, + SHIFT, + _nodeFactory, +} from './cstCommon.ts'; +import type {MatchContext} from './cstCommon.ts'; import {getLineAndColumn, getLineAndColumnMessage} from './extras.ts'; -export const MATCH_RECORD_TYPE_MASK = 0b11; - -// Byte offsets for fields in a CST match record (Wasm linear memory layout). -export const CST_MATCH_LENGTH_OFFSET = 0; -export const CST_TYPE_AND_DETAILS_OFFSET = 4; -export const CST_CHILD_COUNT_OFFSET = 8; -export const CST_CHILDREN_OFFSET = 16; - -/** Bit 1 of a child slot is the HAS_LEADING_SPACES edge flag. */ -export const CST_HAS_LEADING_SPACES_FLAG = 2; - -// Tagged terminal: (matchLength << 2) | 1. Bit 0 distinguishes from real pointers. -// Bit 1 is the HAS_LEADING_SPACES edge flag (set on child slots, not on root handles). -export function isTaggedTerminal(handle: number): boolean { - return (handle & 1) !== 0; -} - -// Extract the MatchRecordType from a raw (non-tagged-terminal) CST pointer. -export function rawMatchRecordType(view: DataView, ptr: number): MatchRecordType { - return (view.getInt32(ptr + CST_TYPE_AND_DETAILS_OFFSET, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType; +function createCstView(ctx: MatchContext, exports: any): CstView { + const heapTop = exports.__offset.value; + if (heapTop >= SHIFT) { + throw new Error( + `Wasm heap too large for CstView: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` + ); + } + if (ctx.input.length >= INPUT_LENGTH_LIMIT) { + throw new Error( + `Input too long for CstView: ${ctx.input.length} chars exceeds limit (${INPUT_LENGTH_LIMIT} chars)` + ); + } + const rootLeadingSpacesLen = Math.max(0, exports.getSpacesLenAt(0)); + const rootPtr = exports.bindingsAt(0); + return new CstView(ctx, createHandle(rootPtr, rootLeadingSpacesLen), rootLeadingSpacesLen); } -// A MatchRecord is the representation of a CstNode in Wasm linear memory. -export const MatchRecordType = { - NONTERMINAL: 0, - TERMINAL: 1, // Only for tagged-integer detection, never in heap nodes. - ITER_FLAG: 2, - OPTIONAL: 3, -} as const; - -export type MatchRecordType = (typeof MatchRecordType)[keyof typeof MatchRecordType]; - -// A _CST node_ is the user-facing representation, built from a match record. -export const CstNodeType = { - NONTERMINAL: 0, - TERMINAL: 1, - LIST: 2, - OPT: 3, - SEQ: 4, -} as const; - -// Define types with the same name as the values above. This gives us roughly the -// same functionality as a TypeScript enum, but works with erasableSyntaxOnly. -export type CstNodeType = (typeof CstNodeType)[keyof typeof CstNodeType]; +export { + CST_CHILD_COUNT_OFFSET, + CST_CHILDREN_OFFSET, + CST_HAS_LEADING_SPACES_FLAG, + CST_MATCH_LENGTH_OFFSET, + CST_TYPE_AND_DETAILS_OFFSET, + CstNodeType, + isTaggedTerminal, + MATCH_RECORD_TYPE_MASK, + MatchRecordType, + rawMatchRecordType, +} from './cstCommon.ts'; +export type {CstNodeType} from './cstCommon.ts'; +export type {MatchContext, MatchRecordType} from './cstCommon.ts'; const EMPTY_CHILDREN: ReadonlyArray = Object.freeze([]); @@ -454,7 +448,7 @@ export class Grammar { evalSpacesFull: exports.evalSpacesFull, memory: exports.memory, }; - const reader = createReaderFromCtx(ctx, exports); + const reader = createCstView(ctx, exports); return new CstNodeImpl(reader, reader.root, reader.rootLeadingSpacesLen) as CstNode; } @@ -473,16 +467,6 @@ export class Grammar { } } -export interface MatchContext { - ruleNames: string[]; - ruleIsSyntactic: boolean[]; - view: DataView; - input: string; - getSpacesLenAt?: (pos: number) => number; - evalSpacesFull?: (pos: number) => number; - memory?: WebAssembly.Memory; -} - export type CstNode = NonterminalNode | TerminalNode | ListNode | OptNode | SeqNode; export type CstNodeChildren = readonly CstNode[]; @@ -552,16 +536,16 @@ export interface SeqNode } class CstNodeImpl implements CstNodeBase { - _reader!: CstReader; + _cstView!: CstView; _handle: number; _children?: CstNodeChildren = undefined; leadingSpaces?: NonterminalNode = undefined; source: {startIdx: number; endIdx: number}; - constructor(reader: CstReader, handle: number, leadingSpacesLen = 0) { + constructor(reader: CstView, handle: number, leadingSpacesLen = 0) { // Non-enumerable properties Object.defineProperties(this, { - _reader: {value: reader}, + _cstView: {value: reader}, _children: {writable: true}, }); this._handle = handle; @@ -580,7 +564,7 @@ class CstNodeImpl implements CstNodeBase { } get startIdx(): number { - return this._reader.startIdx(this._handle); + return this._cstView.startIdx(this._handle); } /** @internal Raw CST pointer (for debug/test use). */ @@ -589,7 +573,7 @@ class CstNodeImpl implements CstNodeBase { } get type(): CstNodeType { - return this._reader.type(this._handle); + return this._cstView.type(this._handle); } isNonterminal(): this is NonterminalNode { @@ -613,11 +597,11 @@ class CstNodeImpl implements CstNodeBase { } get ctorName(): string { - return this._reader.ctorName(this._handle); + return this._cstView.ctorName(this._handle); } get matchLength(): number { - return this._reader.matchLength(this._handle); + return this._cstView.matchLength(this._handle); } get value(): string { @@ -627,7 +611,7 @@ class CstNodeImpl implements CstNodeBase { get children(): CstNodeChildren { if (!this._children) { this._children = this._computeChildren().map((n): CstNode => { - const type = n._reader.type(n._handle); + const type = n._cstView.type(n._handle); if (type === CstNodeType.OPT) { const child: CstNode | undefined = n.children.length <= 1 @@ -635,7 +619,7 @@ class CstNodeImpl implements CstNodeBase { : new SeqNodeImpl(n.children, n.source, n.sourceString); return new OptNodeImpl(child, n.source, n.sourceString); } else if (type === CstNodeType.LIST) { - const arity = n._reader.tupleArity(n._handle); + const arity = n._cstView.tupleArity(n._handle); if (arity <= 1) { return new ListNodeImpl(n.children, n.source, n.sourceString); } @@ -645,7 +629,7 @@ class CstNodeImpl implements CstNodeBase { // FIXME: We don't need any of this nonsense if we actually build the SeqNodes at parse time. const seqChildren = n.children.slice(i, i + arity); const endIdx = checkNotNull(seqChildren.at(-1)).source.endIdx; - const sourceString = n._reader.input.slice(startIdx, endIdx); + const sourceString = n._cstView.input.slice(startIdx, endIdx); arr.push(new SeqNodeImpl(seqChildren, {startIdx, endIdx}, sourceString)); startIdx = endIdx; } @@ -660,7 +644,7 @@ class CstNodeImpl implements CstNodeBase { _computeChildren(): CstNodeImpl[] { const children: CstNodeImpl[] = []; - const reader = this._reader; + const reader = this._cstView; reader.forEachChild(this._handle, (childHandle, leadingSpacesLen) => { children.push(new CstNodeImpl(reader, childHandle, leadingSpacesLen)); }); @@ -668,17 +652,17 @@ class CstNodeImpl implements CstNodeBase { } get sourceString(): string { - return this._reader.sourceString(this._handle); + return this._cstView.sourceString(this._handle); } isSyntactic(): boolean { assert(this.isNonterminal(), 'Not a nonterminal'); - return this._reader.isSyntactic(this._handle); + return this._cstView.isSyntactic(this._handle); } isLexical(): boolean { assert(this.isNonterminal(), 'Not a nonterminal'); - return !this._reader.isSyntactic(this._handle); + return !this._cstView.isSyntactic(this._handle); } toString(): string { @@ -694,14 +678,14 @@ class LazySpacesNode implements NonterminalNode { readonly leadingSpaces = undefined; readonly source: {startIdx: number; endIdx: number}; - private _reader: CstReader; + private _cstView: CstView; private _startIdx: number; private _matchLength: number; private _children?: CstNodeChildren; private _sourceString?: string; - constructor(reader: CstReader, startIdx: number, matchLength: number) { - this._reader = reader; + constructor(reader: CstView, startIdx: number, matchLength: number) { + this._cstView = reader; this._startIdx = startIdx; this._matchLength = matchLength; this.source = {startIdx, endIdx: startIdx + matchLength}; @@ -713,7 +697,7 @@ class LazySpacesNode implements NonterminalNode { get sourceString(): string { if (this._sourceString === undefined) { - this._sourceString = this._reader.input.slice( + this._sourceString = this._cstView.input.slice( this._startIdx, this._startIdx + this._matchLength ); @@ -729,10 +713,10 @@ class LazySpacesNode implements NonterminalNode { } private _parseChildren(): CstNodeChildren { - const ptr = this._reader.evalSpacesFull(this._startIdx); + const ptr = this._cstView.evalSpacesFull(this._startIdx); if (ptr === 0) return EMPTY_CHILDREN; const handle = createHandle(ptr, this._startIdx); - const fullNode = new CstNodeImpl(this._reader, handle); + const fullNode = new CstNodeImpl(this._cstView, handle); return fullNode.children; } @@ -977,9 +961,14 @@ function createMatchResult( ); } +// Register the CstNode factory so CstView.node() / .rootNode() can +// create CstNodeImpl instances without a circular import. +_nodeFactory.make = (view, handle, leadingSpacesLen) => + new CstNodeImpl(view, handle, leadingSpacesLen) as CstNode; + export class SucceededMatchResult extends MatchResult { /** @internal */ - _cst: CstNode; + private _cstView?: CstView; /** @internal */ protected constructor( @@ -989,11 +978,15 @@ export class SucceededMatchResult extends MatchResult { succeeded: boolean ) { super(grammar, startExpr, ctx, succeeded); - this._cst = grammar._getCstRoot(ctx); } - getCstRoot(): CstNode { - return this._cst; + /** Returns a CstView — the canonical, lazy CST access object for this match. */ + cstView(): CstView { + if (!this._cstView) { + const exports = (this.grammar as any)._instance.exports; + this._cstView = createCstView(this._ctx, exports); + } + return this._cstView; } } diff --git a/packages/runtime/src/unstableDebug.ts b/packages/runtime/src/unstableDebug.ts index ae6b0def..a2e14da0 100644 --- a/packages/runtime/src/unstableDebug.ts +++ b/packages/runtime/src/unstableDebug.ts @@ -129,7 +129,7 @@ function walkRecordTree( * the memo table globals reflect the last match. */ export function getMatchStats(result: SucceededMatchResult): MatchStats { - const root = result.getCstRoot() as any; + const root = result.cstView().rootNode() as any; const grammar = result.grammar as any; const exports = grammar._instance.exports; const ctx = (result as any)._ctx; diff --git a/packages/runtime/tsdown.config.ts b/packages/runtime/tsdown.config.ts index f50d49f1..6f60fdac 100644 --- a/packages/runtime/tsdown.config.ts +++ b/packages/runtime/tsdown.config.ts @@ -4,7 +4,7 @@ export default defineConfig({ entry: { index: 'index.ts', 'src/unstableDebug': 'src/unstableDebug.ts', - 'src/cstReader': 'src/cstReader.ts', + 'src/cstView': 'src/cstView.ts', }, format: 'esm', fixedExtension: false, diff --git a/packages/semantics/bench.ts b/packages/semantics/bench.ts index 8770a907..5bc7ba2b 100644 --- a/packages/semantics/bench.ts +++ b/packages/semantics/bench.ts @@ -3,10 +3,9 @@ import process from 'node:process'; import {Bench} from 'tinybench'; import * as ohm from '@ohm-js/compiler/compat'; -import {createReader} from 'ohm-js/cstReader'; import {createOperation} from './src/index.ts'; -import {createReaderOperation} from './src/reader.ts'; +import {createCstViewOperation} from './src/cstViewOps.ts'; const smallSize = process.argv.includes('--small-size'); @@ -36,15 +35,15 @@ const countNodesCstNode = createOperation('countNodes', { }, }); -// --- CstReader-based (createReaderOperation) --- +// --- CstView-based (createCstViewOperation) --- -let _rd: ReturnType; +let _cst: any; -const countNodesCstReader = createReaderOperation('countNodes', { +const countNodesCstView = createCstViewOperation('countNodes', { _nonterminal(h) { let sum = 1; - _rd.forEachChild(h, child => { - sum += countNodesCstReader(_rd, child); + _cst.forEachChild(h, child => { + sum += countNodesCstView(_cst, child); }); return sum; }, @@ -53,8 +52,8 @@ const countNodesCstReader = createReaderOperation('countNodes', { }, _default(h) { let sum = 1; - _rd.forEachChild(h, child => { - sum += countNodesCstReader(_rd, child); + _cst.forEachChild(h, child => { + sum += countNodesCstView(_cst, child); }); return sum; }, @@ -77,16 +76,17 @@ const bench = new Bench({ bench.add( 'createOperation (CstNode)', - () => g.match(input).use((r: any) => countNodesCstNode(r.getCstRoot())), + () => g.match(input).use((r: any) => countNodesCstNode(r.cstView().rootNode())), opts ); bench.add( - 'createReaderOperation (CstReader)', + 'createCstViewOperation (CstView)', () => g.match(input).use((r: any) => { - _rd = createReader(r); - return countNodesCstReader(_rd, _rd.root); + const cst = r.cstView(); + _cst = cst; + return countNodesCstView(_cst, cst.root); }), opts ); @@ -103,6 +103,6 @@ console.log(`Input: ${smallSize ? 'small' : 'underscore-1.8.3.js'} (${input.leng } const cstNodeMean = bench.tasks[0].result!.latency.mean; - const cstReaderMean = bench.tasks[1].result!.latency.mean; - console.log(`\nSpeedup: ${(cstNodeMean / cstReaderMean).toFixed(2)}x`); + const cstViewMean = bench.tasks[1].result!.latency.mean; + console.log(`\nSpeedup: ${(cstNodeMean / cstViewMean).toFixed(2)}x`); })(); diff --git a/packages/semantics/package.json b/packages/semantics/package.json index d4fa353e..e79270d1 100644 --- a/packages/semantics/package.json +++ b/packages/semantics/package.json @@ -14,20 +14,22 @@ "license": "MIT", "author": "Patrick Dubroy ", "type": "module", - "main": "dist/index.js", + "main": "dist/src/index.js", "exports": { ".": { "types": "./dist/src/index.d.ts", "default": "./dist/src/index.js" }, - "./reader": { - "types": "./dist/src/reader.d.ts", - "default": "./dist/src/reader.js" + "./cstView": { + "types": "./dist/src/cstViewOps.d.ts", + "default": "./dist/src/cstViewOps.js" } }, "scripts": { "build": "tsc", - "test": "ava && node --experimental-strip-types bench.ts --small-size" + "bench": "node bench.ts", + "bench:smoke": "node bench.ts --small-size", + "test": "ava" }, "devDependencies": { "@ohm-js/compiler": "workspace:^", diff --git a/packages/semantics/src/cstViewOps.test.ts b/packages/semantics/src/cstViewOps.test.ts new file mode 100644 index 00000000..abb2ee41 --- /dev/null +++ b/packages/semantics/src/cstViewOps.test.ts @@ -0,0 +1,133 @@ +/* global URL */ + +import * as ohm from '@ohm-js/compiler/compat'; +import test from 'ava'; +import {readFileSync} from 'node:fs'; + +import type {CstViewOperation} from './cstViewOps.ts'; +import {createCstViewOperation} from './cstViewOps.ts'; +import type {ReaderOperation} from './reader.ts'; +import {createReaderOperation} from './reader.ts'; + +const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); + +test('cstView-based: arithmetic', t => { + const g2 = ohm.grammar(readFileSync(scriptRel('../../ohm-js/test/arithmetic.ohm'), 'utf8')); + g2.match('1+(2*3)').use(r => { + if (!r.succeeded()) return t.fail('parse failed'); + const cst = r.cstView(); + + const evalIt: CstViewOperation = createCstViewOperation('evalIt', { + addExp_plus(h, a, _, b) { + return evalIt(cst, a) + evalIt(cst, b); + }, + addExp_minus(h, a, _, b) { + return evalIt(cst, a) - evalIt(cst, b); + }, + mulExp_times(h, a, _, b) { + return evalIt(cst, a) * evalIt(cst, b); + }, + mulExp_divide(h, a, _, b) { + return evalIt(cst, a) / evalIt(cst, b); + }, + priExp_paren(h, _, e, _2) { + return evalIt(cst, e); + }, + number(h, _) { + return parseInt(cst.sourceString(h), 10); + }, + }); + t.is(evalIt(cst, cst.root), 7); + }); +}); + +test('reader compatibility exports still work', t => { + const g2 = ohm.grammar(readFileSync(scriptRel('../../ohm-js/test/arithmetic.ohm'), 'utf8')); + g2.match('1+(2*3)').use(r => { + if (!r.succeeded()) return t.fail('parse failed'); + const cst = r.cstView(); + + const evalIt: ReaderOperation = createReaderOperation('evalIt', { + addExp_plus(h, a, _, b) { + return evalIt(cst, a) + evalIt(cst, b); + }, + addExp_minus(h, a, _, b) { + return evalIt(cst, a) - evalIt(cst, b); + }, + mulExp_times(h, a, _, b) { + return evalIt(cst, a) * evalIt(cst, b); + }, + mulExp_divide(h, a, _, b) { + return evalIt(cst, a) / evalIt(cst, b); + }, + priExp_paren(h, _, e, _2) { + return evalIt(cst, e); + }, + number(h, _) { + return parseInt(cst.sourceString(h), 10); + }, + }); + + t.is(createReaderOperation, createCstViewOperation); + t.is(evalIt(cst, cst.root), 7); + }); +}); + +test('cstView-based: list and opt', t => { + const g = ohm.grammar(String.raw` + G { + Start = ~end #"a" &(letter "c") ("b"+ letter?)* punc? + punc = ("!" space?)+ + } + `); + + g.match('abcbc!!').use(r => { + if (!r.succeeded()) return t.fail('parse failed'); + const cst = r.cstView(); + + const reversed: CstViewOperation = createCstViewOperation('reversed', { + Start(h, a, list, opt) { + const parts: string[] = []; + cst.forEachTuple(list, (b, optLetter) => { + parts.push(reversed(cst, optLetter) + reversed(cst, b)); + }); + return reversed(cst, opt) + parts.reverse().join('') + reversed(cst, a); + }, + punc(h, list) { + return reversed(cst, list); + }, + _list(h) { + const parts: string[] = []; + cst.forEachTuple(h, (...children) => { + let text = ''; + for (const child of children) { + text += reversed(cst, child); + } + parts.push(text); + }); + return parts.join(''); + }, + _opt(h) { + if (!cst.isPresent(h)) return ''; + return cst.withChildren(h, (_handle, ...children) => { + let text = ''; + for (const child of children) { + text += reversed(cst, child); + } + return text; + }); + }, + _terminal(h) { + return cst.sourceString(h); + }, + _default(h) { + let result = ''; + cst.forEachChild(h, child => { + result += reversed(cst, child); + }); + return result; + }, + }); + t.is(reversed(cst, cst.root), '!!cbcba'); + }); +}); diff --git a/packages/semantics/src/cstViewOps.ts b/packages/semantics/src/cstViewOps.ts new file mode 100644 index 00000000..75792c23 --- /dev/null +++ b/packages/semantics/src/cstViewOps.ts @@ -0,0 +1,109 @@ +import type {CstView} from 'ohm-js/cstView'; +import {CstNodeType} from 'ohm-js/cstView'; + +export type CstViewActionDict = { + _list?: (handle: number) => R; + _nonterminal?: (handle: number) => R; + _opt?: (handle: number) => R; + _terminal?: (handle: number) => R; + _default?: (handle: number) => R; + [ruleName: string]: ((handle: number, ...children: number[]) => R) | undefined; +}; + +export type CstViewOperation = (cst: CstView, handle: number) => R; + +type ActionFn = (handle: number, ...children: number[]) => R; + +// Sentinel values used in the dispatch table for fallback actions. +const NO_ACTION = 0; +const USE_NONTERMINAL = 1; +const USE_DEFAULT = 2; + +export function createCstViewOperation( + name: string, + actions: CstViewActionDict +): CstViewOperation { + // Lazily-built dispatch table: actionTable[ruleId] is either an action + // function or a sentinel (NO_ACTION / USE_NONTERMINAL / USE_DEFAULT). + let actionTable: (ActionFn | number)[] | undefined; + let cachedRuleNames: readonly string[] | undefined; + const listAction = actions._list; + const terminalAction = actions._terminal; + const nonterminalAction = actions._nonterminal; + const optAction = actions._opt; + const defaultAction = actions._default; + + function fail(cst: CstView, handle: number): never { + throw new Error(`missing semantic action for '${cst.ctorName(handle)}' in '${name}'`); + } + + function buildTable(ruleNames: readonly string[]): (ActionFn | number)[] { + const table: (ActionFn | number)[] = new Array(ruleNames.length); + for (let i = 0; i < ruleNames.length; i++) { + const ctorName = ruleNames[i].split('<')[0]; + const action = actions[ctorName]; + if (action) { + table[i] = action; + } else if (nonterminalAction) { + table[i] = USE_NONTERMINAL; + } else if (defaultAction) { + table[i] = USE_DEFAULT; + } else { + table[i] = NO_ACTION; + } + } + return table; + } + + function getTable(cst: CstView): (ActionFn | number)[] { + const ruleNames = cst.ruleNames; + if (actionTable && cachedRuleNames === ruleNames) return actionTable; + cachedRuleNames = ruleNames; + actionTable = buildTable(ruleNames); + return actionTable; + } + + const doIt: CstViewOperation = (cst: CstView, handle: number): R => { + const nodeType = cst.type(handle); + + // Terminal — no children, no table lookup needed. + if (nodeType === CstNodeType.TERMINAL) { + if (terminalAction) return terminalAction(handle); + if (defaultAction) return defaultAction(handle); + return fail(cst, handle); + } + + if (nodeType === CstNodeType.LIST) { + if (listAction) return listAction(handle); + if (defaultAction) return defaultAction(handle); + return fail(cst, handle); + } + + if (nodeType === CstNodeType.OPT) { + if (optAction) return optAction(handle); + if (defaultAction) return defaultAction(handle); + return fail(cst, handle); + } + + // Nonterminal — use dispatch table indexed by ruleId. + const table = getTable(cst); + const ruleId = cst.ruleId(handle); + const entry = table[ruleId]; + + if (typeof entry === 'function') { + return cst.withChildren(handle, entry); + } + if (entry === USE_NONTERMINAL) { + return nonterminalAction!(handle); + } + if (entry === USE_DEFAULT) { + return defaultAction!(handle); + } + if (cst.childCount(handle) === 1) { + return cst.withChildren(handle, (_handle, child) => doIt(cst, child)); + } + return fail(cst, handle); + }; + + return doIt; +} diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index f49bc635..d7627d0d 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -41,7 +41,7 @@ test('it basically works', t => { }, }); if (r.succeeded()) { - t.is(evalIt(r.getCstRoot()), 7); + t.is(evalIt(r.cstView().rootNode()), 7); } else { t.fail('parse failed'); } @@ -95,9 +95,7 @@ test('it handles v17 CSTs', t => { }); const r = g.match('abcbc!!'); if (r.succeeded()) { - // t.is(reversed(adaptCstFromMatchResult(r)), '!!cbcba'); - t.is(reversed(r.getCstRoot()), '!!cbcba'); - // t.pass(); + t.is(reversed(r.cstView().rootNode()), '!!cbcba'); } else { t.fail('parse failed'); } @@ -128,7 +126,7 @@ test('missing action does not corrupt the action stack', t => { }); const r = twoChildG.match('abcd'); assert(r.succeeded(), 'match should succeed'); - const err = t.throws(() => op(r.getCstRoot()), { + const err = t.throws(() => op(r.cstView().rootNode()), { message: /missing semantic action: alsoBroken/, }); // The error trace should show that we're inside 'start'. diff --git a/packages/semantics/src/reader.test.ts b/packages/semantics/src/reader.test.ts deleted file mode 100644 index fa7621e2..00000000 --- a/packages/semantics/src/reader.test.ts +++ /dev/null @@ -1,100 +0,0 @@ -/* global URL */ - -import * as ohm from '@ohm-js/compiler/compat'; -import {createReader} from 'ohm-js/cstReader'; -import test from 'ava'; -import {readFileSync} from 'node:fs'; - -import type {ReaderOperation} from './reader.ts'; -import {createReaderOperation} from './reader.ts'; - -const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); - -test('reader-based: arithmetic', t => { - const g2 = ohm.grammar(readFileSync(scriptRel('../../ohm-js/test/arithmetic.ohm'), 'utf8')); - g2.match('1+(2*3)').use(r => { - if (!r.succeeded()) return t.fail('parse failed'); - const rd = createReader(r); - - const evalIt: ReaderOperation = createReaderOperation('evalIt', { - addExp_plus(h, a, _, b) { - return evalIt(rd, a) + evalIt(rd, b); - }, - addExp_minus(h, a, _, b) { - return evalIt(rd, a) - evalIt(rd, b); - }, - mulExp_times(h, a, _, b) { - return evalIt(rd, a) * evalIt(rd, b); - }, - mulExp_divide(h, a, _, b) { - return evalIt(rd, a) / evalIt(rd, b); - }, - priExp_paren(h, _, e, _2) { - return evalIt(rd, e); - }, - number(h, _) { - return parseInt(rd.sourceString(h), 10); - }, - }); - t.is(evalIt(rd, rd.root), 7); - }); -}); - -test('reader-based: list and opt', t => { - const g = ohm.grammar(String.raw` - G { - Start = ~end #"a" &(letter "c") ("b"+ letter?)* punc? - punc = ("!" space?)+ - } - `); - - g.match('abcbc!!').use(r => { - if (!r.succeeded()) return t.fail('parse failed'); - const rd = createReader(r); - - const reversed: ReaderOperation = createReaderOperation('reversed', { - Start(h, a, list, opt) { - const parts: string[] = []; - rd.forEachTuple(list, (b, optLetter) => { - parts.push(reversed(rd, optLetter) + reversed(rd, b)); - }); - return reversed(rd, opt) + parts.reverse().join('') + reversed(rd, a); - }, - punc(h, list) { - return reversed(rd, list); - }, - _list(h) { - const parts: string[] = []; - rd.forEachTuple(h, (...children) => { - let text = ''; - for (const child of children) { - text += reversed(rd, child); - } - parts.push(text); - }); - return parts.join(''); - }, - _opt(h) { - if (!rd.isPresent(h)) return ''; - return rd.withChildren(h, (_handle, ...children) => { - let text = ''; - for (const child of children) { - text += reversed(rd, child); - } - return text; - }); - }, - _terminal(h) { - return rd.sourceString(h); - }, - _default(h) { - let result = ''; - rd.forEachChild(h, child => { - result += reversed(rd, child); - }); - return result; - }, - }); - t.is(reversed(rd, rd.root), '!!cbcba'); - }); -}); diff --git a/packages/semantics/src/reader.ts b/packages/semantics/src/reader.ts index ee7aae50..aadea49e 100644 --- a/packages/semantics/src/reader.ts +++ b/packages/semantics/src/reader.ts @@ -1,109 +1,11 @@ -import type {CstReader} from 'ohm-js/cstReader'; -import {CstNodeType} from 'ohm-js/cstReader'; - -export type ReaderActionDict = { - _list?: (handle: number) => R; - _nonterminal?: (handle: number) => R; - _opt?: (handle: number) => R; - _terminal?: (handle: number) => R; - _default?: (handle: number) => R; - [ruleName: string]: ((handle: number, ...children: number[]) => R) | undefined; -}; - -export type ReaderOperation = (reader: CstReader, handle: number) => R; - -type ActionFn = (handle: number, ...children: number[]) => R; - -// Sentinel values used in the dispatch table for fallback actions. -const NO_ACTION = 0; -const USE_NONTERMINAL = 1; -const USE_DEFAULT = 2; - -export function createReaderOperation( - name: string, - actions: ReaderActionDict -): ReaderOperation { - // Lazily-built dispatch table: actionTable[ruleId] is either an action - // function or a sentinel (NO_ACTION / USE_NONTERMINAL / USE_DEFAULT). - let actionTable: (ActionFn | number)[] | undefined; - let cachedRuleNames: readonly string[] | undefined; - const listAction = actions._list; - const terminalAction = actions._terminal; - const nonterminalAction = actions._nonterminal; - const optAction = actions._opt; - const defaultAction = actions._default; - - function fail(reader: CstReader, handle: number): never { - throw new Error(`missing semantic action for '${reader.ctorName(handle)}' in '${name}'`); - } - - function buildTable(ruleNames: readonly string[]): (ActionFn | number)[] { - const table: (ActionFn | number)[] = new Array(ruleNames.length); - for (let i = 0; i < ruleNames.length; i++) { - const ctorName = ruleNames[i].split('<')[0]; - const action = actions[ctorName]; - if (action) { - table[i] = action; - } else if (nonterminalAction) { - table[i] = USE_NONTERMINAL; - } else if (defaultAction) { - table[i] = USE_DEFAULT; - } else { - table[i] = NO_ACTION; - } - } - return table; - } - - function getTable(reader: CstReader): (ActionFn | number)[] { - const ruleNames = reader.ruleNames; - if (actionTable && cachedRuleNames === ruleNames) return actionTable; - cachedRuleNames = ruleNames; - actionTable = buildTable(ruleNames); - return actionTable; - } - - const doIt: ReaderOperation = (reader: CstReader, handle: number): R => { - const nodeType = reader.type(handle); - - // Terminal — no children, no table lookup needed. - if (nodeType === CstNodeType.TERMINAL) { - if (terminalAction) return terminalAction(handle); - if (defaultAction) return defaultAction(handle); - return fail(reader, handle); - } - - if (nodeType === CstNodeType.LIST) { - if (listAction) return listAction(handle); - if (defaultAction) return defaultAction(handle); - return fail(reader, handle); - } - - if (nodeType === CstNodeType.OPT) { - if (optAction) return optAction(handle); - if (defaultAction) return defaultAction(handle); - return fail(reader, handle); - } - - // Nonterminal — use dispatch table indexed by ruleId. - const table = getTable(reader); - const ruleId = reader.ruleId(handle); - const entry = table[ruleId]; - - if (typeof entry === 'function') { - return reader.withChildren(handle, entry); - } - if (entry === USE_NONTERMINAL) { - return nonterminalAction!(handle); - } - if (entry === USE_DEFAULT) { - return defaultAction!(handle); - } - if (reader.childCount(handle) === 1) { - return reader.withChildren(handle, (_handle, child) => doIt(reader, child)); - } - return fail(reader, handle); - }; - - return doIt; -} +export { + createCstViewOperation, + createCstViewOperation as createReaderOperation, +} from './cstViewOps.ts'; + +export type { + CstViewActionDict, + CstViewActionDict as ReaderActionDict, + CstViewOperation, + CstViewOperation as ReaderOperation, +} from './cstViewOps.ts'; diff --git a/packages/to-ast-compat/src/createToAst.ts b/packages/to-ast-compat/src/createToAst.ts index 692d03ae..549ef4c6 100644 --- a/packages/to-ast-compat/src/createToAst.ts +++ b/packages/to-ast-compat/src/createToAst.ts @@ -155,7 +155,7 @@ export class AstBuilder { if (typeof (nodeOrResult as MatchResult)._succeeded === 'boolean') { const matchResult = nodeOrResult as MatchResult; assert(matchResult._succeeded, 'Cannot convert failed match result to AST'); - node = (matchResult as SucceededMatchResult).getCstRoot(); + node = (matchResult as SucceededMatchResult).cstView().rootNode(); } let ans; this._depth++;