diff --git a/features.txt b/features.txt index 4d99a8878ca..5bd68e9d4fc 100644 --- a/features.txt +++ b/features.txt @@ -43,6 +43,10 @@ decorators # https://github.com/tc39/proposal-duplicate-named-capturing-groups regexp-duplicate-named-groups +# RegExp Buffer Boundaries +# https://github.com/tc39/proposal-regexp-buffer-boundaries +regexp-buffer-boundaries + # https://tc39.es/proposal-array-from-async/ Array.fromAsync diff --git a/test/annexB/built-ins/RegExp/buffer-boundaries/not-supported-outside-unicode-modes.js b/test/annexB/built-ins/RegExp/buffer-boundaries/not-supported-outside-unicode-modes.js new file mode 100644 index 00000000000..85a03a78c52 --- /dev/null +++ b/test/annexB/built-ins/RegExp/buffer-boundaries/not-supported-outside-unicode-modes.js @@ -0,0 +1,44 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\A` and `\z` (and `\Z`) are treated as IdentityEscape in Annex B outside any unicode mode +info: | + Patterns + + Assertion[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] : + ... + [+UnicodeMode] `\` `A` + [+UnicodeMode] `\` `z` + ... + + IdentityEscape[UnicodeMode, NamedCaptureGroups] : + [+UnicodeMode] SyntaxCharacter + [+UnicodeMode] `/` + [~UnicodeMode] SourceCharacterIdentityEscape[?NamedCaptureGroups] + + SourceCharacterIdentityEscape[NamedCaptureGroups] :: + [~NamedCaptureGroups] SourceCharacter but not `c` + [+NamedCaptureGroups] SourceCharacter but not one of `c` or `k` + +esid: sec-regular-expressions-patterns +features: [regexp-buffer-boundaries] +---*/ + +// `\A` is |IdentityEscape| outside of any unicode mode +assert(/^\A$/.test("A"), "Expected \\A to match literal 'A' outside of unicode mode."); +assert(!/\Ax/.test("x"), "Expected \\A to not match start of buffer outside of unicode mode."); +assert(new RegExp("^\\A$").test("A"), "Expected \\A to match literal 'A' outside of unicode mode."); +assert(!new RegExp("\\Ax").test("x"), "Expected \\A to not match start of buffer outside of unicode mode."); + +// `\z` is |IdentityEscape| outside of any unicode mode +assert(/^\z$/.test("z"), "Expected \\z to match literal 'z' outside of unicode mode."); +assert(!/x\z/.test("x"), "Expected \\z to not match end of buffer outside of unicode mode."); +assert(new RegExp("^\\z$").test("z"), "Expected \\z to match literal 'z' outside of unicode mode."); +assert(!new RegExp("x\\z").test("x"), "Expected \\z to not match end of buffer outside of unicode mode."); + +// NOTE: ensures reserved `\Z` is |IdentityEscape| outside of any unicode mode +assert(/^\Z$/.test("Z"), "Expected \\Z to match literal 'Z' outside of unicode mode."); +assert(new RegExp("^\\Z$").test("Z"), "Expected \\Z to match literal 'Z' outside of unicode mode."); diff --git a/test/built-ins/RegExp/buffer-boundaries/slash-lower-case-z-matches-end-of-buffer.js b/test/built-ins/RegExp/buffer-boundaries/slash-lower-case-z-matches-end-of-buffer.js new file mode 100644 index 00000000000..f4914b84fb4 --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/slash-lower-case-z-matches-end-of-buffer.js @@ -0,0 +1,51 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\z` (lower-case z) matches end of buffer in any unicode mode. +info: | + Runtime Semantics: CompileAssertion + + Assertion :: \z + 1. Return a new Matcher with parameters (x, c) that captures nothing and performs the following steps when called: + a. Assert: x is a MatchState. + b. Assert: c is a MatcherContinuation. + c. Let Input be x.[[Input]]. + d. Let e be x.[[EndIndex]]. + e. Let InputLength be the number of elements in Input. + f. If e = InputLength, return c(x). + g. Return failure. + +esid: sec-patterns +features: [regexp-buffer-boundaries] +---*/ + +assert(/x\z/u.test("x"), "Expected \\z to match the end of the buffer outside of multiline mode"); +assert(/x\z/um.test("x"), "Expected \\z to match the end of the buffer inside of multiline mode"); +assert(!/x\z/u.test("xy"), "Expected \\z to not match when not at the end of the buffer outside of multiline mode"); +assert(!/x\z/um.test("xy"), "Expected \\z to not match when not at the end of the buffer inside of multiline mode"); +assert(!/x\z/u.test("x\ny"), "Expected \\z to not match when only at the end of a line outside of multiline mode"); +assert(!/x\z/um.test("x\ny"), "Expected \\z to not match when only at the end of a line inside of multiline mode"); + +assert(/x\z/v.test("x"), "Expected \\z to match the end of the buffer outside of multiline mode"); +assert(/x\z/vm.test("x"), "Expected \\z to match the end of the buffer inside of multiline mode"); +assert(!/x\z/v.test("xy"), "Expected \\z to not match when not at the end of the buffer outside of multiline mode"); +assert(!/x\z/vm.test("xy"), "Expected \\z to not match when not at the end of the buffer inside of multiline mode"); +assert(!/x\z/v.test("x\ny"), "Expected \\z to not match when only at the end of a line outside of multiline mode"); +assert(!/x\z/vm.test("x\ny"), "Expected \\z to not match when only at the end of a line inside of multiline mode"); + +assert(new RegExp("x\\z", "u").test("x"), "Expected \\z to match the end of the buffer outside of multiline mode"); +assert(new RegExp("x\\z", "um").test("x"), "Expected \\z to match the end of the buffer inside of multiline mode"); +assert(!new RegExp("x\\z", "u").test("xy"), "Expected \\z to not match when not at the end of the buffer outside of multiline mode"); +assert(!new RegExp("x\\z", "um").test("xy"), "Expected \\z to not match when not at the end of the buffer inside of multiline mode"); +assert(!new RegExp("x\\z", "u").test("x\ny"), "Expected \\z to not match when only at the end of a line outside of multiline mode"); +assert(!new RegExp("x\\z", "um").test("x\ny"), "Expected \\z to not match when only at the end of a line inside of multiline mode"); + +assert(new RegExp("x\\z", "v").test("x"), "Expected \\z to match the end of the buffer outside of multiline mode"); +assert(new RegExp("x\\z", "vm").test("x"), "Expected \\z to match the end of the buffer inside of multiline mode"); +assert(!new RegExp("x\\z", "v").test("xy"), "Expected \\z to not match when not at the end of the buffer outside of multiline mode"); +assert(!new RegExp("x\\z", "vm").test("xy"), "Expected \\z to not match when not at the end of the buffer inside of multiline mode"); +assert(!new RegExp("x\\z", "v").test("x\ny"), "Expected \\z to not match when only at the end of a line outside of multiline mode"); +assert(!new RegExp("x\\z", "vm").test("x\ny"), "Expected \\z to not match when only at the end of a line inside of multiline mode"); diff --git a/test/built-ins/RegExp/buffer-boundaries/slash-upper-case-a-matches-start-of-buffer.js b/test/built-ins/RegExp/buffer-boundaries/slash-upper-case-a-matches-start-of-buffer.js new file mode 100644 index 00000000000..44aa6148bdf --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/slash-upper-case-a-matches-start-of-buffer.js @@ -0,0 +1,49 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\A` matches start of buffer in any unicode mode. +info: | + Runtime Semantics: CompileAssertion + + Assertion :: \A + 1. Return a new Matcher with parameters (x, c) that captures nothing and performs the following steps when called: + a. Assert: x is a MatchState. + b. Assert: c is a MatcherContinuation. + c. Let e be x.[[EndIndex]]. + d. If e = 0, return c(x). + e. Return ~failure~. + +esid: sec-patterns +features: [regexp-buffer-boundaries] +---*/ + +assert(/\Ax/u.test("x"), "Expected \\A to match the start of the buffer outside of multiline mode"); +assert(/\Ax/um.test("x"), "Expected \\A to match the start of the buffer inside of multiline mode"); +assert(!/\Ax/u.test("yx"), "Expected \\A to not match when not at the start of the buffer outside of multiline mode"); +assert(!/\Ax/um.test("yx"), "Expected \\A to not match when not at the start of the buffer inside of multiline mode"); +assert(!/\Ax/u.test("y\nx"), "Expected \\A to not match when at the start of a new line outside of multiline mode"); +assert(!/\Ax/um.test("y\nx"), "Expected \\A to not match when at the start of a new line inside of multiline mode"); + +assert(/\Ax/v.test("x"), "Expected \\A to match the start of the buffer outside of multiline mode"); +assert(/\Ax/vm.test("x"), "Expected \\A to match the start of the buffer inside of multiline mode"); +assert(!/\Ax/v.test("yx"), "Expected \\A to not match when not at the start of the buffer outside of multiline mode"); +assert(!/\Ax/vm.test("yx"), "Expected \\A to not match when not at the start of the buffer inside of multiline mode"); +assert(!/\Ax/v.test("y\nx"), "Expected \\A to not match when at the start of a new line outside of multiline mode"); +assert(!/\Ax/vm.test("y\nx"), "Expected \\A to not match when at the start of a new line inside of multiline mode"); + +assert(new RegExp("\\Ax", "u").test("x"), "Expected \\A to match the start of the buffer outside of multiline mode"); +assert(new RegExp("\\Ax", "um").test("x"), "Expected \\A to match the start of the buffer inside of multiline mode"); +assert(!new RegExp("\\Ax", "u").test("yx"), "Expected \\A to not match when not at the start of the buffer outside of multiline mode"); +assert(!new RegExp("\\Ax", "um").test("yx"), "Expected \\A to not match when not at the start of the buffer inside of multiline mode"); +assert(!new RegExp("\\Ax", "u").test("y\nx"), "Expected \\A to not match when at the start of a new line outside of multiline mode"); +assert(!new RegExp("\\Ax", "um").test("y\nx"), "Expected \\A to not match when at the start of a new line inside of multiline mode"); + +assert(new RegExp("\\Ax", "v").test("x"), "Expected \\A to match the start of the buffer outside of multiline mode"); +assert(new RegExp("\\Ax", "vm").test("x"), "Expected \\A to match the start of the buffer inside of multiline mode"); +assert(!new RegExp("\\Ax", "v").test("yx"), "Expected \\A to not match when not at the start of the buffer outside of multiline mode"); +assert(!new RegExp("\\Ax", "vm").test("yx"), "Expected \\A to not match when not at the start of the buffer inside of multiline mode"); +assert(!new RegExp("\\Ax", "v").test("y\nx"), "Expected \\A to not match when at the start of a new line outside of multiline mode"); +assert(!new RegExp("\\Ax", "vm").test("y\nx"), "Expected \\A to not match when at the start of a new line inside of multiline mode"); diff --git a/test/built-ins/RegExp/buffer-boundaries/syntax/u-mode.js b/test/built-ins/RegExp/buffer-boundaries/syntax/u-mode.js new file mode 100644 index 00000000000..27305d5280b --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/syntax/u-mode.js @@ -0,0 +1,48 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\A` and `\z` are parsed successfully in u-mode +info: | + Patterns + + Assertion[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] : + ... + [+UnicodeMode] `\` `A` + [+UnicodeMode] `\` `z` + ... + +esid: sec-patterns +features: [regexp-buffer-boundaries] +---*/ + +/\A/u; +/\A/um; +/\A/umi; +/\A/us; +/\A/usi; +/\A/usm; +/\A/usmi; +/\z/u; +/\z/um; +/\z/umi; +/\z/us; +/\z/usi; +/\z/usm; +/\z/usmi; +new RegExp("\\A", "u"); +new RegExp("\\A", "um"); +new RegExp("\\A", "umi"); +new RegExp("\\A", "us"); +new RegExp("\\A", "usi"); +new RegExp("\\A", "usm"); +new RegExp("\\A", "usmi"); +new RegExp("\\z", "u"); +new RegExp("\\z", "um"); +new RegExp("\\z", "umi"); +new RegExp("\\z", "us"); +new RegExp("\\z", "usi"); +new RegExp("\\z", "usm"); +new RegExp("\\z", "usmi"); diff --git a/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-u-mode.js b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-u-mode.js new file mode 100644 index 00000000000..948df73d221 --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-u-mode.js @@ -0,0 +1,23 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\Z` (upper-case Z) is reserved in Unicode ('u') mode. +info: | + Patterns + + Note: While the sequence `\Z` is already an error when either the `u` (unicode) or `v` (unicode sets) modes are + present, it is reserved for possible future use as an extension of the `\A` and `\z` assertions. + +esid: sec-patterns +features: [regexp-buffer-boundaries] +negative: + phase: parse + type: SyntaxError +---*/ + +$DONOTEVALUATE(); + +/\Z/u; diff --git a/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-v-mode.js b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-v-mode.js new file mode 100644 index 00000000000..7a349841b04 --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes-literal-v-mode.js @@ -0,0 +1,23 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\Z` (upper-case Z) is reserved in Unicode Sets ('v') mode. +info: | + Patterns + + Note: While the sequence `\Z` is already an error when either the `u` (unicode) or `v` (unicode sets) modes are + present, it is reserved for possible future use as an extension of the `\A` and `\z` assertions. + +esid: sec-patterns +features: [regexp-buffer-boundaries] +negative: + phase: parse + type: SyntaxError +---*/ + +$DONOTEVALUATE(); + +/\Z/v; diff --git a/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes.js b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes.js new file mode 100644 index 00000000000..8e62a65ca1e --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/syntax/upper-z-escape-reserved-in-unicode-modes.js @@ -0,0 +1,19 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\Z` (upper-case Z) is reserved in any unicode mode. +info: | + Patterns + + Note: While the sequence `\Z` is already an error when either the `u` (unicode) or `v` (unicode sets) modes are + present, it is reserved for possible future use as an extension of the `\A` and `\z` assertions. + +esid: sec-patterns +features: [regexp-buffer-boundaries] +---*/ + +assert.throws(SyntaxError, function() { new RegExp("\\Z", "u"); }, 'new RegExp("\\Z", "u")'); +assert.throws(SyntaxError, function() { new RegExp("\\Z", "v"); }, 'new RegExp("\\Z", "v")'); diff --git a/test/built-ins/RegExp/buffer-boundaries/syntax/v-mode.js b/test/built-ins/RegExp/buffer-boundaries/syntax/v-mode.js new file mode 100644 index 00000000000..0f5c6ac2e35 --- /dev/null +++ b/test/built-ins/RegExp/buffer-boundaries/syntax/v-mode.js @@ -0,0 +1,48 @@ +// Copyright 2026 Ron Buckton. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +author: Ron Buckton +description: > + `\A` and `\z` are parsed successfully in v-mode +info: | + Patterns + + Assertion[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] : + ... + [+UnicodeMode] `\` `A` + [+UnicodeMode] `\` `z` + ... + +esid: sec-patterns +features: [regexp-buffer-boundaries] +---*/ + +/\A/v; +/\A/vm; +/\A/vmi; +/\A/vs; +/\A/vsi; +/\A/vsm; +/\A/vsmi; +/\z/v; +/\z/vm; +/\z/vmi; +/\z/vs; +/\z/vsi; +/\z/vsm; +/\z/vsmi; +new RegExp("\\A", "v"); +new RegExp("\\A", "vm"); +new RegExp("\\A", "vmi"); +new RegExp("\\A", "vs"); +new RegExp("\\A", "vsi"); +new RegExp("\\A", "vsm"); +new RegExp("\\A", "vsmi"); +new RegExp("\\z", "v"); +new RegExp("\\z", "vm"); +new RegExp("\\z", "vmi"); +new RegExp("\\z", "vs"); +new RegExp("\\z", "vsi"); +new RegExp("\\z", "vsm"); +new RegExp("\\z", "vsmi"); diff --git a/test/staging/sm/RegExp/unicode-disallow-extended.js b/test/staging/sm/RegExp/unicode-disallow-extended.js index bbcd6ca470e..a43203110fd 100644 --- a/test/staging/sm/RegExp/unicode-disallow-extended.js +++ b/test/staging/sm/RegExp/unicode-disallow-extended.js @@ -13,7 +13,6 @@ esid: pending assert.compareArray(/\^\$\\\.\*\+\?\(\)\[\]\{\}\|/u.exec("^$\\.*+?()[]{}|"), ["^$\\.*+?()[]{}|"]); -assert.throws(SyntaxError, () => eval(`/\\A/u`)); assert.throws(SyntaxError, () => eval(`/\\-/u`)); assert.throws(SyntaxError, () => eval(`/\\U{10}/u`)); assert.throws(SyntaxError, () => eval(`/\\U0000/u`));