Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/_pyrepl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class unix_word_rubout(KillCommand):
def do(self) -> None:
r = self.reader
for i in range(r.get_arg()):
self.kill_range(r.bow(), r.pos)
self.kill_range(r.bow_whitespace(), r.pos)


class kill_word(KillCommand):
Expand Down
16 changes: 16 additions & 0 deletions Lib/_pyrepl/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,22 @@ def bow(self, p: int | None = None) -> int:
p -= 1
return p + 1

def bow_whitespace(self, p: int | None = None) -> int:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe unix_bow() instead? or bow_ws()? since we use quite short names.

"""Return the 0-based index of the whitespace-delimited word break
preceding p most immediately.
p defaults to self.pos; only whitespace is considered a word
boundary, matching the behavior of unix-word-rubout in bash/readline."""
if p is None:
p = self.pos
b = self.buffer
p -= 1
while p >= 0 and b[p] in (" ", "\n"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
while p >= 0 and b[p] in (" ", "\n"):
while p >= 0 and b[p] in " \n":

p -= 1
while p >= 0 and b[p] not in (" ", "\n"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
while p >= 0 and b[p] not in (" ", "\n"):
while p >= 0 and b[p] not in " \n":

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OOC newlines are also counted but what about tabs? do we also convert them to 4-indents in the REPL or?

p -= 1
return p + 1

def eow(self, p: int | None = None) -> int:
"""Return the 0-based index of the word break following p most
immediately.
Expand Down
44 changes: 44 additions & 0 deletions Lib/test/test_pyrepl/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,47 @@ def test_control_characters(self):
reader, _ = handle_all_events(events)
self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True)
self.assert_screen_equal(reader, 'flag {o}={z} {s}"🏳️\\u200d🌈"{z}'.format(**colors))


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove those extra blanks

class TestBowWhitespace(TestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire test case can have one single reference to the GH issue as a comment and we can remove them from the methods.

However I would prefer that we extend the existing test case with the bow tests and place them where existing ones are.

def test_bow_whitespace_stops_at_whitespace(self):
# GH#146044
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

        # See https://github.com/cpython/issues/146044

Use a link looking like that so that I can click on it in my IDE. I created the link from memory sonjust check that it is the correct URL.

# unix-word-rubout (ctrl-w) should use whitespace boundaries,
# not punctuation boundaries like bow() does
reader = prepare_reader(prepare_console([]))
reader.buffer = list("foo.bar baz")
reader.pos = len(reader.buffer) # cursor at end

# bow_whitespace from end should jump to start of "baz"
result = reader.bow_whitespace()
self.assertEqual(result, 8) # index of 'b' in "baz"

def test_bow_whitespace_includes_punctuation_in_word(self):
# GH#146044
reader = prepare_reader(prepare_console([]))
reader.buffer = list("foo.bar(baz) qux")
reader.pos = 12 # cursor after ")"

# bow_whitespace should treat "foo.bar(baz)" as one word
result = reader.bow_whitespace()
self.assertEqual(result, 0)

def test_bow_stops_at_punctuation(self):
# Verify existing bow() still uses syntax_table (punctuation boundary)
reader = prepare_reader(prepare_console([]))
reader.buffer = list("foo.bar baz")
reader.pos = len(reader.buffer)

result = reader.bow()
self.assertEqual(result, 8) # same — "baz" is all word chars
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.assertEqual(result, 8) # same "baz" is all word chars
self.assertEqual(result, 8) # same: "baz" is all word chars

Avoid LLM long dashes and use regular english please


def test_bow_vs_bow_whitespace_difference(self):
# The key difference: bow() stops at '.', bow_whitespace() does not
reader = prepare_reader(prepare_console([]))
reader.buffer = list("foo.bar")
reader.pos = len(reader.buffer)

# bow() stops at '.' → returns index of 'b' in "bar"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# bow() stops at '.' → returns index of 'b' in "bar"
# bow() stops at '.' so we return the index of 'b' in "bar"

self.assertEqual(reader.bow(), 4)
# bow_whitespace() treats entire "foo.bar" as one word
self.assertEqual(reader.bow_whitespace(), 0)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix ``unix-word-rubout`` (Ctrl-W) in the REPL to use whitespace-only word
boundaries, matching bash/readline behavior. Previously it used
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
boundaries, matching bash/readline behavior. Previously it used
boundaries, matching behavior of the basic REPL. Previously it used

syntax-table boundaries which treated punctuation as word separators.
Loading