From 19f848497d5a930c27aee37e023b94e2b679da96 Mon Sep 17 00:00:00 2001 From: Innes Anderson-Morrison Date: Mon, 26 May 2025 10:44:18 +0100 Subject: [PATCH 1/4] #123 add regression test for utf-8 multibyte search --- .../editor-scenarios/issues/issue_123.txtar | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/data/editor-scenarios/issues/issue_123.txtar diff --git a/tests/data/editor-scenarios/issues/issue_123.txtar b/tests/data/editor-scenarios/issues/issue_123.txtar new file mode 100644 index 00000000..7607e0e7 --- /dev/null +++ b/tests/data/editor-scenarios/issues/issue_123.txtar @@ -0,0 +1,19 @@ +Using the built-in "search in file" behaviour should work with multibyte UTF-8 +characters. + +Issue (gh#123) originally reported this and in order to trigger the panic that +was being seen there we need to type both the multibyte character itself and +then another after it which was being inserted one byte into the multibyte +sequence due to incorrect tracking of byte offsets vs character offsets within +the input buffer. + +-- config -- +path: tests/data/config/without-fsys.toml +-- file-hello.txt -- +before +dača +after +-- actions -- +type: /ča\n +-- expected-buffer-dot-1 -- +dača From c63e475cc905d0faed53e5eb4015746edc063501 Mon Sep 17 00:00:00 2001 From: Innes Anderson-Morrison Date: Sun, 1 Jun 2025 09:16:55 +0100 Subject: [PATCH 2/4] add helper methods to GapBuffer for accessing raw content --- src/buffer/internal.rs | 82 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/src/buffer/internal.rs b/src/buffer/internal.rs index 27447039..82842ff7 100644 --- a/src/buffer/internal.rs +++ b/src/buffer/internal.rs @@ -51,7 +51,7 @@ type CharOffset = usize; /// An implementation of a gap buffer that tracks internal meta-data to help with accessing /// sub-regions of the text such as character ranges and lines. -#[derive(Default, Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct GapBuffer { /// the raw data being stored (both buffer content and the gap) data: Box<[u8]>, @@ -70,6 +70,12 @@ pub struct GapBuffer { n_chars: usize, } +impl Default for GapBuffer { + fn default() -> Self { + Self::new() + } +} + fn compute_line_endings(s: &str) -> (usize, BTreeMap) { let mut n_chars = 0; let mut line_endings = BTreeMap::new(); @@ -201,6 +207,11 @@ macro_rules! assert_line_endings { } impl GapBuffer { + /// Construct a new empty GapBuffer + pub fn new() -> Self { + Self::from("") + } + /// Number of bytes in the gap #[inline] fn gap(&self) -> usize { @@ -220,7 +231,47 @@ impl GapBuffer { self.cap == self.gap() } - /// The raw content of the buffer + /// Rearrange the internal storage of this buffer so that the active data is in a single + /// contiguous slice which is then returned. + pub fn make_contiguous(&mut self) -> &[u8] { + self.move_gap_to(0); + &self.data[self.gap_end..] + } + + /// The contents of the buffer as a single `&str`. + /// + /// This method requires a mutable reference as we need to move the gap in order to ensure that + /// all of the active data within the buffer is contiguous. + pub fn as_str(&mut self) -> &str { + let raw = self.make_contiguous(); + + // SAFETY: we know we have valid utf-8 data internally and as_bytes moves the gap so that + // `raw` contains all of the live data within the buffer. + unsafe { std::str::from_utf8_unchecked(raw) } + } + + /// The contents of the buffer either side of the current gap. + /// + /// If [make_contiguous][GapBuffer::make_contiguous] was previously called, the first `&str` + /// will be empty and the full content of the buffer will be in the second `&str`. + pub fn as_strs(&self) -> (&str, &str) { + let left = &self.data[0..self.gap_start]; + let right = &self.data[self.gap_end..]; + + // SAFETY: we know that we have valid utf8 data internally and that the position of the gap + // does not split any utf-8 codepoints. + unsafe { + ( + std::str::from_utf8_unchecked(left), + std::str::from_utf8_unchecked(right), + ) + } + } + + /// The raw content of the active data within the buffer. + /// + /// For a non allocating version of this when you are able to mutate the buffer (by moving the + /// gap) see [make_contiguous][GapBuffer::make_contiguous]. pub fn bytes(&self) -> Vec { let mut v = Vec::with_capacity(self.len()); v.extend(&self.data[..self.gap_start]); @@ -231,7 +282,7 @@ impl GapBuffer { /// Iterate over the characters of the buffer pub fn chars(&self) -> Chars<'_> { - self.slice(0, self.n_chars).chars() + self.as_slice().chars() } /// Iterate over the lines of the buffer @@ -1793,4 +1844,29 @@ mod tests { "// does it need to be a doc comment? that is a long enough line to\n" ); } + + #[test_case(0, "", "foo bar"; "gap at start")] + #[test_case(3, "foo", " bar"; "gap in between")] + #[test] + fn as_strs_works(byte_idx: usize, left: &str, right: &str) { + let mut gb = GapBuffer::from("foo bar"); + gb.move_gap_to(byte_idx); + + let (l, r) = gb.as_strs(); + + assert_eq!((l, r), (left, right)); + } + + #[test_case(0; "gap at start")] + #[test_case(3; "gap in between")] + #[test] + fn as_str_works(byte_idx: usize) { + let mut gb = GapBuffer::from("foo bar"); + gb.move_gap_to(byte_idx); + + let s = gb.as_str(); + + assert_eq!(s, "foo bar"); + assert_eq!(gb.gap_start, 0); + } } From 1294f6f836dd5b70978b9866618500f8dd37b354 Mon Sep 17 00:00:00 2001 From: Innes Anderson-Morrison Date: Sun, 1 Jun 2025 09:22:12 +0100 Subject: [PATCH 3/4] #123 move over to using a Buffer for handling minibuffer input --- src/editor/minibuffer.rs | 64 +++++++++++-------- .../editor-scenarios/issues/issue_123.txtar | 1 + 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/editor/minibuffer.rs b/src/editor/minibuffer.rs index 8a8e8dca..b8e16364 100644 --- a/src/editor/minibuffer.rs +++ b/src/editor/minibuffer.rs @@ -3,10 +3,10 @@ //! //! Conceptually this is operates as an embedded dmenu. use crate::{ - buffer::{Buffer, Buffers, GapBuffer}, + buffer::{Buffer, Buffers, GapBuffer, Slice}, config_handle, dot::TextObject, - editor::{Actions, Editor}, + editor::{Action, Actions, Editor}, key::{Arrow, Input}, system::System, Config, @@ -20,13 +20,15 @@ use std::{ }; use tracing::trace; +const MINIBUFFER_ID: usize = usize::MAX - 1; + #[derive(Debug, Default)] pub struct MiniBufferState<'a> { pub(crate) cx: usize, pub(crate) n_visible_lines: usize, pub(crate) selected_line_idx: usize, pub(crate) prompt: &'a str, - pub(crate) input: &'a str, + pub(crate) input: Slice<'a>, pub(crate) b: Option<&'a Buffer>, pub(crate) top: usize, pub(crate) bottom: usize, @@ -44,16 +46,16 @@ pub(crate) enum MiniBufferSelection { /// Conceptually this is operates as an embedded dmenu. pub(crate) struct MiniBuffer where - F: Fn(&str) -> Option>, + F: Fn(&GapBuffer) -> Option>, { on_change: F, prompt: String, - input: String, + n_prompt_chars: usize, + input: Buffer, initial_lines: Vec, line_indices: Vec, b: Buffer, max_height: usize, - x: usize, y: usize, selected_line_idx: usize, n_visible_lines: usize, @@ -64,7 +66,7 @@ where impl fmt::Debug for MiniBuffer where - F: Fn(&str) -> Option>, + F: Fn(&GapBuffer) -> Option>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("MiniBuffer") @@ -76,7 +78,7 @@ where impl MiniBuffer where - F: Fn(&str) -> Option>, + F: Fn(&GapBuffer) -> Option>, { pub fn new( prompt: String, @@ -86,16 +88,17 @@ where config: Arc>, ) -> Self { let line_indices = Vec::with_capacity(lines.len()); + let n_prompt_chars = prompt.chars().count(); Self { on_change, prompt, - input: String::new(), + n_prompt_chars, + input: Buffer::new_unnamed(MINIBUFFER_ID, "", config.clone()), initial_lines: lines, line_indices, b: Buffer::new_minibuffer(config), max_height, - x: 0, y: 0, selected_line_idx: 0, n_visible_lines: 0, @@ -107,7 +110,7 @@ where #[inline] fn handle_on_change(&mut self) { - if let Some(lines) = (self.on_change)(&self.input) { + if let Some(lines) = (self.on_change)(&self.input.txt) { self.b.txt = GapBuffer::from(lines.join("\n")); self.b.dot.clamp_idx(self.b.txt.len_chars()); }; @@ -118,7 +121,7 @@ where self.b.txt.clear(); self.line_indices.clear(); - let input_fragments: Vec<&str> = self.input.split_whitespace().collect(); + let input_fragments: Vec<&str> = self.input.txt.as_str().split_whitespace().collect(); let mut visible_lines = vec![]; for (i, line) in self.initial_lines.iter().enumerate() { @@ -162,10 +165,10 @@ where #[inline] fn current_state(&self) -> MiniBufferState<'_> { MiniBufferState { - cx: self.x + self.prompt.len(), + cx: self.input.dot.active_cur().idx + self.n_prompt_chars, n_visible_lines: self.n_visible_lines, prompt: &self.prompt, - input: &self.input, + input: self.input.txt.as_slice(), selected_line_idx: self.selected_line_idx, b: if self.show_buffer_content { Some(&self.b) @@ -181,38 +184,47 @@ where fn handle_input(&mut self, input: Input) -> Option { match input { Input::Char(c) => { - self.input.insert(self.x, c); - self.x += 1; + self.input + .handle_action(Action::InsertChar { c }, Source::Keyboard); self.handle_on_change(); } Input::Ctrl('h') | Input::Backspace | Input::Del => { - if self.x > 0 && self.x <= self.input.len() { - self.input.remove(self.x - 1); - self.x = self.x.saturating_sub(1); - self.handle_on_change(); - } + self.input.handle_action( + Action::DotSet(TextObject::Arr(Arrow::Left), 1), + Source::Keyboard, + ); + self.input.handle_action(Action::Delete, Source::Keyboard); + self.handle_on_change(); } Input::Esc => return Some(MiniBufferSelection::Cancelled), Input::Return => { let selection = match self.b.line(self.y) { Some(_) if self.line_indices.is_empty() => MiniBufferSelection::UserInput { - input: self.input.clone(), + input: self.input.txt.to_string(), }, Some(l) => MiniBufferSelection::Line { cy: self.line_indices[self.y], line: l.to_string(), }, None => MiniBufferSelection::UserInput { - input: self.input.clone(), + input: self.input.txt.to_string(), }, }; return Some(selection); } - Input::Alt('h') | Input::Arrow(Arrow::Left) => self.x = self.x.saturating_sub(1), + Input::Alt('h') | Input::Arrow(Arrow::Left) => { + self.input.handle_action( + Action::DotSet(TextObject::Arr(Arrow::Left), 1), + Source::Keyboard, + ); + } Input::Alt('l') | Input::Arrow(Arrow::Right) => { - self.x = min(self.x + 1, self.input.len()) + self.input.handle_action( + Action::DotSet(TextObject::Arr(Arrow::Right), 1), + Source::Keyboard, + ); } Input::Alt('k') | Input::Arrow(Arrow::Up) => { if self.selected_line_idx == 0 { @@ -240,7 +252,7 @@ impl Editor where S: System, { - fn prompt_w_callback Option>>( + fn prompt_w_callback Option>>( &mut self, prompt: &str, initial_lines: Vec, diff --git a/tests/data/editor-scenarios/issues/issue_123.txtar b/tests/data/editor-scenarios/issues/issue_123.txtar index 7607e0e7..2efef9d5 100644 --- a/tests/data/editor-scenarios/issues/issue_123.txtar +++ b/tests/data/editor-scenarios/issues/issue_123.txtar @@ -17,3 +17,4 @@ after type: /ča\n -- expected-buffer-dot-1 -- dača + From a2a416b06b4ed2fa04d64ed4862acc419a648f99 Mon Sep 17 00:00:00 2001 From: Innes Anderson-Morrison Date: Sun, 1 Jun 2025 09:54:12 +0100 Subject: [PATCH 4/4] move allowing unicode bidi codepoints to the crate level --- src/lib.rs | 3 +++ src/ui/tui.rs | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5ba8cb9..dd2d5d35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,9 @@ rustdoc::all, clippy::undocumented_unsafe_blocks )] +// Required for testing rendering behaviour. +// As of https://github.com/rust-lang/rust/issues/140281 this needs to be at the crate level +#![allow(text_direction_codepoint_in_literal)] use libc::termios as Termios; use std::{io::Stdout, process, sync::OnceLock}; diff --git a/src/ui/tui.rs b/src/ui/tui.rs index 1a7487d4..328c1406 100644 --- a/src/ui/tui.rs +++ b/src/ui/tui.rs @@ -950,7 +950,6 @@ mod tests { // https://i18n-puzzles.com/puzzle/18/ #[test] fn render_chars_correctly_handles_bidi_markers() { - #[allow(text_direction_codepoint_in_literal)] let line = GapBuffer::from("⁧foo⁦bar⁩baz⁩"); let expected = format!("�foo�bar�baz�{RESET_STYLE}");