diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index dd6878a..9519082 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -12,7 +12,9 @@ use crate::sed::command::{ Address, Command, CommandData, ProcessingContext, ReplacementPart, ReplacementTemplate, Substitution, Transliteration, }; -use crate::sed::delimited_parser::{parse_char_escape, parse_regex, parse_transliteration}; +use crate::sed::delimited_parser::{ + RegexMode, parse_char_escape, parse_regex, parse_transliteration, +}; use crate::sed::error_handling::{ScriptLocation, compilation_error, semantic_error}; use crate::sed::fast_regex::Regex; use crate::sed::named_writer::NamedWriter; @@ -286,7 +288,6 @@ fn compile_sequence( let n_addr = compile_address_range(lines, line, &mut cmd, context)?; line.eat_spaces(); let mut cmd_spec = get_verified_cmd_spec(lines, line, n_addr, context.posix)?; - // Compile the command according to its specification. let mut cmd_mut = cmd.borrow_mut(); cmd_mut.code = line.current(); @@ -331,10 +332,8 @@ fn compile_address_range( let mut is_line0 = false; line.eat_spaces(); - if !line.eol() - && is_address_char(line.current()) - && let Ok(addr1) = compile_address(lines, line, context) - { + if !line.eol() && is_address_char(line.current()) { + let addr1 = compile_address(lines, line, context)?; is_line0 = matches!(addr1, Address::Line(0)); cmd.addr1 = Some(addr1); if is_line0 && context.posix { @@ -364,9 +363,8 @@ fn compile_address_range( } // Look for second address. - if !line.eol() - && let Ok(addr2) = compile_address(lines, line, context) - { + if !line.eol() { + let addr2 = compile_address(lines, line, context)?; // Set step_n to the number specified in the (required numeric) address. let step_n = if is_step_match || is_step_end { match addr2 { @@ -449,7 +447,12 @@ fn compile_address( // The next character is an arbitrary delimiter line.advance(); } - let re = parse_regex(lines, line)?; + let regex_mode = if context.regex_extended { + RegexMode::Extended + } else { + RegexMode::Basic + }; + let re = parse_regex(lines, line, regex_mode)?; // Skip over delimiter line.advance(); @@ -533,7 +536,7 @@ fn parse_command_ending( } /// Convert a primitive BRE pattern to a safe ERE-compatible pattern string. -/// - Replaces `\(`, `\)`, `\?`, `\+` and `\|` with `(`, `)`, `?`, `+` and `|`. +/// - Replaces `\(`, `\)`, `\?`, `\+`, `\|`, `\{` and `\}` with `(`, `)`, `?`, `+`, `|`, `{` and `}`. /// - Puts single-digit back-references in non-capturing groups.. /// - Escapes ERE-only metacharacters: `+ ? { } | ( )`. /// - Leaves all other characters as-is. @@ -566,6 +569,14 @@ fn bre_to_ere(pattern: &str) -> String { chars.next(); result.push('|'); // Alternation operator } + Some('{') => { + chars.next(); + result.push('{'); // Brace quantifier start + } + Some('}') => { + chars.next(); + result.push('}'); // Brace quantifier end + } Some(v) if v.is_ascii_digit() => { // Back-reference. In sed BREs these are single-digit // (\1-\9) whereas fancy_regex supports multi-digit @@ -636,7 +647,7 @@ fn compile_regex( // Convert basic to extended regular expression if needed. let pattern = if context.regex_extended { - pattern + &pattern.replace("{,}", "*") } else { &bre_to_ere(pattern) }; @@ -645,7 +656,7 @@ fn compile_regex( let pattern = if icase { format!("(?i){pattern}") } else { - pattern.to_string() + pattern.clone() }; // Compile into engine. @@ -788,8 +799,12 @@ fn compile_subst_command( ); } - let pattern = parse_regex(lines, line)?; - + let regex_mode = if context.regex_extended { + RegexMode::Extended + } else { + RegexMode::Basic + }; + let pattern = parse_regex(lines, line, regex_mode)?; let mut subst = Box::new(Substitution::default()); subst.replacement = compile_replacement(lines, line)?; @@ -819,7 +834,6 @@ fn compile_subst_command( ), ); } - cmd.data = CommandData::Substitution(subst); parse_command_ending(lines, line, cmd)?; @@ -1571,6 +1585,21 @@ mod tests { assert!(!regex.is_match(&mut IOChunk::new_from_str("ABC")).unwrap()); } + #[test] + fn test_compile_re_extended() { + let (lines, chars) = make_providers("acaa\nbbb\nccc"); + let mut ctx = ctx(); + ctx.regex_extended = true; + let regex = compile_regex(&lines, &chars, "cc{,}", &ctx, false) + .unwrap() + .expect("regex should be present"); + assert!( + regex + .is_match(&mut IOChunk::new_from_str("acaa\nccc")) + .unwrap() + ); + } + #[test] fn test_compile_re_case_insensitive() { let (lines, chars) = dummy_providers(); @@ -1801,6 +1830,17 @@ mod tests { } } + #[test] + fn test_compile_address_range_error_propagation() { + let (lines, mut chars) = make_providers("1,/abc"); + let mut cmd = Rc::new(RefCell::new(Command::default())); + let result = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()); + + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("unterminated regular expression")); + } + // compile_sequence fn empty_line() -> ScriptCharProvider { ScriptCharProvider::new("") @@ -2208,6 +2248,11 @@ mod tests { assert_eq!(bre_to_ere(r"a\(b\)c"), "a(b)c"); } + #[test] + fn test_bre_brace_quantifier_translation() { + assert_eq!(bre_to_ere(r"\{1,4\}"), "{1,4}"); + } + #[test] fn test_ere_metacharacters_escaped() { assert_eq!(bre_to_ere(r"a+b?c{1}|(d)"), r"a\+b\?c\{1\}\|\(d\)"); diff --git a/src/sed/delimited_parser.rs b/src/sed/delimited_parser.rs index 06d73ae..cc749ca 100644 --- a/src/sed/delimited_parser.rs +++ b/src/sed/delimited_parser.rs @@ -15,6 +15,14 @@ use crate::sed::script_line_provider::ScriptLineProvider; use std::char; use uucore::error::UResult; +/// Defines whether regex patterns use Basic Regular Expression (BRE) or +/// Extended Regular Expression (ERE) syntax. +#[derive(Copy, Clone, Debug)] +pub enum RegexMode { + Basic, + Extended, +} + /// Return true if c is a valid octal digit fn is_ascii_octal_digit(c: char) -> bool { matches!(c, '0'..='7') @@ -312,11 +320,16 @@ fn scan_delimiter(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> /// Parse the regular expression delimited by the current line /// character and return it as a string. -/// On return the line is on the closing delimiter. -pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> UResult { +/// On return, the line is on the closing delimiter. +/// In Basic mode, quantifiers like {m,n} must be escaped (\{m,n\}). +/// In Extended mode, quantifiers like {m,n} don't require escaping. +pub fn parse_regex( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + regex_mode: RegexMode, +) -> UResult { let delimiter = scan_delimiter(lines, line)?; let mut result = String::new(); - while !line.eol() { match line.current() { '[' if delimiter != '[' => { @@ -335,6 +348,20 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> line.advance(); continue; } + if line.current() == '{' && matches!(regex_mode, RegexMode::Basic) { + validate_quantifier_structure(lines, line, delimiter, RegexMode::Basic)?; + let quantifier = validate_quantifier_numbers(lines, line)?; + result.push('\\'); + result.push('{'); + result.push_str(&quantifier); + continue; + } + if line.current() == '}' { + result.push('\\'); + result.push('}'); + line.advance(); + continue; + } if let Some(decoded) = parse_char_escape(line) { result.push(decoded); } else { @@ -345,6 +372,19 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> } continue; } + '{' if matches!(regex_mode, RegexMode::Extended) => { + validate_quantifier_structure(lines, line, delimiter, RegexMode::Extended)?; + let quantifier = validate_quantifier_numbers(lines, line)?; + result.push('{'); + result.push_str(&quantifier); + continue; + } + '}' => { + result.push('}'); + line.advance(); + continue; + } + c if c == delimiter => return Ok(result), c => result.push(c), } @@ -353,6 +393,173 @@ pub fn parse_regex(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) -> compilation_error(lines, line, "unterminated regular expression") } +// Check for closing brace and the structure/content. +fn validate_quantifier_structure( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, + delimiter: char, + regex_mode: RegexMode, +) -> UResult { + let invalid_content_error_msg = "Invalid content of \\{\\}"; + let mut found_closing_brace = false; + let mut seen_comma = false; + let mut invalid_content_detected = false; + let mut is_quantifier_empty = true; + let initial_pos = line.get_pos(); + line.advance(); + + while !line.eol() && line.current() != delimiter { + match regex_mode { + RegexMode::Extended => { + // In ERE mode, look for } + if line.current() == '}' { + // Empty quantifier {} is not valid + if is_quantifier_empty { + invalid_content_detected = true; + } + found_closing_brace = true; + break; + } else { + // Entering means there is no } immediately after the { + is_quantifier_empty = false; + // Only digits and one comma allowed + if line.current() == ',' { + if seen_comma { + invalid_content_detected = true; + } + seen_comma = true; + } else if !line.current().is_ascii_digit() { + invalid_content_detected = true; + } + line.advance(); + } + } + RegexMode::Basic => { + // In BRE mode, look for \} + if line.current() == '\\' { + line.advance(); + if !line.eol() && line.current() == '}' { + if is_quantifier_empty { + invalid_content_detected = true; + } + found_closing_brace = true; + break; + } else { + invalid_content_detected = true; + } + } else { + is_quantifier_empty = false; + if line.current() == ',' { + if seen_comma { + invalid_content_detected = true; + } + seen_comma = true; + } else if !line.current().is_ascii_digit() { + invalid_content_detected = true; + } + line.advance(); + } + } + } + } + + if !found_closing_brace { + return compilation_error(lines, line, "Unmatched \\{"); + } + + if invalid_content_detected { + return compilation_error(lines, line, invalid_content_error_msg); + } + + line.set_position(initial_pos); + Ok(initial_pos) +} + +// Peforms validations on m and/or n values of the quantifier +// and returns the valid content as a string (without braces). +fn validate_quantifier_numbers( + lines: &ScriptLineProvider, + line: &mut ScriptCharProvider, +) -> UResult { + line.advance(); + + // Handle {,} (zero or more) special case + if line.current() == ',' { + line.advance(); + if line.current() == '}' { + return Ok(",".to_string()); + } + + // Continue to parse n value + let mut result = String::new(); + result.push('0'); + result.push(','); + while line.current() != '}' && line.current() != '\\' { + result.push(line.current()); + line.advance(); + } + return Ok(result); + } + // Parse m value + let mut m = String::new(); + while line.current() != ',' && line.current() != '}' && line.current() != '\\' { + m.push(line.current()); + line.advance(); + } + let m_val: u32 = match m.parse() { + Ok(val) => { + if val > 255 { + return compilation_error(lines, line, "Regular expression too big"); + } + val + } + //never happens due to previous validation, but needed to satisfy the type checker + Err(_) => return compilation_error(lines, line, "Invalid content of \\{\\}"), + }; + + // Parse n if comma is present + let mut n = String::new(); + let has_comma = line.current() == ','; + if has_comma { + line.advance(); + while line.current() != '}' && line.current() != '\\' { + n.push(line.current()); + line.advance(); + } + } + let n_val: Option = if n.is_empty() { + None + } else { + match n.parse::() { + Ok(val) => { + if val > 255 { + return compilation_error(lines, line, "Regular expression too big"); + } + Some(val) + } + Err(_) => return compilation_error(lines, line, "Invalid content of \\{\\}"), + } + }; + + // Validate m <= n if both present + if let Some(n_val) = n_val + && m_val > n_val + { + return compilation_error(lines, line, "Invalid content of \\{\\}"); + } + + // Valid quantifier content (without braces) + let mut result = m.clone(); + if has_comma { + result.push(','); + if !n.is_empty() { + result.push_str(&n); + } + } + + Ok(result) +} + /// Parse the transliteration string delimited by the current line /// character and return it as a string. /// On return the line is on the closing delimiter. @@ -756,7 +963,7 @@ mod tests { #[test] fn test_simple_regex() { let (lines, mut line) = make_providers("/abc/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "abc"); assert_eq!(line.current(), '/'); } @@ -764,7 +971,7 @@ mod tests { #[test] fn test_regex_with_escaped_delimiter() { let (lines, mut line) = make_providers("/ab\\/c/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "ab/c"); assert_eq!(line.current(), '/'); } @@ -772,7 +979,7 @@ mod tests { #[test] fn test_regex_with_capture() { let (lines, mut line) = make_providers(r"/\(.\)/c/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, r"\(.\)"); assert_eq!(line.current(), '/'); } @@ -780,29 +987,101 @@ mod tests { #[test] fn test_regex_with_escape_sequence() { let (lines, mut line) = make_providers("/ab\\n/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "ab\n"); assert_eq!(line.current(), '/'); } + #[test] + fn test_basic_regex_quantifier() { + let (lines, mut line) = make_providers("/a\\{2,3\\}/p"); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); + assert_eq!(parsed, "a\\{2,3\\}"); + assert_eq!(line.current(), '/'); + } + + #[test] + fn test_basic_regex_with_unmatched_brace_quantifier() { + let (lines, mut line) = make_providers("/a\\{2,3/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_basic_regex_with_invalid_content() { + let (lines, mut line) = make_providers("/a\\{2d,3\\}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_quantifier() { + let (lines, mut line) = make_providers("/a{2,3}/p"); + let parsed = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap(); + assert_eq!(parsed, "a{2,3}"); + assert_eq!(line.current(), '/'); + } + + #[test] + fn test_extended_regex_with_unmatched_brace_quantifier() { + let (lines, mut line) = make_providers("/a{2,3/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_extended_regex_with_empty_quantifier() { + let (lines, mut line) = make_providers("/a{}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_whitespace_quantifier() { + let (lines, mut line) = make_providers("/a{}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_invalid_m() { + let (lines, mut line) = make_providers("/a{2d,3}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_invalid_n() { + let (lines, mut line) = make_providers("/a{2,-3}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_extended_regex_with_m_gt_n() { + let (lines, mut line) = make_providers("/a{3,2}/p"); + let err = parse_regex(&lines, &mut line, RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + #[test] fn errors_on_unterminated_regex() { let (lines, mut line) = make_providers("/unterminated"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!(err.to_string().contains("unterminated regular expression")); } #[test] fn errors_on_esc_at_re_eol() { let (lines, mut line) = make_providers("/foo\\"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!(err.to_string().contains("unterminated regular expression")); } #[test] fn errors_on_backslash_delimiter() { let (lines, mut line) = make_providers("\\bad"); - let err = parse_regex(&lines, &mut line).unwrap_err(); + let err = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap_err(); assert!( err.to_string() .contains("\\ cannot be used as a string delimiter") @@ -812,7 +1091,7 @@ mod tests { #[test] fn test_regex_with_character_class() { let (lines, mut line) = make_providers("/[a-z]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a-z]"); assert_eq!(line.current(), '/'); } @@ -820,7 +1099,7 @@ mod tests { #[test] fn test_regex_with_bracket_delimiter() { let (lines, mut line) = make_providers("[abc["); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "abc"); assert_eq!(line.current(), '['); } @@ -828,7 +1107,7 @@ mod tests { #[test] fn test_bracket_regex_with_bracket_delimiter() { let (lines, mut line) = make_providers("[a\\[0-9]bc["); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "a[0-9]bc"); assert_eq!(line.current(), '['); } @@ -836,7 +1115,7 @@ mod tests { #[test] fn test_regex_with_escaped_bracket_in_character_class() { let (lines, mut line) = make_providers("/[a\\]z]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a\\]z]"); assert_eq!(line.current(), '/'); } @@ -844,7 +1123,7 @@ mod tests { #[test] fn test_regex_with_delimiter_inside_character_class() { let (lines, mut line) = make_providers("/[a/c]/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "[a/c]"); assert_eq!(line.current(), '/'); } @@ -852,11 +1131,150 @@ mod tests { #[test] fn test_regex_with_escaped_paren_and_backslash() { let (lines, mut line) = make_providers("/\\(\\\\/"); - let parsed = parse_regex(&lines, &mut line).unwrap(); + let parsed = parse_regex(&lines, &mut line, RegexMode::Basic).unwrap(); assert_eq!(parsed, "\\(\\\\"); assert_eq!(line.current(), '/'); } + // validate_quantifier_structure + //BRE tests + #[test] + fn test_validate_quantifier_structure_bre_valid() { + let (lines, mut line) = make_providers("{2,3\\}"); + let result = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap(); + assert_eq!(result, 0); + assert_eq!(line.current(), '{'); // Line should be back on the opening brace + } + + #[test] + fn test_validate_quantifier_structure_bre_with_unmatched_brace() { + let (lines, mut line) = make_providers("{2,3"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_empty_content() { + let (lines, mut line) = make_providers("{\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_invalid_char() { + let (lines, mut line) = make_providers("{2d,3\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_bre_with_double_comma() { + let (lines, mut line) = make_providers("{2,3,\\}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Basic).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + // ERE tests + #[test] + fn test_validate_quantifier_structure_ere_valid() { + let (lines, mut line) = make_providers("{2,3}"); + let result = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap(); + assert_eq!(result, 0); + assert_eq!(line.current(), '{'); // Line should be back on the opening brace + } + + #[test] + fn test_validate_quantifier_structure_ere_with_unmatched_brace() { + let (lines, mut line) = make_providers("{2,3"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Unmatched \\{")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_empty_content() { + let (lines, mut line) = make_providers("{}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_invalid_char() { + let (lines, mut line) = make_providers("{2d,3}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + #[test] + fn test_validate_quantifier_structure_ere_with_double_comma() { + let (lines, mut line) = make_providers("{2,3,}"); + let err = + validate_quantifier_structure(&lines, &mut line, '/', RegexMode::Extended).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + + // validate_quantifier_numbers + #[test] + fn test_validate_quantifier_numbers_with_m() { + let (lines, mut line) = make_providers("{2}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "2"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_single_comma() { + let (lines, mut line) = make_providers("{,}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, ","); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_comma_n() { + let (lines, mut line) = make_providers("{,3}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "0,3"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_valid() { + let (lines, mut line) = make_providers("{2,3}"); + let result = validate_quantifier_numbers(&lines, &mut line).unwrap(); + assert_eq!(result, "2,3"); + assert_eq!(line.current(), '}'); + } + + #[test] + fn test_validate_quantifier_numbers_with_m_too_big() { + let (lines, mut line) = make_providers("{256}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Regular expression too big")); + } + + #[test] + fn test_validate_quantifier_numbers_with_n_too_big() { + let (lines, mut line) = make_providers("{2,256}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Regular expression too big")); + } + + #[test] + fn test_validate_quantifier_numbers_with_m_gt_n() { + let (lines, mut line) = make_providers("{3,2}"); + let err = validate_quantifier_numbers(&lines, &mut line).unwrap_err(); + assert!(err.to_string().contains("Invalid content of \\{\\}")); + } + // parse_transliteration #[test] fn test_simple_transliteration() { diff --git a/src/sed/script_char_provider.rs b/src/sed/script_char_provider.rs index a3e3a85..4e4ecd1 100644 --- a/src/sed/script_char_provider.rs +++ b/src/sed/script_char_provider.rs @@ -34,6 +34,11 @@ impl ScriptCharProvider { self.pos = self.pos.saturating_sub(n); } + /// Sets new current position. + pub fn set_position(&mut self, pos: usize) { + self.pos = pos; + } + /// Returns the current character. Panics if out of bounds. pub fn current(&self) -> char { self.line[self.pos] diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index 13a6576..b317b90 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -275,6 +275,157 @@ check_output!(addr_range_step_zero, ["-n", "10~0p", LINES1]); check_output!(addr_range_end_multiple, ["-n", "/l1_2/,~10p", LINES1]); //////////////////////////////////////////////////////////// + +// Quantifiers: {m,n} +// m and n are considered to be the first and second numbers in the interval, respectively. + +const REGEX_QUANTIFIERS_INPUT: &str = + "Hello World\nHelo World\nHelllllo World\nHeo Word\nHeo Worl}d\n"; + +#[test] +fn ere_quantifier_exactly_m() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{2}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Hello World\nHelllllo World\n"); +} + +#[test] +fn ere_quantifier_minimum_m() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{1,}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Hello World\nHelo World\nHelllllo World\nHeo Worl}d\n"); +} + +#[test] +fn ere_quantifier_m_to_n() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{3,4}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Helllllo World\n"); +} + +#[test] +fn ere_quantifier_comma_n() { + new_ucmd!() + .args(&["-n", "-E", "-e", "/l{,4}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is(REGEX_QUANTIFIERS_INPUT); +} + +#[test] +fn bre_quantifier_minimum_m() { + new_ucmd!() + .args(&["-n", "-e", "/l\\{3,\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Helllllo World\n"); +} + +#[test] +fn bre_quantifier_comma() { + new_ucmd!() + .args(&["-n", "-e", "/l\\{,\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is(REGEX_QUANTIFIERS_INPUT); +} + +#[test] +fn bre_quantifier_only_closing_brace() { + new_ucmd!() + .args(&["-n", "-e", "/l\\}/p"]) + .pipe_in(REGEX_QUANTIFIERS_INPUT) + .succeeds() + .stdout_is("Heo Worl}d\n"); +} + +#[test] +fn test_ere_quantifier_n_gt_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{3,2}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_negative_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{-2,4}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_invalid_m() { + new_ucmd!() + .args(&["-E", "-e", "/l{d,}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_m_too_big() { + new_ucmd!() + .args(&["-E", "-e", "/l{300,}/p"]) + .fails() + .code_is(1) + .stderr_contains("Regular expression too big"); +} + +#[test] +fn test_ere_quantifier_empty() { + new_ucmd!() + .args(&["-E", "-e", "/l{}/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_whitespace() { + new_ucmd!() + .args(&["-E", "-e", "/l{ }/p"]) + .fails() + .code_is(1) + .stderr_contains("Invalid content of \\{\\}"); +} + +#[test] +fn test_ere_quantifier_unmatched_brace() { + new_ucmd!() + .args(&["-E", "-e", "/l{,/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + +#[test] +fn test_ere_quantifier_unmatched_brace_2() { + new_ucmd!() + .args(&["-E", "-e", "/l{m,n/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + +#[test] +fn test_bre_quantifier_unmatched_brace() { + new_ucmd!() + .args(&["-e", "/l\\{1,2}/p"]) + .fails() + .code_is(1) + .stderr_contains("Unmatched \\{"); +} + // Substitution: s check_output!(subst_any, ["-e", r"s/./X/g", LINES1]); check_output!(subst_any_global, ["-e", r"s,.,X,g", LINES1]); diff --git a/tests/fixtures/sed/output/bre_quantifier_comma b/tests/fixtures/sed/output/bre_quantifier_comma new file mode 100644 index 0000000..4d77d3f --- /dev/null +++ b/tests/fixtures/sed/output/bre_quantifier_comma @@ -0,0 +1,5 @@ +Hello World +Helo World +Helllllo World +Heo Word +Heo Worl}d diff --git a/tests/fixtures/sed/output/bre_quantifier_minimum_m b/tests/fixtures/sed/output/bre_quantifier_minimum_m new file mode 100644 index 0000000..f498293 --- /dev/null +++ b/tests/fixtures/sed/output/bre_quantifier_minimum_m @@ -0,0 +1 @@ +Helllllo World diff --git a/tests/fixtures/sed/output/bre_quantifier_only_closing_brace b/tests/fixtures/sed/output/bre_quantifier_only_closing_brace new file mode 100644 index 0000000..5164ebc --- /dev/null +++ b/tests/fixtures/sed/output/bre_quantifier_only_closing_brace @@ -0,0 +1 @@ +Heo Worl}d diff --git a/tests/fixtures/sed/output/ere_quantifier_comma_n b/tests/fixtures/sed/output/ere_quantifier_comma_n new file mode 100644 index 0000000..4d77d3f --- /dev/null +++ b/tests/fixtures/sed/output/ere_quantifier_comma_n @@ -0,0 +1,5 @@ +Hello World +Helo World +Helllllo World +Heo Word +Heo Worl}d diff --git a/tests/fixtures/sed/output/ere_quantifier_exactly_m b/tests/fixtures/sed/output/ere_quantifier_exactly_m new file mode 100644 index 0000000..ac78241 --- /dev/null +++ b/tests/fixtures/sed/output/ere_quantifier_exactly_m @@ -0,0 +1,2 @@ +Hello World +Helllllo World diff --git a/tests/fixtures/sed/output/ere_quantifier_m_to_n b/tests/fixtures/sed/output/ere_quantifier_m_to_n new file mode 100644 index 0000000..f498293 --- /dev/null +++ b/tests/fixtures/sed/output/ere_quantifier_m_to_n @@ -0,0 +1 @@ +Helllllo World diff --git a/tests/fixtures/sed/output/ere_quantifier_minimum_m b/tests/fixtures/sed/output/ere_quantifier_minimum_m new file mode 100644 index 0000000..10e0352 --- /dev/null +++ b/tests/fixtures/sed/output/ere_quantifier_minimum_m @@ -0,0 +1,4 @@ +Hello World +Helo World +Helllllo World +Heo Worl}d