Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 152 additions & 16 deletions library/alloc/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,23 @@ where
result
}

/// Helper for final sigma lowercase
#[cfg(not(no_global_oom_handling))]
fn map_uppercase_sigma(from: &str, i: usize) -> char {
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
Some(c) => c.is_cased(),
None => false,
}
}

// See https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
// for the definition of `Final_Sigma`.
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
&& !case_ignorable_then_cased(from[i + const { 'Σ'.len_utf8() }..].chars());
if is_word_final { 'ς' } else { 'σ' }
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Borrow<str> for String {
#[inline]
Expand Down Expand Up @@ -345,20 +362,20 @@ impl str {
///
/// Unlike [`char::to_lowercase()`], this method fully handles the context-dependent
/// casing of Greek sigma. However, like that method, it does not handle locale-specific
/// casing, like Turkish and Azeri I/ı/İ/i. See that method's documentation
/// casing, like Turkish and Azeri I/ı/İ/i. See its documentation
/// for more information.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// let s = "HELLO";
/// let s = "HELLO WORLD";
///
/// assert_eq!("hello", s.to_lowercase());
/// assert_eq!("hello world", s.to_lowercase());
/// ```
///
/// A tricky example, with sigma:
/// Tricky examples, with sigma:
///
/// ```
/// let sigma = "Σ";
Expand All @@ -369,6 +386,10 @@ impl str {
/// let odysseus = "ὈΔΥΣΣΕΎΣ";
///
/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
///
/// let odysseus_king_of_ithaca = "Ο ΟΔΥΣΣΈΑΣ ΒΑΣΙΛΙΆΣ ΤΗΣ ΙΘΆΚΗΣ";
///
/// assert_eq!("ο οδυσσέας βασιλιάς της ιθάκης", odysseus_king_of_ithaca.to_lowercase());
/// ```
///
/// Languages without case are not changed:
Expand Down Expand Up @@ -415,21 +436,136 @@ impl str {
}
}
return s;
}

fn map_uppercase_sigma(from: &str, i: usize) -> char {
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
// for the definition of `Final_Sigma`.
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
&& !case_ignorable_then_cased(from[i + const { 'Σ'.len_utf8() }..].chars());
if is_word_final { 'ς' } else { 'σ' }
/// Returns the titlecase equivalent of this string slice,
/// which is assumed to represent a single word,
/// as a new [`String`].
///
/// Essentially, this consists of uppercasing the first cased letter
/// (with [`char::to_titlecase()`]), and lowercasing everything that follows.
///
/// 'Titlecase' is defined according to the terms of
/// [Chapter 3 (Conformance)](https://www.unicode.org/versions/latest/core-spec/chapter-3/#G34082)
/// of the Unicode standard.
///
/// Since some characters can expand into multiple characters when changing
/// the case, this function returns a [`String`] instead of modifying the
/// parameter in-place.
///
/// Unlike [`char::to_lowercase()`], this method fully handles the context-dependent
/// casing of Greek sigma. However, like that method, it does not handle locale-specific
/// casing, like Turkish and Azeri I/ı/İ/i. See its documentation
/// for more information.
///
/// This method does not perform any kind of word segmentation.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(titlecase)]
/// let s = "HELLO WORLD";
///
/// assert_eq!("Hello world", s.word_to_titlecase());
/// ```
///
/// The first *cased* letter is uppercased:
///
/// ```
/// #![feature(titlecase)]
/// let the_night_before_christmas = "'twas";
///
/// assert_eq!("'Twas", the_night_before_christmas.word_to_titlecase());
/// ```
///
/// Languages without case are not changed:
///
/// ```
/// #![feature(titlecase)]
/// let new_year = "农历新年";
///
/// assert_eq!(new_year, new_year.word_to_titlecase());
/// ```
///
/// Georgian uppercase ("Mtavruli") letters are not used in titlecase:
///
/// ```
/// #![feature(titlecase)]
/// let georgian = "ერთობაშია";
///
/// assert_eq!(georgian, georgian.word_to_titlecase());
/// ```
///
/// No word segmentation is performed,
/// so only the first cased letter in the whole string gets uppercased:
///
/// ```
/// #![feature(titlecase)]
/// let blazingly_fast = "ferris and I";
///
/// assert_eq!("Ferris and i", blazingly_fast.word_to_titlecase());
/// ```
///
/// Tricky examples, with sigma:
///
/// ```
/// #![feature(titlecase)]
/// let odysseus = "ὈΔΥΣΣΕΎΣ";
///
/// assert_eq!("Ὀδυσσεύς", odysseus.word_to_titlecase());
///
/// let odysseus_king_of_ithaca = "Ο ΟΔΥΣΣΈΑΣ ΒΑΣΙΛΙΆΣ ΤΗΣ ΙΘΆΚΗΣ";
///
/// assert_eq!("Ο οδυσσέας βασιλιάς της ιθάκης", odysseus_king_of_ithaca.word_to_titlecase());
/// ```
#[cfg(not(no_global_oom_handling))]
#[rustc_allow_incoherent_impl]
#[must_use = "this returns the titlecase word as a new String, \
without modifying the original"]
#[unstable(feature = "titlecase", issue = "153892")]
pub fn word_to_titlecase(&self) -> String {
// FIXME: add ASCII fast path

let mut s = String::with_capacity(self.len());
let mut chars = self.char_indices();

'until_first_cased_char: for (_, c) in chars.by_ref() {
if c.is_cased() {
s.extend(c.to_titlecase());
break 'until_first_cased_char;
} else {
s.push(c);
}
}

fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
Some(c) => c.is_cased(),
None => false,
for (i, c) in chars {
if c == 'Σ' {
// Σ maps to σ, except at the end of a word where it maps to ς.
// This is the only conditional (contextual) but language-independent mapping
// in `SpecialCasing.txt`,
// so hard-code it rather than have a generic "condition" mechanism.
// See https://github.com/rust-lang/rust/issues/26035
let sigma_lowercase = map_uppercase_sigma(self, i);
s.push(sigma_lowercase);
} else {
match conversions::to_lower(c) {
[a, '\0', _] => s.push(a),
[a, b, '\0'] => {
s.push(a);
s.push(b);
}
[a, b, c] => {
s.push(a);
s.push(b);
s.push(c);
}
}
}
}

s
}

/// Returns the uppercase equivalent of this string slice, as a new [`String`].
Expand All @@ -451,9 +587,9 @@ impl str {
/// Basic usage:
///
/// ```
/// let s = "hello";
/// let s = "hello world";
///
/// assert_eq!("HELLO", s.to_uppercase());
/// assert_eq!("HELLO WORLD", s.to_uppercase());
/// ```
///
/// Scripts without case are not changed:
Expand Down
Loading