rust-lang · Jules-Bertholet · Apr 2, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 18, 2026
diff --git a/Cargo.lock b/Cargo.lock
@@ -6085,6 +6085,7 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 name = "unicode-table-generator"
 version = "0.1.0"
 dependencies = [
+ "rustc-hash 2.1.1",
  "ucd-parse",
 ]
 

diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
@@ -497,6 +497,106 @@ impl str {
         s
     }
 
+    /// Returns the case-folded equivalent of this string slice, as a new [`String`].
+    ///
+    /// Case folding is a transformation, mostly matching lowercase, that is meant to be used
+    /// for case-insensitive string comparisons. Case-folded strings should not usually
+    /// be exposed directly to users.
+    ///
+    /// For the precise specification of case folding, see
+    /// [Chapter 3 (Conformance)](https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63737)
+    /// of the Unicode standard.
+    ///
+    /// Since some characters can expand into multiple characters when case folding,
+    /// this function returns a [`String`] instead of modifying the parameter in-place.
+    ///
+    /// This function does not perform any [normalization] (e.g. NFC),
+    /// so semantically and visually identical strings may compare unequal.
+    ///
+    /// Like [`char::to_casefold()`] this method does not handle language-specific
+    /// casing, like Turkish and Azeri I/ı/İ/i. See that method's documentation
+    /// for more information.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let s0 = "HELLO";
+    /// let s1 = "Hello";
+    ///
+    /// assert_eq!(s0.to_casefold(), s1.to_casefold());
+    /// assert_eq!(s0.to_casefold(), "hello")
+    /// ```
+    ///
+    /// Scripts without case are not changed:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let new_year = "农历新年";
+    ///
+    /// assert_eq!(new_year, new_year.to_casefold());
+    /// ```
+    ///
+    /// One character can become multiple:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let s0 = "TSCHÜẞ";
+    /// let s1 = "TSCHÜSS";
+    /// let s2 = "tschüß";
+    ///
+    /// assert_eq!(s0.to_casefold(), s1.to_casefold());
+    /// assert_eq!(s0.to_casefold(), s2.to_casefold());
+    /// assert_eq!(s0.to_casefold(), "tschüss");
+    /// ```
+    ///
+    /// No NFC [normalization] is performed:
+    ///
+    /// ```rust
+    /// #![feature(casefold)]
+    /// // These two strings are visually and semantically identical...
+    /// let comp = "Á";
+    /// let decomp = "Á";
+    ///
+    /// // ... but not codepoint-for-codepoint equal.
+    /// assert_eq!(comp, "\u{C1}");
+    /// assert_eq!(decomp, "A\u{0301}");
+    ///
+    /// // Their case-foldings are likewise unequal:
+    /// assert_eq!(comp.to_casefold(), "\u{E1}");
+    /// assert_eq!(decomp.to_casefold(), "a\u{0301}");
+    /// ```
+    ///
+    /// [normalization]: https://www.unicode.org/faq/normalization
+    #[cfg(not(no_global_oom_handling))]
+    #[rustc_allow_incoherent_impl]
+    #[must_use = "this returns the case-folded string as a new String, \
+                  without modifying the original"]
+    #[unstable(feature = "casefold", issue = "none")]
+    pub fn to_casefold(&self) -> String {
+        // SAFETY: `to_ascii_lowercase` preserves ASCII bytes, so the converted
+        // prefix remains valid UTF-8.
+        let (mut s, rest) = unsafe { convert_while_ascii(self, u8::to_ascii_lowercase) };
+
+        for c in rest.chars() {
+            match conversions::to_casefold(c) {
+                [a, '\0', _] => s.push(a),
+                [a, b, '\0'] => {
+                    s.push(a);
+                    s.push(b);
+                }
+                [a, b, c] => {
+                    s.push(a);
+                    s.push(b);
+                    s.push(c);
+                }
+            }
+        }
+        s
+    }
+
     /// Converts a [`Box<str>`] into a [`String`] without copying or allocating.
     ///
     /// # Examples

diff --git a/library/alloctests/tests/lib.rs b/library/alloctests/tests/lib.rs
@@ -3,6 +3,7 @@
 #![feature(const_heap)]
 #![feature(deque_extend_front)]
 #![feature(iter_array_chunks)]
+#![feature(casefold)]
 #![feature(cow_is_borrowed)]
 #![feature(core_intrinsics)]
 #![feature(downcast_unchecked)]

diff --git a/library/alloctests/tests/str.rs b/library/alloctests/tests/str.rs
@@ -1867,7 +1867,13 @@ fn to_lowercase() {
 #[test]
 fn to_uppercase() {
     assert_eq!("".to_uppercase(), "");
-    assert_eq!("aéǅßﬁᾀ".to_uppercase(), "AÉǄSSFIἈΙ");
+    assert_eq!("aéǅßẞﬁᾀ".to_uppercase(), "AÉǄSSẞFIἈΙ");
+}
+
+#[test]
+fn to_casefold() {
+    assert_eq!("".to_casefold(), "");
+    assert_eq!("ꮿﬁῲὼ\u{0345}ßẞΣς".to_casefold(), "Ꮿfiὼιὼιssssσσ");
 }
 
 #[test]

diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
@@ -1075,16 +1075,17 @@ impl char {
     }
 
     /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
-    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ),
+    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
     /// which has two lowercase forms.
     ///
     /// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
-    /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
-    /// see those resources for more information.
+    /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
     ///
     /// [D136]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
     /// [ucd]: https://www.unicode.org/reports/tr44/
     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
+    /// [`to_lowercase()`]: Self::to_lowercase()
     #[must_use]
     #[inline]
     #[unstable(feature = "case_ignorable", issue = "154848")]
@@ -1154,8 +1155,6 @@ impl char {
     /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
     /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
     ///
-    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
-    ///
     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
     /// is independent of context and language. See [below](#notes-on-context-and-locale)
     /// for more information.
@@ -1210,28 +1209,39 @@ impl char {
     ///
     /// ## Greek sigma
     ///
-    /// In Greek, the letter simga (uppercase Σ) has two lowercase forms:
-    /// ς which is used only at the end of a word, and σ which is used everywhere else.
-    /// `to_lowercase()` always uses the second form:
+    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
+    /// 'σ' which is used in most situations, and 'ς' which appears only
+    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
     ///
     /// ```
     /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
     /// ```
     ///
+    /// `str::to_lowercase()` (only available with the `alloc` crate)
+    /// *does* properly handle this contextual mapping,
+    /// so prefer using that method if you can. Alternatively, you can use
+    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
+    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
+    /// along with [`SpecialCasing.txt`], for more details.
+    ///
+    /// [`is_cased()`]: Self::is_cased()
+    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
+    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
+    ///
     /// ## Turkish and Azeri I/ı/İ/i
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
+    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// let lower_i = 'I'.to_lowercase().to_string();
     /// ```
     ///
-    /// The value of `lower_i` here relies on the language of the text: if we're
+    /// `'I'`'s correct lowercase relies on the language of the text: if we're
     /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
     ///
@@ -1242,6 +1252,8 @@ impl char {
     /// ```
     ///
     /// holds across languages.
+    ///
+    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
     #[must_use = "this returns the lowercased character as a new iterator, \
                   without modifying the original"]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -1392,22 +1404,22 @@ impl char {
     /// As stated above, this method is locale-insensitive.
     /// If you need locale support, consider using an external crate,
     /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
-    /// which is developed by Unicode. A description of a common
-    /// locale-dependent casing issue follows:
+    /// which is developed by Unicode. A description of one common
+    /// locale-dependent casing issue follows (there are others):
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// #![feature(titlecase)]
     /// let upper_i = 'i'.to_titlecase().to_string();
     /// ```
     ///
-    /// The value of `upper_i` here relies on the language of the text: if we're
+    /// `'i'`'s correct titlecase relies on the language of the text: if we're
     /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
     ///
@@ -1504,21 +1516,21 @@ impl char {
     /// As stated above, this method is locale-insensitive.
     /// If you need locale support, consider using an external crate,
     /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
-    /// which is developed by Unicode. A description of a common
-    /// locale-dependent casing issue follows:
+    /// which is developed by Unicode. A description of one common
+    /// locale-dependent casing issue follows (there are others):
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// let upper_i = 'i'.to_uppercase().to_string();
     /// ```
     ///
-    /// The value of `upper_i` here relies on the language of the text: if we're
+    /// `'i'`'s correct uppercase relies on the language of the text: if we're
     /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
     ///
@@ -1539,6 +1551,88 @@ impl char {
         ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
     }
 
+    /// Returns an iterator that yields the case folding of this `char` as one or more
+    /// `char`s.
+    ///
+    /// Case folding is meant to be used when performing case-insensitive string comparisons.
+    /// Case-folded strings should not usually be exposed directly to users. For most,
+    /// but not all, characters, the casefold mapping is identical to the lowercase one.
+    ///
+    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
+    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
+    /// The maximum number of `char`s in a case folding is 3.
+    ///
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
+    ///
+    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+    /// is independent of context and language.
+    ///
+    /// It also does not perform any [normalization] (e.g. NFC).
+    ///
+    /// [normalization]: https://www.unicode.org/faq/normalization
+    ///
+    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
+    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    ///
+    /// # Examples
+    ///
+    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
+    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
+    /// has the same case-folding.
+    ///
+    /// As an iterator:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// assert!('ß'.to_casefold().eq(['s', 's']));
+    /// assert!('ẞ'.to_casefold().eq(['s', 's']));
+    /// ```
+    ///
+    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// assert_eq!('ß'.to_casefold().to_string(), "ss");
+    /// assert_eq!('ẞ'.to_casefold().to_string(), "ss");
+    /// ```
+    ///
+    /// # Note on locale
+    ///
+    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
+    ///
+    /// * 'Dotless': I / ı, sometimes written ï
+    /// * 'Dotted': İ / i
+    ///
+    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let casefold_i = 'I'.to_casefold().to_string();
+    /// ```
+    ///
+    /// `'I'`'s correct case folding relies on the language of the text: if we're
+    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
+    /// be `"ı"`. `to_casefold()` does not take this into account, and so:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let casefold_i = 'I'.to_casefold().to_string();
+    ///
+    /// assert_eq!(casefold_i, "i");
+    /// ```
+    ///
+    /// holds across languages.
+    #[must_use = "this returns the case-folded character as a new iterator, \
+                  without modifying the original"]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[inline]
+    pub fn to_casefold(self) -> ToCasefold {
+        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
+    }
+
     /// Checks if the value is within the ASCII range.
     ///
     /// # Examples

diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
@@ -516,6 +516,21 @@ casemappingiter_impls! {
     ToLowercase
 }
 
+casemappingiter_impls! {
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    /// Returns an iterator that yields the case-folded equivalent of a `char`.
+    ///
+    /// This `struct` is created by the [`to_casefold`] method on [`char`]. See
+    /// its documentation for more.
+    ///
+    /// [`to_casefold`]: char::to_casefold
+    ToCasefold
+}
+
 #[derive(Debug, Clone)]
 struct CaseMappingIter(core::array::IntoIter<char, 3>);