From c6bf0acbc502661f9630d9ff0fd0a86fcd4ba37b Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Thu, 2 Apr 2026 00:09:54 -0400
Subject: [PATCH 1/5] Add `char::to_casefold()`

---
 Cargo.lock                                    |  1 +
 library/core/src/char/methods.rs              | 79 ++++++++++++++++
 library/core/src/char/mod.rs                  | 15 +++
 library/core/src/unicode/unicode_data.rs      | 82 +++++++++++++++-
 library/coretests/tests/char.rs               | 35 +++++++
 library/coretests/tests/lib.rs                |  1 +
 library/coretests/tests/unicode.rs            | 14 +++
 library/coretests/tests/unicode/test_data.rs  | 91 ++++++++++++++++++
 src/tools/unicode-table-generator/Cargo.toml  |  1 +
 .../src/cascading_map.rs                      |  5 +-
 .../src/case_mapping.rs                       | 85 +++++++++++++++--
 src/tools/unicode-table-generator/src/main.rs | 94 +++++++++++++++++--
 .../src/raw_emitter.rs                        | 17 ++--
 .../src/unicode_download.rs                   |  9 +-
 14 files changed, 498 insertions(+), 31 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bac0aeb37c600..bfc5598bea709 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6085,6 +6085,7 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 name = "unicode-table-generator"
 version = "0.1.0"
 dependencies = [
+ "rustc-hash 2.1.1",
  "ucd-parse",
 ]
 
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 00b735e91a377..2fb70214a9c34 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1539,6 +1539,85 @@ impl char {
         ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
     }
 
+    /// Returns an iterator that yields the case folding of this `char` as one or more
+    /// `char`s.
+    ///
+    /// Case folding is meant to be used when performing case-insensitive string comparisons,
+    /// but case-folded strings should not generally be exposed directly to users. For most,
+    /// but not all, characters, the casefold mapping is identical to the lowercase one.
+    ///
+    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
+    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
+    ///
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
+    ///
+    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+    /// is independent of context and language.
+    ///
+    /// It also does not perform any normalization (e.g. NFC).
+    ///
+    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
+    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    ///
+    /// # Examples
+    ///
+    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
+    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
+    /// has the same case-folding.
+    ///
+    /// As an iterator:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// assert!('ß'.to_casefold().eq(['s', 's']));
+    /// assert!('ẞ'.to_casefold().eq(['s', 's']));
+    /// ```
+    ///
+    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// assert_eq!('ß'.to_casefold().to_string(), "ss");
+    /// assert_eq!('ẞ'.to_casefold().to_string(), "ss");
+    /// ```
+    ///
+    /// # Note on locale
+    ///
+    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
+    ///
+    /// * 'Dotless': I / ı, sometimes written ï
+    /// * 'Dotted': İ / i
+    ///
+    /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let casefold_i = 'I'.to_casefold().to_string();
+    /// ```
+    ///
+    /// The value of `casefold_i` here relies on the language of the text: if we're
+    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
+    /// be `"ı"`. `to_casefold()` does not take this into account, and so:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let casefold_i = 'I'.to_casefold().to_string();
+    ///
+    /// assert_eq!(casefold_i, "i");
+    /// ```
+    ///
+    /// holds across languages.
+    #[must_use = "this returns the case-folded character as a new iterator, \
+                  without modifying the original"]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[inline]
+    pub fn to_casefold(self) -> ToCasefold {
+        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
+    }
+
     /// Checks if the value is within the ASCII range.
     ///
     /// # Examples
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
index 3231c4193064c..7e10c46265d1b 100644
--- a/library/core/src/char/mod.rs
+++ b/library/core/src/char/mod.rs
@@ -516,6 +516,21 @@ casemappingiter_impls! {
     ToLowercase
 }
 
+casemappingiter_impls! {
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    #[unstable(feature = "casefold", issue = "none")]
+    /// Returns an iterator that yields the case-folded equivalent of a `char`.
+    ///
+    /// This `struct` is created by the [`to_casefold`] method on [`char`]. See
+    /// its documentation for more.
+    ///
+    /// [`to_casefold`]: char::to_casefold
+    ToCasefold
+}
+
 #[derive(Debug, Clone)]
 struct CaseMappingIter(core::array::IntoIter<char, 3>);
 
diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
index 83d3808051840..729234506f60c 100644
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@@ -10,7 +10,8 @@
 // to_lower        :  1112 bytes,   1462 codepoints in 185 ranges (U+0000C0 - U+01E921) using 2-level LUT
 // to_upper        :  1998 bytes,   1554 codepoints in 299 ranges (U+0000B5 - U+01E943) using 2-level LUT
 // to_title        :   340 bytes,    135 codepoints in  49 ranges (U+0000DF - U+00FB17) using 2-level LUT
-// Total           :  9629 bytes
+// to_casefold     :    32 bytes,    174 codepoints in   5 ranges (U+000131 - U+00ABBF) using 2-level LUT
+// Total           :  9661 bytes
 
 #[inline(always)]
 const fn bitset_search<
@@ -846,7 +847,7 @@ pub mod conversions {
     }
 
     pub fn to_lower(c: char) -> [char; 3] {
-        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Lowercased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Lowercased:]-[:ASCII:]&abb=on
         if c < '\u{C0}' {
             return [c.to_ascii_lowercase(), '\0', '\0'];
         }
@@ -855,7 +856,7 @@ pub mod conversions {
     }
 
     pub fn to_upper(c: char) -> [char; 3] {
-        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Uppercased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Uppercased:]-[:ASCII:]&abb=on
         if c < '\u{B5}' {
             return [c.to_ascii_uppercase(), '\0', '\0'];
         }
@@ -864,7 +865,7 @@ pub mod conversions {
     }
 
     pub fn to_title(c: char) -> [char; 3] {
-        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Titlecased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Titlecased:]-[:ASCII:]&abb=on
         if c < '\u{B5}' {
             return [c.to_ascii_uppercase(), '\0', '\0'];
         }
@@ -872,6 +873,59 @@ pub mod conversions {
         lookup(c, &TITLECASE_LUT).or_else(|| lookup(c, &UPPERCASE_LUT)).unwrap_or([c, '\0', '\0'])
     }
 
+    pub fn to_casefold(c: char) -> [char; 3] {
+        // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Casefolded:]-[:ASCII:]&abb=on
+        if c < '\u{B5}' {
+            return [c.to_ascii_lowercase(), '\0', '\0'];
+        }
+
+
+        lookup(c, &CASEFOLD_LUT).unwrap_or_else(|| {
+            // fall back to lowercase of uppercase
+
+            let uppercase = lookup(c, &UPPERCASE_LUT).unwrap_or([c, '\0', '\0']);
+            let mut final_result = to_lower(uppercase[0]);
+            if uppercase[1] != '\0' {
+                let lowercase_1 = to_lower(uppercase[1]);
+                debug_assert_eq!(lowercase_1[2], '\0');
+
+                // If, after updating the Unicode data
+                // to a new Unicode version, the below
+                // assertion starts to fail in tests,
+                // delete it, and uncomment the
+                // `if` condition and corresponding
+                // `else` block below it.
+                debug_assert_eq!(final_result[1], '\0');
+                //if final_result[1] == '\0' {
+
+                final_result[1] = lowercase_1[0];
+
+                if uppercase[2] != '\0' {
+                    debug_assert_eq!(lowercase_1[1], '\0');
+                    let lowercase_2 = to_lower(uppercase[2]);
+                    debug_assert_eq!(lowercase_2[1], '\0');
+                    debug_assert_eq!(lowercase_2[2], '\0');
+                    final_result[2] = lowercase_2[0];
+                } else {
+                    // If, after updating the Unicode data
+                    // to a new Unicode version, the below
+                    // assertion starts to fail in tests,
+                    // delete it and uncomment the line
+                    // below it.
+                    debug_assert_eq!(lowercase_1[1], '\0');
+                    //final_result[2] = lowercase_1[1];
+                }
+
+                /*} else {
+                    final_result[2] = lowercase_1[0];
+                    debug_assert_eq!(lowercase_1[1], '\0');
+                    debug_assert_eq!(uppercase[2], '\0')
+                }*/
+            }
+            final_result
+        })
+    }
+
     static LOWERCASE_LUT: L1Lut = L1Lut {
         l2_luts: [
             L2Lut {
@@ -1188,4 +1242,24 @@ pub mod conversions {
             },
         ],
     };
+
+    static CASEFOLD_LUT: L1Lut = L1Lut {
+        l2_luts: [
+            L2Lut {
+                singles: &[ // 4 entries, 24 bytes
+                    (Range::singleton(0x0131), 0), (Range::step_by_1(0x13a0..=0x13f5), 0),
+                    (Range::step_by_1(0x13f8..=0x13fd), -8), (Range::step_by_1(0xab70..=0xabbf), 26672),
+                ],
+                multis: &[ // 1 entries, 8 bytes
+                    (0x1e9e, [0x0073, 0x0073, 0x0000]),
+                ],
+            },
+            L2Lut {
+                singles: &[ // 0 entries, 0 bytes
+                ],
+                multis: &[ // 0 entries, 0 bytes
+                ],
+            },
+        ],
+    };
 }
diff --git a/library/coretests/tests/char.rs b/library/coretests/tests/char.rs
index 877017f682c97..3c43e4db7b330 100644
--- a/library/coretests/tests/char.rs
+++ b/library/coretests/tests/char.rs
@@ -212,6 +212,41 @@ fn test_to_uppercase() {
     assert_eq!(upper('ᾀ'), "ἈΙ");
 }
 
+#[test]
+fn test_to_casefold() {
+    fn fold(c: char) -> String {
+        let to_casefold = c.to_casefold();
+        assert_eq!(to_casefold.len(), to_casefold.count());
+        let iter: String = c.to_casefold().collect();
+        let disp: String = c.to_casefold().to_string();
+        assert_eq!(iter, disp);
+        let iter_rev: String = c.to_casefold().rev().collect();
+        let disp_rev: String = disp.chars().rev().collect();
+        assert_eq!(iter_rev, disp_rev);
+        iter
+    }
+    assert_eq!(fold('A'), "a");
+    assert_eq!(fold('Ö'), "ö");
+    assert_eq!(fold('ß'), "ss");
+    assert_eq!(fold('ẞ'), "ss");
+    assert_eq!(fold('Ü'), "ü");
+    assert_eq!(fold('💩'), "💩");
+    assert_eq!(fold('Σ'), "σ");
+    assert_eq!(fold('ς'), "σ");
+    assert_eq!(fold('Τ'), "τ");
+    assert_eq!(fold('Ι'), "ι");
+    assert_eq!(fold('Γ'), "γ");
+    assert_eq!(fold('Μ'), "μ");
+    assert_eq!(fold('Α'), "α");
+    assert_eq!(fold('ǅ'), "ǆ");
+    assert_eq!(fold('ﬁ'), "fi");
+    assert_eq!(fold('İ'), "i\u{307}");
+    assert_eq!(fold('ꮿ'), "Ꮿ");
+    assert_eq!(fold('Ꮿ'), "Ꮿ");
+    assert_eq!(fold('ῲ'), "ὼι");
+    assert_eq!(fold('\u{0345}'), "ι");
+}
+
 #[test]
 fn test_is_control() {
     assert!('\u{0}'.is_control());
diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs
index 2c561b5b0529e..cd5f4d80039f4 100644
--- a/library/coretests/tests/lib.rs
+++ b/library/coretests/tests/lib.rs
@@ -10,6 +10,7 @@
 #![feature(bool_to_result)]
 #![feature(borrowed_buf_init)]
 #![feature(bstr)]
+#![feature(casefold)]
 #![feature(cfg_target_has_reliable_f16_f128)]
 #![feature(char_internals)]
 #![feature(char_max_len)]
diff --git a/library/coretests/tests/unicode.rs b/library/coretests/tests/unicode.rs
index 12eed25a1feae..6ca45661f7d83 100644
--- a/library/coretests/tests/unicode.rs
+++ b/library/coretests/tests/unicode.rs
@@ -124,3 +124,17 @@ fn to_titlecase() {
         unicode_data::conversions::to_upper,
     );
 }
+
+#[test]
+#[cfg_attr(miri, ignore)] // Miri is too slow
+fn to_casefold() {
+    test_case_mapping(test_data::TO_CASEFOLD, unicode_data::conversions::to_casefold, |c| {
+        let upper = unicode_data::conversions::to_upper(c);
+        let lower = upper.map(unicode_data::conversions::to_lower);
+        let mut result = ['\0'; 3];
+        for (i, c) in lower.into_iter().flatten().filter(|&c| c != '\0').enumerate() {
+            result[i] = c;
+        }
+        result
+    });
+}
diff --git a/library/coretests/tests/unicode/test_data.rs b/library/coretests/tests/unicode/test_data.rs
index 962770a0ff830..77b976c489c9b 100644
--- a/library/coretests/tests/unicode/test_data.rs
+++ b/library/coretests/tests/unicode/test_data.rs
@@ -2931,3 +2931,94 @@ pub(super) static TO_TITLE: &[(char, [char; 3]); 135] = &[
     ('\u{fb16}', ['\u{54e}', '\u{576}', '\u{0}']),
     ('\u{fb17}', ['\u{544}', '\u{56d}', '\u{0}']),
 ];
+
+#[rustfmt::skip]
+pub(super) static TO_CASEFOLD: &[(char, [char; 3]); 174] = &[
+    ('\u{131}', ['\u{131}', '\u{0}', '\u{0}']), ('\u{13a0}', ['\u{13a0}', '\u{0}', '\u{0}']),
+    ('\u{13a1}', ['\u{13a1}', '\u{0}', '\u{0}']), ('\u{13a2}', ['\u{13a2}', '\u{0}', '\u{0}']),
+    ('\u{13a3}', ['\u{13a3}', '\u{0}', '\u{0}']), ('\u{13a4}', ['\u{13a4}', '\u{0}', '\u{0}']),
+    ('\u{13a5}', ['\u{13a5}', '\u{0}', '\u{0}']), ('\u{13a6}', ['\u{13a6}', '\u{0}', '\u{0}']),
+    ('\u{13a7}', ['\u{13a7}', '\u{0}', '\u{0}']), ('\u{13a8}', ['\u{13a8}', '\u{0}', '\u{0}']),
+    ('\u{13a9}', ['\u{13a9}', '\u{0}', '\u{0}']), ('\u{13aa}', ['\u{13aa}', '\u{0}', '\u{0}']),
+    ('\u{13ab}', ['\u{13ab}', '\u{0}', '\u{0}']), ('\u{13ac}', ['\u{13ac}', '\u{0}', '\u{0}']),
+    ('\u{13ad}', ['\u{13ad}', '\u{0}', '\u{0}']), ('\u{13ae}', ['\u{13ae}', '\u{0}', '\u{0}']),
+    ('\u{13af}', ['\u{13af}', '\u{0}', '\u{0}']), ('\u{13b0}', ['\u{13b0}', '\u{0}', '\u{0}']),
+    ('\u{13b1}', ['\u{13b1}', '\u{0}', '\u{0}']), ('\u{13b2}', ['\u{13b2}', '\u{0}', '\u{0}']),
+    ('\u{13b3}', ['\u{13b3}', '\u{0}', '\u{0}']), ('\u{13b4}', ['\u{13b4}', '\u{0}', '\u{0}']),
+    ('\u{13b5}', ['\u{13b5}', '\u{0}', '\u{0}']), ('\u{13b6}', ['\u{13b6}', '\u{0}', '\u{0}']),
+    ('\u{13b7}', ['\u{13b7}', '\u{0}', '\u{0}']), ('\u{13b8}', ['\u{13b8}', '\u{0}', '\u{0}']),
+    ('\u{13b9}', ['\u{13b9}', '\u{0}', '\u{0}']), ('\u{13ba}', ['\u{13ba}', '\u{0}', '\u{0}']),
+    ('\u{13bb}', ['\u{13bb}', '\u{0}', '\u{0}']), ('\u{13bc}', ['\u{13bc}', '\u{0}', '\u{0}']),
+    ('\u{13bd}', ['\u{13bd}', '\u{0}', '\u{0}']), ('\u{13be}', ['\u{13be}', '\u{0}', '\u{0}']),
+    ('\u{13bf}', ['\u{13bf}', '\u{0}', '\u{0}']), ('\u{13c0}', ['\u{13c0}', '\u{0}', '\u{0}']),
+    ('\u{13c1}', ['\u{13c1}', '\u{0}', '\u{0}']), ('\u{13c2}', ['\u{13c2}', '\u{0}', '\u{0}']),
+    ('\u{13c3}', ['\u{13c3}', '\u{0}', '\u{0}']), ('\u{13c4}', ['\u{13c4}', '\u{0}', '\u{0}']),
+    ('\u{13c5}', ['\u{13c5}', '\u{0}', '\u{0}']), ('\u{13c6}', ['\u{13c6}', '\u{0}', '\u{0}']),
+    ('\u{13c7}', ['\u{13c7}', '\u{0}', '\u{0}']), ('\u{13c8}', ['\u{13c8}', '\u{0}', '\u{0}']),
+    ('\u{13c9}', ['\u{13c9}', '\u{0}', '\u{0}']), ('\u{13ca}', ['\u{13ca}', '\u{0}', '\u{0}']),
+    ('\u{13cb}', ['\u{13cb}', '\u{0}', '\u{0}']), ('\u{13cc}', ['\u{13cc}', '\u{0}', '\u{0}']),
+    ('\u{13cd}', ['\u{13cd}', '\u{0}', '\u{0}']), ('\u{13ce}', ['\u{13ce}', '\u{0}', '\u{0}']),
+    ('\u{13cf}', ['\u{13cf}', '\u{0}', '\u{0}']), ('\u{13d0}', ['\u{13d0}', '\u{0}', '\u{0}']),
+    ('\u{13d1}', ['\u{13d1}', '\u{0}', '\u{0}']), ('\u{13d2}', ['\u{13d2}', '\u{0}', '\u{0}']),
+    ('\u{13d3}', ['\u{13d3}', '\u{0}', '\u{0}']), ('\u{13d4}', ['\u{13d4}', '\u{0}', '\u{0}']),
+    ('\u{13d5}', ['\u{13d5}', '\u{0}', '\u{0}']), ('\u{13d6}', ['\u{13d6}', '\u{0}', '\u{0}']),
+    ('\u{13d7}', ['\u{13d7}', '\u{0}', '\u{0}']), ('\u{13d8}', ['\u{13d8}', '\u{0}', '\u{0}']),
+    ('\u{13d9}', ['\u{13d9}', '\u{0}', '\u{0}']), ('\u{13da}', ['\u{13da}', '\u{0}', '\u{0}']),
+    ('\u{13db}', ['\u{13db}', '\u{0}', '\u{0}']), ('\u{13dc}', ['\u{13dc}', '\u{0}', '\u{0}']),
+    ('\u{13dd}', ['\u{13dd}', '\u{0}', '\u{0}']), ('\u{13de}', ['\u{13de}', '\u{0}', '\u{0}']),
+    ('\u{13df}', ['\u{13df}', '\u{0}', '\u{0}']), ('\u{13e0}', ['\u{13e0}', '\u{0}', '\u{0}']),
+    ('\u{13e1}', ['\u{13e1}', '\u{0}', '\u{0}']), ('\u{13e2}', ['\u{13e2}', '\u{0}', '\u{0}']),
+    ('\u{13e3}', ['\u{13e3}', '\u{0}', '\u{0}']), ('\u{13e4}', ['\u{13e4}', '\u{0}', '\u{0}']),
+    ('\u{13e5}', ['\u{13e5}', '\u{0}', '\u{0}']), ('\u{13e6}', ['\u{13e6}', '\u{0}', '\u{0}']),
+    ('\u{13e7}', ['\u{13e7}', '\u{0}', '\u{0}']), ('\u{13e8}', ['\u{13e8}', '\u{0}', '\u{0}']),
+    ('\u{13e9}', ['\u{13e9}', '\u{0}', '\u{0}']), ('\u{13ea}', ['\u{13ea}', '\u{0}', '\u{0}']),
+    ('\u{13eb}', ['\u{13eb}', '\u{0}', '\u{0}']), ('\u{13ec}', ['\u{13ec}', '\u{0}', '\u{0}']),
+    ('\u{13ed}', ['\u{13ed}', '\u{0}', '\u{0}']), ('\u{13ee}', ['\u{13ee}', '\u{0}', '\u{0}']),
+    ('\u{13ef}', ['\u{13ef}', '\u{0}', '\u{0}']), ('\u{13f0}', ['\u{13f0}', '\u{0}', '\u{0}']),
+    ('\u{13f1}', ['\u{13f1}', '\u{0}', '\u{0}']), ('\u{13f2}', ['\u{13f2}', '\u{0}', '\u{0}']),
+    ('\u{13f3}', ['\u{13f3}', '\u{0}', '\u{0}']), ('\u{13f4}', ['\u{13f4}', '\u{0}', '\u{0}']),
+    ('\u{13f5}', ['\u{13f5}', '\u{0}', '\u{0}']), ('\u{13f8}', ['\u{13f0}', '\u{0}', '\u{0}']),
+    ('\u{13f9}', ['\u{13f1}', '\u{0}', '\u{0}']), ('\u{13fa}', ['\u{13f2}', '\u{0}', '\u{0}']),
+    ('\u{13fb}', ['\u{13f3}', '\u{0}', '\u{0}']), ('\u{13fc}', ['\u{13f4}', '\u{0}', '\u{0}']),
+    ('\u{13fd}', ['\u{13f5}', '\u{0}', '\u{0}']), ('\u{1e9e}', ['s', 's', '\u{0}']),
+    ('\u{ab70}', ['\u{13a0}', '\u{0}', '\u{0}']), ('\u{ab71}', ['\u{13a1}', '\u{0}', '\u{0}']),
+    ('\u{ab72}', ['\u{13a2}', '\u{0}', '\u{0}']), ('\u{ab73}', ['\u{13a3}', '\u{0}', '\u{0}']),
+    ('\u{ab74}', ['\u{13a4}', '\u{0}', '\u{0}']), ('\u{ab75}', ['\u{13a5}', '\u{0}', '\u{0}']),
+    ('\u{ab76}', ['\u{13a6}', '\u{0}', '\u{0}']), ('\u{ab77}', ['\u{13a7}', '\u{0}', '\u{0}']),
+    ('\u{ab78}', ['\u{13a8}', '\u{0}', '\u{0}']), ('\u{ab79}', ['\u{13a9}', '\u{0}', '\u{0}']),
+    ('\u{ab7a}', ['\u{13aa}', '\u{0}', '\u{0}']), ('\u{ab7b}', ['\u{13ab}', '\u{0}', '\u{0}']),
+    ('\u{ab7c}', ['\u{13ac}', '\u{0}', '\u{0}']), ('\u{ab7d}', ['\u{13ad}', '\u{0}', '\u{0}']),
+    ('\u{ab7e}', ['\u{13ae}', '\u{0}', '\u{0}']), ('\u{ab7f}', ['\u{13af}', '\u{0}', '\u{0}']),
+    ('\u{ab80}', ['\u{13b0}', '\u{0}', '\u{0}']), ('\u{ab81}', ['\u{13b1}', '\u{0}', '\u{0}']),
+    ('\u{ab82}', ['\u{13b2}', '\u{0}', '\u{0}']), ('\u{ab83}', ['\u{13b3}', '\u{0}', '\u{0}']),
+    ('\u{ab84}', ['\u{13b4}', '\u{0}', '\u{0}']), ('\u{ab85}', ['\u{13b5}', '\u{0}', '\u{0}']),
+    ('\u{ab86}', ['\u{13b6}', '\u{0}', '\u{0}']), ('\u{ab87}', ['\u{13b7}', '\u{0}', '\u{0}']),
+    ('\u{ab88}', ['\u{13b8}', '\u{0}', '\u{0}']), ('\u{ab89}', ['\u{13b9}', '\u{0}', '\u{0}']),
+    ('\u{ab8a}', ['\u{13ba}', '\u{0}', '\u{0}']), ('\u{ab8b}', ['\u{13bb}', '\u{0}', '\u{0}']),
+    ('\u{ab8c}', ['\u{13bc}', '\u{0}', '\u{0}']), ('\u{ab8d}', ['\u{13bd}', '\u{0}', '\u{0}']),
+    ('\u{ab8e}', ['\u{13be}', '\u{0}', '\u{0}']), ('\u{ab8f}', ['\u{13bf}', '\u{0}', '\u{0}']),
+    ('\u{ab90}', ['\u{13c0}', '\u{0}', '\u{0}']), ('\u{ab91}', ['\u{13c1}', '\u{0}', '\u{0}']),
+    ('\u{ab92}', ['\u{13c2}', '\u{0}', '\u{0}']), ('\u{ab93}', ['\u{13c3}', '\u{0}', '\u{0}']),
+    ('\u{ab94}', ['\u{13c4}', '\u{0}', '\u{0}']), ('\u{ab95}', ['\u{13c5}', '\u{0}', '\u{0}']),
+    ('\u{ab96}', ['\u{13c6}', '\u{0}', '\u{0}']), ('\u{ab97}', ['\u{13c7}', '\u{0}', '\u{0}']),
+    ('\u{ab98}', ['\u{13c8}', '\u{0}', '\u{0}']), ('\u{ab99}', ['\u{13c9}', '\u{0}', '\u{0}']),
+    ('\u{ab9a}', ['\u{13ca}', '\u{0}', '\u{0}']), ('\u{ab9b}', ['\u{13cb}', '\u{0}', '\u{0}']),
+    ('\u{ab9c}', ['\u{13cc}', '\u{0}', '\u{0}']), ('\u{ab9d}', ['\u{13cd}', '\u{0}', '\u{0}']),
+    ('\u{ab9e}', ['\u{13ce}', '\u{0}', '\u{0}']), ('\u{ab9f}', ['\u{13cf}', '\u{0}', '\u{0}']),
+    ('\u{aba0}', ['\u{13d0}', '\u{0}', '\u{0}']), ('\u{aba1}', ['\u{13d1}', '\u{0}', '\u{0}']),
+    ('\u{aba2}', ['\u{13d2}', '\u{0}', '\u{0}']), ('\u{aba3}', ['\u{13d3}', '\u{0}', '\u{0}']),
+    ('\u{aba4}', ['\u{13d4}', '\u{0}', '\u{0}']), ('\u{aba5}', ['\u{13d5}', '\u{0}', '\u{0}']),
+    ('\u{aba6}', ['\u{13d6}', '\u{0}', '\u{0}']), ('\u{aba7}', ['\u{13d7}', '\u{0}', '\u{0}']),
+    ('\u{aba8}', ['\u{13d8}', '\u{0}', '\u{0}']), ('\u{aba9}', ['\u{13d9}', '\u{0}', '\u{0}']),
+    ('\u{abaa}', ['\u{13da}', '\u{0}', '\u{0}']), ('\u{abab}', ['\u{13db}', '\u{0}', '\u{0}']),
+    ('\u{abac}', ['\u{13dc}', '\u{0}', '\u{0}']), ('\u{abad}', ['\u{13dd}', '\u{0}', '\u{0}']),
+    ('\u{abae}', ['\u{13de}', '\u{0}', '\u{0}']), ('\u{abaf}', ['\u{13df}', '\u{0}', '\u{0}']),
+    ('\u{abb0}', ['\u{13e0}', '\u{0}', '\u{0}']), ('\u{abb1}', ['\u{13e1}', '\u{0}', '\u{0}']),
+    ('\u{abb2}', ['\u{13e2}', '\u{0}', '\u{0}']), ('\u{abb3}', ['\u{13e3}', '\u{0}', '\u{0}']),
+    ('\u{abb4}', ['\u{13e4}', '\u{0}', '\u{0}']), ('\u{abb5}', ['\u{13e5}', '\u{0}', '\u{0}']),
+    ('\u{abb6}', ['\u{13e6}', '\u{0}', '\u{0}']), ('\u{abb7}', ['\u{13e7}', '\u{0}', '\u{0}']),
+    ('\u{abb8}', ['\u{13e8}', '\u{0}', '\u{0}']), ('\u{abb9}', ['\u{13e9}', '\u{0}', '\u{0}']),
+    ('\u{abba}', ['\u{13ea}', '\u{0}', '\u{0}']), ('\u{abbb}', ['\u{13eb}', '\u{0}', '\u{0}']),
+    ('\u{abbc}', ['\u{13ec}', '\u{0}', '\u{0}']), ('\u{abbd}', ['\u{13ed}', '\u{0}', '\u{0}']),
+    ('\u{abbe}', ['\u{13ee}', '\u{0}', '\u{0}']), ('\u{abbf}', ['\u{13ef}', '\u{0}', '\u{0}']),
+];
diff --git a/src/tools/unicode-table-generator/Cargo.toml b/src/tools/unicode-table-generator/Cargo.toml
index 3ca6e9e316f1d..3be916dc69bf5 100644
--- a/src/tools/unicode-table-generator/Cargo.toml
+++ b/src/tools/unicode-table-generator/Cargo.toml
@@ -7,3 +7,4 @@ edition = "2024"
 
 [dependencies]
 ucd-parse = "0.1.3"
+rustc-hash = "2.0.0"
diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs
index 56e6401908dcf..da06049beb575 100644
--- a/src/tools/unicode-table-generator/src/cascading_map.rs
+++ b/src/tools/unicode-table-generator/src/cascading_map.rs
@@ -1,7 +1,8 @@
-use std::collections::HashMap;
 use std::fmt::Write as _;
 use std::ops::Range;
 
+use rustc_hash::FxHashMap;
+
 use crate::fmt_list;
 use crate::raw_emitter::RawEmitter;
 
@@ -27,7 +28,7 @@ impl RawEmitter {
         println!("there are {} points", points.len());
 
         // how many distinct ranges need to be counted?
-        let mut codepoints_by_high_bytes = HashMap::<usize, Vec<u32>>::new();
+        let mut codepoints_by_high_bytes = FxHashMap::<usize, Vec<u32>>::default();
         for point in points {
             // assert that there is no whitespace over the 0x3000 range.
             assert!(point <= 0x3000, "the highest unicode whitespace value has changed");
diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs
index b7b385542ef53..ee4dfc2514c20 100644
--- a/src/tools/unicode-table-generator/src/case_mapping.rs
+++ b/src/tools/unicode-table-generator/src/case_mapping.rs
@@ -48,21 +48,33 @@ use std::ops::RangeInclusive;
 use crate::fmt_helpers::Hex;
 use crate::{UnicodeData, fmt_list};
 
-pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [(String, usize); 3]) {
+pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [(String, usize); 4]) {
     let mut file = String::new();
 
     file.push_str("\n\n");
     file.push_str(HEADER.trim_start());
     file.push('\n');
-    let (lower_tables, lower_desc, lower_size) = generate_tables("LOWER", &data.to_lower);
+    let (lower_tables, lower_desc, lower_size) = generate_tables("LOWERCASE", &data.to_lower);
     file.push_str(&lower_tables);
     file.push_str("\n\n");
-    let (upper_tables, upper_desc, upper_size) = generate_tables("UPPER", &data.to_upper);
+    let (upper_tables, upper_desc, upper_size) = generate_tables("UPPERCASE", &data.to_upper);
     file.push_str(&upper_tables);
     file.push_str("\n\n");
-    let (title_tables, title_desc, title_size) = generate_tables("TITLE", &data.to_title);
+    let (title_tables, title_desc, title_size) = generate_tables("TITLECASE", &data.to_title);
     file.push_str(&title_tables);
-    (file, [(lower_desc, lower_size), (upper_desc, upper_size), (title_desc, title_size)])
+    file.push_str("\n\n");
+    let (casefold_tables, casefold_desc, casefold_size) =
+        generate_tables("CASEFOLD", &data.to_casefold);
+    file.push_str(&casefold_tables);
+    (
+        file,
+        [
+            (lower_desc, lower_size),
+            (upper_desc, upper_size),
+            (title_desc, title_size),
+            (casefold_desc, casefold_size),
+        ],
+    )
 }
 
 // So far, only planes 0 and 1 (Basic Multilingual Plane and Supplementary
@@ -205,7 +217,7 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, Strin
                     output_high, input_high,
                     "Case-mapping a character should not change its plane"
                 );
-                let delta = output_low as i16 - input_low as i16;
+                let delta = output_low.wrapping_sub(input_low).cast_signed();
                 let range = Range::singleton(input_low);
                 l2_lut.singles.push((range, delta));
             }
@@ -264,7 +276,7 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, Strin
     let size = l1_lut.size();
     let num_ranges =
         l1_lut.l2_luts.iter().map(|l2| l2.singles.len() + l2.multis.len()).sum::<usize>();
-    let table = format!("static {case}CASE_LUT: L1Lut = {l1_lut:#?};");
+    let table = format!("static {case}_LUT: L1Lut = {l1_lut:#?};");
     let desc = format!(
         "{:6} codepoints in {:3} ranges (U+{:06X} - U+{:06X}) using 2-level LUT",
         data.len(),
@@ -381,7 +393,7 @@ fn lookup(input: char, l1_lut: &L1Lut) -> Option<[char; 3]> {
 }
 
 pub fn to_lower(c: char) -> [char; 3] {
-    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Lowercased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Lowercased:]-[:ASCII:]&abb=on
     if c < '\u{C0}' {
         return [c.to_ascii_lowercase(), '\0', '\0'];
     }
@@ -390,7 +402,7 @@ pub fn to_lower(c: char) -> [char; 3] {
 }
 
 pub fn to_upper(c: char) -> [char; 3] {
-    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Uppercased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Uppercased:]-[:ASCII:]&abb=on
     if c < '\u{B5}' {
         return [c.to_ascii_uppercase(), '\0', '\0'];
     }
@@ -399,11 +411,64 @@ pub fn to_upper(c: char) -> [char; 3] {
 }
 
 pub fn to_title(c: char) -> [char; 3] {
-    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Titlecased%253A%5D-%5B%253AASCII%253A%5D&abb=on
+    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Titlecased:]-[:ASCII:]&abb=on
     if c < '\u{B5}' {
         return [c.to_ascii_uppercase(), '\0', '\0'];
     }
 
     lookup(c, &TITLECASE_LUT).or_else(|| lookup(c, &UPPERCASE_LUT)).unwrap_or([c, '\0', '\0'])
 }
+
+pub fn to_casefold(c: char) -> [char; 3] {
+    // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[:Changes_When_Casefolded:]-[:ASCII:]&abb=on
+    if c < '\u{B5}' {
+        return [c.to_ascii_lowercase(), '\0', '\0'];
+    }
+
+
+    lookup(c, &CASEFOLD_LUT).unwrap_or_else(|| {
+        // fall back to lowercase of uppercase
+
+        let uppercase = lookup(c, &UPPERCASE_LUT).unwrap_or([c, '\0', '\0']);
+        let mut final_result = to_lower(uppercase[0]);
+        if uppercase[1] != '\0' {
+            let lowercase_1 = to_lower(uppercase[1]);
+            debug_assert_eq!(lowercase_1[2], '\0');
+
+            // If, after updating the Unicode data
+            // to a new Unicode version, the below
+            // assertion starts to fail in tests,
+            // delete it, and uncomment the
+            // `if` condition and corresponding
+            // `else` block below it.
+            debug_assert_eq!(final_result[1], '\0');
+            //if final_result[1] == '\0' {
+
+            final_result[1] = lowercase_1[0];
+
+            if uppercase[2] != '\0' {
+                debug_assert_eq!(lowercase_1[1], '\0');
+                let lowercase_2 = to_lower(uppercase[2]);
+                debug_assert_eq!(lowercase_2[1], '\0');
+                debug_assert_eq!(lowercase_2[2], '\0');
+                final_result[2] = lowercase_2[0];
+            } else {
+                // If, after updating the Unicode data
+                // to a new Unicode version, the below
+                // assertion starts to fail in tests,
+                // delete it and uncomment the line
+                // below it.
+                debug_assert_eq!(lowercase_1[1], '\0');
+                //final_result[2] = lowercase_1[1];
+            }
+
+            /*} else {
+                final_result[2] = lowercase_1[0];
+                debug_assert_eq!(lowercase_1[1], '\0');
+                debug_assert_eq!(uppercase[2], '\0')
+            }*/
+        }
+        final_result
+    })
+}
 ";
diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs
index 398b4c7b7ec5a..a55cd2f657a6d 100644
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@@ -71,11 +71,12 @@
 //! index of that offset is utilized as the answer to whether we're in the set
 //! or not.
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 use std::fmt::Write;
 use std::ops::Range;
 
-use ucd_parse::Codepoints;
+use rustc_hash::{FxHashMap, FxHashSet};
+use ucd_parse::{Codepoint, Codepoints};
 
 mod cascading_map;
 mod case_mapping;
@@ -106,6 +107,9 @@ struct UnicodeData {
     to_title: BTreeMap<u32, [u32; 3]>,
     /// Only stores mappings that are not to self
     to_lower: BTreeMap<u32, [u32; 3]>,
+    /// Only stores mappings that differ from
+    /// `to_upper` followed by `to_lower`
+    to_casefold: BTreeMap<u32, [u32; 3]>,
 }
 
 fn to_mapping(
@@ -126,7 +130,7 @@ static UNICODE_DIRECTORY: &str = "unicode-downloads";
 fn load_data() -> UnicodeData {
     unicode_download::fetch_latest();
 
-    let mut properties = HashMap::new();
+    let mut properties = FxHashMap::default();
     for row in ucd_parse::parse::<_, ucd_parse::CoreProperty>(&UNICODE_DIRECTORY).unwrap() {
         if let Some(name) = PROPERTIES.iter().find(|prop| **prop == row.property.as_str()) {
             properties.entry(*name).or_insert_with(Vec::new).push(row.codepoints);
@@ -138,7 +142,8 @@ fn load_data() -> UnicodeData {
         }
     }
 
-    let [mut to_lower, mut to_upper, mut to_title] = [const { BTreeMap::new() }; 3];
+    let [mut to_lower, mut to_upper, mut to_title, mut to_casefold] =
+        [const { BTreeMap::new() }; 4];
     for row in ucd_parse::UnicodeDataExpander::new(
         ucd_parse::parse::<_, ucd_parse::UnicodeData>(&UNICODE_DIRECTORY).unwrap(),
     ) {
@@ -189,6 +194,78 @@ fn load_data() -> UnicodeData {
         }
     }
 
+    fn get_mapping_from_btreemap<'a>(
+        cp: Codepoint,
+        map: &'a BTreeMap<u32, [u32; 3]>,
+    ) -> Vec<Codepoint> {
+        let mapping =
+            map.get(&cp.value()).copied().map(|cs| cs.map(|c| Codepoint::from_u32(c).unwrap()));
+
+        mapping
+            .as_ref()
+            .map(|cs| {
+                let nul = Codepoint::from_u32(0).unwrap();
+                if cs[1] == nul {
+                    &cs[..1]
+                } else if cs[2] == nul {
+                    &cs[..2]
+                } else {
+                    &cs[..]
+                }
+            })
+            .map_or_else(|| vec![cp], ToOwned::to_owned)
+    }
+
+    let mut nontrivial_casefold = FxHashSet::default();
+
+    for row in ucd_parse::parse::<_, ucd_parse::CaseFold>(&UNICODE_DIRECTORY).unwrap() {
+        use ucd_parse::{CaseStatus, Codepoint};
+        if matches!(row.status, CaseStatus::Common | CaseStatus::Full) {
+            let key = row.codepoint.value();
+            nontrivial_casefold.insert(key);
+
+            // We store case-fold data only for characters whose case-folding
+            // differs from the lowercase of their uppercase.
+
+            let lower_upper_mapping: Vec<Codepoint> =
+                get_mapping_from_btreemap(row.codepoint, &to_upper)
+                    .into_iter()
+                    .flat_map(|cp| get_mapping_from_btreemap(cp, &to_lower))
+                    .collect();
+
+            if let Some(casefold) = to_mapping(&lower_upper_mapping, &row.mapping) {
+                to_casefold.insert(key, casefold);
+            }
+        }
+    }
+
+    // Now, account for characters that remain unchanged by case-folding
+    // (and are therefore omitted from `CaseFolding.txt`),
+    // but yet differ from the lowercase of their uppercase.
+
+    for c in '\0'..=char::MAX {
+        let cnum: u32 = c.into();
+        if !nontrivial_casefold.contains(&cnum) {
+            let cp = Codepoint::from_u32(cnum).unwrap();
+
+            use std::collections::btree_map::Entry;
+            match to_casefold.entry(cnum) {
+                Entry::Vacant(vacant_entry) => {
+                    let lower_upper_mapping: Vec<Codepoint> =
+                        get_mapping_from_btreemap(cp, &to_upper)
+                            .into_iter()
+                            .flat_map(|cp| get_mapping_from_btreemap(cp, &to_lower))
+                            .collect();
+
+                    if let Some(casefold) = to_mapping(&lower_upper_mapping, &[cp]) {
+                        vacant_entry.insert(casefold);
+                    }
+                }
+                Entry::Occupied(_) => {}
+            }
+        }
+    }
+
     // Filter out ASCII codepoints.
     to_lower.retain(|&c, _| c > 0x7f);
     to_upper.retain(|&c, _| c > 0x7f);
@@ -207,7 +284,7 @@ fn load_data() -> UnicodeData {
         .collect();
 
     properties.sort_by_key(|p| p.0);
-    UnicodeData { ranges: properties, to_lower, to_title, to_upper }
+    UnicodeData { ranges: properties, to_lower, to_title, to_upper, to_casefold }
 }
 
 fn main() {
@@ -259,7 +336,9 @@ fn main() {
         total_bytes += emitter.bytes_used;
     }
     let (conversions, sizes) = case_mapping::generate_case_mapping(&unicode_data);
-    for (name, (desc, size)) in ["to_lower", "to_upper", "to_title"].iter().zip(sizes) {
+    for (name, (desc, size)) in
+        ["to_lower", "to_upper", "to_title", "to_casefold"].iter().zip(sizes)
+    {
         table_file.push_str(&format!("// {:16}: {:5} bytes, {desc}\n", name, size,));
         total_bytes += size;
     }
@@ -369,10 +448,11 @@ pub(super) static {prop_upper}: &[RangeInclusive<char>; {is_true_len}] = &[{is_t
         .unwrap();
     }
 
-    for (name, lut) in ["TO_LOWER", "TO_UPPER", "TO_TITLE"].iter().zip([
+    for (name, lut) in ["TO_LOWER", "TO_UPPER", "TO_TITLE", "TO_CASEFOLD"].iter().zip([
         &data.to_lower,
         &data.to_upper,
         &data.to_title,
+        &data.to_casefold,
     ]) {
         let lut = lut
             .iter()
diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs
index 297965615c1a5..de3395df3806e 100644
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@@ -1,7 +1,9 @@
-use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::collections::{BTreeMap, BTreeSet};
 use std::fmt::{self, Write};
 use std::ops::Range;
 
+use rustc_hash::FxHashMap;
+
 use crate::fmt_list;
 
 #[derive(Clone)]
@@ -126,8 +128,11 @@ impl RawEmitter {
         for chunk in compressed_words.chunks(chunk_length) {
             chunks.insert(chunk);
         }
-        let chunk_map =
-            chunks.iter().enumerate().map(|(idx, &chunk)| (chunk, idx)).collect::<HashMap<_, _>>();
+        let chunk_map = chunks
+            .iter()
+            .enumerate()
+            .map(|(idx, &chunk)| (chunk, idx))
+            .collect::<FxHashMap<_, _>>();
         let mut chunk_indices = Vec::new();
         for chunk in compressed_words.chunks(chunk_length) {
             chunk_indices.push(chunk_map[chunk]);
@@ -186,7 +191,7 @@ struct Canonicalized {
 
     /// Maps an input unique word to the associated index (u8) which is into
     /// canonical_words or canonicalized_words (in order).
-    unique_mapping: HashMap<u64, u8>,
+    unique_mapping: FxHashMap<u64, u8>,
 }
 
 impl Canonicalized {
@@ -253,7 +258,7 @@ impl Canonicalized {
         // These are mapped words, which will be represented by an index into
         // the canonical_words and a Mapping; u16 when encoded.
         let mut canonicalized_words = Vec::new();
-        let mut unique_mapping = HashMap::new();
+        let mut unique_mapping = FxHashMap::default();
 
         #[derive(Debug, PartialEq, Eq)]
         enum UniqueMapping {
@@ -361,7 +366,7 @@ impl Canonicalized {
                     },
                 )
             })
-            .collect::<HashMap<_, _>>();
+            .collect::<FxHashMap<_, _>>();
 
         let mut distinct_indices = BTreeSet::new();
         for &w in unique_words {
diff --git a/src/tools/unicode-table-generator/src/unicode_download.rs b/src/tools/unicode-table-generator/src/unicode_download.rs
index c9826170905c2..b2fcf6444033d 100644
--- a/src/tools/unicode-table-generator/src/unicode_download.rs
+++ b/src/tools/unicode-table-generator/src/unicode_download.rs
@@ -7,8 +7,13 @@ static URL_PREFIX: &str = "https://www.unicode.org/Public/UCD/latest/ucd/";
 
 static README: &str = "ReadMe.txt";
 
-static RESOURCES: &[&str] =
-    &["DerivedCoreProperties.txt", "PropList.txt", "UnicodeData.txt", "SpecialCasing.txt"];
+static RESOURCES: &[&str] = &[
+    "CaseFolding.txt",
+    "DerivedCoreProperties.txt",
+    "PropList.txt",
+    "SpecialCasing.txt",
+    "UnicodeData.txt",
+];
 
 #[track_caller]
 fn fetch(url: &str) -> Output {

From a5d82ca7fbe78ffc2ddff687ded2c383fe269029 Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Thu, 2 Apr 2026 20:26:14 -0400
Subject: [PATCH 2/5] Add `str::to_casefold()`

---
 library/alloc/src/str.rs        | 97 +++++++++++++++++++++++++++++++++
 library/alloctests/tests/lib.rs |  1 +
 library/alloctests/tests/str.rs |  8 ++-
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
index 2966f3ccc1791..db9f96cdd7e70 100644
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@@ -497,6 +497,103 @@ impl str {
         s
     }
 
+    /// Returns the case-folded equivalent of this string slice, as a new [`String`].
+    ///
+    /// Case folding is a transformation, mostly matching lowercase, that is meant to be used
+    /// for case-insensitive string comparisons. Case-folded strings should not usually
+    /// be exposed directly to users.
+    ///
+    /// For the precise specification of case folding, see
+    /// [Chapter 3 (Conformance)](https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63737)
+    /// of the Unicode standard.
+    ///
+    /// Since some characters can expand into multiple characters when case folding,
+    /// this function returns a [`String`] instead of modifying the parameter in-place.
+    ///
+    /// This function does not perform any normalization (e.g. NFC).
+    ///
+    /// Like [`char::to_casefold()`] this method does not handle language-specific
+    /// casing, like Turkish and Azeri I/ı/İ/i. See that method's documentation
+    /// for more information.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let s0 = "HELLO";
+    /// let s1 = "Hello";
+    ///
+    /// assert_eq!(s0.to_casefold(), s1.to_casefold());
+    /// assert_eq!(s0.to_casefold(), "hello")
+    /// ```
+    ///
+    /// Scripts without case are not changed:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let new_year = "农历新年";
+    ///
+    /// assert_eq!(new_year, new_year.to_casefold());
+    /// ```
+    ///
+    /// One character can become multiple:
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// let s0 = "TSCHÜẞ";
+    /// let s1 = "TSCHÜSS";
+    /// let s2 = "tschüß";
+    ///
+    /// assert_eq!(s0.to_casefold(), s1.to_casefold());
+    /// assert_eq!(s0.to_casefold(), s2.to_casefold());
+    /// assert_eq!(s0.to_casefold(), "tschüss");
+    /// ```
+    ///
+    /// No NFC normalization is performed:
+    ///
+    /// ```rust
+    /// #![feature(casefold)]
+    /// // These two strings are visually and semantically identical...
+    /// let comp = "Á";
+    /// let decomp = "Á";
+    ///
+    /// // ... but not codepoint-for-codepoint equal.
+    ///
+    /// assert_eq!(comp, "\u{C1}");
+    /// assert_eq!(decomp, "A\u{0301}");
+    ///
+    /// // Their case-foldings are likewise unequal:
+    ///
+    /// assert_eq!(comp.to_casefold(), "\u{E1}");
+    /// assert_eq!(decomp.to_casefold(), "a\u{0301}");
+    /// ```
+    #[cfg(not(no_global_oom_handling))]
+    #[rustc_allow_incoherent_impl]
+    #[must_use = "this returns the case-folded string as a new String, \
+                  without modifying the original"]
+    #[unstable(feature = "casefold", issue = "none")]
+    pub fn to_casefold(&self) -> String {
+        let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
+
+        for c in rest.chars() {
+            match conversions::to_casefold(c) {
+                [a, '\0', _] => s.push(a),
+                [a, b, '\0'] => {
+                    s.push(a);
+                    s.push(b);
+                }
+                [a, b, c] => {
+                    s.push(a);
+                    s.push(b);
+                    s.push(c);
+                }
+            }
+        }
+        s
+    }
+
     /// Converts a [`Box<str>`] into a [`String`] without copying or allocating.
     ///
     /// # Examples
diff --git a/library/alloctests/tests/lib.rs b/library/alloctests/tests/lib.rs
index 699a5010282b0..5067fc45eb29b 100644
--- a/library/alloctests/tests/lib.rs
+++ b/library/alloctests/tests/lib.rs
@@ -3,6 +3,7 @@
 #![feature(const_heap)]
 #![feature(deque_extend_front)]
 #![feature(iter_array_chunks)]
+#![feature(casefold)]
 #![feature(cow_is_borrowed)]
 #![feature(core_intrinsics)]
 #![feature(downcast_unchecked)]
diff --git a/library/alloctests/tests/str.rs b/library/alloctests/tests/str.rs
index c0bcdb8500af6..42fbdd0ea9a9a 100644
--- a/library/alloctests/tests/str.rs
+++ b/library/alloctests/tests/str.rs
@@ -1867,7 +1867,13 @@ fn to_lowercase() {
 #[test]
 fn to_uppercase() {
     assert_eq!("".to_uppercase(), "");
-    assert_eq!("aéǅßﬁᾀ".to_uppercase(), "AÉǄSSFIἈΙ");
+    assert_eq!("aéǅßẞﬁᾀ".to_uppercase(), "AÉǄSSẞFIἈΙ");
+}
+
+#[test]
+fn to_casefold() {
+    assert_eq!("".to_casefold(), "");
+    assert_eq!("ꮿﬁῲὼ\u{0345}ßẞΣς".to_casefold(), "Ꮿfiὼιὼιssssσσ");
 }
 
 #[test]

From 66b91bdf5070bd30c19566bd29007f10d866751f Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Thu, 2 Apr 2026 20:38:23 -0400
Subject: [PATCH 3/5] Add `str::eq_ignore_case()`

With an unoptimized, non-`const` implementation
for now.
---
 library/alloc/src/str.rs    |  4 +++-
 library/core/src/str/mod.rs | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
index db9f96cdd7e70..7ac7495e7f701 100644
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@@ -575,7 +575,9 @@ impl str {
                   without modifying the original"]
     #[unstable(feature = "casefold", issue = "none")]
     pub fn to_casefold(&self) -> String {
-        let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
+        // SAFETY: `to_ascii_lowercase` preserves ASCII bytes, so the converted
+        // prefix remains valid UTF-8.
+        let (mut s, rest) = unsafe { convert_while_ascii(self, u8::to_ascii_lowercase) };
 
         for c in rest.chars() {
             match conversions::to_casefold(c) {
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 5af399ab1b34c..a8fc30a632642 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -2826,6 +2826,9 @@ impl str {
     /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
     /// but without allocating and copying temporaries.
     ///
+    /// For Unicode-aware case-insensitive matching, consider
+    /// [`str::eq_ignore_case`].
+    ///
     /// # Examples
     ///
     /// ```
@@ -2841,6 +2844,38 @@ impl str {
         self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
     }
 
+    /// Checks that two strings are a caseless match, according to
+    /// [Definition 144] in Chapter 3 of the Unicode Standard.
+    ///
+    /// [Definition 144]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G53513
+    ///
+    /// Same as `a.to_casefold() == b.to_casefold()`,
+    /// but without allocating. See that method's documentation,
+    /// and [`char::to_casefold()`],
+    /// for more information about case folding.
+    ///
+    /// No normalization (e.g. NFC) is performed,
+    /// so visually and semantically identical strings
+    /// might still compare unequal. In addition,
+    /// this method is independent of language/locale,
+    /// so the special behavior of  I/ı/İ/i
+    /// in Turkish and Azeri is not handled.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(casefold)]
+    /// assert!("Ferris".eq_ignore_case("FERRIS"));
+    /// assert!("Ferrös".eq_ignore_case("FERRÖS"));
+    /// assert!("ẞ".eq_ignore_case("ss"));
+    /// ```
+    #[unstable(feature = "casefold", issue = "none")]
+    #[must_use]
+    #[inline]
+    pub fn eq_ignore_case(&self, other: &str) -> bool {
+        self.chars().flat_map(char::to_casefold).eq(other.chars().flat_map(char::to_casefold))
+    }
+
     /// Converts this string to its ASCII upper case equivalent in-place.
     ///
     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',

From b14b43baad04900daa2e6815fe65c2a78100aeb1 Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Sat, 18 Apr 2026 00:29:21 -0400
Subject: [PATCH 4/5] Address review comments

---
 library/alloc/src/str.rs         |  9 ++---
 library/core/src/char/methods.rs | 61 ++++++++++++++++++++------------
 library/core/src/str/mod.rs      | 24 +++++++++++--
 3 files changed, 64 insertions(+), 30 deletions(-)

diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
index 7ac7495e7f701..4946c7f034678 100644
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@@ -510,7 +510,8 @@ impl str {
     /// Since some characters can expand into multiple characters when case folding,
     /// this function returns a [`String`] instead of modifying the parameter in-place.
     ///
-    /// This function does not perform any normalization (e.g. NFC).
+    /// This function does not perform any [normalization] (e.g. NFC),
+    /// so semantically and visually identical strings may compare unequal.
     ///
     /// Like [`char::to_casefold()`] this method does not handle language-specific
     /// casing, like Turkish and Azeri I/ı/İ/i. See that method's documentation
@@ -551,7 +552,7 @@ impl str {
     /// assert_eq!(s0.to_casefold(), "tschüss");
     /// ```
     ///
-    /// No NFC normalization is performed:
+    /// No NFC [normalization] is performed:
     ///
     /// ```rust
     /// #![feature(casefold)]
@@ -560,15 +561,15 @@ impl str {
     /// let decomp = "Á";
     ///
     /// // ... but not codepoint-for-codepoint equal.
-    ///
     /// assert_eq!(comp, "\u{C1}");
     /// assert_eq!(decomp, "A\u{0301}");
     ///
     /// // Their case-foldings are likewise unequal:
-    ///
     /// assert_eq!(comp.to_casefold(), "\u{E1}");
     /// assert_eq!(decomp.to_casefold(), "a\u{0301}");
     /// ```
+    ///
+    /// [normalization]: https://www.unicode.org/faq/normalization
     #[cfg(not(no_global_oom_handling))]
     #[rustc_allow_incoherent_impl]
     #[must_use = "this returns the case-folded string as a new String, \
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 2fb70214a9c34..a87d11d7fa1a6 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1075,16 +1075,17 @@ impl char {
     }
 
     /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
-    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase Σ),
+    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
     /// which has two lowercase forms.
     ///
     /// `Case_Ignorable` is [described][D136] in Chapter 3 (Conformance) of the Unicode Core Specification,
-    /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`];
-    /// see those resources for more information.
+    /// and specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
     ///
     /// [D136]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
     /// [ucd]: https://www.unicode.org/reports/tr44/
     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
+    /// [`to_lowercase()`]: Self::to_lowercase()
     #[must_use]
     #[inline]
     #[unstable(feature = "case_ignorable", issue = "154848")]
@@ -1154,8 +1155,6 @@ impl char {
     /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
     /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
     ///
-    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
-    ///
     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
     /// is independent of context and language. See [below](#notes-on-context-and-locale)
     /// for more information.
@@ -1210,14 +1209,25 @@ impl char {
     ///
     /// ## Greek sigma
     ///
-    /// In Greek, the letter simga (uppercase Σ) has two lowercase forms:
-    /// ς which is used only at the end of a word, and σ which is used everywhere else.
-    /// `to_lowercase()` always uses the second form:
+    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
+    /// 'σ' which is used in most situations, and 'ς' which appears only
+    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
     ///
     /// ```
     /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
     /// ```
     ///
+    /// `str::to_lowercase()` (only available with the `alloc` crate)
+    /// *does* properly handle this contextual mapping,
+    /// so prefer using that method if you can. Alternatively, you can use
+    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
+    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
+    /// along with [`SpecialCasing.txt`], for more details.
+    ///
+    /// [`is_cased()`]: Self::is_cased()
+    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
+    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
+    ///
     /// ## Turkish and Azeri I/ı/İ/i
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
@@ -1225,13 +1235,13 @@ impl char {
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
+    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// let lower_i = 'I'.to_lowercase().to_string();
     /// ```
     ///
-    /// The value of `lower_i` here relies on the language of the text: if we're
+    /// `'I'`'s correct lowercase relies on the language of the text: if we're
     /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
     ///
@@ -1242,6 +1252,8 @@ impl char {
     /// ```
     ///
     /// holds across languages.
+    ///
+    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
     #[must_use = "this returns the lowercased character as a new iterator, \
                   without modifying the original"]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -1392,22 +1404,22 @@ impl char {
     /// As stated above, this method is locale-insensitive.
     /// If you need locale support, consider using an external crate,
     /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
-    /// which is developed by Unicode. A description of a common
-    /// locale-dependent casing issue follows:
+    /// which is developed by Unicode. A description of one common
+    /// locale-dependent casing issue follows (there are others):
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// #![feature(titlecase)]
     /// let upper_i = 'i'.to_titlecase().to_string();
     /// ```
     ///
-    /// The value of `upper_i` here relies on the language of the text: if we're
+    /// `'i'`'s correct titlecase relies on the language of the text: if we're
     /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
     ///
@@ -1504,21 +1516,21 @@ impl char {
     /// As stated above, this method is locale-insensitive.
     /// If you need locale support, consider using an external crate,
     /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
-    /// which is developed by Unicode. A description of a common
-    /// locale-dependent casing issue follows:
+    /// which is developed by Unicode. A description of one common
+    /// locale-dependent casing issue follows (there are others):
     ///
     /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// let upper_i = 'i'.to_uppercase().to_string();
     /// ```
     ///
-    /// The value of `upper_i` here relies on the language of the text: if we're
+    /// `'i'`'s correct uppercase relies on the language of the text: if we're
     /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
     ///
@@ -1542,12 +1554,13 @@ impl char {
     /// Returns an iterator that yields the case folding of this `char` as one or more
     /// `char`s.
     ///
-    /// Case folding is meant to be used when performing case-insensitive string comparisons,
-    /// but case-folded strings should not generally be exposed directly to users. For most,
+    /// Case folding is meant to be used when performing case-insensitive string comparisons.
+    /// Case-folded strings should not usually be exposed directly to users. For most,
     /// but not all, characters, the casefold mapping is identical to the lowercase one.
     ///
     /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
     /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
+    /// The maximum number of `char`s in a case folding is 3.
     ///
     /// [ucd]: https://www.unicode.org/reports/tr44/
     /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
@@ -1555,7 +1568,9 @@ impl char {
     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
     /// is independent of context and language.
     ///
-    /// It also does not perform any normalization (e.g. NFC).
+    /// It also does not perform any [normalization] (e.g. NFC).
+    ///
+    /// [normalization]: https://www.unicode.org/faq/normalization
     ///
     /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
     /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
@@ -1591,14 +1606,14 @@ impl char {
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
     ///
-    /// Note that the uppercase undotted 'I' is the same as the Latin. Therefore:
+    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
     ///
     /// ```
     /// #![feature(casefold)]
     /// let casefold_i = 'I'.to_casefold().to_string();
     /// ```
     ///
-    /// The value of `casefold_i` here relies on the language of the text: if we're
+    /// `'I'`'s correct case folding relies on the language of the text: if we're
     /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"ı"`. `to_casefold()` does not take this into account, and so:
     ///
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index a8fc30a632642..9ff0fd2bd3f31 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -2851,14 +2851,14 @@ impl str {
     ///
     /// Same as `a.to_casefold() == b.to_casefold()`,
     /// but without allocating. See that method's documentation,
-    /// and [`char::to_casefold()`],
+    /// as well as [`char::to_casefold()`],
     /// for more information about case folding.
     ///
-    /// No normalization (e.g. NFC) is performed,
+    /// No [normalization] (e.g. NFC) is performed,
     /// so visually and semantically identical strings
     /// might still compare unequal. In addition,
     /// this method is independent of language/locale,
-    /// so the special behavior of  I/ı/İ/i
+    /// so the special behavior of I/ı/İ/i
     /// in Turkish and Azeri is not handled.
     ///
     /// # Examples
@@ -2869,6 +2869,24 @@ impl str {
     /// assert!("Ferrös".eq_ignore_case("FERRÖS"));
     /// assert!("ẞ".eq_ignore_case("ss"));
     /// ```
+    ///
+    /// No NFC [normalization] is performed:
+    ///
+    /// ```rust
+    /// #![feature(casefold)]
+    /// // These two strings are visually and semantically identical...
+    /// let comp = "Á";
+    /// let decomp = "Á";
+    ///
+    /// // ... but not codepoint-for-codepoint equal.
+    /// assert_eq!(comp, "\u{C1}");
+    /// assert_eq!(decomp, "A\u{0301}");
+    ///
+    /// // Their case-foldings are likewise unequal:
+    /// assert_eq!(!comp.eq_ignore_case(decomp));
+    /// ```
+    ///
+    /// [normalization]: https://www.unicode.org/faq/normalization
     #[unstable(feature = "casefold", issue = "none")]
     #[must_use]
     #[inline]

From d82bc3946f6875e3398a568411ad22ab679ca73a Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Sat, 18 Apr 2026 07:07:56 -0400
Subject: [PATCH 5/5] Fix doctest

---
 library/core/src/str/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 9ff0fd2bd3f31..8b0bdd230a540 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -2883,7 +2883,7 @@ impl str {
     /// assert_eq!(decomp, "A\u{0301}");
     ///
     /// // Their case-foldings are likewise unequal:
-    /// assert_eq!(!comp.eq_ignore_case(decomp));
+    /// assert!(!comp.eq_ignore_case(decomp));
     /// ```
     ///
     /// [normalization]: https://www.unicode.org/faq/normalization