servo · fernandolins · Apr 13, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -67,6 +67,8 @@ jobs:
         # Run tests
       - name: Run tests
         run: cargo test
+      - name: Run percent-encoding IRI (iri) tests
+        run: cargo test -p percent-encoding --features iri
         # Run tests enabling the serde feature
       - name: Run tests with the serde feature
         run: cargo test --features "url/serde,url/expose_internals"

diff --git a/percent_encoding/Cargo.toml b/percent_encoding/Cargo.toml
@@ -13,6 +13,9 @@ rust-version = "1.51"
 default = ["std"]
 std = ["alloc"]
 alloc = []
+# Encode only ASCII code units in `AsciiSet`; leave UTF-8 non-ASCII bytes literal (IRI-style).
+# Used by Anki for Unicode paths in file:/media references; see `AsciiSet::should_percent_encode`.
+iri = []
 
 [package.metadata.docs.rs]
 rustdoc-args = ["--generate-link-to-definition"]
diff --git a/percent_encoding/src/ascii_set.rs b/percent_encoding/src/ascii_set.rs
@@ -50,7 +50,14 @@ impl AsciiSet {
     }
 
     pub(crate) fn should_percent_encode(&self, byte: u8) -> bool {
-        !byte.is_ascii() || self.contains(byte)
+        #[cfg(feature = "iri")]
+        {
+            byte.is_ascii() && self.contains(byte)
+        }
+        #[cfg(not(feature = "iri"))]
+        {
+            !byte.is_ascii() || self.contains(byte)
+        }
     }
 
     pub const fn add(&self, byte: u8) -> Self {
@@ -211,3 +218,28 @@ mod tests {
         assert!(COMPLEMENT.contains(b'C'));
     }
 }
+
+#[cfg(all(test, feature = "iri"))]
+mod iri_tests {
+    use super::*;
+
+    #[test]
+    fn should_percent_encode_leaves_non_ascii_utf8_unencoded() {
+        let set = AsciiSet::EMPTY.add(b'/').add(b'%');
+        for &byte in "日本語.mp3".as_bytes() {
+            assert!(
+                !set.should_percent_encode(byte),
+                "byte {:#x} should not be percent-encoded",
+                byte
+            );
+        }
+    }
+
+    #[test]
+    fn should_percent_encode_still_encodes_ascii_in_set() {
+        let set = AsciiSet::EMPTY.add(b' ').add(b'?');
+        assert!(set.should_percent_encode(b' '));
+        assert!(set.should_percent_encode(b'?'));
+        assert!(!set.should_percent_encode(b'a'));
+    }
+}
diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs
@@ -98,7 +98,8 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
 
 /// Percent-encode the given bytes with the given set.
 ///
-/// Non-ASCII bytes and bytes in `ascii_set` are encoded.
+/// Bytes in `ascii_set` are encoded. Non-ASCII bytes are also encoded unless the crate
+/// feature `iri` is enabled (IRI-style: UTF-8 non-ASCII octets pass through).
 ///
 /// The return type:
 ///
@@ -418,6 +419,16 @@ mod tests {
         );
     }
 
+    #[cfg(feature = "iri")]
+    #[test]
+    fn utf8_percent_encode_unicode_filename_unchanged_for_path_set() {
+        const PATHISH: &AsciiSet = &CONTROLS.add(b'#').add(b'?').add(b'{').add(b'}');
+        assert_eq!(
+            super::utf8_percent_encode("日本語.mp3", PATHISH).collect::<String>(),
+            "日本語.mp3"
+        );
+    }
+
     #[test]
     fn percent_decode() {
         assert_eq!(