Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ jobs:
# Run tests
- name: Run tests
run: cargo test
- name: Run percent-encoding IRI (iri) tests
run: cargo test -p percent-encoding --features iri
# Run tests enabling the serde feature
- name: Run tests with the serde feature
run: cargo test --features "url/serde,url/expose_internals"
Expand Down
3 changes: 3 additions & 0 deletions percent_encoding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ rust-version = "1.51"
default = ["std"]
std = ["alloc"]
alloc = []
# Encode only ASCII code units in `AsciiSet`; leave UTF-8 non-ASCII bytes literal (IRI-style).
# Used by Anki for Unicode paths in file:/media references; see `AsciiSet::should_percent_encode`.
iri = []

[package.metadata.docs.rs]
rustdoc-args = ["--generate-link-to-definition"]
34 changes: 33 additions & 1 deletion percent_encoding/src/ascii_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,14 @@ impl AsciiSet {
}

pub(crate) fn should_percent_encode(&self, byte: u8) -> bool {
!byte.is_ascii() || self.contains(byte)
#[cfg(feature = "iri")]
{
byte.is_ascii() && self.contains(byte)
}
#[cfg(not(feature = "iri"))]
{
!byte.is_ascii() || self.contains(byte)
}
}

pub const fn add(&self, byte: u8) -> Self {
Expand Down Expand Up @@ -211,3 +218,28 @@ mod tests {
assert!(COMPLEMENT.contains(b'C'));
}
}

#[cfg(all(test, feature = "iri"))]
mod iri_tests {
use super::*;

#[test]
fn should_percent_encode_leaves_non_ascii_utf8_unencoded() {
let set = AsciiSet::EMPTY.add(b'/').add(b'%');
for &byte in "日本語.mp3".as_bytes() {
assert!(
!set.should_percent_encode(byte),
"byte {:#x} should not be percent-encoded",
byte
);
}
}

#[test]
fn should_percent_encode_still_encodes_ascii_in_set() {
let set = AsciiSet::EMPTY.add(b' ').add(b'?');
assert!(set.should_percent_encode(b' '));
assert!(set.should_percent_encode(b'?'));
assert!(!set.should_percent_encode(b'a'));
}
}
13 changes: 12 additions & 1 deletion percent_encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {

/// Percent-encode the given bytes with the given set.
///
/// Non-ASCII bytes and bytes in `ascii_set` are encoded.
/// Bytes in `ascii_set` are encoded. Non-ASCII bytes are also encoded unless the crate
/// feature `iri` is enabled (IRI-style: UTF-8 non-ASCII octets pass through).
///
/// The return type:
///
Expand Down Expand Up @@ -418,6 +419,16 @@ mod tests {
);
}

#[cfg(feature = "iri")]
#[test]
fn utf8_percent_encode_unicode_filename_unchanged_for_path_set() {
const PATHISH: &AsciiSet = &CONTROLS.add(b'#').add(b'?').add(b'{').add(b'}');
assert_eq!(
super::utf8_percent_encode("日本語.mp3", PATHISH).collect::<String>(),
"日本語.mp3"
);
}

#[test]
fn percent_decode() {
assert_eq!(
Expand Down