Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 40 additions & 25 deletions url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3122,6 +3122,45 @@ fn file_url_segments_to_pathbuf(
file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments)
}

// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
#[cfg(feature = "std")]
#[cfg_attr(not(windows), allow(dead_code))]
fn decode_windows_drive_path_segment(segment: &str) -> Result<String, ()> {
use percent_encoding::percent_decode_str;

// `path_segments_mut()` encodes separators inside a segment, but Windows
// file paths still need to round-trip through `to_file_path()`.

// Percent-decode the segment so that e.g. "C%3A%5CUsers" becomes "C:\Users".
let decoded = percent_decode_str(segment).decode_utf8().map_err(|_| ())?;
let bytes = decoded.as_bytes();

// Must start with an ASCII drive letter followed by ':', e.g. "C:".
if bytes.len() < 2 || !parser::ascii_alpha(bytes[0] as char) || bytes[1] != b':' {
return Err(());
}

// A bare drive letter like "C:" is valid as-is.
if bytes.len() == 2 {
return Ok(decoded.into_owned());
}

// After the drive letter, only an absolute path separator is allowed.
// Reject drive-relative paths like "C:Users" (no leading separator).
if !matches!(bytes[2], b'\\' | b'/') {
return Err(());
}

// Normalize forward slashes to backslashes for a native Windows path.
let mut normalized = String::with_capacity(decoded.len());
normalized.push(bytes[0] as char);
normalized.push(':');
for c in decoded[2..].chars() {
normalized.push(if c == '/' { '\\' } else { c });
}
Ok(normalized)
}

// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
#[cfg(feature = "std")]
#[cfg_attr(not(windows), allow(dead_code))]
Expand All @@ -3138,31 +3177,7 @@ fn file_url_segments_to_pathbuf_windows(
string.push_str(host);
} else {
let first = segments.next().ok_or(())?;

match first.len() {
2 => {
if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
return Err(());
}

string.push_str(first);
}

4 => {
if !first.starts_with(parser::ascii_alpha) {
return Err(());
}
let bytes = first.as_bytes();
if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
return Err(());
}

string.push_str(&first[0..1]);
string.push(':');
}

_ => return Err(()),
}
string.push_str(&decode_windows_drive_path_segment(first)?);
};

for segment in segments {
Expand Down
40 changes: 40 additions & 0 deletions url/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,46 @@ fn new_path_windows_fun() {
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));
}

#[test]
#[cfg(all(feature = "std", windows))]
fn windows_to_file_path_accepts_encoded_separators_in_first_segment() {
let url = Url::parse("file:///C:%5Cfoo%5Cbar").unwrap();
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));

let url = Url::parse("file:///C:%2Ffoo%2Fbar").unwrap();
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));
}

#[test]
#[cfg(all(feature = "std", windows))]
fn windows_to_file_path_accepts_drive_path_from_path_segments_mut() {
let mut url = Url::parse("file://").unwrap();
url.path_segments_mut()
.unwrap()
.pop_if_empty()
.push(r"C:\foo\bar");

assert_eq!(url.as_str(), "file:///C:%5Cfoo%5Cbar");
assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar")));
}

/// https://github.com/servo/rust-url/issues/1077
#[test]
#[cfg(all(feature = "std", windows))]
fn issue_1077_path_segments_mut_extend_roundtrips_windows_path() {
// object_store splits a Windows path into parts and extends the URL with them.
// The first segment ends up containing the drive letter plus the rest of the path
// percent-encoded into a single segment. to_file_path() must recover the original path.
let path = Path::new(r"C:\Users\me\data\file.parquet");
let mut url = Url::parse("file://").unwrap();
url.path_segments_mut()
.unwrap()
.pop_if_empty()
.extend(std::iter::once(path.to_str().unwrap()));

assert_eq!(url.to_file_path(), Ok(PathBuf::from(path)));
}

#[test]
#[cfg(all(
feature = "std",
Expand Down