diff --git a/url/src/lib.rs b/url/src/lib.rs index f1558682b..99d188a69 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -3122,6 +3122,45 @@ fn file_url_segments_to_pathbuf( file_url_segments_to_pathbuf_windows(estimated_capacity, host, segments) } +// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 +#[cfg(feature = "std")] +#[cfg_attr(not(windows), allow(dead_code))] +fn decode_windows_drive_path_segment(segment: &str) -> Result { + use percent_encoding::percent_decode_str; + + // `path_segments_mut()` encodes separators inside a segment, but Windows + // file paths still need to round-trip through `to_file_path()`. + + // Percent-decode the segment so that e.g. "C%3A%5CUsers" becomes "C:\Users". + let decoded = percent_decode_str(segment).decode_utf8().map_err(|_| ())?; + let bytes = decoded.as_bytes(); + + // Must start with an ASCII drive letter followed by ':', e.g. "C:". + if bytes.len() < 2 || !parser::ascii_alpha(bytes[0] as char) || bytes[1] != b':' { + return Err(()); + } + + // A bare drive letter like "C:" is valid as-is. + if bytes.len() == 2 { + return Ok(decoded.into_owned()); + } + + // After the drive letter, only an absolute path separator is allowed. + // Reject drive-relative paths like "C:Users" (no leading separator). + if !matches!(bytes[2], b'\\' | b'/') { + return Err(()); + } + + // Normalize forward slashes to backslashes for a native Windows path. + let mut normalized = String::with_capacity(decoded.len()); + normalized.push(bytes[0] as char); + normalized.push(':'); + for c in decoded[2..].chars() { + normalized.push(if c == '/' { '\\' } else { c }); + } + Ok(normalized) +} + // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg(feature = "std")] #[cfg_attr(not(windows), allow(dead_code))] @@ -3138,31 +3177,7 @@ fn file_url_segments_to_pathbuf_windows( string.push_str(host); } else { let first = segments.next().ok_or(())?; - - match first.len() { - 2 => { - if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { - return Err(()); - } - - string.push_str(first); - } - - 4 => { - if !first.starts_with(parser::ascii_alpha) { - return Err(()); - } - let bytes = first.as_bytes(); - if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { - return Err(()); - } - - string.push_str(&first[0..1]); - string.push(':'); - } - - _ => return Err(()), - } + string.push_str(&decode_windows_drive_path_segment(first)?); }; for segment in segments { diff --git a/url/tests/unit.rs b/url/tests/unit.rs index 828f79756..afc3c2039 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -219,6 +219,46 @@ fn new_path_windows_fun() { assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); } +#[test] +#[cfg(all(feature = "std", windows))] +fn windows_to_file_path_accepts_encoded_separators_in_first_segment() { + let url = Url::parse("file:///C:%5Cfoo%5Cbar").unwrap(); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); + + let url = Url::parse("file:///C:%2Ffoo%2Fbar").unwrap(); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); +} + +#[test] +#[cfg(all(feature = "std", windows))] +fn windows_to_file_path_accepts_drive_path_from_path_segments_mut() { + let mut url = Url::parse("file://").unwrap(); + url.path_segments_mut() + .unwrap() + .pop_if_empty() + .push(r"C:\foo\bar"); + + assert_eq!(url.as_str(), "file:///C:%5Cfoo%5Cbar"); + assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); +} + +/// https://github.com/servo/rust-url/issues/1077 +#[test] +#[cfg(all(feature = "std", windows))] +fn issue_1077_path_segments_mut_extend_roundtrips_windows_path() { + // object_store splits a Windows path into parts and extends the URL with them. + // The first segment ends up containing the drive letter plus the rest of the path + // percent-encoded into a single segment. to_file_path() must recover the original path. + let path = Path::new(r"C:\Users\me\data\file.parquet"); + let mut url = Url::parse("file://").unwrap(); + url.path_segments_mut() + .unwrap() + .pop_if_empty() + .extend(std::iter::once(path.to_str().unwrap())); + + assert_eq!(url.to_file_path(), Ok(PathBuf::from(path))); +} + #[test] #[cfg(all( feature = "std",