diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..e9623e7cf --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,32 @@ +name: CIFuzz +on: + pull_request: + branches: + - main + +permissions: {} + +jobs: + Fuzzing: + runs-on: ubuntu-latest + permissions: + security-events: write + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: "rust-url" + language: rust + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: "rust-url" + language: rust + fuzz-seconds: 600 + - name: Upload Crash + uses: actions/upload-artifact@v4 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..382e30ace --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "rust-url-fuzz" +version = "0.0.1" +authors = ["Automatically generated"] +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +url = { path = "../url" } +idna = { path = "../idna", features = ["std"] } +percent-encoding = { path = "../percent_encoding", features = ["alloc"] } +form_urlencoded = { path = "../form_urlencoded", features = ["alloc"] } +data-url = { path = "../data-url", features = ["std"] } + +# --- Fuzz targets --- + +[[bin]] +name = "fuzz_url_parse_roundtrip" +path = "fuzz_targets/fuzz_url_parse_roundtrip.rs" +doc = false + +[[bin]] +name = "fuzz_url_differential" +path = "fuzz_targets/fuzz_url_differential.rs" +doc = false + +[[bin]] +name = "fuzz_url_setters" +path = "fuzz_targets/fuzz_url_setters.rs" +doc = false + +[[bin]] +name = "fuzz_idna" +path = "fuzz_targets/fuzz_idna.rs" +doc = false + +[[bin]] +name = "fuzz_data_url" +path = "fuzz_targets/fuzz_data_url.rs" +doc = false + +[[bin]] +name = "fuzz_form_urlencoded" +path = "fuzz_targets/fuzz_form_urlencoded.rs" +doc = false + +[[bin]] +name = "fuzz_percent_encoding" +path = "fuzz_targets/fuzz_percent_encoding.rs" +doc = false + +[workspace] +members = ["."] diff --git a/fuzz/corpus/seed/idna_01 b/fuzz/corpus/seed/idna_01 new file mode 100644 index 000000000..06c159d73 --- /dev/null +++ b/fuzz/corpus/seed/idna_01 @@ -0,0 +1 @@ +münchen.de \ No newline at end of file diff --git a/fuzz/corpus/seed/idna_02 b/fuzz/corpus/seed/idna_02 new file mode 100644 index 000000000..99b3b7437 --- /dev/null +++ b/fuzz/corpus/seed/idna_02 @@ -0,0 +1 @@ +xn--mnchen-3ya.de \ No newline at end of file diff --git a/fuzz/corpus/seed/url_01 b/fuzz/corpus/seed/url_01 new file mode 100644 index 000000000..bf54804e9 --- /dev/null +++ b/fuzz/corpus/seed/url_01 @@ -0,0 +1 @@ +https://example.com/path?query=value#fragment \ No newline at end of file diff --git a/fuzz/corpus/seed/url_02 b/fuzz/corpus/seed/url_02 new file mode 100644 index 000000000..dfd944647 --- /dev/null +++ b/fuzz/corpus/seed/url_02 @@ -0,0 +1 @@ +http://user:password@host.example.com:8080/path/to/resource?key=val&key2=val2#frag \ No newline at end of file diff --git a/fuzz/corpus/seed/url_03 b/fuzz/corpus/seed/url_03 new file mode 100644 index 000000000..e36d2e67a --- /dev/null +++ b/fuzz/corpus/seed/url_03 @@ -0,0 +1 @@ +ftp://ftp.example.com/pub/files/readme.txt \ No newline at end of file diff --git a/fuzz/corpus/seed/url_04 b/fuzz/corpus/seed/url_04 new file mode 100644 index 000000000..2609dbc7d --- /dev/null +++ b/fuzz/corpus/seed/url_04 @@ -0,0 +1 @@ +file:///tmp/local/file.txt \ No newline at end of file diff --git a/fuzz/corpus/seed/url_05 b/fuzz/corpus/seed/url_05 new file mode 100644 index 000000000..504138580 --- /dev/null +++ b/fuzz/corpus/seed/url_05 @@ -0,0 +1 @@ +https://[::1]:443/ipv6 \ No newline at end of file diff --git a/fuzz/corpus/seed/url_06 b/fuzz/corpus/seed/url_06 new file mode 100644 index 000000000..1927b5602 --- /dev/null +++ b/fuzz/corpus/seed/url_06 @@ -0,0 +1 @@ +https://xn--nxasmq6b.example.com/idn \ No newline at end of file diff --git a/fuzz/corpus/seed/url_07 b/fuzz/corpus/seed/url_07 new file mode 100644 index 000000000..a763ffe79 --- /dev/null +++ b/fuzz/corpus/seed/url_07 @@ -0,0 +1 @@ +data:text/plain;base64,SGVsbG8gV29ybGQh \ No newline at end of file diff --git a/fuzz/corpus/seed/url_08 b/fuzz/corpus/seed/url_08 new file mode 100644 index 000000000..59ca05f80 --- /dev/null +++ b/fuzz/corpus/seed/url_08 @@ -0,0 +1 @@ +data:text/html,%3Ch1%3EHello%3C%2Fh1%3E \ No newline at end of file diff --git a/fuzz/corpus/seed/url_09 b/fuzz/corpus/seed/url_09 new file mode 100644 index 000000000..c0644cc06 --- /dev/null +++ b/fuzz/corpus/seed/url_09 @@ -0,0 +1 @@ +https://example.com/path%20with%20spaces?q=%E4%B8%AD%E6%96%87 \ No newline at end of file diff --git a/fuzz/corpus/seed/url_10 b/fuzz/corpus/seed/url_10 new file mode 100644 index 000000000..ac7a50b37 --- /dev/null +++ b/fuzz/corpus/seed/url_10 @@ -0,0 +1 @@ +https://example.com/?foo=bar&baz=qux&empty=&key+with+plus=value+with+plus \ No newline at end of file diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict new file mode 100644 index 000000000..023cd90ea --- /dev/null +++ b/fuzz/fuzz.dict @@ -0,0 +1,81 @@ +# URL schemes +"http://" +"https://" +"ftp://" +"file://" +"data:" +"blob:" +"ws://" +"wss://" +"custom://" + +# URL delimiters +"://" +":/" +"//" +"/" +"?" +"#" +"@" +":" +";" + +# Common URL components +"example.com" +"localhost" +"127.0.0.1" +"[::1]" +"[2001:db8::1]" +"0.0.0.0" + +# Percent encoding +"%00" +"%20" +"%25" +"%2F" +"%3A" +"%3F" +"%40" +"%23" +"%26" +"%3D" +"%C3%A9" +"%E4%B8%AD" + +# Form URL encoded +"&" +"=" +"+" +"key=value" +"a=b&c=d" + +# IDNA / Punycode +"xn--" +"xn--nxasmq6b" +".com" +".de" +".org" + +# Data URL +"data:," +"data:text/plain," +"data:text/plain;base64," +"data:text/html," +"data:application/octet-stream;base64," +";base64" +";charset=utf-8" +";charset=US-ASCII" + +# Base64 +"SGVsbG8=" +"AAAA" +"////+" + +# Special characters +"\x09" +"\x0a" +"\x0d" +" " +"\x5c" +".." +"." diff --git a/fuzz/fuzz_targets/fuzz_data_url.rs b/fuzz/fuzz_targets/fuzz_data_url.rs new file mode 100644 index 000000000..774737afe --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_data_url.rs @@ -0,0 +1,48 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; + +fuzz_target!(|data: &[u8]| { + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + let Ok(data_url) = data_url::DataUrl::process(utf8) else { + return; + }; + + // Access MIME type (should not panic) + let mime = data_url.mime_type(); + let _ = mime.type_.len(); + let _ = mime.subtype.len(); + for (name, value) in &mime.parameters { + let _ = name.len(); + let _ = value.len(); + } + + // Decode body (should not panic) + match data_url.decode_to_vec() { + Ok((body, fragment)) => { + // Body must be valid bytes + let _ = body.len(); + if let Some(frag) = fragment { + // Fragment percent-encoding should produce valid UTF-8 + let _ = frag.to_percent_encoded(); + } + } + Err(_) => { + // Base64 decode errors are expected for malformed input + } + } + + // Test streaming decode + let mut chunks = Vec::new(); + let _ = data_url.decode(|bytes| { + chunks.push(bytes.to_vec()); + Ok::<(), std::convert::Infallible>(()) + }); + + // Test forgiving_base64 directly + let _ = data_url::forgiving_base64::decode_to_vec(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_form_urlencoded.rs b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs new file mode 100644 index 000000000..673e4998f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_form_urlencoded.rs @@ -0,0 +1,35 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Parse the input as form-urlencoded data + let pairs: Vec<(String, String)> = form_urlencoded::parse(data) + .into_owned() + .collect(); + + // Roundtrip invariant: serialize and re-parse should produce the same pairs + let mut serializer = form_urlencoded::Serializer::new(String::new()); + for (name, value) in &pairs { + serializer.append_pair(name, value); + } + let serialized = serializer.finish(); + + let reparsed: Vec<(String, String)> = form_urlencoded::parse(serialized.as_bytes()) + .into_owned() + .collect(); + + // The key insight: form_urlencoded uses lossy UTF-8 decoding, + // so we need to compare the parsed pairs (not raw bytes). + // After one roundtrip through parse->serialize->parse, the result should be stable. + assert_eq!( + pairs, reparsed, + "form_urlencoded roundtrip mismatch: serialized={:?}", + serialized + ); + + // Test byte_serialize roundtrip + let byte_serialized: String = form_urlencoded::byte_serialize(data).collect(); + // byte_serialize output should be valid UTF-8 (it produces &str slices) + let _ = byte_serialized.len(); +}); diff --git a/fuzz/fuzz_targets/fuzz_idna.rs b/fuzz/fuzz_targets/fuzz_idna.rs new file mode 100644 index 000000000..3e36b9328 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_idna.rs @@ -0,0 +1,64 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; + +fuzz_target!(|data: &[u8]| { + // Test domain_to_ascii_cow (primary entry point, takes &[u8]) + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::URL); + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::EMPTY); + let _ = idna::domain_to_ascii_cow(data, idna::AsciiDenyList::STD3); + + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Test domain_to_ascii (takes &str) + let ascii_result = idna::domain_to_ascii(utf8); + let strict_result = idna::domain_to_ascii_strict(utf8); + + // Roundtrip invariant: if we can convert to ASCII, converting to Unicode + // and back to ASCII should produce the same result + if let Ok(ref ascii) = ascii_result { + let (unicode, unicode_result) = idna::domain_to_unicode(ascii); + if unicode_result.is_ok() { + if let Ok(back_to_ascii) = idna::domain_to_ascii(&unicode) { + assert_eq!( + ascii.to_lowercase(), + back_to_ascii.to_lowercase(), + "IDNA roundtrip mismatch: input={:?}, ascii={:?}, unicode={:?}, back={:?}", + utf8, + ascii, + unicode, + back_to_ascii + ); + } + } + } + + // Consistency: strict mode should be a subset of non-strict + if strict_result.is_ok() { + assert!( + ascii_result.is_ok(), + "strict succeeded but non-strict failed for {:?}", + utf8 + ); + } + + // Test domain_to_unicode + let (unicode_str, _result) = idna::domain_to_unicode(utf8); + + // The Unicode result should itself be valid UTF-8 (it's a String) + let _ = unicode_str.len(); + + // Test Punycode encode/decode roundtrip + if let Some(encoded) = idna::punycode::encode_str(utf8) { + if let Some(decoded) = idna::punycode::decode_to_string(&encoded) { + assert_eq!( + utf8, decoded, + "Punycode roundtrip mismatch: input={:?}, encoded={:?}, decoded={:?}", + utf8, encoded, decoded + ); + } + } +}); diff --git a/fuzz/fuzz_targets/fuzz_percent_encoding.rs b/fuzz/fuzz_targets/fuzz_percent_encoding.rs new file mode 100644 index 000000000..7b178a661 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_percent_encoding.rs @@ -0,0 +1,82 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use percent_encoding::{ + percent_decode, percent_decode_str, percent_encode, utf8_percent_encode, AsciiSet, CONTROLS, + NON_ALPHANUMERIC, +}; +use std::borrow::Cow; +use std::str; + +/// https://url.spec.whatwg.org/#fragment-percent-encode-set +const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); + +/// https://url.spec.whatwg.org/#path-percent-encode-set +const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); + +/// https://url.spec.whatwg.org/#userinfo-percent-encode-set +const USERINFO: &AsciiSet = &PATH + .add(b'/') + .add(b':') + .add(b';') + .add(b'=') + .add(b'@') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'|'); + +fuzz_target!(|data: &[u8]| { + if data.is_empty() { + return; + } + + // Use NON_ALPHANUMERIC for roundtrip tests since it includes '%', + // ensuring encode→decode is a true roundtrip. Sets that don't encode '%' + // will cause percent_decode to interpret literal %XX in the input. + let ascii_sets: [&AsciiSet; 4] = [&CONTROLS, NON_ALPHANUMERIC, FRAGMENT, USERINFO]; + let set_idx = data[0] as usize % ascii_sets.len(); + let ascii_set = ascii_sets[set_idx]; + let input = &data[1..]; + + // Test percent_encode -> percent_decode roundtrip with NON_ALPHANUMERIC + // (which encodes '%', guaranteeing a clean roundtrip) + let safe_encoded: Cow = percent_encode(input, NON_ALPHANUMERIC).into(); + let safe_decoded: Cow<[u8]> = percent_decode(safe_encoded.as_bytes()).into(); + assert_eq!( + &*safe_decoded, input, + "percent_encode/decode roundtrip mismatch with NON_ALPHANUMERIC" + ); + + // Test that encoding with the selected set produces valid output + let encoded: Cow = percent_encode(input, ascii_set).into(); + let _ = encoded.len(); + + // Test UTF-8 path: if input is valid UTF-8, utf8_percent_encode should work too + if let Ok(utf8_input) = str::from_utf8(input) { + let utf8_encoded = utf8_percent_encode(utf8_input, NON_ALPHANUMERIC).to_string(); + let utf8_decoded = percent_decode_str(&utf8_encoded) + .decode_utf8() + .expect("decoding percent-encoded UTF-8 must produce valid UTF-8"); + assert_eq!( + utf8_input, &*utf8_decoded, + "utf8_percent_encode roundtrip mismatch" + ); + } + + // Test percent_decode directly on raw input + let direct_decoded: Cow<[u8]> = percent_decode(input).into(); + // Re-encoding with NON_ALPHANUMERIC and decoding again should be stable + let re_encoded: Cow = percent_encode(&direct_decoded, NON_ALPHANUMERIC).into(); + let re_decoded: Cow<[u8]> = percent_decode(re_encoded.as_bytes()).into(); + assert_eq!( + &*direct_decoded, &*re_decoded, + "double roundtrip mismatch" + ); + + // Test percent_decode_str if input is valid UTF-8 + if let Ok(utf8_input) = str::from_utf8(input) { + let _ = percent_decode_str(utf8_input).decode_utf8_lossy(); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_url_differential.rs b/fuzz/fuzz_targets/fuzz_url_differential.rs new file mode 100644 index 000000000..ee97207cd --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_differential.rs @@ -0,0 +1,60 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + if data.len() < 2 { + return; + } + + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Split input into a base URL part and a relative part. + // Ensure we split on a char boundary. + let split = (data[0] as usize) % utf8.len().max(1); + let split = match utf8.char_indices().find(|&(i, _)| i >= split) { + Some((i, _)) => i, + None => utf8.len(), + }; + let (base_str, relative_str) = utf8.split_at(split); + + // Try parsing base as absolute URL + let Ok(base) = Url::parse(base_str) else { + return; + }; + + // Test relative URL resolution + if let Ok(resolved) = base.join(relative_str) { + // The resolved URL must be valid + let serialized = resolved.as_str(); + let reparsed = + Url::parse(serialized).expect("re-parsing a resolved URL must succeed"); + assert_eq!(resolved.as_str(), reparsed.as_str()); + + // make_relative + join should roundtrip for non-opaque paths + if !base.cannot_be_a_base() && !resolved.cannot_be_a_base() { + if let Some(relative) = resolved.make_relative(&base) { + // Re-resolving the relative URL from base should give the same result + if let Ok(re_resolved) = base.join(&relative) { + // Scheme and host should match + assert_eq!(re_resolved.scheme(), resolved.scheme()); + assert_eq!(re_resolved.host_str(), resolved.host_str()); + } + } + } + } + + // Test parse_with_params + if utf8.len() < 500 { + let params = [("key", "value"), ("a", "b")]; + if let Ok(with_params) = Url::parse_with_params(utf8, ¶ms) { + let query = with_params.query().unwrap_or(""); + assert!(query.contains("key=value")); + assert!(query.contains("a=b")); + } + } +}); diff --git a/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs new file mode 100644 index 000000000..8df8ef62a --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_parse_roundtrip.rs @@ -0,0 +1,44 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + let Ok(utf8) = str::from_utf8(data) else { + return; + }; + + // Parse the input as a URL + let Ok(parsed) = Url::parse(utf8) else { + return; + }; + + // Roundtrip invariant: serializing and re-parsing must produce the same URL + let serialized = parsed.as_str(); + let reparsed = Url::parse(serialized).expect("re-parsing a serialized URL must succeed"); + assert_eq!( + parsed.as_str(), + reparsed.as_str(), + "roundtrip mismatch for input: {:?}", + utf8 + ); + + // Component invariant: individual components must be consistent + assert_eq!(parsed.scheme(), reparsed.scheme()); + assert_eq!(parsed.username(), reparsed.username()); + assert_eq!(parsed.password(), reparsed.password()); + assert_eq!(parsed.host_str(), reparsed.host_str()); + assert_eq!(parsed.port(), reparsed.port()); + assert_eq!(parsed.path(), reparsed.path()); + assert_eq!(parsed.query(), reparsed.query()); + assert_eq!(parsed.fragment(), reparsed.fragment()); + + // Join invariant: joining an absolute URL with itself yields the same URL + if let Ok(joined) = parsed.join(serialized) { + assert_eq!(joined.as_str(), serialized); + } + + // Origin consistency + let _ = parsed.origin(); +}); diff --git a/fuzz/fuzz_targets/fuzz_url_setters.rs b/fuzz/fuzz_targets/fuzz_url_setters.rs new file mode 100644 index 000000000..7402d04d2 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_url_setters.rs @@ -0,0 +1,78 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::str; +use url::Url; + +fuzz_target!(|data: &[u8]| { + if data.len() < 3 { + return; + } + + let Ok(utf8) = str::from_utf8(&data[2..]) else { + return; + }; + + // Use first byte to select a base URL, second byte to select which setter to test + let base_urls = [ + "https://example.com/path?query#fragment", + "http://user:pass@host:8080/a/b/c", + "ftp://files.example.com/pub", + "file:///tmp/test", + "custom://example", + ]; + + let base_idx = data[0] as usize % base_urls.len(); + let setter_idx = data[1] % 10; + + let mut url = Url::parse(base_urls[base_idx]).unwrap(); + let original = url.as_str().to_string(); + + match setter_idx { + 0 => { + let _ = url.set_scheme(utf8); + } + 1 => { + let _ = url.set_host(Some(utf8)); + } + 2 => { + let _ = url.set_host(None); + } + 3 => { + let _ = url.set_username(utf8); + } + 4 => { + let _ = url.set_password(Some(utf8)); + } + 5 => { + url.set_path(utf8); + } + 6 => { + url.set_query(Some(utf8)); + } + 7 => { + url.set_fragment(Some(utf8)); + } + 8 => { + if let Ok(port) = utf8.parse::() { + let _ = url.set_port(Some(port)); + } + } + 9 => { + if let Ok(mut segs) = url.path_segments_mut() { + segs.push(utf8); + } + } + _ => {} + } + + // After mutation, the URL must still be valid (roundtrip) + let modified = url.as_str().to_string(); + let reparsed = Url::parse(&modified).unwrap_or_else(|e| { + panic!( + "URL became invalid after mutation: {:?}\noriginal: {}\nmodified: {}\nerror: {}", + setter_idx, original, modified, e + ); + }); + assert_eq!(url.as_str(), reparsed.as_str()); +});