diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index 9fa5501433453..4a5172a237e71 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -24,6 +24,7 @@ #![feature(box_patterns)] #![feature(iter_order_by)] #![feature(rustc_attrs)] +#![feature(titlecase)] #![feature(try_blocks)] // tidy-alphabetical-end diff --git a/compiler/rustc_lint/src/nonstandard_style.rs b/compiler/rustc_lint/src/nonstandard_style.rs index 13429f328565d..6cec7b0efd82f 100644 --- a/compiler/rustc_lint/src/nonstandard_style.rs +++ b/compiler/rustc_lint/src/nonstandard_style.rs @@ -47,34 +47,46 @@ declare_lint! { declare_lint_pass!(NonCamelCaseTypes => [NON_CAMEL_CASE_TYPES]); -/// Some unicode characters *have* case, are considered upper case or lower case, but they *can't* -/// be upper cased or lower cased. For the purposes of the lint suggestion, we care about being able +/// Some unicode characters *have* case, are considered upper, title, or lower case, but they *can't* +/// be title cased or lower cased. For the purposes of the lint suggestion, we care about being able /// to change the char's case. fn char_has_case(c: char) -> bool { - !c.to_lowercase().eq(c.to_uppercase()) + !c.to_lowercase().eq(c.to_titlecase()) } -// contains a capitalisable character followed by, or preceded by, an underscore -fn has_underscore_case(s: &str) -> bool { +/// FIXME: we should add a more efficient version +/// in the stdlib for this +fn changes_when_titlecased(c: char) -> bool { + !c.to_titlecase().eq([c]) +} + +// contains a capitalisable character followed by, or preceded by, an underscore, +// or contains an uppercase character that changes when titlecased, +// or contains `__` +fn not_camel_case(s: &str) -> bool { let mut last = '\0'; - s.chars().any(|c| match (std::mem::replace(&mut last, c), c) { - ('_', cs) | (cs, '_') => char_has_case(cs), - _ => false, + s.chars().any(|snd| { + let fst = std::mem::replace(&mut last, snd); + match (fst, snd) { + ('_', '_') => return true, + ('_', _) if char_has_case(snd) => return true, + (_, '_') if char_has_case(fst) => return true, + _ => snd.is_uppercase() && changes_when_titlecased(snd), + } }) } -fn is_camel_case(name: &str) -> bool { +fn is_upper_camel_case(name: &str) -> bool { let name = name.trim_matches('_'); let Some(first) = name.chars().next() else { return true; }; - // start with a non-lowercase letter rather than uppercase - // ones (some scripts don't have a concept of upper/lowercase) - !(first.is_lowercase() || name.contains("__") || has_underscore_case(name)) + // some scripts don't have a concept of upper/lowercase + !(changes_when_titlecased(first) || not_camel_case(name)) } -fn to_camel_case(s: &str) -> String { +fn to_upper_camel_case(s: &str) -> String { s.trim_matches('_') .split('_') .filter(|component| !component.is_empty()) @@ -83,24 +95,31 @@ fn to_camel_case(s: &str) -> String { let mut new_word = true; let mut prev_is_lower_case = true; + let mut prev_is_lowercased_sigma = false; for c in component.chars() { // Preserve the case if an uppercase letter follows a lowercase letter, so that // `camelCase` is converted to `CamelCase`. - if prev_is_lower_case && c.is_uppercase() { + if prev_is_lower_case && (c.is_uppercase() | c.is_titlecase()) { new_word = true; } if new_word { - camel_cased_component.extend(c.to_uppercase()); + camel_cased_component.extend(c.to_titlecase()); } else { camel_cased_component.extend(c.to_lowercase()); } - prev_is_lower_case = c.is_lowercase(); + prev_is_lower_case = c.is_lowercase() || c.is_titlecase(); + prev_is_lowercased_sigma = !new_word && c == 'Σ'; new_word = false; } + if prev_is_lowercased_sigma { + camel_cased_component.pop(); + camel_cased_component.push('ς'); + } + camel_cased_component }) .fold((String::new(), None), |(acc, prev): (String, Option), next| { @@ -122,8 +141,8 @@ impl NonCamelCaseTypes { fn check_case(&self, cx: &EarlyContext<'_>, sort: &str, ident: &Ident) { let name = ident.name.as_str(); - if !is_camel_case(name) { - let cc = to_camel_case(name); + if !is_upper_camel_case(name) { + let cc = to_upper_camel_case(name); let sub = if *name != cc { NonCamelCaseTypeSub::Suggestion { span: ident.span, replace: cc } } else { @@ -235,14 +254,20 @@ impl NonSnakeCase { continue; } for ch in s.chars() { - if !buf.is_empty() && buf != "'" && ch.is_uppercase() && !last_upper { - words.push(buf); + if !buf.is_empty() + && buf != "'" + && (ch.is_uppercase() || ch.is_titlecase()) + && !last_upper + { + // We lowercase only at the end, to handle final sigma correctly + words.push(buf.to_lowercase()); buf = String::new(); } - last_upper = ch.is_uppercase(); - buf.extend(ch.to_lowercase()); + last_upper = ch.is_uppercase() || ch.is_titlecase(); + buf.push(ch); } - words.push(buf); + // We lowercase only at the end, to handle final sigma correctly + words.push(buf.to_lowercase()); } words.join("_") } @@ -262,7 +287,8 @@ impl NonSnakeCase { // This correctly handles letters in languages with and without // cases, as well as numbers and underscores. - !ident.chars().any(char::is_uppercase) + // FIXME: we should add a standard library impl of `c.to_lowercase().eq([c])` + ident.chars().all(|c| c.to_lowercase().eq([c])) } let name = ident.name.as_str(); @@ -474,10 +500,12 @@ impl<'a, 'b, F: FnOnce() -> NonUpperCaseGlobal<'b>> Diagnostic<'a, ()> impl NonUpperCaseGlobals { fn check_upper_case(cx: &LateContext<'_>, sort: &str, did: Option, ident: &Ident) { let name = ident.name.as_str(); - if name.chars().any(|c| c.is_lowercase()) { + // FIXME: we should add a more efficient version + // in the stdlib for `c.to_uppercase().eq([c])` + if !name.chars().all(|c| c.to_uppercase().eq([c])) { let uc = NonSnakeCase::to_snake_case(name).to_uppercase(); - // If the item is exported, suggesting changing it's name would be breaking-change + // If the item is exported, suggesting changing its name would be a breaking change // and could break users without a "nice" applicable fix, so let's avoid it. let can_change_usages = if let Some(did) = did { !cx.tcx.effective_visibilities(()).is_exported(did) diff --git a/compiler/rustc_lint/src/nonstandard_style/tests.rs b/compiler/rustc_lint/src/nonstandard_style/tests.rs index 39c525b8623d0..773ef99775462 100644 --- a/compiler/rustc_lint/src/nonstandard_style/tests.rs +++ b/compiler/rustc_lint/src/nonstandard_style/tests.rs @@ -1,21 +1,37 @@ -use super::{is_camel_case, to_camel_case}; +use super::{is_upper_camel_case, to_upper_camel_case}; #[test] fn camel_case() { - assert!(!is_camel_case("userData")); - assert_eq!(to_camel_case("userData"), "UserData"); + assert!(!is_upper_camel_case("userData")); + assert_eq!(to_upper_camel_case("userData"), "UserData"); - assert!(is_camel_case("X86_64")); + assert!(is_upper_camel_case("X86_64")); - assert!(!is_camel_case("X86__64")); - assert_eq!(to_camel_case("X86__64"), "X86_64"); + assert!(!is_upper_camel_case("X86__64")); + assert_eq!(to_upper_camel_case("X86__64"), "X86_64"); - assert!(!is_camel_case("Abc_123")); - assert_eq!(to_camel_case("Abc_123"), "Abc123"); + assert!(!is_upper_camel_case("Abc_123")); + assert_eq!(to_upper_camel_case("Abc_123"), "Abc123"); - assert!(!is_camel_case("A1_b2_c3")); - assert_eq!(to_camel_case("A1_b2_c3"), "A1B2C3"); + assert!(!is_upper_camel_case("A1_b2_c3")); + assert_eq!(to_upper_camel_case("A1_b2_c3"), "A1B2C3"); - assert!(!is_camel_case("ONE_TWO_THREE")); - assert_eq!(to_camel_case("ONE_TWO_THREE"), "OneTwoThree"); + assert!(!is_upper_camel_case("ONE_TWO_THREE")); + assert_eq!(to_upper_camel_case("ONE_TWO_THREE"), "OneTwoThree"); + + // FIXME(@Jules-Bertholet): This test doesn't work due to what I believe + // is a Unicode spec bug - uppercase Georgian letters have + // incorrect titlecase mappings. + // I've reported it to Unicode. + // Georgian mtavruli is only used in all-caps + //assert!(!is_upper_camel_case("ᲫალაᲔრთობაშია")); + //assert_eq!(to_upper_camel_case("ᲫალაᲔრთობაშია"), "ძალა_ერთობაშია"); + + assert!(!is_upper_camel_case("LJNJaaaDŽooo")); + assert_eq!(to_upper_camel_case("LJNJaaLjNJaDŽooo"), "LjnjaaLjNjaDžooo"); + + // Final sigma + assert!(!is_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ")); + assert_eq!(to_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"), "ΦιλοςΦιλος"); + assert!(is_upper_camel_case("ΦιλοσΦιλοσ")); } diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-1.rs b/tests/ui/lint/lint-nonstandard-style-unicode-1.rs index 7c45c099304fa..95738b2bec5a4 100644 --- a/tests/ui/lint/lint-nonstandard-style-unicode-1.rs +++ b/tests/ui/lint/lint-nonstandard-style-unicode-1.rs @@ -42,8 +42,18 @@ struct Hello_World; struct 你_ӟ; //~^ ERROR type `你_ӟ` should have an upper camel case name -// and this is ok: +struct ΦΙΛΟΣ_Σ; +//~^ ERROR type `ΦΙΛΟΣ_Σ` should have an upper camel case name + +struct Σ_ΦΙΛΟΣ; +//~^ ERROR type `Σ_ΦΙΛΟΣ` should have an upper camel case name + +// these are ok: struct 你_好; +struct ძალა_ერთობაშია; + +struct Σ; + fn main() {} diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-1.stderr b/tests/ui/lint/lint-nonstandard-style-unicode-1.stderr index 6c2aa225e602e..c319395e6ce31 100644 --- a/tests/ui/lint/lint-nonstandard-style-unicode-1.stderr +++ b/tests/ui/lint/lint-nonstandard-style-unicode-1.stderr @@ -46,5 +46,17 @@ error: type `你_ӟ` should have an upper camel case name LL | struct 你_ӟ; | ^^^^ help: convert the identifier to upper camel case: `你Ӟ` -error: aborting due to 7 previous errors +error: type `ΦΙΛΟΣ_Σ` should have an upper camel case name + --> $DIR/lint-nonstandard-style-unicode-1.rs:45:8 + | +LL | struct ΦΙΛΟΣ_Σ; + | ^^^^^^^ help: convert the identifier to upper camel case: `ΦιλοςΣ` + +error: type `Σ_ΦΙΛΟΣ` should have an upper camel case name + --> $DIR/lint-nonstandard-style-unicode-1.rs:48:8 + | +LL | struct Σ_ΦΙΛΟΣ; + | ^^^^^^^ help: convert the identifier to upper camel case: `ΣΦιλος` + +error: aborting due to 9 previous errors diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-2.rs b/tests/ui/lint/lint-nonstandard-style-unicode-2.rs new file mode 100644 index 0000000000000..43c98c1f26b3e --- /dev/null +++ b/tests/ui/lint/lint-nonstandard-style-unicode-2.rs @@ -0,0 +1,31 @@ +#![allow(dead_code)] +#![forbid(non_snake_case)] + +// 2. non_snake_case + + +fn LJNJaaLjNJaDŽooo() {} +//~^ ERROR function `LJNJaaLjNJaDŽooo` should have a snake case name +//~| WARN identifier contains 5 non normalized (NFKC) characters + +fn LjnjaaLjNjaDžooo() {} +//~^ ERROR function `LjnjaaLjNjaDžooo` should have a snake case name +//~| WARN identifier contains 5 non normalized (NFKC) characters + +// test final sigma casing +fn ΦΙΛΟΣ_ΦΙΛΟΣ() {} +//~^ ERROR function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name + +fn Σ() {} +//~^ ERROR function `Σ` should have a snake case name + +fn ΦΙΛΟΣ_Σ() {} +//~^ ERROR function `ΦΙΛΟΣ_Σ` should have a snake case name + +fn Σ_ΦΙΛΟΣ() {} +//~^ ERROR function `Σ_ΦΙΛΟΣ` should have a snake case name + +// this is ok +fn φιλοσ_φιλοσ() {} + +fn main() {} diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-2.stderr b/tests/ui/lint/lint-nonstandard-style-unicode-2.stderr new file mode 100644 index 0000000000000..3e5fffb22ebc4 --- /dev/null +++ b/tests/ui/lint/lint-nonstandard-style-unicode-2.stderr @@ -0,0 +1,61 @@ +warning: identifier contains 5 non normalized (NFKC) characters: 'LJ', 'NJ', 'Lj', 'NJ', and 'DŽ' + --> $DIR/lint-nonstandard-style-unicode-2.rs:7:4 + | +LL | fn LJNJaaLjNJaDŽooo() {} + | ^^^^^^^^^^^ + | + = note: these characters are included in the Not_NFKC Unicode general security profile + = note: `#[warn(uncommon_codepoints)]` on by default + +warning: identifier contains 5 non normalized (NFKC) characters: 'Lj', 'nj', 'Lj', 'Nj', and 'Dž' + --> $DIR/lint-nonstandard-style-unicode-2.rs:11:4 + | +LL | fn LjnjaaLjNjaDžooo() {} + | ^^^^^^^^^^^ + | + = note: these characters are included in the Not_NFKC Unicode general security profile + +error: function `LJNJaaLjNJaDŽooo` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:7:4 + | +LL | fn LJNJaaLjNJaDŽooo() {} + | ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo` + | +note: the lint level is defined here + --> $DIR/lint-nonstandard-style-unicode-2.rs:2:11 + | +LL | #![forbid(non_snake_case)] + | ^^^^^^^^^^^^^^ + +error: function `LjnjaaLjNjaDžooo` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:11:4 + | +LL | fn LjnjaaLjNjaDžooo() {} + | ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo` + +error: function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:16:4 + | +LL | fn ΦΙΛΟΣ_ΦΙΛΟΣ() {} + | ^^^^^^^^^^^ help: convert the identifier to snake case: `φιλος_φιλος` + +error: function `Σ` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:19:4 + | +LL | fn Σ() {} + | ^ help: convert the identifier to snake case: `σ` + +error: function `ΦΙΛΟΣ_Σ` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:22:4 + | +LL | fn ΦΙΛΟΣ_Σ() {} + | ^^^^^^^ help: convert the identifier to snake case: `φιλος_σ` + +error: function `Σ_ΦΙΛΟΣ` should have a snake case name + --> $DIR/lint-nonstandard-style-unicode-2.rs:25:4 + | +LL | fn Σ_ΦΙΛΟΣ() {} + | ^^^^^^^ help: convert the identifier to snake case: `σ_φιλος` + +error: aborting due to 6 previous errors; 2 warnings emitted + diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-3.rs b/tests/ui/lint/lint-nonstandard-style-unicode-3.rs index 9175be7a0f49d..cb835e914704b 100644 --- a/tests/ui/lint/lint-nonstandard-style-unicode-3.rs +++ b/tests/ui/lint/lint-nonstandard-style-unicode-3.rs @@ -21,4 +21,11 @@ static τεχ: f32 = 3.14159265; static __密__封__线__内__禁__止__答__题__: bool = true; +static ძალა_ერთობაშია: () = (); +//~^ ERROR static variable `ძალა_ერთობაშია` should have an upper case name + +static Nj: () = (); +//~^ ERROR static variable `Nj` should have an upper case name +//~| WARN identifier contains a non normalized (NFKC) character + fn main() {} diff --git a/tests/ui/lint/lint-nonstandard-style-unicode-3.stderr b/tests/ui/lint/lint-nonstandard-style-unicode-3.stderr index 3d4337bbc6f92..223620ab22942 100644 --- a/tests/ui/lint/lint-nonstandard-style-unicode-3.stderr +++ b/tests/ui/lint/lint-nonstandard-style-unicode-3.stderr @@ -1,3 +1,12 @@ +warning: identifier contains a non normalized (NFKC) character: 'Nj' + --> $DIR/lint-nonstandard-style-unicode-3.rs:27:8 + | +LL | static Nj: () = (); + | ^ + | + = note: this character is included in the Not_NFKC Unicode general security profile + = note: `#[warn(uncommon_codepoints)]` on by default + error: static variable `τεχ` should have an upper case name --> $DIR/lint-nonstandard-style-unicode-3.rs:17:8 | @@ -10,5 +19,17 @@ note: the lint level is defined here LL | #![forbid(non_upper_case_globals)] | ^^^^^^^^^^^^^^^^^^^^^^ -error: aborting due to 1 previous error +error: static variable `ძალა_ერთობაშია` should have an upper case name + --> $DIR/lint-nonstandard-style-unicode-3.rs:24:8 + | +LL | static ძალა_ერთობაშია: () = (); + | ^^^^^^^^^^^^^^ help: convert the identifier to upper case: `ᲫᲐᲚᲐ_ᲔᲠᲗᲝᲑᲐᲨᲘᲐ` + +error: static variable `Nj` should have an upper case name + --> $DIR/lint-nonstandard-style-unicode-3.rs:27:8 + | +LL | static Nj: () = (); + | ^ help: convert the identifier to upper case: `NJ` + +error: aborting due to 3 previous errors; 1 warning emitted diff --git a/tests/ui/lint/special-upper-lower-cases.rs b/tests/ui/lint/special-upper-lower-cases.rs index d77ffbcbfa3d1..6384909911a35 100644 --- a/tests/ui/lint/special-upper-lower-cases.rs +++ b/tests/ui/lint/special-upper-lower-cases.rs @@ -1,23 +1,23 @@ // (#77273) These characters are in the general categories of -// "Uppercase/Lowercase Letter". -// The diagnostics don't provide meaningful suggestions for them -// as we cannot convert them properly. +// "Uppercase/Lowercase Letter", +// but casing operations map them to themselves. +// Therefore, we do not warn about casing +// (but do warn about uncommon codepoints). //@ check-pass -#![allow(uncommon_codepoints, unused)] +#![allow(unused)] struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝; -//~^ WARN: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name +//~^ WARN identifier contains 9 non normalized (NFKC) characters -// FIXME: How we should handle this? struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝; -//~^ WARN: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name +//~^ WARN identifier contains 9 non normalized (NFKC) characters static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1; -//~^ WARN: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name +//~^ WARN identifier contains 12 non normalized (NFKC) characters fn main() { let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1; - //~^ WARN: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name + //~^ WARN identifier contains 9 non normalized (NFKC) characters } diff --git a/tests/ui/lint/special-upper-lower-cases.stderr b/tests/ui/lint/special-upper-lower-cases.stderr index 0f5cf336aec2d..18e366f176fb3 100644 --- a/tests/ui/lint/special-upper-lower-cases.stderr +++ b/tests/ui/lint/special-upper-lower-cases.stderr @@ -1,32 +1,35 @@ -warning: type `𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name - --> $DIR/special-upper-lower-cases.rs:10:8 +warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝' + --> $DIR/special-upper-lower-cases.rs:11:8 | LL | struct 𝕟𝕠𝕥𝕒𝕔𝕒𝕞𝕖𝕝; - | ^^^^^^^^^ should have an UpperCamelCase name + | ^^^^^^^^^ | - = note: `#[warn(non_camel_case_types)]` (part of `#[warn(nonstandard_style)]`) on by default + = note: these characters are included in the Not_NFKC Unicode general security profile + = note: `#[warn(uncommon_codepoints)]` on by default -warning: type `𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝` should have an upper camel case name +warning: identifier contains 9 non normalized (NFKC) characters: '𝕟', '𝕠', '𝕥', '𝕒', '𝕔', '𝕒', '𝕞', '𝕖', and '𝕝' --> $DIR/special-upper-lower-cases.rs:14:8 | LL | struct 𝕟𝕠𝕥_𝕒_𝕔𝕒𝕞𝕖𝕝; - | ^^^^^^^^^^^ should have an UpperCamelCase name + | ^^^^^^^^^^^ + | + = note: these characters are included in the Not_NFKC Unicode general security profile -warning: static variable `𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲` should have an upper case name +warning: identifier contains 12 non normalized (NFKC) characters: '𝗻', '𝗼', '𝗻', '𝘂', '𝗽', '𝗽', '𝗲', '𝗿', '𝗰', '𝗮', '𝘀', and '𝗲' --> $DIR/special-upper-lower-cases.rs:17:8 | LL | static 𝗻𝗼𝗻𝘂𝗽𝗽𝗲𝗿𝗰𝗮𝘀𝗲: i32 = 1; - | ^^^^^^^^^^^^ should have an UPPER_CASE name + | ^^^^^^^^^^^^ | - = note: `#[warn(non_upper_case_globals)]` (part of `#[warn(nonstandard_style)]`) on by default + = note: these characters are included in the Not_NFKC Unicode general security profile -warning: variable `𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢` should have a snake case name +warning: identifier contains 9 non normalized (NFKC) characters: '𝓢', '𝓝', '𝓐', '𝓐', '𝓐', '𝓐', '𝓚', '𝓔', and '𝓢' --> $DIR/special-upper-lower-cases.rs:21:9 | LL | let 𝓢𝓝𝓐𝓐𝓐𝓐𝓚𝓔𝓢 = 1; - | ^^^^^^^^^ should have a snake_case name + | ^^^^^^^^^ | - = note: `#[warn(non_snake_case)]` (part of `#[warn(nonstandard_style)]`) on by default + = note: these characters are included in the Not_NFKC Unicode general security profile warning: 4 warnings emitted