Skip to content

Commit 0dca307

Browse files
Account for titlecase in casing lints
1 parent 30837cb commit 0dca307

11 files changed

Lines changed: 252 additions & 62 deletions

compiler/rustc_lint/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#![feature(box_patterns)]
2525
#![feature(iter_order_by)]
2626
#![feature(rustc_attrs)]
27+
#![feature(titlecase)]
2728
#![feature(try_blocks)]
2829
// tidy-alphabetical-end
2930

compiler/rustc_lint/src/nonstandard_style.rs

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,34 +47,46 @@ declare_lint! {
4747

4848
declare_lint_pass!(NonCamelCaseTypes => [NON_CAMEL_CASE_TYPES]);
4949

50-
/// Some unicode characters *have* case, are considered upper case or lower case, but they *can't*
51-
/// be upper cased or lower cased. For the purposes of the lint suggestion, we care about being able
50+
/// Some unicode characters *have* case, are considered upper, title, or lower case, but they *can't*
51+
/// be title cased or lower cased. For the purposes of the lint suggestion, we care about being able
5252
/// to change the char's case.
5353
fn char_has_case(c: char) -> bool {
54-
!c.to_lowercase().eq(c.to_uppercase())
54+
!c.to_lowercase().eq(c.to_titlecase())
5555
}
5656

57-
// contains a capitalisable character followed by, or preceded by, an underscore
58-
fn has_underscore_case(s: &str) -> bool {
57+
/// FIXME: we should add a more efficient version
58+
/// in the stdlib for this
59+
fn changes_when_titlecased(c: char) -> bool {
60+
!c.to_titlecase().eq([c])
61+
}
62+
63+
// contains a capitalisable character followed by, or preceded by, an underscore,
64+
// or contains an uppercase character that changes when titlecased,
65+
// or contains `__`
66+
fn not_camel_case(s: &str) -> bool {
5967
let mut last = '\0';
60-
s.chars().any(|c| match (std::mem::replace(&mut last, c), c) {
61-
('_', cs) | (cs, '_') => char_has_case(cs),
62-
_ => false,
68+
s.chars().any(|snd| {
69+
let fst = std::mem::replace(&mut last, snd);
70+
match (fst, snd) {
71+
('_', '_') => return true,
72+
('_', _) if char_has_case(snd) => return true,
73+
(_, '_') if char_has_case(fst) => return true,
74+
_ => snd.is_uppercase() && changes_when_titlecased(snd),
75+
}
6376
})
6477
}
6578

66-
fn is_camel_case(name: &str) -> bool {
79+
fn is_upper_camel_case(name: &str) -> bool {
6780
let name = name.trim_matches('_');
6881
let Some(first) = name.chars().next() else {
6982
return true;
7083
};
7184

72-
// start with a non-lowercase letter rather than uppercase
73-
// ones (some scripts don't have a concept of upper/lowercase)
74-
!(first.is_lowercase() || name.contains("__") || has_underscore_case(name))
85+
// some scripts don't have a concept of upper/lowercase
86+
!(changes_when_titlecased(first) || not_camel_case(name))
7587
}
7688

77-
fn to_camel_case(s: &str) -> String {
89+
fn to_upper_camel_case(s: &str) -> String {
7890
s.trim_matches('_')
7991
.split('_')
8092
.filter(|component| !component.is_empty())
@@ -83,24 +95,31 @@ fn to_camel_case(s: &str) -> String {
8395

8496
let mut new_word = true;
8597
let mut prev_is_lower_case = true;
98+
let mut prev_is_lowercased_sigma = false;
8699

87100
for c in component.chars() {
88101
// Preserve the case if an uppercase letter follows a lowercase letter, so that
89102
// `camelCase` is converted to `CamelCase`.
90-
if prev_is_lower_case && c.is_uppercase() {
103+
if prev_is_lower_case && (c.is_uppercase() | c.is_titlecase()) {
91104
new_word = true;
92105
}
93106

94107
if new_word {
95-
camel_cased_component.extend(c.to_uppercase());
108+
camel_cased_component.extend(c.to_titlecase());
96109
} else {
97110
camel_cased_component.extend(c.to_lowercase());
98111
}
99112

100-
prev_is_lower_case = c.is_lowercase();
113+
prev_is_lower_case = c.is_lowercase() || c.is_titlecase();
114+
prev_is_lowercased_sigma = !new_word && c == 'Σ';
101115
new_word = false;
102116
}
103117

118+
if prev_is_lowercased_sigma {
119+
camel_cased_component.pop();
120+
camel_cased_component.push('ς');
121+
}
122+
104123
camel_cased_component
105124
})
106125
.fold((String::new(), None), |(acc, prev): (String, Option<String>), next| {
@@ -122,8 +141,8 @@ impl NonCamelCaseTypes {
122141
fn check_case(&self, cx: &EarlyContext<'_>, sort: &str, ident: &Ident) {
123142
let name = ident.name.as_str();
124143

125-
if !is_camel_case(name) {
126-
let cc = to_camel_case(name);
144+
if !is_upper_camel_case(name) {
145+
let cc = to_upper_camel_case(name);
127146
let sub = if *name != cc {
128147
NonCamelCaseTypeSub::Suggestion { span: ident.span, replace: cc }
129148
} else {
@@ -235,14 +254,20 @@ impl NonSnakeCase {
235254
continue;
236255
}
237256
for ch in s.chars() {
238-
if !buf.is_empty() && buf != "'" && ch.is_uppercase() && !last_upper {
239-
words.push(buf);
257+
if !buf.is_empty()
258+
&& buf != "'"
259+
&& (ch.is_uppercase() || ch.is_titlecase())
260+
&& !last_upper
261+
{
262+
// We lowercase only at the end, to handle final sigma correctly
263+
words.push(buf.to_lowercase());
240264
buf = String::new();
241265
}
242-
last_upper = ch.is_uppercase();
243-
buf.extend(ch.to_lowercase());
266+
last_upper = ch.is_uppercase() || ch.is_titlecase();
267+
buf.push(ch);
244268
}
245-
words.push(buf);
269+
// We lowercase only at the end, to handle final sigma correctly
270+
words.push(buf.to_lowercase());
246271
}
247272
words.join("_")
248273
}
@@ -262,7 +287,8 @@ impl NonSnakeCase {
262287

263288
// This correctly handles letters in languages with and without
264289
// cases, as well as numbers and underscores.
265-
!ident.chars().any(char::is_uppercase)
290+
// FIXME: we should add a standard library impl of `c.to_lowercase().eq([c])`
291+
ident.chars().all(|c| c.to_lowercase().eq([c]))
266292
}
267293

268294
let name = ident.name.as_str();
@@ -474,10 +500,12 @@ impl<'a, 'b, F: FnOnce() -> NonUpperCaseGlobal<'b>> Diagnostic<'a, ()>
474500
impl NonUpperCaseGlobals {
475501
fn check_upper_case(cx: &LateContext<'_>, sort: &str, did: Option<LocalDefId>, ident: &Ident) {
476502
let name = ident.name.as_str();
477-
if name.chars().any(|c| c.is_lowercase()) {
503+
// FIXME: we should add a more efficient version
504+
// in the stdlib for `c.to_uppercase().eq([c])`
505+
if !name.chars().all(|c| c.to_uppercase().eq([c])) {
478506
let uc = NonSnakeCase::to_snake_case(name).to_uppercase();
479507

480-
// If the item is exported, suggesting changing it's name would be breaking-change
508+
// If the item is exported, suggesting changing its name would be a breaking change
481509
// and could break users without a "nice" applicable fix, so let's avoid it.
482510
let can_change_usages = if let Some(did) = did {
483511
!cx.tcx.effective_visibilities(()).is_exported(did)
Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
1-
use super::{is_camel_case, to_camel_case};
1+
use super::{is_upper_camel_case, to_upper_camel_case};
22

33
#[test]
44
fn camel_case() {
5-
assert!(!is_camel_case("userData"));
6-
assert_eq!(to_camel_case("userData"), "UserData");
5+
assert!(!is_upper_camel_case("userData"));
6+
assert_eq!(to_upper_camel_case("userData"), "UserData");
77

8-
assert!(is_camel_case("X86_64"));
8+
assert!(is_upper_camel_case("X86_64"));
99

10-
assert!(!is_camel_case("X86__64"));
11-
assert_eq!(to_camel_case("X86__64"), "X86_64");
10+
assert!(!is_upper_camel_case("X86__64"));
11+
assert_eq!(to_upper_camel_case("X86__64"), "X86_64");
1212

13-
assert!(!is_camel_case("Abc_123"));
14-
assert_eq!(to_camel_case("Abc_123"), "Abc123");
13+
assert!(!is_upper_camel_case("Abc_123"));
14+
assert_eq!(to_upper_camel_case("Abc_123"), "Abc123");
1515

16-
assert!(!is_camel_case("A1_b2_c3"));
17-
assert_eq!(to_camel_case("A1_b2_c3"), "A1B2C3");
16+
assert!(!is_upper_camel_case("A1_b2_c3"));
17+
assert_eq!(to_upper_camel_case("A1_b2_c3"), "A1B2C3");
1818

19-
assert!(!is_camel_case("ONE_TWO_THREE"));
20-
assert_eq!(to_camel_case("ONE_TWO_THREE"), "OneTwoThree");
19+
assert!(!is_upper_camel_case("ONE_TWO_THREE"));
20+
assert_eq!(to_upper_camel_case("ONE_TWO_THREE"), "OneTwoThree");
21+
22+
// FIXME(@Jules-Bertholet): This test doesn't work due to what I believe
23+
// is a Unicode spec bug - uppercase Georgian letters have
24+
// incorrect titlecase mappings.
25+
// I've reported it to Unicode.
26+
// Georgian mtavruli is only used in all-caps
27+
//assert!(!is_upper_camel_case("ᲫალაᲔრთობაშია"));
28+
//assert_eq!(to_upper_camel_case("ᲫალაᲔრთობაშია"), "ძალა_ერთობაშია");
29+
30+
assert!(!is_upper_camel_case("LJNJaaaDŽooo"));
31+
assert_eq!(to_upper_camel_case("LJNJaaLjNJaDŽooo"), "LjnjaaLjNjaDžooo");
32+
33+
// Final sigma
34+
assert!(!is_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"));
35+
assert_eq!(to_upper_camel_case("ΦΙΛΟΣ_ΦΙΛΟΣ"), "ΦιλοςΦιλος");
36+
assert!(is_upper_camel_case("ΦιλοσΦιλοσ"));
2137
}

tests/ui/lint/lint-nonstandard-style-unicode-1.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,18 @@ struct Hello_World;
4242
struct 你_ӟ;
4343
//~^ ERROR type `你_ӟ` should have an upper camel case name
4444

45-
// and this is ok:
45+
struct ΦΙΛΟΣ_Σ;
46+
//~^ ERROR type `ΦΙΛΟΣ_Σ` should have an upper camel case name
47+
48+
struct Σ_ΦΙΛΟΣ;
49+
//~^ ERROR type `Σ_ΦΙΛΟΣ` should have an upper camel case name
50+
51+
// these are ok:
4652

4753
struct 你_好;
4854

55+
struct ძალა_ერთობაშია;
56+
57+
struct Σ;
58+
4959
fn main() {}

tests/ui/lint/lint-nonstandard-style-unicode-1.stderr

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,17 @@ error: type `你_ӟ` should have an upper camel case name
4646
LL | struct 你_ӟ;
4747
| ^^^^ help: convert the identifier to upper camel case: `你Ӟ`
4848

49-
error: aborting due to 7 previous errors
49+
error: type `ΦΙΛΟΣ_Σ` should have an upper camel case name
50+
--> $DIR/lint-nonstandard-style-unicode-1.rs:45:8
51+
|
52+
LL | struct ΦΙΛΟΣ_Σ;
53+
| ^^^^^^^ help: convert the identifier to upper camel case: `ΦιλοςΣ`
54+
55+
error: type `Σ_ΦΙΛΟΣ` should have an upper camel case name
56+
--> $DIR/lint-nonstandard-style-unicode-1.rs:48:8
57+
|
58+
LL | struct Σ_ΦΙΛΟΣ;
59+
| ^^^^^^^ help: convert the identifier to upper camel case: `ΣΦιλος`
60+
61+
error: aborting due to 9 previous errors
5062

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#![allow(dead_code)]
2+
#![forbid(non_snake_case)]
3+
4+
// 2. non_snake_case
5+
6+
7+
fn LJNJaaLjNJaDŽooo() {}
8+
//~^ ERROR function `LJNJaaLjNJaDŽooo` should have a snake case name
9+
//~| WARN identifier contains 5 non normalized (NFKC) characters
10+
11+
fn LjnjaaLjNjaDžooo() {}
12+
//~^ ERROR function `LjnjaaLjNjaDžooo` should have a snake case name
13+
//~| WARN identifier contains 5 non normalized (NFKC) characters
14+
15+
// test final sigma casing
16+
fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
17+
//~^ ERROR function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
18+
19+
fn Σ() {}
20+
//~^ ERROR function `Σ` should have a snake case name
21+
22+
fn ΦΙΛΟΣ_Σ() {}
23+
//~^ ERROR function `ΦΙΛΟΣ_Σ` should have a snake case name
24+
25+
fn Σ_ΦΙΛΟΣ() {}
26+
//~^ ERROR function `Σ_ΦΙΛΟΣ` should have a snake case name
27+
28+
// this is ok
29+
fn φιλοσ_φιλοσ() {}
30+
31+
fn main() {}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
warning: identifier contains 5 non normalized (NFKC) characters: 'LJ', 'NJ', 'Lj', 'NJ', and 'DŽ'
2+
--> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
3+
|
4+
LL | fn LJNJaaLjNJaDŽooo() {}
5+
| ^^^^^^^^^^^
6+
|
7+
= note: these characters are included in the Not_NFKC Unicode general security profile
8+
= note: `#[warn(uncommon_codepoints)]` on by default
9+
10+
warning: identifier contains 5 non normalized (NFKC) characters: 'Lj', 'nj', 'Lj', 'Nj', and 'Dž'
11+
--> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
12+
|
13+
LL | fn LjnjaaLjNjaDžooo() {}
14+
| ^^^^^^^^^^^
15+
|
16+
= note: these characters are included in the Not_NFKC Unicode general security profile
17+
18+
error: function `LJNJaaLjNJaDŽooo` should have a snake case name
19+
--> $DIR/lint-nonstandard-style-unicode-2.rs:7:4
20+
|
21+
LL | fn LJNJaaLjNJaDŽooo() {}
22+
| ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo`
23+
|
24+
note: the lint level is defined here
25+
--> $DIR/lint-nonstandard-style-unicode-2.rs:2:11
26+
|
27+
LL | #![forbid(non_snake_case)]
28+
| ^^^^^^^^^^^^^^
29+
30+
error: function `LjnjaaLjNjaDžooo` should have a snake case name
31+
--> $DIR/lint-nonstandard-style-unicode-2.rs:11:4
32+
|
33+
LL | fn LjnjaaLjNjaDžooo() {}
34+
| ^^^^^^^^^^^ help: convert the identifier to snake case: `ljnjaa_ljnja_džooo`
35+
36+
error: function `ΦΙΛΟΣ_ΦΙΛΟΣ` should have a snake case name
37+
--> $DIR/lint-nonstandard-style-unicode-2.rs:16:4
38+
|
39+
LL | fn ΦΙΛΟΣ_ΦΙΛΟΣ() {}
40+
| ^^^^^^^^^^^ help: convert the identifier to snake case: `φιλος_φιλος`
41+
42+
error: function `Σ` should have a snake case name
43+
--> $DIR/lint-nonstandard-style-unicode-2.rs:19:4
44+
|
45+
LL | fn Σ() {}
46+
| ^ help: convert the identifier to snake case: `σ`
47+
48+
error: function `ΦΙΛΟΣ_Σ` should have a snake case name
49+
--> $DIR/lint-nonstandard-style-unicode-2.rs:22:4
50+
|
51+
LL | fn ΦΙΛΟΣ_Σ() {}
52+
| ^^^^^^^ help: convert the identifier to snake case: `φιλος_σ`
53+
54+
error: function `Σ_ΦΙΛΟΣ` should have a snake case name
55+
--> $DIR/lint-nonstandard-style-unicode-2.rs:25:4
56+
|
57+
LL | fn Σ_ΦΙΛΟΣ() {}
58+
| ^^^^^^^ help: convert the identifier to snake case: `σ_φιλος`
59+
60+
error: aborting due to 6 previous errors; 2 warnings emitted
61+

tests/ui/lint/lint-nonstandard-style-unicode-3.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,11 @@ static τεχ: f32 = 3.14159265;
2121

2222
static __密__封__线__内__禁__止__答__题__: bool = true;
2323

24+
static ძალა_ერთობაშია: () = ();
25+
//~^ ERROR static variable `ძალა_ერთობაშია` should have an upper case name
26+
27+
static Nj: () = ();
28+
//~^ ERROR static variable `Nj` should have an upper case name
29+
//~| WARN identifier contains a non normalized (NFKC) character
30+
2431
fn main() {}

tests/ui/lint/lint-nonstandard-style-unicode-3.stderr

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
warning: identifier contains a non normalized (NFKC) character: 'Nj'
2+
--> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
3+
|
4+
LL | static Nj: () = ();
5+
| ^
6+
|
7+
= note: this character is included in the Not_NFKC Unicode general security profile
8+
= note: `#[warn(uncommon_codepoints)]` on by default
9+
110
error: static variable `τεχ` should have an upper case name
211
--> $DIR/lint-nonstandard-style-unicode-3.rs:17:8
312
|
@@ -10,5 +19,17 @@ note: the lint level is defined here
1019
LL | #![forbid(non_upper_case_globals)]
1120
| ^^^^^^^^^^^^^^^^^^^^^^
1221

13-
error: aborting due to 1 previous error
22+
error: static variable `ძალა_ერთობაშია` should have an upper case name
23+
--> $DIR/lint-nonstandard-style-unicode-3.rs:24:8
24+
|
25+
LL | static ძალა_ერთობაშია: () = ();
26+
| ^^^^^^^^^^^^^^ help: convert the identifier to upper case: `ᲫᲐᲚᲐ_ᲔᲠᲗᲝᲑᲐᲨᲘᲐ`
27+
28+
error: static variable `Nj` should have an upper case name
29+
--> $DIR/lint-nonstandard-style-unicode-3.rs:27:8
30+
|
31+
LL | static Nj: () = ();
32+
| ^ help: convert the identifier to upper case: `NJ`
33+
34+
error: aborting due to 3 previous errors; 1 warning emitted
1435

0 commit comments

Comments
 (0)