@@ -13,12 +13,20 @@ use crate::rules::ruff::rules::Context;
13
13
use crate :: settings:: LinterSettings ;
14
14
15
15
/// ## What it does
16
- /// Checks for ambiguous unicode characters in strings.
16
+ /// Checks for ambiguous Unicode characters in strings.
17
17
///
18
18
/// ## Why is this bad?
19
- /// The use of ambiguous unicode characters can confuse readers and cause
19
+ /// Some Unicode characters are visually similar to ASCII characters, but have
20
+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
21
+ /// visually similar, but not identical, to the ASCII character `A`.
22
+ ///
23
+ /// The use of ambiguous Unicode characters can confuse readers and cause
20
24
/// subtle bugs.
21
25
///
26
+ /// In [preview], this rule will also flag Unicode characters that are
27
+ /// confusable with other, non-preferred Unicode characters. For example, the
28
+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
29
+ ///
22
30
/// ## Example
23
31
/// ```python
24
32
/// print("Ηello, world!") # "Η" is the Greek eta (`U+0397`).
@@ -28,6 +36,8 @@ use crate::settings::LinterSettings;
28
36
/// ```python
29
37
/// print("Hello, world!") # "H" is the Latin capital H (`U+0048`).
30
38
/// ```
39
+ ///
40
+ /// [preview]: https://docs.astral.sh/ruff/preview/
31
41
#[ violation]
32
42
pub struct AmbiguousUnicodeCharacterString {
33
43
confusable : char ,
@@ -50,12 +60,20 @@ impl Violation for AmbiguousUnicodeCharacterString {
50
60
}
51
61
52
62
/// ## What it does
53
- /// Checks for ambiguous unicode characters in docstrings.
63
+ /// Checks for ambiguous Unicode characters in docstrings.
54
64
///
55
65
/// ## Why is this bad?
56
- /// The use of ambiguous unicode characters can confuse readers and cause
66
+ /// Some Unicode characters are visually similar to ASCII characters, but have
67
+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
68
+ /// visually similar, but not identical, to the ASCII character `A`.
69
+ ///
70
+ /// The use of ambiguous Unicode characters can confuse readers and cause
57
71
/// subtle bugs.
58
72
///
73
+ /// In [preview], this rule will also flag Unicode characters that are
74
+ /// confusable with other, non-preferred Unicode characters. For example, the
75
+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
76
+ ///
59
77
/// ## Example
60
78
/// ```python
61
79
/// """A lovely docstring (with a `U+FF09` parenthesis)."""
@@ -65,6 +83,8 @@ impl Violation for AmbiguousUnicodeCharacterString {
65
83
/// ```python
66
84
/// """A lovely docstring (with no strange parentheses)."""
67
85
/// ```
86
+ ///
87
+ /// [preview]: https://docs.astral.sh/ruff/preview/
68
88
#[ violation]
69
89
pub struct AmbiguousUnicodeCharacterDocstring {
70
90
confusable : char ,
@@ -87,12 +107,20 @@ impl Violation for AmbiguousUnicodeCharacterDocstring {
87
107
}
88
108
89
109
/// ## What it does
90
- /// Checks for ambiguous unicode characters in comments.
110
+ /// Checks for ambiguous Unicode characters in comments.
91
111
///
92
112
/// ## Why is this bad?
93
- /// The use of ambiguous unicode characters can confuse readers and cause
113
+ /// Some Unicode characters are visually similar to ASCII characters, but have
114
+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
115
+ /// visually similar, but not identical, to the ASCII character `A`.
116
+ ///
117
+ /// The use of ambiguous Unicode characters can confuse readers and cause
94
118
/// subtle bugs.
95
119
///
120
+ /// In [preview], this rule will also flag Unicode characters that are
121
+ /// confusable with other, non-preferred Unicode characters. For example, the
122
+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
123
+ ///
96
124
/// ## Example
97
125
/// ```python
98
126
/// foo() # nоqa # "о" is Cyrillic (`U+043E`)
@@ -102,6 +130,8 @@ impl Violation for AmbiguousUnicodeCharacterDocstring {
102
130
/// ```python
103
131
/// foo() # noqa # "o" is Latin (`U+006F`)
104
132
/// ```
133
+ ///
134
+ /// [preview]: https://docs.astral.sh/ruff/preview/
105
135
#[ violation]
106
136
pub struct AmbiguousUnicodeCharacterComment {
107
137
confusable : char ,
@@ -159,7 +189,9 @@ pub(crate) fn ambiguous_unicode_character(
159
189
// Check if the boundary character is itself an ambiguous unicode character, in which
160
190
// case, it's always included as a diagnostic.
161
191
if !current_char. is_ascii ( ) {
162
- if let Some ( representant) = confusable ( current_char as u32 ) {
192
+ if let Some ( representant) = confusable ( current_char as u32 )
193
+ . filter ( |representant| settings. preview . is_enabled ( ) || representant. is_ascii ( ) )
194
+ {
163
195
let candidate = Candidate :: new (
164
196
TextSize :: try_from ( relative_offset) . unwrap ( ) + range. start ( ) ,
165
197
current_char,
@@ -173,7 +205,9 @@ pub(crate) fn ambiguous_unicode_character(
173
205
} else if current_char. is_ascii ( ) {
174
206
// The current word contains at least one ASCII character.
175
207
word_flags |= WordFlags :: ASCII ;
176
- } else if let Some ( representant) = confusable ( current_char as u32 ) {
208
+ } else if let Some ( representant) = confusable ( current_char as u32 )
209
+ . filter ( |representant| settings. preview . is_enabled ( ) || representant. is_ascii ( ) )
210
+ {
177
211
// The current word contains an ambiguous unicode character.
178
212
word_candidates. push ( Candidate :: new (
179
213
TextSize :: try_from ( relative_offset) . unwrap ( ) + range. start ( ) ,
0 commit comments