Skip to content

Commit b972313

Browse files
committed
Provide aliases instead of Cpp1 multi-word keywords, and require them
For example, `ushort` instead of `unsigned short int` With a good diagnostic that encourages using fixed-width types instead Sample diagnostic: ``` test.cpp2(7,8): error: 'signed char' - did you mean 'i8' (usually best) or '__schar'? test.cpp2(7,8): error: 'signed char' is an old-style C/C++ multi-word keyword type - most such types should be used only for interoperability with older code - using those when you need them is fine, but name them with these short names instead: ushort, uint, ulong, longlong, ulonglong, __schar, __uchar - see also cpp2util.h > "Convenience names for integer types"```
1 parent 7554108 commit b972313

6 files changed

+107
-28
lines changed

include/cpp2util.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,15 @@ namespace cpp2 {
223223

224224
//-----------------------------------------------------------------------
225225
//
226-
// Convenience names for fixed-width integer types
226+
// Convenience names for fundamental types
227227
//
228228
// Note: De jure, some of these are optional per the C and C++ standards
229229
// De facto, all of these are supported in all implementations I know of
230230
//
231231
//-----------------------------------------------------------------------
232232
//
233+
234+
// Encouraged by default: Fixed-precision names
233235
using i8 = std::int8_t ;
234236
using i16 = std::int16_t ;
235237
using i32 = std::int32_t ;
@@ -239,6 +241,7 @@ using u16 = std::uint16_t ;
239241
using u32 = std::uint32_t ;
240242
using u64 = std::uint64_t ;
241243

244+
// Rarely, when really needed for speed optimization: Fastest type with at least N bits
242245
using i8_fast = std::int_fast8_t ;
243246
using i16_fast = std::int_fast16_t ;
244247
using i32_fast = std::int_fast32_t ;
@@ -248,6 +251,7 @@ using u16_fast = std::uint_fast16_t ;
248251
using u32_fast = std::uint_fast32_t ;
249252
using u64_fast = std::uint_fast64_t ;
250253

254+
// Rarely, when really needed for space optimization: Smallest type with at least N bits
251255
using i8_small = std::int_least8_t ;
252256
using i16_small = std::int_least16_t ;
253257
using i32_small = std::int_least32_t ;
@@ -257,6 +261,20 @@ using u16_small = std::uint_least16_t;
257261
using u32_small = std::uint_least32_t;
258262
using u64_small = std::uint_least64_t;
259263

264+
// Discouraged: Variable precision names
265+
// short
266+
using ushort = unsigned short;
267+
// int
268+
using ulong = unsigned long;
269+
// long
270+
using longlong = long long;
271+
using ulonglong = unsigned long long;
272+
using longdouble = long double;
273+
274+
// Strongly discouraged, for compatibility/interop only
275+
using __schar = signed char; // normally use i8 instead
276+
using __uchar = unsigned char; // normally use u8 instead
277+
260278

261279
//-----------------------------------------------------------------------
262280
//
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
pure2-cpp1-multitoken-fundamental-types-error.cpp2...
2+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(3,8): error: 'signed short int' - did you mean 'short'?
3+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(3,8): error: 'signed short int' is an old-style C/C++ multi-word keyword type
4+
- most such types should be used only for interoperability with older code
5+
- using those when you need them is fine, but name them with these short names instead:
6+
ushort, uint, ulong, longlong, ulonglong, __schar, __uchar
7+
- see also cpp2util.h > "Convenience names for integer types"
8+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(4,8): error: 'short int signed' - did you mean 'short'?
9+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(4,8): error: 'short int signed' is an old-style C/C++ multi-word keyword type
10+
- most such types should be used only for interoperability with older code
11+
- using those when you need them is fine, but name them with these short names instead:
12+
ushort, uint, ulong, longlong, ulonglong, __schar, __uchar
13+
- see also cpp2util.h > "Convenience names for integer types"
14+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(5,8): error: 'long long unsigned int' - did you mean 'ulonglong'?
15+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(5,8): error: 'long long unsigned int' is an old-style C/C++ multi-word keyword type
16+
- most such types should be used only for interoperability with older code
17+
- using those when you need them is fine, but name them with these short names instead:
18+
ushort, uint, ulong, longlong, ulonglong, __schar, __uchar
19+
- see also cpp2util.h > "Convenience names for integer types"
20+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(6,8): error: 'long double' - did you mean 'long'?
21+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(6,8): error: 'long double' is an old-style C/C++ multi-word keyword type
22+
- most such types should be used only for interoperability with older code
23+
- using those when you need them is fine, but name them with these short names instead:
24+
ushort, uint, ulong, longlong, ulonglong, __schar, __uchar
25+
- see also cpp2util.h > "Convenience names for integer types"
26+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(7,8): error: 'unsigned char' - did you mean 'u8' (usually best) or '__uchar'?
27+
pure2-cpp1-multitoken-fundamental-types-error.cpp2(7,8): error: 'unsigned char' is an old-style C/C++ multi-word keyword type
28+
- most such types should be used only for interoperability with older code
29+
- using those when you need them is fine, but name them with these short names instead:
30+
ushort, uint, ulong, longlong, ulonglong, __schar, __uchar
31+
- see also cpp2util.h > "Convenience names for integer types"
32+

regression-tests/test-results/pure2-cpp1-multitoken-fundamental-types.cpp

Lines changed: 0 additions & 21 deletions
This file was deleted.

regression-tests/test-results/pure2-cpp1-multitoken-fundamental-types.cpp2.output

Lines changed: 0 additions & 2 deletions
This file was deleted.

source/lex.h

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,13 +350,34 @@ auto lex_line(
350350

351351
assert(tokens.back().type() == lexeme::Cpp1MultiKeyword);
352352
auto pos = tokens.back().position();
353-
generated_text.push_back( tokens.back().to_string(true) );
354-
tokens.pop_back();
355353

356-
while( !tokens.empty() && tokens.back().type() == lexeme::Cpp1MultiKeyword) {
357-
generated_text.back() = tokens.back().to_string(true) + " " + generated_text.back();
354+
auto num_merged_tokens = 0;
355+
auto is_char = 0;
356+
auto is_short = 0;
357+
auto is_int = 0;
358+
auto is_long = 0;
359+
auto is_double = 0;
360+
auto is_signed = 0;
361+
auto is_unsigned = 0;
362+
generated_text.push_back( "" );
363+
while( !tokens.empty() && tokens.back().type() == lexeme::Cpp1MultiKeyword)
364+
{
365+
auto text = tokens.back().to_string(true);
366+
if (text == "char" ) { ++is_char ; }
367+
if (text == "short" ) { ++is_short ; }
368+
if (text == "int" ) { ++is_int ; }
369+
if (text == "long" ) { ++is_long ; }
370+
if (text == "double" ) { ++is_double ; }
371+
if (text == "signed" ) { ++is_signed ; }
372+
if (text == "unsigned") { ++is_unsigned; }
373+
374+
if (num_merged_tokens > 0) {
375+
generated_text.back() = " " + generated_text.back();
376+
}
377+
generated_text.back() = text + generated_text.back();
358378
pos = tokens.back().position();
359379
tokens.pop_back();
380+
++num_merged_tokens;
360381
}
361382

362383
tokens.push_back({
@@ -366,6 +387,37 @@ auto lex_line(
366387
lexeme::Keyword
367388
});
368389

390+
if (num_merged_tokens > 1)
391+
{
392+
auto alt = std::string{};
393+
if (is_char && is_signed) { alt = "'i8' (usually best) or '__schar'"; }
394+
else if (is_char && is_unsigned) { alt = "'u8' (usually best) or '__uchar'"; }
395+
else if (is_short && !is_unsigned) { alt = "'short'" ; }
396+
else if (is_short && is_unsigned) { alt = "'ushort'" ; }
397+
else if (is_long == 1 && !is_unsigned) { alt = "'long'" ; }
398+
else if (is_long == 1 && is_unsigned) { alt = "'ulong'" ; }
399+
else if (is_long > 1 && !is_unsigned) { alt = "'longlong'" ; }
400+
else if (is_long > 1 && is_unsigned) { alt = "'ulonglong'" ; }
401+
else if (is_int && !is_unsigned) { alt = "'int'" ; }
402+
else if (is_int && is_unsigned) { alt = "'uint'" ; }
403+
else if (is_double && is_long) { alt = "'longdouble'" ; }
404+
405+
if (std::ssize(alt) > 0) {
406+
errors.emplace_back(
407+
pos,
408+
"'" + tokens.back().to_string(true) + "' - did you mean " + alt + "?"
409+
);
410+
}
411+
errors.emplace_back(
412+
pos,
413+
"'" + tokens.back().to_string(true) + "' is an old-style C/C++ multi-word keyword type\n"
414+
" - most such types should be used only for interoperability with older code\n"
415+
" - using those when you need them is fine, but name them with these short names instead:\n"
416+
" ushort, uint, ulong, longlong, ulonglong, __schar, __uchar\n"
417+
" - see also cpp2util.h > \"Convenience names for integer types\""
418+
);
419+
}
420+
369421
tokens.push_back(last_token);
370422
};
371423

0 commit comments

Comments
 (0)