Skip to content

Commit f88a294

Browse files
owencatstellar
authored andcommitted
[clang-format] Fix a bug that changes keyword or to an identifier (llvm#128410)
Backports ffc61dc 0968df9 2d585cc Fixes llvm#105482
1 parent b23c3cc commit f88a294

File tree

11 files changed

+106
-48
lines changed

11 files changed

+106
-48
lines changed

clang/docs/ClangFormatStyleOptions.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4735,15 +4735,24 @@ the configuration (without a prefix: ``Auto``).
47354735
.. _Language:
47364736

47374737
**Language** (``LanguageKind``) :versionbadge:`clang-format 3.5` :ref:`<Language>`
4738-
Language, this format style is targeted at.
4738+
The language that this format style targets.
4739+
4740+
.. note::
4741+
4742+
You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
4743+
files by adding a ``// clang-format Language:`` line before the first
4744+
non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
47394745

47404746
Possible values:
47414747

47424748
* ``LK_None`` (in configuration: ``None``)
47434749
Do not use.
47444750

4751+
* ``LK_C`` (in configuration: ``C``)
4752+
Should be used for C.
4753+
47454754
* ``LK_Cpp`` (in configuration: ``Cpp``)
4746-
Should be used for C, C++.
4755+
Should be used for C++.
47474756

47484757
* ``LK_CSharp`` (in configuration: ``CSharp``)
47494758
Should be used for C#.

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,10 @@ clang-format
13581358
- Adds ``WrapNamespaceBodyWithEmptyLines`` option.
13591359
- Adds the ``IndentExportBlock`` option.
13601360
- Adds ``PenaltyBreakBeforeMemberAccess`` option.
1361+
- Add the C language instead of treating it like C++.
1362+
- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by
1363+
adding a special comment (e.g. ``// clang-format Language: ObjC``) near the
1364+
top of the file.
13611365

13621366
libclang
13631367
--------

clang/include/clang/Format/Format.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3275,7 +3275,9 @@ struct FormatStyle {
32753275
enum LanguageKind : int8_t {
32763276
/// Do not use.
32773277
LK_None,
3278-
/// Should be used for C, C++.
3278+
/// Should be used for C.
3279+
LK_C,
3280+
/// Should be used for C++.
32793281
LK_Cpp,
32803282
/// Should be used for C#.
32813283
LK_CSharp,
@@ -3300,7 +3302,9 @@ struct FormatStyle {
33003302
/// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
33013303
LK_Verilog
33023304
};
3303-
bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
3305+
bool isCpp() const {
3306+
return Language == LK_Cpp || Language == LK_C || Language == LK_ObjC;
3307+
}
33043308
bool isCSharp() const { return Language == LK_CSharp; }
33053309
bool isJson() const { return Language == LK_Json; }
33063310
bool isJavaScript() const { return Language == LK_JavaScript; }
@@ -3310,7 +3314,12 @@ struct FormatStyle {
33103314
}
33113315
bool isTableGen() const { return Language == LK_TableGen; }
33123316

3313-
/// Language, this format style is targeted at.
3317+
/// The language that this format style targets.
3318+
/// \note
3319+
/// You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
3320+
/// files by adding a ``// clang-format Language:`` line before the first
3321+
/// non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
3322+
/// \endnote
33143323
/// \version 3.5
33153324
LanguageKind Language;
33163325

@@ -5665,6 +5674,8 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code);
56655674
// Returns a string representation of ``Language``.
56665675
inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
56675676
switch (Language) {
5677+
case FormatStyle::LK_C:
5678+
return "C";
56685679
case FormatStyle::LK_Cpp:
56695680
return "C++";
56705681
case FormatStyle::LK_CSharp:

clang/lib/Format/Format.cpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ template <> struct MappingTraits<FormatStyle::KeepEmptyLinesStyle> {
401401

402402
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
403403
static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
404+
IO.enumCase(Value, "C", FormatStyle::LK_C);
404405
IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
405406
IO.enumCase(Value, "Java", FormatStyle::LK_Java);
406407
IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
@@ -3952,7 +3953,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
39523953
LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11;
39533954

39543955
LangOpts.LineComment = 1;
3955-
LangOpts.CXXOperatorNames = Style.isCpp();
3956+
3957+
const auto Language = Style.Language;
3958+
LangOpts.C17 = Language == FormatStyle::LK_C;
3959+
LangOpts.CXXOperatorNames =
3960+
Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC;
3961+
39563962
LangOpts.Bool = 1;
39573963
LangOpts.ObjC = 1;
39583964
LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
@@ -3977,6 +3983,8 @@ const char *StyleOptionHelpDescription =
39773983
" --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
39783984

39793985
static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
3986+
if (FileName.ends_with(".c"))
3987+
return FormatStyle::LK_C;
39803988
if (FileName.ends_with(".java"))
39813989
return FormatStyle::LK_Java;
39823990
if (FileName.ends_with_insensitive(".js") ||
@@ -4016,6 +4024,35 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
40164024
return FormatStyle::LK_Cpp;
40174025
}
40184026

4027+
static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
4028+
const auto ID = Env.getFileID();
4029+
const auto &SourceMgr = Env.getSourceManager();
4030+
4031+
LangOptions LangOpts;
4032+
LangOpts.CPlusPlus = 1;
4033+
LangOpts.LineComment = 1;
4034+
4035+
Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
4036+
Lex.SetCommentRetentionState(true);
4037+
4038+
for (Token Tok; !Lex.LexFromRawLexer(Tok) && Tok.is(tok::comment);) {
4039+
auto Text = StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
4040+
Tok.getLength());
4041+
if (!Text.consume_front("// clang-format Language:"))
4042+
continue;
4043+
4044+
Text = Text.trim();
4045+
if (Text == "C")
4046+
return FormatStyle::LK_C;
4047+
if (Text == "Cpp")
4048+
return FormatStyle::LK_Cpp;
4049+
if (Text == "ObjC")
4050+
return FormatStyle::LK_ObjC;
4051+
}
4052+
4053+
return FormatStyle::LK_None;
4054+
}
4055+
40194056
FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
40204057
const auto GuessedLanguage = getLanguageByFileName(FileName);
40214058
if (GuessedLanguage == FormatStyle::LK_Cpp) {
@@ -4025,6 +4062,10 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
40254062
if (!Code.empty() && (Extension.empty() || Extension == ".h")) {
40264063
auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
40274064
Environment Env(Code, NonEmptyFileName, /*Ranges=*/{});
4065+
if (const auto Language = getLanguageByComment(Env);
4066+
Language != FormatStyle::LK_None) {
4067+
return Language;
4068+
}
40284069
ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle());
40294070
Guesser.process();
40304071
if (Guesser.isObjC())

clang/lib/Format/FormatToken.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ static SmallVector<StringRef> CppNonKeywordTypes = {
4242
};
4343

4444
bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
45-
const bool IsCpp = LangOpts.CXXOperatorNames;
46-
return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||
47-
(IsCpp && is(tok::identifier) &&
48-
std::binary_search(CppNonKeywordTypes.begin(),
49-
CppNonKeywordTypes.end(), TokenText));
45+
if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts))
46+
return true;
47+
return (LangOpts.CXXOperatorNames || LangOpts.C17) && is(tok::identifier) &&
48+
std::binary_search(CppNonKeywordTypes.begin(),
49+
CppNonKeywordTypes.end(), TokenText);
5050
}
5151

5252
bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {

clang/lib/Format/FormatToken.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -743,29 +743,6 @@ struct FormatToken {
743743
return isOneOf(tok::star, tok::amp, tok::ampamp);
744744
}
745745

746-
bool isCppAlternativeOperatorKeyword() const {
747-
assert(!TokenText.empty());
748-
if (!isalpha(TokenText[0]))
749-
return false;
750-
751-
switch (Tok.getKind()) {
752-
case tok::ampamp:
753-
case tok::ampequal:
754-
case tok::amp:
755-
case tok::pipe:
756-
case tok::tilde:
757-
case tok::exclaim:
758-
case tok::exclaimequal:
759-
case tok::pipepipe:
760-
case tok::pipeequal:
761-
case tok::caret:
762-
case tok::caretequal:
763-
return true;
764-
default:
765-
return false;
766-
}
767-
}
768-
769746
bool isUnaryOperator() const {
770747
switch (Tok.getKind()) {
771748
case tok::plus:

clang/lib/Format/TokenAnnotator.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class AnnotatingParser {
129129
: Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
130130
IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
131131
Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
132-
assert(IsCpp == LangOpts.CXXOperatorNames);
132+
assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
133133
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
134134
resetTokenMetadata();
135135
}
@@ -3820,7 +3820,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
38203820
};
38213821

38223822
const auto *Next = Current.Next;
3823-
const bool IsCpp = LangOpts.CXXOperatorNames;
3823+
const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C17;
38243824

38253825
// Find parentheses of parameter list.
38263826
if (Current.is(tok::kw_operator)) {

clang/lib/Format/TokenAnnotator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class TokenAnnotator {
225225
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
226226
: Style(Style), IsCpp(Style.isCpp()),
227227
LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
228-
assert(IsCpp == LangOpts.CXXOperatorNames);
228+
assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
229229
}
230230

231231
/// Adapts the indent levels of comment lines to the indent of the

clang/lib/Format/UnwrappedLineParser.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ UnwrappedLineParser::UnwrappedLineParser(
168168
: IG_Inited),
169169
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
170170
Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
171-
assert(IsCpp == LangOpts.CXXOperatorNames);
171+
assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
172172
}
173173

174174
void UnwrappedLineParser::reset() {
@@ -1712,12 +1712,6 @@ void UnwrappedLineParser::parseStructuralElement(
17121712
OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
17131713
TT_CompoundRequirementLBrace);
17141714
!eof();) {
1715-
if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1716-
if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1717-
Next && Next->isBinaryOperator()) {
1718-
FormatTok->Tok.setKind(tok::identifier);
1719-
}
1720-
}
17211715
const FormatToken *Previous = FormatTok->Previous;
17221716
switch (FormatTok->Tok.getKind()) {
17231717
case tok::at:

clang/unittests/Format/FormatTest.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17784,9 +17784,11 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeAssignmentOperators) {
1778417784
verifyFormat("int a = 5;");
1778517785
verifyFormat("a += 42;");
1778617786
verifyFormat("a or_eq 8;");
17787-
verifyFormat("xor = foo;");
1778817787

17789-
FormatStyle Spaces = getLLVMStyle();
17788+
auto Spaces = getLLVMStyle(FormatStyle::LK_C);
17789+
verifyFormat("xor = foo;", Spaces);
17790+
17791+
Spaces.Language = FormatStyle::LK_Cpp;
1779017792
Spaces.SpaceBeforeAssignmentOperators = false;
1779117793
verifyFormat("int a= 5;", Spaces);
1779217794
verifyFormat("a+= 42;", Spaces);
@@ -24683,6 +24685,7 @@ TEST_F(FormatTest, StructuredBindings) {
2468324685
}
2468424686

2468524687
TEST_F(FormatTest, FileAndCode) {
24688+
EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", ""));
2468624689
EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", ""));
2468724690
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", ""));
2468824691
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", ""));
@@ -24848,6 +24851,18 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
2484824851
guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })"));
2484924852
}
2485024853

24854+
TEST_F(FormatTest, GetLanguageByComment) {
24855+
EXPECT_EQ(FormatStyle::LK_C,
24856+
guessLanguage("foo.h", "// clang-format Language: C\n"
24857+
"int i;"));
24858+
EXPECT_EQ(FormatStyle::LK_Cpp,
24859+
guessLanguage("foo.h", "// clang-format Language: Cpp\n"
24860+
"int DoStuff(CGRect rect);"));
24861+
EXPECT_EQ(FormatStyle::LK_ObjC,
24862+
guessLanguage("foo.h", "// clang-format Language: ObjC\n"
24863+
"int i;"));
24864+
}
24865+
2485124866
TEST_F(FormatTest, TypenameMacros) {
2485224867
std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"};
2485324868

clang/unittests/Format/TokenAnnotatorTest.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3646,6 +3646,11 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
36463646
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
36473647
EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator);
36483648

3649+
Tokens = annotate("return segment < *this or *this < segment;");
3650+
ASSERT_EQ(Tokens.size(), 12u) << Tokens;
3651+
EXPECT_TOKEN(Tokens[5], tok::pipepipe, TT_BinaryOperator);
3652+
EXPECT_TOKEN(Tokens[6], tok::star, TT_UnaryOperator);
3653+
36493654
Tokens = annotate("a = b or_eq c;");
36503655
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
36513656
EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator);
@@ -3658,11 +3663,13 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
36583663
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
36593664
EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator);
36603665

3661-
Tokens = annotate("xor = foo;");
3666+
const auto StyleC = getLLVMStyle(FormatStyle::LK_C);
3667+
3668+
Tokens = annotate("xor = foo;", StyleC);
36623669
ASSERT_EQ(Tokens.size(), 5u) << Tokens;
36633670
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown);
36643671

3665-
Tokens = annotate("int xor = foo;");
3672+
Tokens = annotate("int xor = foo;", StyleC);
36663673
ASSERT_EQ(Tokens.size(), 6u) << Tokens;
36673674
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
36683675
}

0 commit comments

Comments
 (0)