Skip to content

Commit abd7a00

Browse files
Multi-decl overhaul (including inline struct fixes) (#657)
The most important changes: - Clean up the multi-decl detection and rewriting code, and unify the code paths for global variables, fields, and local variables. - Automatically generate names for unnamed inline structs if they need to be rewritten, and remove 3C's previous workarounds that tried to avoid needing to rewrite multi-decls with unnamed inline structs that 3C couldn't handle correctly. - Inside a struct, when rewriting a field multi-decl that contains an inline struct, correctly detect the presence of the inline struct to avoid losing it completely, and additionally move it to the top level to avoid a compiler warning. - Add support for typedef multi-decls on par with variable and field multi-decls. See #657 for more details.
1 parent 94cd56c commit abd7a00

30 files changed

+1531
-736
lines changed

clang/include/clang/3C/ConstraintVariables.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define LLVM_CLANG_3C_CONSTRAINTVARIABLES_H
2727

2828
#include "clang/3C/Constraints.h"
29+
#include "clang/3C/MultiDecls.h"
2930
#include "clang/3C/OptionalParams.h"
3031
#include "clang/3C/ProgramVar.h"
3132
#include "clang/AST/ASTContext.h"
@@ -355,16 +356,17 @@ class PointerVariableConstraint : public ConstraintVariable {
355356
// declaration's base type. To preserve macros, this we first try to take
356357
// the type directly from source code. Where that is not possible, the type
357358
// is regenerated from the type in the clang AST.
358-
static std::string extractBaseType(DeclaratorDecl *D, TypeSourceInfo *TSI,
359-
QualType QT, const Type *Ty,
360-
const ASTContext &C);
359+
static std::string extractBaseType(MultiDeclMemberDecl *MMD,
360+
TypeSourceInfo *TSI, QualType QT,
361+
const Type *Ty, const ASTContext &C,
362+
ProgramInfo &Info);
361363

362364
// Try to extract string representation of the base type for a declaration
363365
// from the source code. If the base type cannot be extracted from source, an
364366
// empty string is returned instead.
365-
static std::string tryExtractBaseType(DeclaratorDecl *D, TypeSourceInfo *TSI,
366-
QualType QT, const Type *Ty,
367-
const ASTContext &C);
367+
static std::string tryExtractBaseType(MultiDeclMemberDecl *MMD,
368+
TypeSourceInfo *TSI, QualType QT,
369+
const Type *Ty, const ASTContext &C);
368370

369371
// Flag to indicate that this constraint is a part of function prototype
370372
// e.g., Parameters or Return.

clang/include/clang/3C/Constraints.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class ConstraintsGraph;
4343
#define SPECIAL_REASON(spec) (std::string("Special case for ") + (spec))
4444
#define STRING_LITERAL_REASON "The type of a string literal"
4545
#define MACRO_REASON "Pointer in Macro declaration."
46+
#define UNION_FIELD_REASON "Union field encountered"
4647

4748
template <typename T> struct PComp {
4849
bool operator()(const T Lhs, const T Rhs) const { return *Lhs < *Rhs; }

clang/include/clang/3C/DeclRewriter.h

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ using namespace clang;
2626

2727
class DeclRewriter {
2828
public:
29-
DeclRewriter(Rewriter &R, ASTContext &A, GlobalVariableGroups &GP)
30-
: R(R), A(A), GP(GP) {}
29+
DeclRewriter(Rewriter &R, ProgramInfo &Info, ASTContext &A)
30+
: R(R), Info(Info), A(A) {}
3131

3232
// The publicly accessible interface for performing declaration rewriting.
3333
// All declarations for variables with checked types in the variable map of
@@ -40,41 +40,24 @@ class DeclRewriter {
4040
ArrayBoundsRewriter &ABR);
4141

4242
private:
43-
static RecordDecl *LastRecordDecl;
44-
static std::map<Decl *, Decl *> VDToRDMap;
45-
static std::set<Decl *> InlineVarDecls;
4643
Rewriter &R;
44+
ProgramInfo &Info;
4745
ASTContext &A;
48-
GlobalVariableGroups &GP;
4946

50-
// This set contains declarations that have already been rewritten as part of
51-
// a prior declaration that was in the same multi-declaration. It is checked
52-
// before rewriting in order to avoid rewriting a declaration more than once.
53-
// It is not used with individual declarations outside of multi-declarations
54-
// because these declarations are seen exactly once, rather than every time a
55-
// declaration in the containing multi-decl is visited.
56-
std::set<Decl *> VisitedMultiDeclMembers;
47+
// List of TagDecls that were split from multi-decls and should be moved out
48+
// of an enclosing RecordDecl to avoid a compiler warning. Filled during
49+
// multi-decl rewriting and processed by denestTagDecls.
50+
std::vector<TagDecl *> TagDeclsToDenest;
5751

5852
// Visit each Decl in ToRewrite and apply the appropriate pointer type
5953
// to that Decl. ToRewrite is the set of all declarations to rewrite.
6054
void rewrite(RSet &ToRewrite);
6155

62-
// Rewrite a specific variable declaration using the replacement string in the
63-
// DAndReplace structure. Each of these functions is specialized to handling
64-
// one subclass of declarations.
65-
template <typename DRType>
66-
void rewriteFieldOrVarDecl(DRType *N, RSet &ToRewrite);
67-
void rewriteMultiDecl(DeclReplacement *N, RSet &ToRewrite,
68-
std::vector<Decl *> SameLineDecls,
69-
bool ContainsInlineStruct);
70-
void rewriteSingleDecl(DeclReplacement *N, RSet &ToRewrite);
56+
void rewriteMultiDecl(MultiDeclInfo &MDI, RSet &ToRewrite);
7157
void doDeclRewrite(SourceRange &SR, DeclReplacement *N);
7258
void rewriteFunctionDecl(FunctionDeclReplacement *N);
73-
void rewriteTypedefDecl(TypedefDeclReplacement *TDT, RSet &ToRewrite);
74-
void getDeclsOnSameLine(DeclReplacement *N, std::vector<Decl *> &Decls);
75-
bool isSingleDeclaration(DeclReplacement *N);
76-
SourceRange getNextCommaOrSemicolon(SourceLocation L);
77-
static void detectInlineStruct(Decl *D, SourceManager &SM);
59+
SourceRange getNextComma(SourceLocation L);
60+
void denestTagDecls();
7861
};
7962

8063
// Visits function declarations and adds entries with their new rewritten
@@ -119,16 +102,4 @@ class FunctionDeclBuilder : public RecursiveASTVisitor<FunctionDeclBuilder> {
119102

120103
bool inParamMultiDecl(const ParmVarDecl *PVD);
121104
};
122-
123-
class FieldFinder : public RecursiveASTVisitor<FieldFinder> {
124-
public:
125-
FieldFinder(GlobalVariableGroups &GVG) : GVG(GVG) {}
126-
127-
bool VisitFieldDecl(FieldDecl *FD);
128-
129-
static void gatherSameLineFields(GlobalVariableGroups &GVG, Decl *D);
130-
131-
private:
132-
GlobalVariableGroups &GVG;
133-
};
134105
#endif // LLVM_CLANG_3C_DECLREWRITER_H

clang/include/clang/3C/MappingVisitor.h

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,26 @@
2121
#include "clang/AST/ASTConsumer.h"
2222
#include "clang/AST/RecursiveASTVisitor.h"
2323

24-
typedef std::tuple<clang::Stmt *, clang::Decl *> StmtDecl;
25-
typedef std::map<PersistentSourceLoc, StmtDecl> SourceToDeclMapType;
26-
typedef std::pair<SourceToDeclMapType, VariableDecltoStmtMap>
27-
MappingResultsType;
24+
typedef std::map<PersistentSourceLoc, clang::Decl *> SourceToDeclMapType;
2825

2926
class MappingVisitor : public clang::RecursiveASTVisitor<MappingVisitor> {
3027
public:
3128
MappingVisitor(std::set<PersistentSourceLoc> S, clang::ASTContext &C)
3229
: SourceLocs(S), Context(C) {}
3330

34-
bool VisitDeclStmt(clang::DeclStmt *S);
35-
3631
bool VisitDecl(clang::Decl *D);
3732

38-
MappingResultsType getResults() {
39-
return std::pair<std::map<PersistentSourceLoc, StmtDecl>,
40-
VariableDecltoStmtMap>(PSLtoSDT, DeclToDeclStmt);
41-
}
33+
const SourceToDeclMapType &getResults() { return PSLtoSDT; }
4234

4335
private:
44-
// A map from a PersistentSourceLoc to a tuple describing a statement, decl
45-
// or type.
36+
// A map from a PersistentSourceLoc to a Decl at that location.
4637
SourceToDeclMapType PSLtoSDT;
4738
// The set of PersistentSourceLoc's this instance of MappingVisitor is tasked
48-
// with re-instantiating as either a Stmt, Decl or Type.
39+
// with re-instantiating as a Decl.
4940
std::set<PersistentSourceLoc> SourceLocs;
5041
// The ASTContext for the particular AST that the MappingVisitor is
5142
// traversing.
5243
clang::ASTContext &Context;
53-
// A mapping of individual Decls to the DeclStmt that contains them.
54-
VariableDecltoStmtMap DeclToDeclStmt;
5544
};
5645

5746
#endif

clang/include/clang/3C/MultiDecls.h

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
//=--MultiDecls.h-------------------------------------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Code to deal with "multi-decls": constructs in which one or more identifiers
9+
// are declared in a comma-separated list based on a single type "on the left".
10+
// A simple example:
11+
//
12+
// struct my_struct x, *p;
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_CLANG_3C_MULTIDECLS_H
16+
#define LLVM_CLANG_3C_MULTIDECLS_H
17+
18+
#include "clang/3C/PersistentSourceLoc.h"
19+
#include "clang/AST/Decl.h"
20+
#include "clang/AST/RecursiveASTVisitor.h"
21+
#include "clang/AST/Type.h"
22+
#include "llvm/ADT/Optional.h"
23+
24+
using namespace clang;
25+
26+
// Some more information about multi-decls in the context of 3C:
27+
//
28+
// The "members" of a given multi-decl may be ordinary variables (VarDecls),
29+
// struct/union fields (FieldDecls), or typedefs (TypedefDecls), but all members
30+
// of a given multi-decl are of the same kind.
31+
//
32+
// If the "left type" of a multi-decl is a TagDecl, it may have an inline
33+
// definition; if it does, then the TagDecl may be unnamed. Examples:
34+
//
35+
// struct my_struct { int *y; } x, *p;
36+
// struct { int *y; } x, *p;
37+
//
38+
// Multi-decls (especially those with inline TagDecls) have historically been
39+
// tricky for 3C to rewrite. If the type of one member becomes a _Ptr (or
40+
// similar), then the left type of the members is no longer the same, so the
41+
// multi-decl must be broken up, for example:
42+
//
43+
// struct my_struct x;
44+
// _Ptr<struct my_struct> p;
45+
//
46+
// To keep the logic simpler, if 3C needs to change the type of at least one
47+
// member of a multi-decl, it breaks up all members of the multi-decl into
48+
// separate declarations. To preserve SourceLocations as much as possible and
49+
// avoid interfering with rewrites to any other constructs in the multi-decl
50+
// (e.g., within existing initializer expressions), this breakup is performed by
51+
// replacing the commas with semicolons in place and inserting additional
52+
// occurrences of the left type and any common qualifiers as needed.
53+
//
54+
// If there is an inline TagDecl, it is separated too and moved out of any
55+
// containing RecordDecl to avoid a compiler warning, and if the TagDecl is
56+
// unnamed, it is given an automatically generated name so that it can be
57+
// referenced by the new, separate declarations of the multi-decl members.
58+
// Example:
59+
//
60+
// static struct { int *y; } x, *p:
61+
//
62+
// ->
63+
//
64+
// struct x_struct_1 { _Ptr<int> y; };
65+
// static struct x_struct_1 x;
66+
// static _Ptr<struct x_struct_1> p;
67+
//
68+
// Exception: In a typedef multi-decl, if the _first_ member refers to the
69+
// TagDecl itself (not a pointer to it, etc.), then 3C uses that name for the
70+
// TagDecl rather than generating a new one. This produces nicer output for the
71+
// idiom:
72+
//
73+
// typedef struct { int *y; } FOO, *PFOO;
74+
//
75+
// ->
76+
//
77+
// typedef struct { _Ptr<int> y; } FOO;
78+
// typedef _Ptr<FOO> PFOO;
79+
//
80+
// The multi-decl code is used even for "multi-decls" of VarDecls, FieldDecls,
81+
// or TypedefDecls that have only a single member to avoid having to maintain a
82+
// separate code path for them. But a multi-decl always has at least one member;
83+
// a pure TagDecl such as `struct my_struct { int *y; };` is _not_ considered a
84+
// multi-decl. ParmVarDecls are handled differently. In fact, ParmVarDecls with
85+
// inline TagDecls are known to be handled poorly, but that's a rare and poor
86+
// practice and it's not easy to handle them better.
87+
88+
// Currently, we automatically generate a name for every unnamed TagDecl defined
89+
// in a multi-decl and use the name in ConstraintVariables, but we only insert
90+
// the name into the definition if the multi-decl gets rewritten for some other
91+
// reason. This solves the common case of allowing the types of all the
92+
// multi-decl members to refer to the TagDecl, but it doesn't address cases in
93+
// which 3C might need to insert a reference to the unnamed TagDecl elsewhere
94+
// even if the multi-decl isn't being rewritten. In these cases, 3C typically
95+
// uses the generated name even though it is not defined, causing a compile
96+
// error that the user has to correct manually. The problematic cases include:
97+
//
98+
// - Type argument insertion. TypeVariableEntry has a check for
99+
// `isTypeAnonymous`, but it has at least one bug (it misses double pointers).
100+
//
101+
// - Cast insertion, potentially. I was unable to find an example, but that
102+
// doesn't mean it will never happen, especially with future changes to the
103+
// code.
104+
//
105+
// - Typedef itype insertion.
106+
//
107+
// One approach to try to rule out all of these bugs at once would be to
108+
// preemptively rewrite all multi-decls containing unnamed TagDecls, but those
109+
// changes might be undesirable or could even cause errors in the presence of
110+
// macros, etc. Or we could try to add the necessary code so that insertion of a
111+
// reference to an unnamed TagDecl would trigger insertion of the name into the
112+
// definition. For now, we don't deal with the problem.
113+
114+
// Implementation note: The Clang AST does not represent multi-decls explicitly
115+
// (except in functions, where they are represented by DeclStmts). In other
116+
// contexts, we detect them based on the property that the beginning
117+
// SourceLocation of all the members is the same. And as long as we are making
118+
// this assumption, we use it in functions too rather than having a separate
119+
// code path that looks for DeclStmts.
120+
121+
// NamedDecl is the nearest common superclass of all Decl subtypes that can be
122+
// multi-decl members. There is no enforcement that a MultiDeclMemberDecl is
123+
// actually one of the allowed subtypes, so use of the MultiDeclMemberDecl
124+
// typedef serves as documentation only. (If we wanted to enforce it, we'd need
125+
// a wrapper object of some kind, which currently seems to be more trouble than
126+
// it's worth.)
127+
typedef NamedDecl MultiDeclMemberDecl;
128+
129+
// Returns D if it can be a multi-decl member, otherwise null.
130+
MultiDeclMemberDecl *getAsMultiDeclMember(Decl *D);
131+
132+
// Helpers to cope with the different APIs to do corresponding things with a
133+
// TypedefDecl or DeclaratorDecl.
134+
QualType getTypeOfMultiDeclMember(MultiDeclMemberDecl *MMD);
135+
TypeSourceInfo *getTypeSourceInfoOfMultiDeclMember(MultiDeclMemberDecl *MMD);
136+
137+
struct MultiDeclInfo {
138+
// The TagDecl that is defined inline in the multi-decl and needs to be split
139+
// from it during rewriting, if any, otherwise null. In a case like
140+
// `typedef struct { ... } T`, there is an inline tag definition but we don't
141+
// need to split it out, so this will be null.
142+
TagDecl *TagDefToSplit = nullptr;
143+
144+
// True if the base type was an unnamed TagDecl defined inline for which we
145+
// are using a new name. Note that TagDefToSplit can be nonnull and
146+
// BaseTypeRenamed can be false if the inline TagDecl was named, and the
147+
// reverse can occur in the `typedef struct { ... } T` case.
148+
bool BaseTypeRenamed = false;
149+
150+
// The members of the multi-decl in their original order.
151+
std::vector<MultiDeclMemberDecl *> Members;
152+
153+
// Set by DeclRewriter::rewriteMultiDecl after it rewrites the entire
154+
// multi-decl to ensure that it doesn't try to do so more than once if
155+
// multiple members needed changes.
156+
bool AlreadyRewritten = false;
157+
};
158+
159+
struct TUMultiDeclsInfo {
160+
// All multi-decls, keyed by the common beginning source location of their
161+
// members. Note that the beginning source location of TagDefToSplit may be
162+
// later if there is a keyword such as `static` or `typedef` in between.
163+
std::map<SourceLocation, MultiDeclInfo> MultiDeclsByBeginLoc;
164+
165+
// Map from a tag definition to its containing multi-decl (if it is part of
166+
// one). Note that the TagDefToSplit of the MultiDeclInfo is not guaranteed to
167+
// equal the TagDecl: it may be null in the `typedef struct { ... } T` case.
168+
//
169+
// Note that the MultiDeclInfo pointers remain valid for as long as the
170+
// MultiDeclInfo objects remain in MultiDeclsByBeginLoc: see
171+
// https://en.cppreference.com/w/cpp/container#Iterator_invalidation.
172+
std::map<TagDecl *, MultiDeclInfo *> ContainingMultiDeclOfTagDecl;
173+
};
174+
175+
class ProgramMultiDeclsInfo {
176+
private:
177+
// Set of TagDecl names already used at least once in the program, so we can
178+
// avoid colliding with them.
179+
std::set<std::string> UsedTagNames;
180+
181+
// Information about an originally unnamed tag definition in a multi-decl for
182+
// which we're using a new name.
183+
struct RenamedTagDefInfo {
184+
// The new string that should be used to refer to the type of the TagDecl.
185+
// Unlike UsedTagNames, this includes the tag kind keyword (such as
186+
// `struct`), except when we use an existing typedef (which doesn't require
187+
// a tag keyword).
188+
std::string AssignedTypeStr;
189+
// Whether the TagDecl should be split from the multi-decl. True except when
190+
// we use an existing typedef.
191+
bool ShouldSplit;
192+
};
193+
194+
// Map from PSL of a TagDecl to its RenamedTagDefInfo, to ensure that we
195+
// handle the TagDecl consistently when 3C naively rewrites the same header
196+
// file multiple times as part of different translation units (see
197+
// https://github.com/correctcomputation/checkedc-clang/issues/374#issuecomment-804283984).
198+
std::map<PersistentSourceLoc, RenamedTagDefInfo> RenamedTagDefs;
199+
200+
std::map<ASTContext *, TUMultiDeclsInfo> TUInfos;
201+
202+
// Recursive helpers.
203+
void findUsedTagNames(DeclContext *DC);
204+
void findMultiDecls(DeclContext *DC, ASTContext &Context);
205+
206+
public:
207+
void findUsedTagNames(ASTContext &Context);
208+
void findMultiDecls(ASTContext &Context);
209+
llvm::Optional<std::string> getTypeStrOverride(const Type *Ty,
210+
const ASTContext &C);
211+
MultiDeclInfo *findContainingMultiDecl(MultiDeclMemberDecl *MMD);
212+
MultiDeclInfo *findContainingMultiDecl(TagDecl *TD);
213+
bool wasBaseTypeRenamed(Decl *D);
214+
};
215+
216+
#endif // LLVM_CLANG_3C_MULTIDECLS_H

0 commit comments

Comments
 (0)