|
| 1 | +//=--MultiDecls.h-------------------------------------------------*- C++-*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// Code to deal with "multi-decls": constructs in which one or more identifiers |
| 9 | +// are declared in a comma-separated list based on a single type "on the left". |
| 10 | +// A simple example: |
| 11 | +// |
| 12 | +// struct my_struct x, *p; |
| 13 | +//===----------------------------------------------------------------------===// |
| 14 | + |
| 15 | +#ifndef LLVM_CLANG_3C_MULTIDECLS_H |
| 16 | +#define LLVM_CLANG_3C_MULTIDECLS_H |
| 17 | + |
| 18 | +#include "clang/3C/PersistentSourceLoc.h" |
| 19 | +#include "clang/AST/Decl.h" |
| 20 | +#include "clang/AST/RecursiveASTVisitor.h" |
| 21 | +#include "clang/AST/Type.h" |
| 22 | +#include "llvm/ADT/Optional.h" |
| 23 | + |
| 24 | +using namespace clang; |
| 25 | + |
| 26 | +// Some more information about multi-decls in the context of 3C: |
| 27 | +// |
| 28 | +// The "members" of a given multi-decl may be ordinary variables (VarDecls), |
| 29 | +// struct/union fields (FieldDecls), or typedefs (TypedefDecls), but all members |
| 30 | +// of a given multi-decl are of the same kind. |
| 31 | +// |
| 32 | +// If the "left type" of a multi-decl is a TagDecl, it may have an inline |
| 33 | +// definition; if it does, then the TagDecl may be unnamed. Examples: |
| 34 | +// |
| 35 | +// struct my_struct { int *y; } x, *p; |
| 36 | +// struct { int *y; } x, *p; |
| 37 | +// |
| 38 | +// Multi-decls (especially those with inline TagDecls) have historically been |
| 39 | +// tricky for 3C to rewrite. If the type of one member becomes a _Ptr (or |
| 40 | +// similar), then the left type of the members is no longer the same, so the |
| 41 | +// multi-decl must be broken up, for example: |
| 42 | +// |
| 43 | +// struct my_struct x; |
| 44 | +// _Ptr<struct my_struct> p; |
| 45 | +// |
| 46 | +// To keep the logic simpler, if 3C needs to change the type of at least one |
| 47 | +// member of a multi-decl, it breaks up all members of the multi-decl into |
| 48 | +// separate declarations. To preserve SourceLocations as much as possible and |
| 49 | +// avoid interfering with rewrites to any other constructs in the multi-decl |
| 50 | +// (e.g., within existing initializer expressions), this breakup is performed by |
| 51 | +// replacing the commas with semicolons in place and inserting additional |
| 52 | +// occurrences of the left type and any common qualifiers as needed. |
| 53 | +// |
| 54 | +// If there is an inline TagDecl, it is separated too and moved out of any |
| 55 | +// containing RecordDecl to avoid a compiler warning, and if the TagDecl is |
| 56 | +// unnamed, it is given an automatically generated name so that it can be |
| 57 | +// referenced by the new, separate declarations of the multi-decl members. |
| 58 | +// Example: |
| 59 | +// |
| 60 | +// static struct { int *y; } x, *p: |
| 61 | +// |
| 62 | +// -> |
| 63 | +// |
| 64 | +// struct x_struct_1 { _Ptr<int> y; }; |
| 65 | +// static struct x_struct_1 x; |
| 66 | +// static _Ptr<struct x_struct_1> p; |
| 67 | +// |
| 68 | +// Exception: In a typedef multi-decl, if the _first_ member refers to the |
| 69 | +// TagDecl itself (not a pointer to it, etc.), then 3C uses that name for the |
| 70 | +// TagDecl rather than generating a new one. This produces nicer output for the |
| 71 | +// idiom: |
| 72 | +// |
| 73 | +// typedef struct { int *y; } FOO, *PFOO; |
| 74 | +// |
| 75 | +// -> |
| 76 | +// |
| 77 | +// typedef struct { _Ptr<int> y; } FOO; |
| 78 | +// typedef _Ptr<FOO> PFOO; |
| 79 | +// |
| 80 | +// The multi-decl code is used even for "multi-decls" of VarDecls, FieldDecls, |
| 81 | +// or TypedefDecls that have only a single member to avoid having to maintain a |
| 82 | +// separate code path for them. But a multi-decl always has at least one member; |
| 83 | +// a pure TagDecl such as `struct my_struct { int *y; };` is _not_ considered a |
| 84 | +// multi-decl. ParmVarDecls are handled differently. In fact, ParmVarDecls with |
| 85 | +// inline TagDecls are known to be handled poorly, but that's a rare and poor |
| 86 | +// practice and it's not easy to handle them better. |
| 87 | + |
| 88 | +// Currently, we automatically generate a name for every unnamed TagDecl defined |
| 89 | +// in a multi-decl and use the name in ConstraintVariables, but we only insert |
| 90 | +// the name into the definition if the multi-decl gets rewritten for some other |
| 91 | +// reason. This solves the common case of allowing the types of all the |
| 92 | +// multi-decl members to refer to the TagDecl, but it doesn't address cases in |
| 93 | +// which 3C might need to insert a reference to the unnamed TagDecl elsewhere |
| 94 | +// even if the multi-decl isn't being rewritten. In these cases, 3C typically |
| 95 | +// uses the generated name even though it is not defined, causing a compile |
| 96 | +// error that the user has to correct manually. The problematic cases include: |
| 97 | +// |
| 98 | +// - Type argument insertion. TypeVariableEntry has a check for |
| 99 | +// `isTypeAnonymous`, but it has at least one bug (it misses double pointers). |
| 100 | +// |
| 101 | +// - Cast insertion, potentially. I was unable to find an example, but that |
| 102 | +// doesn't mean it will never happen, especially with future changes to the |
| 103 | +// code. |
| 104 | +// |
| 105 | +// - Typedef itype insertion. |
| 106 | +// |
| 107 | +// One approach to try to rule out all of these bugs at once would be to |
| 108 | +// preemptively rewrite all multi-decls containing unnamed TagDecls, but those |
| 109 | +// changes might be undesirable or could even cause errors in the presence of |
| 110 | +// macros, etc. Or we could try to add the necessary code so that insertion of a |
| 111 | +// reference to an unnamed TagDecl would trigger insertion of the name into the |
| 112 | +// definition. For now, we don't deal with the problem. |
| 113 | + |
| 114 | +// Implementation note: The Clang AST does not represent multi-decls explicitly |
| 115 | +// (except in functions, where they are represented by DeclStmts). In other |
| 116 | +// contexts, we detect them based on the property that the beginning |
| 117 | +// SourceLocation of all the members is the same. And as long as we are making |
| 118 | +// this assumption, we use it in functions too rather than having a separate |
| 119 | +// code path that looks for DeclStmts. |
| 120 | + |
| 121 | +// NamedDecl is the nearest common superclass of all Decl subtypes that can be |
| 122 | +// multi-decl members. There is no enforcement that a MultiDeclMemberDecl is |
| 123 | +// actually one of the allowed subtypes, so use of the MultiDeclMemberDecl |
| 124 | +// typedef serves as documentation only. (If we wanted to enforce it, we'd need |
| 125 | +// a wrapper object of some kind, which currently seems to be more trouble than |
| 126 | +// it's worth.) |
| 127 | +typedef NamedDecl MultiDeclMemberDecl; |
| 128 | + |
| 129 | +// Returns D if it can be a multi-decl member, otherwise null. |
| 130 | +MultiDeclMemberDecl *getAsMultiDeclMember(Decl *D); |
| 131 | + |
| 132 | +// Helpers to cope with the different APIs to do corresponding things with a |
| 133 | +// TypedefDecl or DeclaratorDecl. |
| 134 | +QualType getTypeOfMultiDeclMember(MultiDeclMemberDecl *MMD); |
| 135 | +TypeSourceInfo *getTypeSourceInfoOfMultiDeclMember(MultiDeclMemberDecl *MMD); |
| 136 | + |
| 137 | +struct MultiDeclInfo { |
| 138 | + // The TagDecl that is defined inline in the multi-decl and needs to be split |
| 139 | + // from it during rewriting, if any, otherwise null. In a case like |
| 140 | + // `typedef struct { ... } T`, there is an inline tag definition but we don't |
| 141 | + // need to split it out, so this will be null. |
| 142 | + TagDecl *TagDefToSplit = nullptr; |
| 143 | + |
| 144 | + // True if the base type was an unnamed TagDecl defined inline for which we |
| 145 | + // are using a new name. Note that TagDefToSplit can be nonnull and |
| 146 | + // BaseTypeRenamed can be false if the inline TagDecl was named, and the |
| 147 | + // reverse can occur in the `typedef struct { ... } T` case. |
| 148 | + bool BaseTypeRenamed = false; |
| 149 | + |
| 150 | + // The members of the multi-decl in their original order. |
| 151 | + std::vector<MultiDeclMemberDecl *> Members; |
| 152 | + |
| 153 | + // Set by DeclRewriter::rewriteMultiDecl after it rewrites the entire |
| 154 | + // multi-decl to ensure that it doesn't try to do so more than once if |
| 155 | + // multiple members needed changes. |
| 156 | + bool AlreadyRewritten = false; |
| 157 | +}; |
| 158 | + |
| 159 | +struct TUMultiDeclsInfo { |
| 160 | + // All multi-decls, keyed by the common beginning source location of their |
| 161 | + // members. Note that the beginning source location of TagDefToSplit may be |
| 162 | + // later if there is a keyword such as `static` or `typedef` in between. |
| 163 | + std::map<SourceLocation, MultiDeclInfo> MultiDeclsByBeginLoc; |
| 164 | + |
| 165 | + // Map from a tag definition to its containing multi-decl (if it is part of |
| 166 | + // one). Note that the TagDefToSplit of the MultiDeclInfo is not guaranteed to |
| 167 | + // equal the TagDecl: it may be null in the `typedef struct { ... } T` case. |
| 168 | + // |
| 169 | + // Note that the MultiDeclInfo pointers remain valid for as long as the |
| 170 | + // MultiDeclInfo objects remain in MultiDeclsByBeginLoc: see |
| 171 | + // https://en.cppreference.com/w/cpp/container#Iterator_invalidation. |
| 172 | + std::map<TagDecl *, MultiDeclInfo *> ContainingMultiDeclOfTagDecl; |
| 173 | +}; |
| 174 | + |
| 175 | +class ProgramMultiDeclsInfo { |
| 176 | +private: |
| 177 | + // Set of TagDecl names already used at least once in the program, so we can |
| 178 | + // avoid colliding with them. |
| 179 | + std::set<std::string> UsedTagNames; |
| 180 | + |
| 181 | + // Information about an originally unnamed tag definition in a multi-decl for |
| 182 | + // which we're using a new name. |
| 183 | + struct RenamedTagDefInfo { |
| 184 | + // The new string that should be used to refer to the type of the TagDecl. |
| 185 | + // Unlike UsedTagNames, this includes the tag kind keyword (such as |
| 186 | + // `struct`), except when we use an existing typedef (which doesn't require |
| 187 | + // a tag keyword). |
| 188 | + std::string AssignedTypeStr; |
| 189 | + // Whether the TagDecl should be split from the multi-decl. True except when |
| 190 | + // we use an existing typedef. |
| 191 | + bool ShouldSplit; |
| 192 | + }; |
| 193 | + |
| 194 | + // Map from PSL of a TagDecl to its RenamedTagDefInfo, to ensure that we |
| 195 | + // handle the TagDecl consistently when 3C naively rewrites the same header |
| 196 | + // file multiple times as part of different translation units (see |
| 197 | + // https://github.com/correctcomputation/checkedc-clang/issues/374#issuecomment-804283984). |
| 198 | + std::map<PersistentSourceLoc, RenamedTagDefInfo> RenamedTagDefs; |
| 199 | + |
| 200 | + std::map<ASTContext *, TUMultiDeclsInfo> TUInfos; |
| 201 | + |
| 202 | + // Recursive helpers. |
| 203 | + void findUsedTagNames(DeclContext *DC); |
| 204 | + void findMultiDecls(DeclContext *DC, ASTContext &Context); |
| 205 | + |
| 206 | +public: |
| 207 | + void findUsedTagNames(ASTContext &Context); |
| 208 | + void findMultiDecls(ASTContext &Context); |
| 209 | + llvm::Optional<std::string> getTypeStrOverride(const Type *Ty, |
| 210 | + const ASTContext &C); |
| 211 | + MultiDeclInfo *findContainingMultiDecl(MultiDeclMemberDecl *MMD); |
| 212 | + MultiDeclInfo *findContainingMultiDecl(TagDecl *TD); |
| 213 | + bool wasBaseTypeRenamed(Decl *D); |
| 214 | +}; |
| 215 | + |
| 216 | +#endif // LLVM_CLANG_3C_MULTIDECLS_H |
0 commit comments