diff --git a/clang/include/clang/3C/ConstraintVariables.h b/clang/include/clang/3C/ConstraintVariables.h index 65b0974decb7..fa1d9f14ef87 100644 --- a/clang/include/clang/3C/ConstraintVariables.h +++ b/clang/include/clang/3C/ConstraintVariables.h @@ -26,6 +26,7 @@ #define LLVM_CLANG_3C_CONSTRAINTVARIABLES_H #include "clang/3C/Constraints.h" +#include "clang/3C/MultiDecls.h" #include "clang/3C/OptionalParams.h" #include "clang/3C/ProgramVar.h" #include "clang/AST/ASTContext.h" @@ -355,16 +356,17 @@ class PointerVariableConstraint : public ConstraintVariable { // declaration's base type. To preserve macros, this we first try to take // the type directly from source code. Where that is not possible, the type // is regenerated from the type in the clang AST. - static std::string extractBaseType(DeclaratorDecl *D, TypeSourceInfo *TSI, - QualType QT, const Type *Ty, - const ASTContext &C); + static std::string extractBaseType(MultiDeclMemberDecl *MMD, + TypeSourceInfo *TSI, QualType QT, + const Type *Ty, const ASTContext &C, + ProgramInfo &Info); // Try to extract string representation of the base type for a declaration // from the source code. If the base type cannot be extracted from source, an // empty string is returned instead. - static std::string tryExtractBaseType(DeclaratorDecl *D, TypeSourceInfo *TSI, - QualType QT, const Type *Ty, - const ASTContext &C); + static std::string tryExtractBaseType(MultiDeclMemberDecl *MMD, + TypeSourceInfo *TSI, QualType QT, + const Type *Ty, const ASTContext &C); // Flag to indicate that this constraint is a part of function prototype // e.g., Parameters or Return. diff --git a/clang/include/clang/3C/Constraints.h b/clang/include/clang/3C/Constraints.h index 85f8274b0445..be46f093284a 100644 --- a/clang/include/clang/3C/Constraints.h +++ b/clang/include/clang/3C/Constraints.h @@ -43,6 +43,7 @@ class ConstraintsGraph; #define SPECIAL_REASON(spec) (std::string("Special case for ") + (spec)) #define STRING_LITERAL_REASON "The type of a string literal" #define MACRO_REASON "Pointer in Macro declaration." +#define UNION_FIELD_REASON "Union field encountered" template struct PComp { bool operator()(const T Lhs, const T Rhs) const { return *Lhs < *Rhs; } diff --git a/clang/include/clang/3C/DeclRewriter.h b/clang/include/clang/3C/DeclRewriter.h index 8ebc9f037f49..ed41d8feb952 100644 --- a/clang/include/clang/3C/DeclRewriter.h +++ b/clang/include/clang/3C/DeclRewriter.h @@ -26,8 +26,8 @@ using namespace clang; class DeclRewriter { public: - DeclRewriter(Rewriter &R, ASTContext &A, GlobalVariableGroups &GP) - : R(R), A(A), GP(GP) {} + DeclRewriter(Rewriter &R, ProgramInfo &Info, ASTContext &A) + : R(R), Info(Info), A(A) {} // The publicly accessible interface for performing declaration rewriting. // All declarations for variables with checked types in the variable map of @@ -40,41 +40,24 @@ class DeclRewriter { ArrayBoundsRewriter &ABR); private: - static RecordDecl *LastRecordDecl; - static std::map VDToRDMap; - static std::set InlineVarDecls; Rewriter &R; + ProgramInfo &Info; ASTContext &A; - GlobalVariableGroups &GP; - // This set contains declarations that have already been rewritten as part of - // a prior declaration that was in the same multi-declaration. It is checked - // before rewriting in order to avoid rewriting a declaration more than once. - // It is not used with individual declarations outside of multi-declarations - // because these declarations are seen exactly once, rather than every time a - // declaration in the containing multi-decl is visited. - std::set VisitedMultiDeclMembers; + // List of TagDecls that were split from multi-decls and should be moved out + // of an enclosing RecordDecl to avoid a compiler warning. Filled during + // multi-decl rewriting and processed by denestTagDecls. + std::vector TagDeclsToDenest; // Visit each Decl in ToRewrite and apply the appropriate pointer type // to that Decl. ToRewrite is the set of all declarations to rewrite. void rewrite(RSet &ToRewrite); - // Rewrite a specific variable declaration using the replacement string in the - // DAndReplace structure. Each of these functions is specialized to handling - // one subclass of declarations. - template - void rewriteFieldOrVarDecl(DRType *N, RSet &ToRewrite); - void rewriteMultiDecl(DeclReplacement *N, RSet &ToRewrite, - std::vector SameLineDecls, - bool ContainsInlineStruct); - void rewriteSingleDecl(DeclReplacement *N, RSet &ToRewrite); + void rewriteMultiDecl(MultiDeclInfo &MDI, RSet &ToRewrite); void doDeclRewrite(SourceRange &SR, DeclReplacement *N); void rewriteFunctionDecl(FunctionDeclReplacement *N); - void rewriteTypedefDecl(TypedefDeclReplacement *TDT, RSet &ToRewrite); - void getDeclsOnSameLine(DeclReplacement *N, std::vector &Decls); - bool isSingleDeclaration(DeclReplacement *N); - SourceRange getNextCommaOrSemicolon(SourceLocation L); - static void detectInlineStruct(Decl *D, SourceManager &SM); + SourceRange getNextComma(SourceLocation L); + void denestTagDecls(); }; // Visits function declarations and adds entries with their new rewritten @@ -119,16 +102,4 @@ class FunctionDeclBuilder : public RecursiveASTVisitor { bool inParamMultiDecl(const ParmVarDecl *PVD); }; - -class FieldFinder : public RecursiveASTVisitor { -public: - FieldFinder(GlobalVariableGroups &GVG) : GVG(GVG) {} - - bool VisitFieldDecl(FieldDecl *FD); - - static void gatherSameLineFields(GlobalVariableGroups &GVG, Decl *D); - -private: - GlobalVariableGroups &GVG; -}; #endif // LLVM_CLANG_3C_DECLREWRITER_H diff --git a/clang/include/clang/3C/MappingVisitor.h b/clang/include/clang/3C/MappingVisitor.h index 71c9e07cf3e9..da6c8dae449f 100644 --- a/clang/include/clang/3C/MappingVisitor.h +++ b/clang/include/clang/3C/MappingVisitor.h @@ -21,37 +21,26 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" -typedef std::tuple StmtDecl; -typedef std::map SourceToDeclMapType; -typedef std::pair - MappingResultsType; +typedef std::map SourceToDeclMapType; class MappingVisitor : public clang::RecursiveASTVisitor { public: MappingVisitor(std::set S, clang::ASTContext &C) : SourceLocs(S), Context(C) {} - bool VisitDeclStmt(clang::DeclStmt *S); - bool VisitDecl(clang::Decl *D); - MappingResultsType getResults() { - return std::pair, - VariableDecltoStmtMap>(PSLtoSDT, DeclToDeclStmt); - } + const SourceToDeclMapType &getResults() { return PSLtoSDT; } private: - // A map from a PersistentSourceLoc to a tuple describing a statement, decl - // or type. + // A map from a PersistentSourceLoc to a Decl at that location. SourceToDeclMapType PSLtoSDT; // The set of PersistentSourceLoc's this instance of MappingVisitor is tasked - // with re-instantiating as either a Stmt, Decl or Type. + // with re-instantiating as a Decl. std::set SourceLocs; // The ASTContext for the particular AST that the MappingVisitor is // traversing. clang::ASTContext &Context; - // A mapping of individual Decls to the DeclStmt that contains them. - VariableDecltoStmtMap DeclToDeclStmt; }; #endif diff --git a/clang/include/clang/3C/MultiDecls.h b/clang/include/clang/3C/MultiDecls.h new file mode 100644 index 000000000000..b6f1e0eca4f3 --- /dev/null +++ b/clang/include/clang/3C/MultiDecls.h @@ -0,0 +1,216 @@ +//=--MultiDecls.h-------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Code to deal with "multi-decls": constructs in which one or more identifiers +// are declared in a comma-separated list based on a single type "on the left". +// A simple example: +// +// struct my_struct x, *p; +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_3C_MULTIDECLS_H +#define LLVM_CLANG_3C_MULTIDECLS_H + +#include "clang/3C/PersistentSourceLoc.h" +#include "clang/AST/Decl.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" +#include "llvm/ADT/Optional.h" + +using namespace clang; + +// Some more information about multi-decls in the context of 3C: +// +// The "members" of a given multi-decl may be ordinary variables (VarDecls), +// struct/union fields (FieldDecls), or typedefs (TypedefDecls), but all members +// of a given multi-decl are of the same kind. +// +// If the "left type" of a multi-decl is a TagDecl, it may have an inline +// definition; if it does, then the TagDecl may be unnamed. Examples: +// +// struct my_struct { int *y; } x, *p; +// struct { int *y; } x, *p; +// +// Multi-decls (especially those with inline TagDecls) have historically been +// tricky for 3C to rewrite. If the type of one member becomes a _Ptr (or +// similar), then the left type of the members is no longer the same, so the +// multi-decl must be broken up, for example: +// +// struct my_struct x; +// _Ptr p; +// +// To keep the logic simpler, if 3C needs to change the type of at least one +// member of a multi-decl, it breaks up all members of the multi-decl into +// separate declarations. To preserve SourceLocations as much as possible and +// avoid interfering with rewrites to any other constructs in the multi-decl +// (e.g., within existing initializer expressions), this breakup is performed by +// replacing the commas with semicolons in place and inserting additional +// occurrences of the left type and any common qualifiers as needed. +// +// If there is an inline TagDecl, it is separated too and moved out of any +// containing RecordDecl to avoid a compiler warning, and if the TagDecl is +// unnamed, it is given an automatically generated name so that it can be +// referenced by the new, separate declarations of the multi-decl members. +// Example: +// +// static struct { int *y; } x, *p: +// +// -> +// +// struct x_struct_1 { _Ptr y; }; +// static struct x_struct_1 x; +// static _Ptr p; +// +// Exception: In a typedef multi-decl, if the _first_ member refers to the +// TagDecl itself (not a pointer to it, etc.), then 3C uses that name for the +// TagDecl rather than generating a new one. This produces nicer output for the +// idiom: +// +// typedef struct { int *y; } FOO, *PFOO; +// +// -> +// +// typedef struct { _Ptr y; } FOO; +// typedef _Ptr PFOO; +// +// The multi-decl code is used even for "multi-decls" of VarDecls, FieldDecls, +// or TypedefDecls that have only a single member to avoid having to maintain a +// separate code path for them. But a multi-decl always has at least one member; +// a pure TagDecl such as `struct my_struct { int *y; };` is _not_ considered a +// multi-decl. ParmVarDecls are handled differently. In fact, ParmVarDecls with +// inline TagDecls are known to be handled poorly, but that's a rare and poor +// practice and it's not easy to handle them better. + +// Currently, we automatically generate a name for every unnamed TagDecl defined +// in a multi-decl and use the name in ConstraintVariables, but we only insert +// the name into the definition if the multi-decl gets rewritten for some other +// reason. This solves the common case of allowing the types of all the +// multi-decl members to refer to the TagDecl, but it doesn't address cases in +// which 3C might need to insert a reference to the unnamed TagDecl elsewhere +// even if the multi-decl isn't being rewritten. In these cases, 3C typically +// uses the generated name even though it is not defined, causing a compile +// error that the user has to correct manually. The problematic cases include: +// +// - Type argument insertion. TypeVariableEntry has a check for +// `isTypeAnonymous`, but it has at least one bug (it misses double pointers). +// +// - Cast insertion, potentially. I was unable to find an example, but that +// doesn't mean it will never happen, especially with future changes to the +// code. +// +// - Typedef itype insertion. +// +// One approach to try to rule out all of these bugs at once would be to +// preemptively rewrite all multi-decls containing unnamed TagDecls, but those +// changes might be undesirable or could even cause errors in the presence of +// macros, etc. Or we could try to add the necessary code so that insertion of a +// reference to an unnamed TagDecl would trigger insertion of the name into the +// definition. For now, we don't deal with the problem. + +// Implementation note: The Clang AST does not represent multi-decls explicitly +// (except in functions, where they are represented by DeclStmts). In other +// contexts, we detect them based on the property that the beginning +// SourceLocation of all the members is the same. And as long as we are making +// this assumption, we use it in functions too rather than having a separate +// code path that looks for DeclStmts. + +// NamedDecl is the nearest common superclass of all Decl subtypes that can be +// multi-decl members. There is no enforcement that a MultiDeclMemberDecl is +// actually one of the allowed subtypes, so use of the MultiDeclMemberDecl +// typedef serves as documentation only. (If we wanted to enforce it, we'd need +// a wrapper object of some kind, which currently seems to be more trouble than +// it's worth.) +typedef NamedDecl MultiDeclMemberDecl; + +// Returns D if it can be a multi-decl member, otherwise null. +MultiDeclMemberDecl *getAsMultiDeclMember(Decl *D); + +// Helpers to cope with the different APIs to do corresponding things with a +// TypedefDecl or DeclaratorDecl. +QualType getTypeOfMultiDeclMember(MultiDeclMemberDecl *MMD); +TypeSourceInfo *getTypeSourceInfoOfMultiDeclMember(MultiDeclMemberDecl *MMD); + +struct MultiDeclInfo { + // The TagDecl that is defined inline in the multi-decl and needs to be split + // from it during rewriting, if any, otherwise null. In a case like + // `typedef struct { ... } T`, there is an inline tag definition but we don't + // need to split it out, so this will be null. + TagDecl *TagDefToSplit = nullptr; + + // True if the base type was an unnamed TagDecl defined inline for which we + // are using a new name. Note that TagDefToSplit can be nonnull and + // BaseTypeRenamed can be false if the inline TagDecl was named, and the + // reverse can occur in the `typedef struct { ... } T` case. + bool BaseTypeRenamed = false; + + // The members of the multi-decl in their original order. + std::vector Members; + + // Set by DeclRewriter::rewriteMultiDecl after it rewrites the entire + // multi-decl to ensure that it doesn't try to do so more than once if + // multiple members needed changes. + bool AlreadyRewritten = false; +}; + +struct TUMultiDeclsInfo { + // All multi-decls, keyed by the common beginning source location of their + // members. Note that the beginning source location of TagDefToSplit may be + // later if there is a keyword such as `static` or `typedef` in between. + std::map MultiDeclsByBeginLoc; + + // Map from a tag definition to its containing multi-decl (if it is part of + // one). Note that the TagDefToSplit of the MultiDeclInfo is not guaranteed to + // equal the TagDecl: it may be null in the `typedef struct { ... } T` case. + // + // Note that the MultiDeclInfo pointers remain valid for as long as the + // MultiDeclInfo objects remain in MultiDeclsByBeginLoc: see + // https://en.cppreference.com/w/cpp/container#Iterator_invalidation. + std::map ContainingMultiDeclOfTagDecl; +}; + +class ProgramMultiDeclsInfo { +private: + // Set of TagDecl names already used at least once in the program, so we can + // avoid colliding with them. + std::set UsedTagNames; + + // Information about an originally unnamed tag definition in a multi-decl for + // which we're using a new name. + struct RenamedTagDefInfo { + // The new string that should be used to refer to the type of the TagDecl. + // Unlike UsedTagNames, this includes the tag kind keyword (such as + // `struct`), except when we use an existing typedef (which doesn't require + // a tag keyword). + std::string AssignedTypeStr; + // Whether the TagDecl should be split from the multi-decl. True except when + // we use an existing typedef. + bool ShouldSplit; + }; + + // Map from PSL of a TagDecl to its RenamedTagDefInfo, to ensure that we + // handle the TagDecl consistently when 3C naively rewrites the same header + // file multiple times as part of different translation units (see + // https://github.com/correctcomputation/checkedc-clang/issues/374#issuecomment-804283984). + std::map RenamedTagDefs; + + std::map TUInfos; + + // Recursive helpers. + void findUsedTagNames(DeclContext *DC); + void findMultiDecls(DeclContext *DC, ASTContext &Context); + +public: + void findUsedTagNames(ASTContext &Context); + void findMultiDecls(ASTContext &Context); + llvm::Optional getTypeStrOverride(const Type *Ty, + const ASTContext &C); + MultiDeclInfo *findContainingMultiDecl(MultiDeclMemberDecl *MMD); + MultiDeclInfo *findContainingMultiDecl(TagDecl *TD); + bool wasBaseTypeRenamed(Decl *D); +}; + +#endif // LLVM_CLANG_3C_MULTIDECLS_H diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index a208458039ab..192fa56885cb 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -16,6 +16,7 @@ #include "clang/3C/3CStats.h" #include "clang/3C/AVarBoundsInfo.h" #include "clang/3C/ConstraintVariables.h" +#include "clang/3C/MultiDecls.h" #include "clang/3C/PersistentSourceLoc.h" #include "clang/3C/Utils.h" #include "clang/AST/ASTConsumer.h" @@ -34,8 +35,8 @@ class ProgramVariableAdder { virtual bool seenTypedef(PersistentSourceLoc PSL) = 0; - virtual void addTypedef(PersistentSourceLoc PSL, bool CanRewriteDef, - TypedefDecl *TD, ASTContext &C) = 0; + virtual void addTypedef(PersistentSourceLoc PSL, TypedefDecl *TD, + ASTContext &C) = 0; protected: virtual AVarBoundsInfo &getABoundsInfo() = 0; @@ -170,7 +171,7 @@ class ProgramInfo : public ProgramVariableAdder { bool seenTypedef(PersistentSourceLoc PSL) override; - void addTypedef(PersistentSourceLoc PSL, bool CanRewriteDef, TypedefDecl *TD, + void addTypedef(PersistentSourceLoc PSL, TypedefDecl *TD, ASTContext &C) override; // Store mapping from ASTContexts to a unique index in the ASTs vector in @@ -179,6 +180,8 @@ class ProgramInfo : public ProgramVariableAdder { void registerTranslationUnits( const std::vector> &ASTs); + ProgramMultiDeclsInfo TheMultiDeclsInfo; + private: // List of constraint variables for declarations, indexed by their location in // the source. This information persists across invocations of the constraint diff --git a/clang/include/clang/3C/RewriteUtils.h b/clang/include/clang/3C/RewriteUtils.h index a571ba27e026..8930327700f2 100644 --- a/clang/include/clang/3C/RewriteUtils.h +++ b/clang/include/clang/3C/RewriteUtils.h @@ -24,18 +24,14 @@ class DeclReplacement { public: virtual Decl *getDecl() const = 0; - DeclStmt *getStatement() const { return Statement; } - std::string getReplacement() const { return Replacement; } virtual SourceRange getSourceRange(SourceManager &SM) const; // Discriminator for LLVM-style RTTI (dyn_cast<> et al.). enum DRKind { - DRK_VarDecl, + DRK_MultiDeclMember, DRK_FunctionDecl, - DRK_FieldDecl, - DRK_TypedefDecl }; DRKind getKind() const { return Kind; } @@ -43,11 +39,7 @@ class DeclReplacement { virtual ~DeclReplacement() {} protected: - explicit DeclReplacement(DeclStmt *S, std::string R, DRKind K) - : Statement(S), Replacement(R), Kind(K) {} - - // The Stmt, if it exists (may be nullptr). - DeclStmt *Statement; + explicit DeclReplacement(std::string R, DRKind K) : Replacement(R), Kind(K) {} // The string to replace the declaration with. std::string Replacement; @@ -59,8 +51,8 @@ class DeclReplacement { template class DeclReplacementTempl : public DeclReplacement { public: - explicit DeclReplacementTempl(DeclT *D, DeclStmt *DS, std::string R) - : DeclReplacement(DS, R, K), Decl(D) {} + explicit DeclReplacementTempl(DeclT *D, std::string R) + : DeclReplacement(R, K), Decl(D) {} DeclT *getDecl() const override { return Decl; } @@ -70,12 +62,8 @@ class DeclReplacementTempl : public DeclReplacement { DeclT *Decl; }; -typedef DeclReplacementTempl - VarDeclReplacement; -typedef DeclReplacementTempl - FieldDeclReplacement; -typedef DeclReplacementTempl - TypedefDeclReplacement; +typedef DeclReplacementTempl + MultiDeclMemberReplacement; class FunctionDeclReplacement : public DeclReplacementTempl RSet; -// This class is used to figure out which global variables are part of -// multi-variable declarations. For local variables, all variables in a single -// multi declaration are grouped together in a DeclStmt object. This is not the -// case for global variables, so this class is required to correctly group -// global variable declarations. Declarations in the same multi-declarations -// have the same beginning source locations, so it is used to group variables. -class GlobalVariableGroups { -public: - GlobalVariableGroups(SourceManager &SourceMgr) : SM(SourceMgr) {} - void addGlobalDecl(Decl *VD, std::vector *VDVec = nullptr); - - std::vector &getVarsOnSameLine(Decl *VD); - - virtual ~GlobalVariableGroups(); - -private: - SourceManager &SM; - std::map *> GlobVarGroups; -}; +// Generate a string for the declaration based on the given PVConstraint. +// Includes the storage qualifier, type, name, and bounds string (as +// applicable), or generates an itype declaration if required due to +// ItypesForExtern. Does not include a trailing semicolon or an initializer, so +// it can be used in combination with getDeclSourceRangeWithAnnotations with +// IncludeInitializer = false to preserve an existing initializer. +std::string mkStringForPVDecl(MultiDeclMemberDecl *MMD, PVConstraint *PVC, + ProgramInfo &Info); + +// Generate a string like mkStringForPVDecl, but for a declaration whose type is +// known not to have changed (except possibly for a base type rename) and that +// may not have a PVConstraint if the type is not a pointer or array type. +// +// For similar reasons as in the comment in DeclRewriter::buildItypeDecl, this +// will get the string from Clang instead of mkString if the base type hasn't +// been renamed (hence the need to assume the rest of the type has not changed). +// Yet another possible approach would be to combine the new base type name with +// the original source for the rest of the declaration, but that may run into +// problems with macros and the like, so we might still need some fallback. For +// now, we don't implement this "original source" approach. +std::string mkStringForDeclWithUnchangedType(MultiDeclMemberDecl *D, + ProgramInfo &Info); // Class that handles rewriting bounds information for all the // detected array variables. diff --git a/clang/include/clang/3C/StructInit.h b/clang/include/clang/3C/StructInit.h index 7bf6974e3244..5ee4cc3c14a0 100644 --- a/clang/include/clang/3C/StructInit.h +++ b/clang/include/clang/3C/StructInit.h @@ -34,11 +34,10 @@ class StructVariableInitializer explicit StructVariableInitializer(ASTContext *C, ProgramInfo &I, RSet &R) : Context(C), I(I), RewriteThese(R), RecordsWithCPointers() {} - bool VisitDeclStmt(DeclStmt *S); + bool VisitVarDecl(VarDecl *VD); private: bool hasCheckedMembers(DeclaratorDecl *DD); - void insertVarDecl(VarDecl *VD, DeclStmt *S); ASTContext *Context; ProgramInfo &I; diff --git a/clang/include/clang/3C/Utils.h b/clang/include/clang/3C/Utils.h index b14041381605..8480a8b543c1 100644 --- a/clang/include/clang/3C/Utils.h +++ b/clang/include/clang/3C/Utils.h @@ -230,6 +230,14 @@ int64_t getStmtIdWorkaround(const clang::Stmt *St, clang::SourceLocation getCheckedCAnnotationsEnd(const clang::Decl *D); +// Get the source range for a declaration including Checked C annotations. +// Optionally, any initializer can be excluded from the range in order to avoid +// interfering with other rewrites inside an existing initializer +// (https://github.com/correctcomputation/checkedc-clang/issues/267). If the +// declaration has no initializer, then IncludeInitializer has no effect. +clang::SourceRange getDeclSourceRangeWithAnnotations(const clang::Decl *D, + bool IncludeInitializer); + // Shortcut for the getCustomDiagID + Report sequence to report a custom // diagnostic as we currently do in 3C. // @@ -257,4 +265,20 @@ inline const clang::DiagnosticBuilder &operator<<( return DB; } +// Marker for conditions that we might want to make into non-fatal assertions +// once we have an API design for them +// (https://github.com/correctcomputation/checkedc-clang/issues/745). An inline +// function would work just as well, but macros have an LLVM naming convention +// and syntax highlighting that make call sites easier to read, in Matt's +// opinion. +#define NONFATAL_ASSERT_PLACEHOLDER(_cond) (_cond) + +// Variant for conditions that the caller doesn't actually test because no +// separate recovery path is currently implemented. We want to check that the +// condition compiles but not evaluate it at runtime (until non-fatal assertions +// are actually implemented, and then only when assertions are enabled in the +// build configuration), and we don't want "unused code" compiler warnings. +// TODO: Is there a better way to achieve this? +#define NONFATAL_ASSERT_PLACEHOLDER_UNUSED(_cond) ((void)sizeof(_cond)) + #endif diff --git a/clang/lib/3C/3C.cpp b/clang/lib/3C/3C.cpp index 7eebc2edd660..267082ae0db8 100644 --- a/clang/lib/3C/3C.cpp +++ b/clang/lib/3C/3C.cpp @@ -516,6 +516,18 @@ bool _3CInterface::addVariables() { std::lock_guard Lock(InterfaceMutex); + // Find multi-decls and assign names to unnamed inline TagDecls now so that + // the assigned type names are available when we construct ConstraintVariables + // for the multi-decl members in the "Add Variables" step below. + for (auto &TU : ASTs) + GlobalProgramInfo.TheMultiDeclsInfo.findUsedTagNames(TU->getASTContext()); + if (!isSuccessfulSoFar()) + return false; + for (auto &TU : ASTs) + GlobalProgramInfo.TheMultiDeclsInfo.findMultiDecls(TU->getASTContext()); + if (!isSuccessfulSoFar()) + return false; + // 1. Add Variables. VariableAdderConsumer VA = VariableAdderConsumer(GlobalProgramInfo, nullptr); for (auto &TU : ASTs) diff --git a/clang/lib/3C/CMakeLists.txt b/clang/lib/3C/CMakeLists.txt index 91733f687ba9..7a57f8e93ff7 100644 --- a/clang/lib/3C/CMakeLists.txt +++ b/clang/lib/3C/CMakeLists.txt @@ -44,6 +44,7 @@ add_clang_library(clang3C DeclRewriter.cpp IntermediateToolHook.cpp MappingVisitor.cpp + MultiDecls.cpp PersistentSourceLoc.cpp ProgramInfo.cpp ProgramVar.cpp diff --git a/clang/lib/3C/ConstraintBuilder.cpp b/clang/lib/3C/ConstraintBuilder.cpp index 37442ca12546..902acb400c8a 100644 --- a/clang/lib/3C/ConstraintBuilder.cpp +++ b/clang/lib/3C/ConstraintBuilder.cpp @@ -21,106 +21,6 @@ using namespace llvm; using namespace clang; -// This class is intended to locate inline struct definitions -// in order to mark them wild or signal a warning as appropriate. -class InlineStructDetector { -public: - explicit InlineStructDetector() : LastRecordDecl(nullptr) {} - - void processRecordDecl(RecordDecl *Declaration, ProgramInfo &Info, - ASTContext *Context, ConstraintResolver CB) { - LastRecordDecl = Declaration; - if (RecordDecl *Definition = Declaration->getDefinition()) { - auto LastRecordLocation = Definition->getBeginLoc(); - FullSourceLoc FL = Context->getFullLoc(Definition->getBeginLoc()); - if (FL.isValid()) { - SourceManager &SM = Context->getSourceManager(); - FileID FID = FL.getFileID(); - const FileEntry *FE = SM.getFileEntryForID(FID); - - // Detect whether this RecordDecl is part of an inline struct. - bool IsInLineStruct = false; - Decl *D = Declaration->getNextDeclInContext(); - if (VarDecl *VD = dyn_cast_or_null(D)) { - auto VarTy = VD->getType(); - auto BeginLoc = VD->getBeginLoc(); - auto EndLoc = VD->getEndLoc(); - SourceManager &SM = Context->getSourceManager(); - IsInLineStruct = - !isPtrOrArrayType(VarTy) && !VD->hasInit() && - SM.isPointWithin(LastRecordLocation, BeginLoc, EndLoc); - } - - if (FE && FE->isValid()) { - // We only want to re-write a record if it contains - // any pointer types, to include array types. - for (const auto &F : Definition->fields()) { - auto FieldTy = F->getType(); - // If the RecordDecl is a union or in a system header - // and this field is a pointer, we need to mark it wild. - bool FieldInUnionOrSysHeader = - (FL.isInSystemHeader() || Definition->isUnion()); - // Mark field wild if the above is true and the field is a pointer. - if (isPtrOrArrayType(FieldTy) && - (FieldInUnionOrSysHeader || IsInLineStruct)) { - std::string Rsn = "Union or external struct field encountered"; - CVarOption CV = Info.getVariable(F, Context); - CB.constraintCVarToWild(CV, Rsn); - } - } - } - } - } - } - - void processVarDecl(VarDecl *VD, ProgramInfo &Info, ASTContext *Context, - ConstraintResolver CB) { - // If the last seen RecordDecl is non-null and coincides with the current - // VarDecl (i.e. via an inline struct), we proceed as follows: - // If the struct is named, do nothing. - // If the struct is anonymous: - // When alltypes is on, do nothing, but signal a warning to - // the user indicating its presence. - // When alltypes is off, mark the VarDecl WILD in order to - // ensure the converted program compiles. - if (LastRecordDecl != nullptr) { - auto LastRecordLocation = LastRecordDecl->getBeginLoc(); - auto BeginLoc = VD->getBeginLoc(); - auto EndLoc = VD->getEndLoc(); - auto VarTy = VD->getType(); - SourceManager &SM = Context->getSourceManager(); - bool IsInLineStruct = - SM.isPointWithin(LastRecordLocation, BeginLoc, EndLoc) && - isPtrOrArrayType(VarTy); - bool IsNamedInLineStruct = - IsInLineStruct && LastRecordDecl->getNameAsString() != ""; - if (IsInLineStruct && !IsNamedInLineStruct) { - if (!_3COpts.AllTypes) { - CVarOption CV = Info.getVariable(VD, Context); - CB.constraintCVarToWild(CV, "Inline struct encountered."); - } else { - reportCustomDiagnostic(Context->getDiagnostics(), - DiagnosticsEngine::Warning, - "\n Rewriting failed" - "for %q0 because an inline " - "or anonymous struct instance " - "was detected.\n Consider manually " - "rewriting by inserting the struct " - "definition inside the _Ptr " - "annotation.\n " - "EX. struct {int *a; int *b;} x; " - "_Ptr b;}>;", - VD->getLocation()) - << VD; - } - } - } - } - -private: - RecordDecl *LastRecordDecl; -}; - // This class visits functions and adds constraints to the // Constraints instance assigned to it. // Each VisitXXX method is responsible for looking inside statements @@ -128,27 +28,10 @@ class InlineStructDetector { class FunctionVisitor : public RecursiveASTVisitor { public: explicit FunctionVisitor(ASTContext *C, ProgramInfo &I, FunctionDecl *FD) - : Context(C), Info(I), Function(FD), CB(Info, Context), - ISD() {} + : Context(C), Info(I), Function(FD), CB(Info, Context) {} // T x = e bool VisitDeclStmt(DeclStmt *S) { - // Introduce variables as needed. - for (const auto &D : S->decls()) { - if (RecordDecl *RD = dyn_cast(D)) { - ISD.processRecordDecl(RD, Info, Context, CB); - } - if (VarDecl *VD = dyn_cast(D)) { - if (VD->isLocalVarDecl()) { - FullSourceLoc FL = Context->getFullLoc(VD->getBeginLoc()); - SourceRange SR = VD->getSourceRange(); - if (SR.isValid() && FL.isValid() && isPtrOrArrayType(VD->getType())) { - ISD.processVarDecl(VD, Info, Context, CB); - } - } - } - } - // Process inits even for non-pointers because structs and union values // can contain pointers for (const auto &D : S->decls()) { @@ -442,48 +325,6 @@ class FunctionVisitor : public RecursiveASTVisitor { ProgramInfo &Info; FunctionDecl *Function; ConstraintResolver CB; - InlineStructDetector ISD; -}; - -class PtrToStructDef : public RecursiveASTVisitor { -public: - explicit PtrToStructDef(TypedefDecl *TDT) : TDT(TDT) {} - - bool VisitPointerType(clang::PointerType *PT) { - IsPointer = true; - return true; - } - - bool VisitRecordType(RecordType *RT) { - auto *Decl = RT->getDecl(); - auto DeclRange = Decl->getSourceRange(); - auto TypedefRange = TDT->getSourceRange(); - bool DeclContained = (TypedefRange.getBegin() < DeclRange.getBegin()) && - !(TypedefRange.getEnd() < DeclRange.getEnd()); - if (DeclContained) { - StructDefInTD = true; - return false; - } - return true; - } - - bool VisitFunctionProtoType(FunctionProtoType *FPT) { - IsPointer = true; - return true; - } - - bool getResult(void) { return StructDefInTD; } - - static bool containsPtrToStructDef(TypedefDecl *TDT) { - PtrToStructDef Traverser(TDT); - Traverser.TraverseDecl(TDT); - return Traverser.getResult(); - } - -private: - TypedefDecl *TDT = nullptr; - bool IsPointer = false; - bool StructDefInTD = false; }; // This class visits a global declaration, generating constraints @@ -491,7 +332,7 @@ class PtrToStructDef : public RecursiveASTVisitor { class ConstraintGenVisitor : public RecursiveASTVisitor { public: explicit ConstraintGenVisitor(ASTContext *Context, ProgramInfo &I) - : Context(Context), Info(I), CB(Info, Context), ISD() {} + : Context(Context), Info(I), CB(Info, Context) {} bool VisitVarDecl(VarDecl *G) { @@ -499,7 +340,6 @@ class ConstraintGenVisitor : public RecursiveASTVisitor { if (G->hasInit()) { CB.constrainLocalAssign(nullptr, G, G->getInit(), Same_to_Same); } - ISD.processVarDecl(G, Info, Context, CB); } return true; } @@ -546,16 +386,10 @@ class ConstraintGenVisitor : public RecursiveASTVisitor { return true; } - bool VisitRecordDecl(RecordDecl *Declaration) { - ISD.processRecordDecl(Declaration, Info, Context, CB); - return true; - } - private: ASTContext *Context; ProgramInfo &Info; ConstraintResolver CB; - InlineStructDetector ISD; }; // Some information about variables in the program is required by the type @@ -579,10 +413,8 @@ class VariableAdderVisitor : public RecursiveASTVisitor { // typedef map. If we have seen it before, and we need to preserve the // constraints contained within it if (!VarAdder.seenTypedef(PSL)) - // Add this typedef to the program info, if it contains a ptr to - // an anonymous struct we mark as not being rewritable - VarAdder.addTypedef(PSL, !PtrToStructDef::containsPtrToStructDef(TD), TD, - *Context); + // Add this typedef to the program info. + VarAdder.addTypedef(PSL, TD, *Context); return true; } diff --git a/clang/lib/3C/ConstraintVariables.cpp b/clang/lib/3C/ConstraintVariables.cpp index da065bfe0c0b..c27f0df8076c 100644 --- a/clang/lib/3C/ConstraintVariables.cpp +++ b/clang/lib/3C/ConstraintVariables.cpp @@ -503,7 +503,7 @@ PointerVariableConstraint::PointerVariableConstraint( TSInfo); // Get a string representing the type without pointer and array indirection. - BaseType = extractBaseType(D, TSInfo, QT, Ty, C); + BaseType = extractBaseType(D, TSInfo, QT, Ty, C, I); // check if the type is some depth of pointers to void // TODO: is this what the field should mean? do we want to include other @@ -538,11 +538,9 @@ PointerVariableConstraint::PointerVariableConstraint( } } -std::string PointerVariableConstraint::tryExtractBaseType(DeclaratorDecl *D, - TypeSourceInfo *TSI, - QualType QT, - const Type *Ty, - const ASTContext &C) { +std::string PointerVariableConstraint::tryExtractBaseType( + MultiDeclMemberDecl *D, TypeSourceInfo *TSI, QualType QT, const Type *Ty, + const ASTContext &C) { // Implicit parameters declarations from typedef function declarations will // still have valid and non-empty source ranges, but implicit declarations // aren't written in the source, so extracting the base type from this range @@ -553,7 +551,7 @@ std::string PointerVariableConstraint::tryExtractBaseType(DeclaratorDecl *D, return ""; if (!TSI) - TSI = D->getTypeSourceInfo(); + TSI = getTypeSourceInfoOfMultiDeclMember(D); if (!QT->isOrContainsCheckedType() && !Ty->getAs() && TSI) { // Try to extract the type from original source to preserve defines TypeLoc TL = TSI->getTypeLoc(); @@ -566,7 +564,7 @@ std::string PointerVariableConstraint::tryExtractBaseType(DeclaratorDecl *D, return ""; TL = TL.getAs().getReturnLoc(); } else { - FoundBaseTypeInSrc = D->getType() == QT; + FoundBaseTypeInSrc = getTypeOfMultiDeclMember(D) == QT; } if (!TL.isNull()) { TypeLoc BaseLoc = getBaseTypeLoc(TL); @@ -583,11 +581,13 @@ std::string PointerVariableConstraint::tryExtractBaseType(DeclaratorDecl *D, return ""; } -std::string PointerVariableConstraint::extractBaseType(DeclaratorDecl *D, - TypeSourceInfo *TSI, - QualType QT, - const Type *Ty, - const ASTContext &C) { +std::string PointerVariableConstraint::extractBaseType( + MultiDeclMemberDecl *D, TypeSourceInfo *TSI, QualType QT, const Type *Ty, + const ASTContext &C, ProgramInfo &Info) { + if (llvm::Optional TypeStrOverride = + Info.TheMultiDeclsInfo.getTypeStrOverride(Ty, C)) + return *TypeStrOverride; + std::string BaseTypeStr = tryExtractBaseType(D, TSI, QT, Ty, C); // Fall back to rebuilding the base type based on type passed to constructor if (BaseTypeStr.empty()) diff --git a/clang/lib/3C/DeclRewriter.cpp b/clang/lib/3C/DeclRewriter.cpp index 72ae4289df83..07257ca1dd14 100644 --- a/clang/lib/3C/DeclRewriter.cpp +++ b/clang/lib/3C/DeclRewriter.cpp @@ -52,23 +52,35 @@ void DeclRewriter::buildItypeDecl(PVConstraint *Defn, DeclaratorDecl *Decl, // of IsUncheckedTypedef because both require the variable type be defined // by a typedef. The checked typedef is expanded using unchecked types in the // unchecked portion of the itype. The typedef is used directly in the checked - // portion of the itype. + // portion of the itype. TODO: Maybe we shouldn't do that if the solution for + // the typedef doesn't fully equal the solution for the variable + // (https://github.com/correctcomputation/checkedc-clang/issues/705)? bool IsCheckedTypedef = Defn->isTypedef() && !IsUncheckedTypedef; + bool BaseTypeRenamed = + Decl && Info.TheMultiDeclsInfo.wasBaseTypeRenamed(Decl); + // It should in principle be possible to always generate the unchecked portion - // of the itype by going through mkString. For practical reason, this doesn't - // always work, so we instead use the original type string as generated by - // clang so that we emit valid syntax in more cases. For examples and - // discussion refer to issue correctcomputation/checkedc-clang#703. - if (IsCheckedTypedef || Defn->getFV()) { - // Generate the type string from PVC if we need to unmask a typedef, this is - // a function pointer, or this is a constant size array. When unmasking a - // typedef, the expansion of the typedef does not exist in the original - // source, so it must be constructed. For function pointers, a function - // pointer appearing in the unchecked portion of an itype must contain an - // extra set of parenthesis (e.g. `void ((*f)())` instead of `void (f*)()`) - // for the declaration to parse correctly. qtyToStr (which is used by - // getOriginalTypeWithName) does not support adding these parentheses. + // of the itype by going through mkString. However, mkString has bugs that + // lead to incorrect output in some less common cases + // (https://github.com/correctcomputation/checkedc-clang/issues/703). So we + // use the original type string generated by Clang (via qtyToStr or + // getOriginalTypeWithName) unless we know we have a special requirement that + // it doesn't meet, in which case we use mkString. Those cases are: + // - Unmasking a typedef. The expansion of the typedef does not exist in the + // original source, so it must be constructed. (TODO: Couldn't we just get + // the underlying type with TypedefDecl::getUnderlyingType and then use + // qtyToStr?) + // - A function pointer. For a function pointer with an itype to parse + // correctly, it needs an extra set of parentheses (e.g., + // `void ((*f)()) : itype(...)` instead of `void (*f)() : itype(...)`), and + // Clang won't know to add them. + // - When the base type is an unnamed TagDecl that 3C has renamed, Clang won't + // know the new name. + // - Possible future change: if the internal PVConstraint is partially checked + // and we want to use it + // (https://github.com/correctcomputation/checkedc-clang/issues/704). + if (IsCheckedTypedef || Defn->getFV() || BaseTypeRenamed) { Type = Defn->mkString(Info.getConstraints(), MKSTRING_OPTS(UnmaskTypedef = IsCheckedTypedef, ForItypeBase = true)); @@ -132,8 +144,8 @@ void DeclRewriter::rewriteDecls(ASTContext &Context, ProgramInfo &Info, getStorageQualifierString(D) + Var.mkString(Info.getConstraints(), MKSTRING_OPTS(UnmaskTypedef = true)); - RewriteThese.insert(std::make_pair( - TD, new TypedefDeclReplacement(TD, nullptr, NewTy))); + RewriteThese.insert( + std::make_pair(TD, new MultiDeclMemberReplacement(TD, NewTy))); } } } @@ -147,21 +159,11 @@ void DeclRewriter::rewriteDecls(ASTContext &Context, ProgramInfo &Info, for (const auto &I : Info.getVarMap()) Keys.insert(I.first); MappingVisitor MV(Keys, Context); - LastRecordDecl = nullptr; for (const auto &D : TUD->decls()) { MV.TraverseDecl(D); - detectInlineStruct(D, Context.getSourceManager()); - if (FunctionDecl *FD = dyn_cast(D)) { - if (FD->hasBody() && FD->isThisDeclarationADefinition()) { - for (auto &D : FD->decls()) { - detectInlineStruct(D, Context.getSourceManager()); - } - } - } } SourceToDeclMapType PSLMap; - VariableDecltoStmtMap VDLToStmtMap; - std::tie(PSLMap, VDLToStmtMap) = MV.getResults(); + PSLMap = MV.getResults(); // Add declarations from this map into the rewriting set for (const auto &V : Info.getVarMap()) { @@ -173,7 +175,7 @@ void DeclRewriter::rewriteDecls(ASTContext &Context, ProgramInfo &Info, // the variable name, not the typedef or #define that creates the // name of the type. PersistentSourceLoc PLoc = V.first; - if (Decl *D = std::get<1>(PSLMap[PLoc])) { + if (Decl *D = PSLMap[PLoc]) { ConstraintVariable *CV = V.second; PVConstraint *PV = dyn_cast(CV); bool PVChanged = @@ -184,60 +186,23 @@ void DeclRewriter::rewriteDecls(ASTContext &Context, ProgramInfo &Info, assert(!isa(D) && "Got a PVConstraint for a ParmVarDecl where " "isPartOfFunctionPrototype returns false?"); - DeclStmt *DS = nullptr; - if (VDLToStmtMap.find(D) != VDLToStmtMap.end()) - DS = VDLToStmtMap[D]; - - std::string NewTy = getStorageQualifierString(D); - bool IsExternGlobalVar = - isa(D) && - cast(D)->getFormalLinkage() == Linkage::ExternalLinkage; - if (_3COpts.ItypesForExtern && - (isa(D) || IsExternGlobalVar)) { - // Give record fields and global variables itypes when using - // -itypes-for-extern. Note that we haven't properly implemented - // itypes for structures and globals. This just rewrites to an itype - // instead of a fully checked type when a checked type could have been - // used. This does provide most of the rewriting infrastructure that - // would be required to support these itypes if constraint generation - // is updated to handle structure/global itypes. - std::string Type, IType; - // VarDecl and FieldDecl subclass DeclaratorDecl, so the cast will - // always succeed. - DeclRewriter::buildItypeDecl(PV, cast(D), Type, IType, - Info, ABRewriter); - NewTy += Type + IType; - } else { - NewTy += PV->mkString(Info.getConstraints()) + - ABRewriter.getBoundsString(PV, D); - } - if (auto *VD = dyn_cast(D)) - RewriteThese.insert( - std::make_pair(VD, new VarDeclReplacement(VD, DS, NewTy))); - else if (auto *FD = dyn_cast(D)) - RewriteThese.insert( - std::make_pair(FD, new FieldDeclReplacement(FD, DS, NewTy))); - else - llvm_unreachable("Unrecognized declaration type."); + MultiDeclMemberDecl *MMD = getAsMultiDeclMember(D); + assert(MMD && "Unrecognized declaration type."); + std::string ReplacementText = mkStringForPVDecl(MMD, PV, Info); + RewriteThese.insert(std::make_pair( + MMD, new MultiDeclMemberReplacement(MMD, ReplacementText))); } } } - // Build sets of variables that are declared in the same statement so we can - // rewrite things like int x, *y, **z; - GlobalVariableGroups GVG(R.getSourceMgr()); - for (const auto &D : TUD->decls()) { - GVG.addGlobalDecl(dyn_cast(D)); - //Search through the AST for fields that occur on the same line - FieldFinder::gatherSameLineFields(GVG, D); - } - // Do the declaration rewriting - DeclRewriter DeclR(R, Context, GVG); + DeclRewriter DeclR(R, Info, Context); DeclR.rewrite(RewriteThese); for (auto Pair : RewriteThese) delete Pair.second; + + DeclR.denestTagDecls(); } void DeclRewriter::rewrite(RSet &ToRewrite) { @@ -252,181 +217,246 @@ void DeclRewriter::rewrite(RSet &ToRewrite) { } // Exact rewriting procedure depends on declaration type - if (auto *VR = dyn_cast(N)) { - rewriteFieldOrVarDecl(VR, ToRewrite); + if (auto *MR = dyn_cast(N)) { + MultiDeclInfo *MDI = + Info.TheMultiDeclsInfo.findContainingMultiDecl(MR->getDecl()); + assert("Missing MultiDeclInfo for multi-decl member" && MDI); + // A multi-decl can only be rewritten as a unit. If at least one member + // needs rewriting, then the first MultiDeclMemberReplacement in iteration + // order of ToRewrite (which need not have anything to do with member + // order of the multi-decl) triggers rewriting of the entire multi-decl, + // and rewriteMultiDecl checks ToRewrite for a MultiDeclMemberReplacement + // for each member of the multi-decl and applies it if found. + if (!MDI->AlreadyRewritten) + rewriteMultiDecl(*MDI, ToRewrite); } else if (auto *FR = dyn_cast(N)) { rewriteFunctionDecl(FR); - } else if (auto *FdR = dyn_cast(N)) { - rewriteFieldOrVarDecl(FdR, ToRewrite); - } else if (auto *TDR = dyn_cast(N)) { - rewriteTypedefDecl(TDR, ToRewrite); } else { assert(false && "Unknown replacement type"); } } } -void DeclRewriter::rewriteTypedefDecl(TypedefDeclReplacement *TDR, - RSet &ToRewrite) { - rewriteSingleDecl(TDR, ToRewrite); -} - -// In alltypes mode we need to handle inline structs inside functions specially. -// Because both the recorddecl and vardecl are inside one DeclStmt, the -// SourceLocations will be generated incorrectly if we rewrite it as a -// normal multidecl. -bool isInlineStruct(std::vector &InlineDecls) { - if (InlineDecls.size() >= 2 && _3COpts.AllTypes) - return isa(InlineDecls[0]) && - std::all_of(InlineDecls.begin() + 1, InlineDecls.end(), - [](Decl *D) { return isa(D); }); - return false; -} - -template -void DeclRewriter::rewriteFieldOrVarDecl(DRType *N, RSet &ToRewrite) { - static_assert(std::is_same::value || - std::is_same::value, - "Method expects variable or field declaration replacement."); - - bool IsVisitedMultiDeclMember = (VisitedMultiDeclMembers.find(N->getDecl()) != - VisitedMultiDeclMembers.end()); - if (InlineVarDecls.find(N->getDecl()) != InlineVarDecls.end() && - !IsVisitedMultiDeclMember) { - std::vector SameLineDecls; - getDeclsOnSameLine(N, SameLineDecls); - if (std::find(SameLineDecls.begin(), SameLineDecls.end(), - VDToRDMap[N->getDecl()]) == SameLineDecls.end()) - SameLineDecls.insert(SameLineDecls.begin(), VDToRDMap[N->getDecl()]); - rewriteMultiDecl(N, ToRewrite, SameLineDecls, true); - } else if (isSingleDeclaration(N)) { - rewriteSingleDecl(N, ToRewrite); - } else if (!IsVisitedMultiDeclMember) { - std::vector SameLineDecls; - getDeclsOnSameLine(N, SameLineDecls); - if (isInlineStruct(SameLineDecls)) - SameLineDecls.erase(SameLineDecls.begin()); - rewriteMultiDecl(N, ToRewrite, SameLineDecls, false); - } else { - // Anything that reaches this case should be a multi-declaration that has - // already been rewritten. - assert("Declaration should have been rewritten." && - !isSingleDeclaration(N) && IsVisitedMultiDeclMember); +void DeclRewriter::denestTagDecls() { + // When there are multiple levels of nested TagDecls, we need to process + // all the children of a TagDecl TD before TD itself so that (1) the + // definitions of the children end up before the definition of TD (since the + // rewriter preserves order of insertions) and (2) the definitions of the + // children have been removed from the body of TD before we read the body of + // TD to move it. In effect, we want to process the TagDecls in postorder. + // The easiest way to achieve this is to process them in order of their _end_ + // locations. + std::sort(TagDeclsToDenest.begin(), TagDeclsToDenest.end(), + [&](TagDecl *TD1, TagDecl *TD2) { + return A.getSourceManager().isBeforeInTranslationUnit( + TD1->getEndLoc(), TD2->getEndLoc()); + }); + for (TagDecl *TD : TagDeclsToDenest) { + // rewriteMultiDecl replaced the final "}" in the original source range with + // "};\n", so the new content of the source range should include the ";\n", + // which is what we want here. Except the rewriter has a bug where it + // adjusts the token range to include the final token _after_ mapping the + // offset to account for previous edits (it should be before). We work + // around the bug by adjusting the token range before calling the rewriter + // at all. + CharSourceRange CSR = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(TD->getSourceRange()), R.getSourceMgr(), + R.getLangOpts()); + std::string DefinitionStr = R.getRewrittenText(CSR); + // Delete the definition from the old location. + rewriteSourceRange(R, CSR, ""); + // We want to find the highest ancestor DeclContext of TD that is a TagDecl + // (call it TopTagDecl) and insert TD just before TopTagDecl. + // + // As of this writing, it seems that if TD is named, `TD->getDeclContext()` + // returns the parent of TopTagDecl due to the code at + // https://github.com/correctcomputation/checkedc-clang/blob/fd4d8af4383d40af10ee8bc92b7bf88061a11035/clang/lib/Sema/SemaDecl.cpp#L16980-L16981, + // But that code doesn't run if TD is unnamed (which makes some sense + // because name visibility isn't an issue for TagDecls that have no name), + // and we want to de-nest TagDecls with names we assigned just like ones + // that were originally named, so we can't just use `TD->getDeclContext()`. + // In any event, maybe we wouldn't want to rely on this kind of internal + // Clang behavior. + TagDecl *TopTagDecl = TD; + while (TagDecl *ParentTagDecl = + dyn_cast(TopTagDecl->getLexicalDeclContext())) + TopTagDecl = ParentTagDecl; + // If TopTagDecl is preceded by qualifiers, ideally we'd like to insert TD + // before those qualifiers. If TopTagDecl is actually part of a multi-decl + // with at least one member, then we can just use the begin location of that + // multi-decl as the insertion point. + // + // If there are no members (so the qualifiers have no effect and generate a + // compiler warning), then there isn't an easy way for us to find the source + // location before the qualifiers. In that case, we just insert TD at the + // begin location of TopTagDecl (after the qualifiers) and hope for the + // best. In the cases we're aware of so far (storage qualifiers, including + // `typedef`), this just means that the qualifiers end up applying to the + // first TagDecl de-nested from TopTagDecl instead of to TopTagDecl itself, + // and they generate the same compiler warning as before but on a different + // TagDecl. However, we haven't confirmed that there aren't any obscure + // cases that could lead to an error, such as if a qualifier is valid on one + // kind of TagDecl but not another. + SourceLocation InsertLoc; + if (MultiDeclInfo *MDI = + Info.TheMultiDeclsInfo.findContainingMultiDecl(TopTagDecl)) + InsertLoc = MDI->Members[0]->getBeginLoc(); + else + InsertLoc = TopTagDecl->getBeginLoc(); + // TODO: Use a wrapper like rewriteSourceRange that tries harder with + // macros, reports failure, etc. + // (https://github.com/correctcomputation/checkedc-clang/issues/739) + R.InsertText(InsertLoc, DefinitionStr); } } -void DeclRewriter::rewriteSingleDecl(DeclReplacement *N, RSet &ToRewrite) { - bool IsSingleDecl = - dyn_cast(N->getDecl()) || isSingleDeclaration(N); - assert("Declaration is not a single declaration." && IsSingleDecl); - // This is the easy case, we can rewrite it locally, at the declaration. - SourceManager &SM = N->getDecl()->getASTContext().getSourceManager(); - SourceRange TR = N->getSourceRange(SM); - doDeclRewrite(TR, N); -} - -void DeclRewriter::rewriteMultiDecl(DeclReplacement *N, RSet &ToRewrite, - std::vector SameLineDecls, - bool ContainsInlineStruct) { - // Rewriting is more difficult when there are multiple variables declared in a - // single statement. When this happens, we need to find all the declaration - // replacement for this statement and apply them at the same time. We also - // need to avoid rewriting any of these declarations twice by updating the - // Skip set to include the processed declarations. - - // For each decl in the original, build up a new string. If the - // original decl was re-written, write that out instead. Existing - // initializers are preserved, any declarations that an initializer to - // be valid checked-c are given one. +void DeclRewriter::rewriteMultiDecl(MultiDeclInfo &MDI, RSet &ToRewrite) { + // Rewrite a "multi-decl" consisting of one or more variables, fields, or + // typedefs declared in a comma-separated list based on a single type "on the + // left". See the comment at the top of clang/include/clang/3C/MultiDecls.h + // for a detailed description of the design that is implemented here. As + // mentioned in MultiDecls.h, this code is used even for "multi-decls" that + // have only a single member to avoid having to maintain a separate code path + // for them. + // + // Due to the overlap between members, a multi-decl can only be rewritten as a + // unit, visiting the members in source code order from left to right. For + // each member, we check whether it has a replacement in ToRewrite. If so, we + // use it; if not, we generate a declaration equivalent to the original. + // Existing initializers are preserved, and declarations that need an + // initializer to be valid Checked C are given one. + SourceManager &SM = A.getSourceManager(); bool IsFirst = true; SourceLocation PrevEnd; - for (const auto &DL : SameLineDecls) { - std::string ReplaceText = ";\n"; - // Find the declaration replacement object for the current declaration. - DeclReplacement *SameLineReplacement; - bool Found = false; - auto It = ToRewrite.find(DL); - if (It != ToRewrite.end()) { - SameLineReplacement = It->second; - Found = true; - VisitedMultiDeclMembers.insert(DL); + + if (MDI.TagDefToSplit != nullptr) { + TagDecl *TD = MDI.TagDefToSplit; + // `static struct T { ... } x;` -> `struct T { ... }; static struct T x;` + // A storage qualifier such as `static` applies to the members but is not + // meaningful on TD after it is split, and we need to remove it to avoid a + // compiler warning. The beginning location of the first member should be + // the `static` and the beginning location of TD should be the `struct`, so + // we just remove anything between those locations. (Can other things appear + // there? We hope it makes sense to remove them too.) + // + // We use `getCharRange` to get a range that excludes the first token of TD, + // unlike the default conversion of a SourceRange to a "token range", which + // would include it. + rewriteSourceRange(R, + CharSourceRange::getCharRange( + MDI.Members[0]->getBeginLoc(), TD->getBeginLoc()), + ""); + if (TD->getName().empty()) { + // If the record is unnamed, insert the name that we assigned it: + // `struct {` -> `struct T {` + PersistentSourceLoc PSL = PersistentSourceLoc::mkPSL(TD, A); + // This will assert if we can't find the new name. Is that what we want? + std::string NewTypeStr = *Info.TheMultiDeclsInfo.getTypeStrOverride( + A.getTagDeclType(TD).getTypePtr(), A); + // This token should be the tag kind, e.g., `struct`. + std::string ExistingToken = + getSourceText(SourceRange(TD->getBeginLoc()), A); + if (NONFATAL_ASSERT_PLACEHOLDER(ExistingToken == TD->getKindName())) { + rewriteSourceRange(R, TD->getBeginLoc(), NewTypeStr); + } + } + // Make a note if the TagDecl needs to be de-nested later. + if (isa(TD->getLexicalDeclContext())) + TagDeclsToDenest.push_back(TD); + // `struct T { ... } foo;` -> `struct T { ... };\nfoo;` + rewriteSourceRange(R, TD->getEndLoc(), "};\n"); + IsFirst = false; + // Offset by one to skip past what we've just added so it isn't overwritten. + PrevEnd = TD->getEndLoc().getLocWithOffset(1); + } + + for (auto MIt = MDI.Members.begin(); MIt != MDI.Members.end(); MIt++) { + MultiDeclMemberDecl *DL = *MIt; + + // If we modify this member in any way, this is the original source range + // for the member that we expect to overwrite, before PrevEnd adjustment. + // + // We do want to overwrite existing Checked C annotations. Other than + // ItypesForExtern, 3C currently doesn't have real itype support for + // multi-decl members as opposed to function parameters and returns + // (https://github.com/correctcomputation/checkedc-clang/issues/744), but we + // are probably still better off overwriting the annotations with a + // complete, valid declaration than mixing and matching a declaration + // generated by 3C with existing annotations. + // + // If the variable has an initializer, we want this rewrite to end + // before the initializer to avoid interfering with any other rewrites + // that 3C needs to make inside the initializer expression + // (https://github.com/correctcomputation/checkedc-clang/issues/267). + SourceRange ReplaceSR = + getDeclSourceRangeWithAnnotations(DL, /*IncludeInitializer=*/false); + + // Look for a declaration replacement object for the current declaration. + MultiDeclMemberReplacement *Replacement = nullptr; + auto TRIt = ToRewrite.find(DL); + if (TRIt != ToRewrite.end()) { + Replacement = cast(TRIt->second); + // We can't expect multi-decl rewriting to work properly on a source range + // different from ReplaceSR above; for example, doDeclRewrite might insert + // an initializer in the wrong place. This assertion should pass as long + // as the implementation of DeclReplacement::getSourceRange matches + // ReplaceSR above. If someone changes DeclReplacement::getSourceRange, + // thinking that they can replace a different source range that way, we + // want to fail fast. + // + // This is awkward and makes me wonder if we should just remove + // DeclReplacement::getSourceRange since 3C currently only calls + // getSourceRange on an object already known to be a + // FunctionDeclReplacement. But after drafting that, I wasn't convinced + // that it was better than the status quo. + assert(Replacement->getSourceRange(SM) == ReplaceSR); } - if (IsFirst && ContainsInlineStruct) { - // If it is an inline struct, the first thing we have to do - // is separate the RecordDecl from the VarDecl. - ReplaceText = "};\n"; - } else if (IsFirst) { + if (IsFirst) { // Rewriting the first declaration is easy. Nothing should change if its - // type does not to be rewritten. When rewriting is required, it is - // essentially the same as the single declaration case. + // type does not to be rewritten. IsFirst = false; - if (Found) { - SourceRange SR(DL->getBeginLoc(), DL->getEndLoc()); - doDeclRewrite(SR, SameLineReplacement); + if (Replacement) { + doDeclRewrite(ReplaceSR, Replacement); } } else { + // ReplaceSR.getBegin() is the beginning of the whole multi-decl. We only + // want to replace the text starting after the previous multi-decl member, + // which is given by PrevEnd. + ReplaceSR.setBegin(PrevEnd); + // The subsequent decls are more complicated because we need to insert a // type string even if the variables type hasn't changed. - if (Found) { + if (Replacement) { // If the type has changed, the DeclReplacement object has a replacement // string stored in it that should be used. - SourceRange SR(PrevEnd, DL->getEndLoc()); - doDeclRewrite(SR, SameLineReplacement); + doDeclRewrite(ReplaceSR, Replacement); } else { // When the type hasn't changed, we still need to insert the original // type for the variable. - - // This is a bit of trickery needed to get a string representation of - // the declaration without the initializer. We don't want to rewrite to - // initializer because this causes problems when rewriting casts and - // generic function calls later on. (issue 267) - auto *VD = dyn_cast(DL); - Expr *Init = nullptr; - if (VD && VD->hasInit()) { - Init = VD->getInit(); - VD->setInit(nullptr); - } - - // Dump the declaration (without the initializer) to a string. Printing - // the AST node gives the full declaration including the base type which - // is not present in the multi-decl source code. - std::string DeclStr = ""; - raw_string_ostream DeclStream(DeclStr); - DL->print(DeclStream); - assert("Original decl string empty." && !DeclStream.str().empty()); - - // Do the replacement. PrevEnd is setup to be the source location of the - // comma after the previous declaration in the multi-decl. getEndLoc is - // either the end of the declaration or just before the initializer if - // one is present. - SourceRange SR(PrevEnd, DL->getEndLoc()); - rewriteSourceRange(R, SR, DeclStream.str()); - - // Undo prior trickery. This need to happen so that the PSL for the decl - // is not changed since the PSL is used as a map key in a few places. - if (VD && Init) - VD->setInit(Init); + std::string NewDeclStr = mkStringForDeclWithUnchangedType(DL, Info); + rewriteSourceRange(R, ReplaceSR, NewDeclStr); } } - SourceRange End; - // In the event that IsFirst was not set to false, that implies we are - // separating the RecordDecl and VarDecl, so instead of searching for - // the next comma, we simply specify the end of the RecordDecl. - if (IsFirst) { - IsFirst = false; - End = DL->getEndLoc(); - } - // Variables in a mutli-decl are delimited by commas. The rewritten decls + // Variables in a multi-decl are delimited by commas. The rewritten decls // are separate statements separated by a semicolon and a newline. - else - End = getNextCommaOrSemicolon(DL->getEndLoc()); - rewriteSourceRange(R, End, ReplaceText); - // Offset by one to skip past what we've just added so it isn't overwritten. - PrevEnd = End.getEnd().getLocWithOffset(1); + bool IsLast = (MIt + 1 == MDI.Members.end()); + if (!IsLast) { + // This differs from ReplaceSR in that we want to advance past the entire + // multi-decl member, _including_ any existing initializer. + SourceRange SkipSR = + getDeclSourceRangeWithAnnotations(DL, /*IncludeInitializer=*/true); + SourceRange Comma = getNextComma(SkipSR.getEnd()); + rewriteSourceRange(R, Comma, ";\n"); + // Offset by one to skip past what we've just added so it isn't + // overwritten. + PrevEnd = Comma.getEnd().getLocWithOffset(1); + } } + + MDI.AlreadyRewritten = true; } // Common rewriting logic used to replace a single decl either on its own or as @@ -434,17 +464,15 @@ void DeclRewriter::rewriteMultiDecl(DeclReplacement *N, RSet &ToRewrite, // invoking the rewriter) is to add any required initializer expression. void DeclRewriter::doDeclRewrite(SourceRange &SR, DeclReplacement *N) { std::string Replacement = N->getReplacement(); - if (isa(N->getDecl())) - Replacement = "typedef " + Replacement; if (auto *VD = dyn_cast(N->getDecl())) { - if (VD->hasInit()) { - // Make sure we preserve any existing initializer - SR.setEnd(VD->getInitializerStartLoc()); - Replacement += " ="; - } else { + if (!VD->hasInit()) { // There is no initializer. Add it if we need one. // MWH -- Solves issue 43. Should make it so we insert NULL if stdlib.h or // stdlib_checked.h is included + // TODO: Centralize initialization logic for all types: + // https://github.com/correctcomputation/checkedc-clang/issues/645#issuecomment-876474200 + // TODO: Don't add unnecessary initializers to global variables: + // https://github.com/correctcomputation/checkedc-clang/issues/741 if (VD->getStorageClass() != StorageClass::SC_Extern) { const std::string NullPtrStr = "((void *)0)"; if (isPointerType(VD)) { @@ -466,79 +494,19 @@ void DeclRewriter::rewriteFunctionDecl(FunctionDeclReplacement *N) { N->getReplacement()); } -// A function to detect the presence of inline struct declarations -// by tracking VarDecls and RecordDecls and populating data structures -// later used in rewriting. - -// These variables are duplicated in the header file and here because static -// vars need to be initialized in the cpp file where the class is defined. -/*static*/ RecordDecl *DeclRewriter::LastRecordDecl = nullptr; -/*static*/ std::map DeclRewriter::VDToRDMap; -/*static*/ std::set DeclRewriter::InlineVarDecls; -void DeclRewriter::detectInlineStruct(Decl *D, SourceManager &SM) { - RecordDecl *RD = dyn_cast(D); - if (RD != nullptr && - // With -fms-extensions (default on Windows), Clang injects an implicit - // `struct _GUID` with an invalid location, which would cause an assertion - // failure in SM.isPointWithin below. - RD->getBeginLoc().isValid()) { - LastRecordDecl = RD; - } - if (VarDecl *VD = dyn_cast(D)) { - if (LastRecordDecl != nullptr) { - auto LastRecordLocation = LastRecordDecl->getBeginLoc(); - auto Begin = VD->getBeginLoc(); - auto End = VD->getEndLoc(); - bool IsInLineStruct = SM.isPointWithin(LastRecordLocation, Begin, End); - bool IsNamedInLineStruct = - IsInLineStruct && LastRecordDecl->getNameAsString() != ""; - if (IsNamedInLineStruct) { - VDToRDMap[VD] = LastRecordDecl; - InlineVarDecls.insert(VD); - } - } - } -} - -// Uses clangs lexer to find the location of the next comma or semicolon after +// Uses clangs lexer to find the location of the next comma after // the given source location. This is used to find the end of each declaration // within a multi-declaration. -SourceRange DeclRewriter::getNextCommaOrSemicolon(SourceLocation L) { +SourceRange DeclRewriter::getNextComma(SourceLocation L) { SourceManager &SM = A.getSourceManager(); auto Tok = Lexer::findNextToken(L, SM, A.getLangOpts()); while (Tok.hasValue() && !Tok->is(clang::tok::eof)) { - if (Tok->is(clang::tok::comma) || Tok->is(clang::tok::semi)) + if (Tok->is(clang::tok::comma)) return SourceRange(Tok->getLocation(), Tok->getLocation()); Tok = Lexer::findNextToken(Tok->getEndLoc(), A.getSourceManager(), A.getLangOpts()); } - llvm_unreachable("Unable to find comma or semicolon at source location."); -} - -bool DeclRewriter::isSingleDeclaration(DeclReplacement *N) { - DeclStmt *Stmt = N->getStatement(); - if (Stmt == nullptr) { - auto &VDGroup = GP.getVarsOnSameLine(N->getDecl()); - return VDGroup.size() == 1; - } - return Stmt->isSingleDecl(); -} - -void DeclRewriter::getDeclsOnSameLine(DeclReplacement *N, - std::vector &Decls) { - if (N->getStatement() != nullptr) { - Decls.insert(Decls.begin(), N->getStatement()->decls().begin(), - N->getStatement()->decls().end()); - } else { - std::vector GlobalLine = GP.getVarsOnSameLine(N->getDecl()); - Decls.insert(Decls.begin(), GlobalLine.begin(), GlobalLine.end()); - } - - assert("Invalid ordering in same line decls" && - std::is_sorted(Decls.begin(), Decls.end(), [&](Decl *D0, Decl *D1) { - return A.getSourceManager().isBeforeInTranslationUnit( - D0->getEndLoc(), D1->getEndLoc()); - })); + llvm_unreachable("Unable to find comma at source location."); } // This function checks how to re-write a function declaration. @@ -881,13 +849,3 @@ bool FunctionDeclBuilder::inParamMultiDecl(const ParmVarDecl *PVD) { } return false; } - -bool FieldFinder::VisitFieldDecl(FieldDecl *FD) { - GVG.addGlobalDecl(FD); - return true; -} - -void FieldFinder::gatherSameLineFields(GlobalVariableGroups &GVG, Decl *D) { - FieldFinder FF(GVG); - FF.TraverseDecl(D); -} diff --git a/clang/lib/3C/MappingVisitor.cpp b/clang/lib/3C/MappingVisitor.cpp index 3d1f6219fc4a..febf3ec7c7d2 100644 --- a/clang/lib/3C/MappingVisitor.cpp +++ b/clang/lib/3C/MappingVisitor.cpp @@ -14,57 +14,12 @@ using namespace clang; -bool MappingVisitor::VisitDeclStmt(DeclStmt *S) { - PersistentSourceLoc PSL = PersistentSourceLoc::mkPSL(S, Context); - - if (PSL.valid()) { - - // Check to see if the source location as described by the current location - // of S appears in the set of PersistentSourceLocs we are tasked to - // resolve. If it is, then create a mapping mapping the current - // PersistentSourceLocation to the Stmt object S. - std::set::iterator I = SourceLocs.find(PSL); - if (I != SourceLocs.end()) { - Decl *D = nullptr; - Stmt *So = nullptr; - std::tie(So, D) = PSLtoSDT[PSL]; - if (So != nullptr && _3COpts.Verbose) { - llvm::errs() << "\nOverriding "; - S->dump(); - llvm::errs() << "\n"; - llvm::errs() << "With "; - So->dump(); - llvm::errs() << "\n"; - llvm::errs() << " at "; - PSL.dump(); - llvm::errs() << "\n"; - } - - if (So == nullptr) - PSLtoSDT[PSL] = StmtDecl(S, D); - } - - if (DeclStmt *DL = dyn_cast(S)) { - if (DL->isSingleDecl()) { - if (VarDecl *VD = dyn_cast(DL->getSingleDecl())) - DeclToDeclStmt[VD] = DL; - } else - for (auto *I : DL->decls()) - DeclToDeclStmt[I] = DL; - } - } - - return true; -} - bool MappingVisitor::VisitDecl(Decl *D) { PersistentSourceLoc PSL = PersistentSourceLoc::mkPSL(D, Context); if (PSL.valid()) { std::set::iterator I = SourceLocs.find(PSL); if (I != SourceLocs.end()) { - Decl *Do = nullptr; - Stmt *S = nullptr; - std::tie(S, Do) = PSLtoSDT[PSL]; + Decl *Do = PSLtoSDT[PSL]; if (Do != nullptr && _3COpts.Verbose) { llvm::errs() << "Overriding "; Do->dump(); @@ -75,7 +30,7 @@ bool MappingVisitor::VisitDecl(Decl *D) { } if (Do == nullptr) - PSLtoSDT[PSL] = StmtDecl(S, D); + PSLtoSDT[PSL] = D; } } diff --git a/clang/lib/3C/MultiDecls.cpp b/clang/lib/3C/MultiDecls.cpp new file mode 100644 index 000000000000..26a76a7a92da --- /dev/null +++ b/clang/lib/3C/MultiDecls.cpp @@ -0,0 +1,296 @@ +//=--MultiDecls.cpp-----------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/3C/MultiDecls.h" +#include "clang/3C/Utils.h" + +MultiDeclMemberDecl *getAsMultiDeclMember(Decl *D) { + // XXX: Is this the best place for this check? + if (D->getLocation().isInvalid()) + return nullptr; + + // A FunctionDecl can be part of a multi-decl in C, but 3C currently doesn't + // handle this + // (https://github.com/correctcomputation/checkedc-clang/issues/659 part (a)). + + // While K&R parameter declarations can be in multi-decls in the input + // program, we don't use any of the regular multi-decl infrastructure for + // them; the rewriter blows them away and generates a prototype. + if (isa(D)) + return nullptr; + + if (VarDecl *VD = dyn_cast(D)) + return VD; + if (FieldDecl *FD = dyn_cast(D)) + return FD; + if (TypedefDecl *TD = dyn_cast(D)) + return TD; + return nullptr; +} + +QualType getTypeOfMultiDeclMember(MultiDeclMemberDecl *MMD) { + if (DeclaratorDecl *DD = dyn_cast(MMD)) + return DD->getType(); + if (TypedefDecl *TD = dyn_cast(MMD)) + return TD->getUnderlyingType(); + llvm_unreachable("Unexpected declaration type"); +} + +TypeSourceInfo *getTypeSourceInfoOfMultiDeclMember(MultiDeclMemberDecl *MMD) { + if (DeclaratorDecl *DD = dyn_cast(MMD)) + return DD->getTypeSourceInfo(); + if (TypedefDecl *TD = dyn_cast(MMD)) + return TD->getTypeSourceInfo(); + llvm_unreachable("Unexpected declaration type"); +} + +void ProgramMultiDeclsInfo::findUsedTagNames(DeclContext *DC) { + // We do our own traversal via `decls` rather than using RecursiveASTVisitor. + // This has the advantage of visiting TagDecls in function parameters, which + // RecursiveASTVisitor doesn't do by default, though such TagDecls are + // potentially problematic for 3C anyway. + for (Decl *D : DC->decls()) { + if (TagDecl *TD = dyn_cast(D)) { + if (!TD->getName().empty()) { + // Multiple TagDecls may have the same name if the same physical + // declaration is seen in multiple translation units or different + // TagDecls with the same name are used in different scopes. That is not + // a problem for us here: we're simply making a list of all the names we + // don't want to collide with. + UsedTagNames.insert(std::string(TD->getName())); + } + } + if (DeclContext *NestedDC = dyn_cast(D)) { + findUsedTagNames(NestedDC); + } + } +} + +void ProgramMultiDeclsInfo::findUsedTagNames(ASTContext &Context) { + findUsedTagNames(Context.getTranslationUnitDecl()); +} + +static const Type *unelaborateType(const Type *Ty) { + if (const ElaboratedType *ETy = dyn_cast(Ty)) { + QualType QT = ETy->getNamedType(); + Ty = QT.getTypePtr(); + // Can an ElaboratedType add qualifiers to its underlying type in C? I don't + // think so, but if it does, we don't want to silently lose them. + NONFATAL_ASSERT_PLACEHOLDER_UNUSED(QualType(Ty, 0) == QT); + } + return Ty; +} + +void ProgramMultiDeclsInfo::findMultiDecls(DeclContext *DC, + ASTContext &Context) { + // This will automatically create a new, empty map for the TU if needed. + TUMultiDeclsInfo &TUInfo = TUInfos[&Context]; + TagDecl *LastTagDef = nullptr; + + // Variables related to the current multi-decl. + MultiDeclInfo *CurrentMultiDecl = nullptr; + SourceLocation CurrentBeginLoc; + PersistentSourceLoc TagDefPSL; + bool TagDefNeedsName; + llvm::Optional RenameInfo; + bool AppliedRenameInfo; + + for (Decl *D : DC->decls()) { + TagDecl *TagD = dyn_cast(D); + if (TagD && TagD->isCompleteDefinition() && + // With -fms-extensions (default on Windows), Clang injects an implicit + // `struct _GUID` with an invalid location. + TagD->getBeginLoc().isValid()) { + LastTagDef = TagD; + } + if (MultiDeclMemberDecl *MMD = getAsMultiDeclMember(D)) { + if (CurrentMultiDecl == nullptr || + MMD->getBeginLoc() != CurrentBeginLoc) { + // We are starting a new multi-decl. + CurrentBeginLoc = MMD->getBeginLoc(); + CurrentMultiDecl = &TUInfo.MultiDeclsByBeginLoc[CurrentBeginLoc]; + assert(CurrentMultiDecl->Members.empty() && + "Multi-decl members are not consecutive in traversal order"); + TagDefNeedsName = false; + RenameInfo = llvm::None; + AppliedRenameInfo = false; + + // Check for an inline tag definition. + // Wanted: CurrentBeginLoc <= LastTagDef->getBeginLoc(). + // Implemented as: !(LastTagDef->getBeginLoc() < CurrentBeginLoc). + if (LastTagDef != nullptr && + !Context.getSourceManager().isBeforeInTranslationUnit( + LastTagDef->getBeginLoc(), CurrentBeginLoc)) { + CurrentMultiDecl->TagDefToSplit = LastTagDef; + TUInfo.ContainingMultiDeclOfTagDecl[LastTagDef] = CurrentMultiDecl; + + // Do we need to automatically name the TagDefToSplit? + if (LastTagDef->getName().empty()) { + // A RecordDecl that is declared as the type of one or more + // variables shouldn't be "anonymous", but if it somehow is, we + // don't want to try to give it a name. + NONFATAL_ASSERT_PLACEHOLDER_UNUSED( + !(isa(LastTagDef) && + cast(LastTagDef)->isAnonymousStructOrUnion())); + TagDefPSL = PersistentSourceLoc::mkPSL(LastTagDef, Context); + auto Iter = RenamedTagDefs.find(TagDefPSL); + if (Iter != RenamedTagDefs.end()) + RenameInfo = Iter->second; + else if (canWrite(TagDefPSL.getFileName())) + TagDefNeedsName = true; + } + } + } else { + // Adding another member to an existing multi-decl. + assert(Context.getSourceManager().isBeforeInTranslationUnit( + CurrentMultiDecl->Members.back()->getEndLoc(), + MMD->getEndLoc()) && + "Multi-decl traversal order inconsistent " + "with source location order"); + } + + CurrentMultiDecl->Members.push_back(MMD); + + std::string MemberName; + if (TagDefNeedsName && + NONFATAL_ASSERT_PLACEHOLDER( + !(MemberName = std::string(MMD->getName())).empty())) { + // Special case: If the first member of the multi-decl is a typedef + // whose type is exactly the TagDecl type (`typedef struct { ... } T`), + // then we refer to the TagDecl via that typedef. (The typedef must be + // the first member so that it is defined in time for other members to + // refer to it.) + // + // An argument could be made for using the typedef name in the types of + // other multi-decl members even if the TagDecl has a name: + // `typedef struct T_struct { ... } T, *PT;` would convert to + // `typedef struct T_struct { ... } T; typedef _Ptr PT;` instead of + // `struct T_struct { ... }; typedef struct T_struct T; + // typedef _Ptr PT;`. But it would be tricky to ensure + // that any existing references to `struct T_struct` aren't accidentally + // replaced with `T`, so absent a decision that this feature is + // important enough to justify either solving or ignoring this problem, + // we don't try to implement the feature. + TypedefDecl *TyD; + QualType Underlying; + if (CurrentMultiDecl->Members.size() == 1 && + (TyD = dyn_cast(MMD)) != nullptr && + // XXX: This is a terrible mess. Figure out how we should be + // handling the difference between Type and QualType. + !(Underlying = TyD->getUnderlyingType()).hasLocalQualifiers() && + QualType(unelaborateType(Underlying.getTypePtr()), 0) == + Context.getTagDeclType(LastTagDef)) { + // In this case, ShouldSplit = false: the tag definition should not be + // moved out of the typedef. + RenameInfo = RenamedTagDefInfo{MemberName, false}; + } else { + // Otherwise, just generate a new tag name based on the member name. + // Example: `struct { ... } foo;` -> + // `struct foo_struct_1 { ... }; struct foo_struct_1 foo;` + // If `foo_struct_1` is already taken, use `foo_struct_2`, etc. + std::string KindName = std::string(LastTagDef->getKindName()); + std::string NewName; + for (int Num = 1;; Num++) { + NewName = MemberName + "_" + KindName + "_" + std::to_string(Num); + if (UsedTagNames.find(NewName) == UsedTagNames.end()) + break; + } + RenameInfo = RenamedTagDefInfo{KindName + " " + NewName, true}; + // Consider this name taken and ensure that other automatically + // generated names do not collide with it. + // + // If the multi-decl doesn't end up getting rewritten, this name + // ultimately may not be used, creating a gap in the numbering in 3C's + // output. But this cosmetic inconsistency is a small price to pay for + // the architectural convenience of being able to store the assigned + // names in the PointerVariableConstraints when they are constructed + // rather than trying to assign and store the names after we know + // which multi-decls will be rewritten. + UsedTagNames.insert(NewName); + } + RenamedTagDefs.insert(std::make_pair(TagDefPSL, *RenameInfo)); + TagDefNeedsName = false; + } + + // To help avoid bugs, use the same code whether the RenameInfo was just + // assigned or was saved from a previous translation unit. + if (RenameInfo && !AppliedRenameInfo) { + CurrentMultiDecl->BaseTypeRenamed = true; + if (!RenameInfo->ShouldSplit) + CurrentMultiDecl->TagDefToSplit = nullptr; + AppliedRenameInfo = true; + } + } + + if (DeclContext *NestedDC = dyn_cast(D)) { + findMultiDecls(NestedDC, Context); + } + } +} + +void ProgramMultiDeclsInfo::findMultiDecls(ASTContext &Context) { + findMultiDecls(Context.getTranslationUnitDecl(), Context); +} + +llvm::Optional +ProgramMultiDeclsInfo::getTypeStrOverride(const Type *Ty, const ASTContext &C) { + Ty = unelaborateType(Ty); + if (const TagType *TTy = dyn_cast(Ty)) { + TagDecl *TD = TTy->getDecl(); + if (TD->getName().empty()) { + PersistentSourceLoc PSL = PersistentSourceLoc::mkPSL(TD, C); + auto Iter = RenamedTagDefs.find(PSL); + if (Iter != RenamedTagDefs.end()) + return Iter->second.AssignedTypeStr; + // We should have named all unnamed TagDecls in writable code. + NONFATAL_ASSERT_PLACEHOLDER_UNUSED(!canWrite(PSL.getFileName())); + } + } + return llvm::None; +} + +MultiDeclInfo * +ProgramMultiDeclsInfo::findContainingMultiDecl(MultiDeclMemberDecl *MMD) { + auto &MultiDeclsByLoc = TUInfos[&MMD->getASTContext()].MultiDeclsByBeginLoc; + // Look for a MultiDeclInfo for the beginning location of MMD, then check that + // the MultiDeclInfo actually contains MMD. + auto It = MultiDeclsByLoc.find(MMD->getBeginLoc()); + if (It == MultiDeclsByLoc.end()) + return nullptr; + MultiDeclInfo &MDI = It->second; + // Hope we don't have multi-decls with so many members that this becomes a + // performance problem. + if (std::find(MDI.Members.begin(), MDI.Members.end(), MMD) != + MDI.Members.end()) + return &MDI; + return nullptr; +} + +MultiDeclInfo *ProgramMultiDeclsInfo::findContainingMultiDecl(TagDecl *TD) { + auto &MDOfTD = TUInfos[&TD->getASTContext()].ContainingMultiDeclOfTagDecl; + auto It = MDOfTD.find(TD); + if (It == MDOfTD.end()) + return nullptr; + return It->second; +} + +bool ProgramMultiDeclsInfo::wasBaseTypeRenamed(Decl *D) { + // We assume that the base type was renamed if and only if D belongs to a + // multi-decl marked as having the base type renamed. It might be better to + // actually extract the base type from D and look it up in RenamedTagDefs, + // but that's more work. + MultiDeclMemberDecl *MMD = getAsMultiDeclMember(D); + if (!MMD) + return false; + MultiDeclInfo *MDI = findContainingMultiDecl(MMD); + // We expect to have a MultiDeclInfo for every MultiDeclMemberDecl in the + // program. + if (!NONFATAL_ASSERT_PLACEHOLDER(MDI)) + return false; + return MDI->BaseTypeRenamed; +} diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index f89d2a5bd4b7..ce93d0bf0ff6 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -695,6 +695,11 @@ void ProgramInfo::addVariable(clang::DeclaratorDecl *D, unifyIfTypedef(QT, *AstContext, P); NewCV = P; NewCV->setValidDecl(); + if (FlD->getParent()->isUnion()) { + auto Rsn = ReasonLoc(UNION_FIELD_REASON, PLoc); + NewCV->equateWithItype(*this, Rsn); + NewCV->constrainToWild(CS, Rsn); + } } } else llvm_unreachable("unknown decl type"); @@ -1156,18 +1161,14 @@ bool ProgramInfo::seenTypedef(PersistentSourceLoc PSL) { return TypedefVars.count(PSL) != 0; } -void ProgramInfo::addTypedef(PersistentSourceLoc PSL, bool CanRewriteDef, - TypedefDecl *TD, ASTContext &C) { +void ProgramInfo::addTypedef(PersistentSourceLoc PSL, TypedefDecl *TD, + ASTContext &C) { ConstraintVariable *V = nullptr; if (isa(TD->getUnderlyingType())) V = new FunctionVariableConstraint(TD, *this, C); else V = new PointerVariableConstraint(TD, *this, C); - if (!CanRewriteDef) - V->constrainToWild(this->getConstraints(), - ReasonLoc("Unable to rewrite a typedef with multiple names", PSL)); - if (!canWrite(PSL.getFileName())) V->constrainToWild(this->getConstraints(), ReasonLoc(UNWRITABLE_REASON, PSL)); diff --git a/clang/lib/3C/RewriteUtils.cpp b/clang/lib/3C/RewriteUtils.cpp index 90ac3561f5eb..83fd8815dd08 100644 --- a/clang/lib/3C/RewriteUtils.cpp +++ b/clang/lib/3C/RewriteUtils.cpp @@ -21,41 +21,94 @@ using namespace llvm; using namespace clang; -void GlobalVariableGroups::addGlobalDecl(Decl *VD, std::vector *VDVec) { - if (VD && GlobVarGroups.find(VD) == GlobVarGroups.end()) { - if (VDVec == nullptr) - VDVec = new std::vector(); - assert("Decls in group are not ordered correctly." && - (VDVec->empty() || - SM.isBeforeInTranslationUnit(VDVec->back()->getEndLoc(), - VD->getEndLoc()))); - VDVec->push_back(VD); - GlobVarGroups[VD] = VDVec; - // Process the next decl. - Decl *NDecl = VD->getNextDeclInContext(); - if (isa_and_nonnull(NDecl) || isa_and_nonnull(NDecl)) - if (VD->getBeginLoc() == NDecl->getBeginLoc()) - addGlobalDecl(dyn_cast(NDecl), VDVec); +std::string mkStringForPVDecl(MultiDeclMemberDecl *MMD, PVConstraint *PVC, + ProgramInfo &Info) { + // Currently, it's cheap to keep recreating the ArrayBoundsRewriter. If that + // ceases to be true, we should pass it along as another argument. + ArrayBoundsRewriter ABRewriter{Info}; + std::string NewDecl = getStorageQualifierString(MMD); + bool IsExternGlobalVar = + isa(MMD) && + cast(MMD)->getFormalLinkage() == Linkage::ExternalLinkage; + if (_3COpts.ItypesForExtern && (isa(MMD) || IsExternGlobalVar) && + // isSolutionChecked can return false here when splitting out an unchanged + // multi-decl member. + PVC->isSolutionChecked(Info.getConstraints().getVariables())) { + // Give record fields and global variables itypes when using + // -itypes-for-extern. Note that we haven't properly implemented itypes for + // structures and globals + // (https://github.com/correctcomputation/checkedc-clang/issues/744). This + // just rewrites to an itype instead of a fully checked type when a checked + // type could have been used. This does provide most of the rewriting + // infrastructure that would be required to support these itypes if + // constraint generation is updated to handle structure/global itypes. + std::string Type, IType; + // VarDecl and FieldDecl subclass DeclaratorDecl, so the cast will + // always succeed. + DeclRewriter::buildItypeDecl(PVC, cast(MMD), Type, IType, + Info, ABRewriter); + NewDecl += Type + IType; + } else { + NewDecl += PVC->mkString(Info.getConstraints()) + + ABRewriter.getBoundsString(PVC, MMD); } + return NewDecl; } -std::vector &GlobalVariableGroups::getVarsOnSameLine(Decl *D) { - assert(GlobVarGroups.find(D) != GlobVarGroups.end() && - "Expected to find the group."); - return *(GlobVarGroups[D]); -} +std::string mkStringForDeclWithUnchangedType(MultiDeclMemberDecl *MMD, + ProgramInfo &Info) { + ASTContext &Context = MMD->getASTContext(); + + bool BaseTypeRenamed = Info.TheMultiDeclsInfo.wasBaseTypeRenamed(MMD); + if (!BaseTypeRenamed) { + // As far as we know, we can let Clang generate the declaration string. + PrintingPolicy Policy = Context.getPrintingPolicy(); + Policy.SuppressInitializers = true; + std::string DeclStr = ""; + raw_string_ostream DeclStream(DeclStr); + MMD->print(DeclStream, Policy); + assert("Original decl string empty." && !DeclStr.empty()); + return DeclStr; + } -GlobalVariableGroups::~GlobalVariableGroups() { - std::set *> VVisited; - // Free each of the group. - for (auto &CurrV : GlobVarGroups) { - // Avoid double free by caching deleted sets. - if (VVisited.find(CurrV.second) != VVisited.end()) - continue; - VVisited.insert(CurrV.second); - delete (CurrV.second); + // OK, we have to use mkString. + QualType DType = getTypeOfMultiDeclMember(MMD); + if (isPtrOrArrayType(DType)) { + CVarOption CVO = + (isa(MMD) + ? Info.lookupTypedef(PersistentSourceLoc::mkPSL(MMD, Context)) + : Info.getVariable(MMD, &Context)); + assert(CVO.hasValue() && + "Missing ConstraintVariable for unchanged multi-decl member"); + // A function currently can't be a multi-decl member, so this should always + // be a PointerVariableConstraint. + PVConstraint *PVC = cast(&CVO.getValue()); + // Currently, we benefit from the ItypesForExtern handling in + // mkStringForPVDecl in one very unusual case: an unchanged multi-decl + // member with a renamed TagDecl and an existing implicit itype coming from + // a bounds annotation will keep the itype and not be changed to a fully + // checked type. DeclRewriter::buildItypeDecl will detect the base type + // rename and generate the unchecked side using mkString instead of + // Decl::print in order to pick up the new name. + // + // As long as 3C lacks real support for itypes on variables, this is + // probably the behavior we want with -itypes-for-extern. If we don't care + // about this case, we could alternatively inline the few lines of + // mkStringForPVDecl that would still be relevant. + return mkStringForPVDecl(MMD, PVC, Info); } - GlobVarGroups.clear(); + + // If the type is not a pointer or array, then it should just equal the base + // type except for top-level qualifiers, and it can't have itypes or bounds. + llvm::Optional BaseTypeNewNameOpt = + Info.TheMultiDeclsInfo.getTypeStrOverride(DType.getTypePtr(), Context); + assert(BaseTypeNewNameOpt && + "BaseTypeRenamed is true but we couldn't get the new name"); + std::string QualifierPrefix = DType.getQualifiers().getAsString(); + if (!QualifierPrefix.empty()) + QualifierPrefix += " "; + return getStorageQualifierString(MMD) + QualifierPrefix + + *BaseTypeNewNameOpt + " " + std::string(MMD->getName()); } // Test to see if we can rewrite a given SourceRange. @@ -429,14 +482,8 @@ class TypeArgumentAdder : public clang::RecursiveASTVisitor { }; SourceRange DeclReplacement::getSourceRange(SourceManager &SM) const { - SourceRange SR = getDecl()->getSourceRange(); - SourceLocation OldEnd = SR.getEnd(); - SourceLocation NewEnd = getCheckedCAnnotationsEnd(getDecl()); - if (NewEnd.isValid() && - (!OldEnd.isValid() || SM.isBeforeInTranslationUnit(OldEnd, NewEnd))) - SR.setEnd(NewEnd); - - return SR; + return getDeclSourceRangeWithAnnotations(getDecl(), + /*IncludeInitializer=*/false); } SourceRange FunctionDeclReplacement::getSourceRange(SourceManager &SM) const { diff --git a/clang/lib/3C/StructInit.cpp b/clang/lib/3C/StructInit.cpp index 886b584d26d4..fcc53f641f23 100644 --- a/clang/lib/3C/StructInit.cpp +++ b/clang/lib/3C/StructInit.cpp @@ -11,6 +11,7 @@ #include "clang/3C/StructInit.h" #include "clang/3C/MappingVisitor.h" +#include "clang/3C/RewriteUtils.h" #include "clang/Tooling/Transformer/SourceCode.h" #include @@ -53,31 +54,18 @@ bool StructVariableInitializer::hasCheckedMembers(DeclaratorDecl *DD) { // Insert the declaration and correct replacement text for the declaration into // the set of required rewritings. -void StructVariableInitializer::insertVarDecl(VarDecl *VD, DeclStmt *S) { +bool StructVariableInitializer::VisitVarDecl(VarDecl *VD) { // Check if we need to add an initializer. - bool IsVarExtern = VD->getStorageClass() == StorageClass::SC_Extern; - if (!IsVarExtern && !VD->hasInit() && hasCheckedMembers(VD)) { + // TODO: Centralize initialization logic for all types: + // https://github.com/correctcomputation/checkedc-clang/issues/645#issuecomment-876474200 + + // The first two conditions are the same as in Sema::ActOnUninitializedDecl. + if (VD->hasLocalStorage() && !isa(VD) && !VD->hasInit() && + hasCheckedMembers(VD)) { // Create replacement declaration text with an initializer. - const clang::Type *Ty = VD->getType().getTypePtr(); - std::string TQ = VD->getType().getQualifiers().getAsString(); - if (!TQ.empty()) - TQ += " "; - std::string ToReplace = getStorageQualifierString(VD) + TQ + tyToStr(Ty) + - " " + VD->getName().str() + " = {}"; + std::string ToReplace = mkStringForDeclWithUnchangedType(VD, I) + " = {}"; RewriteThese.insert( - std::make_pair(VD, new VarDeclReplacement(VD, S, ToReplace))); - } -} - -// Check to see if this variable require an initialization. -bool StructVariableInitializer::VisitDeclStmt(DeclStmt *S) { - if (S->isSingleDecl()) { - if (VarDecl *VD = dyn_cast(S->getSingleDecl())) - insertVarDecl(VD, S); - } else { - for (const auto &D : S->decls()) - if (VarDecl *VD = dyn_cast(D)) - insertVarDecl(VD, S); + std::make_pair(VD, new MultiDeclMemberReplacement(VD, ToReplace))); } return true; } diff --git a/clang/lib/3C/Utils.cpp b/clang/lib/3C/Utils.cpp index 84ede9a2f98e..7da01da04cb6 100644 --- a/clang/lib/3C/Utils.cpp +++ b/clang/lib/3C/Utils.cpp @@ -120,7 +120,8 @@ static std::string storageClassToString(StorageClass SC) { } // This method gets the storage qualifier for the -// provided declaration i.e., static, extern, etc. +// provided declaration including any trailing space, i.e., "static ", +// "extern ", etc., or "" if none. std::string getStorageQualifierString(Decl *D) { if (FunctionDecl *FD = dyn_cast(D)) { return storageClassToString(FD->getStorageClass()); @@ -128,6 +129,12 @@ std::string getStorageQualifierString(Decl *D) { if (VarDecl *VD = dyn_cast(D)) { return storageClassToString(VD->getStorageClass()); } + if (isa(D)) { + // `typedef` goes in the same syntactic position as a storage qualifier and + // needs to be inserted when breaking up a multi-decl, just like a real + // storage qualifier. + return "typedef "; + } return ""; } @@ -598,3 +605,42 @@ SourceLocation getCheckedCAnnotationsEnd(const Decl *D) { return End; } + +SourceRange getDeclSourceRangeWithAnnotations(const clang::Decl *D, + bool IncludeInitializer) { + SourceManager &SM = D->getASTContext().getSourceManager(); + SourceRange SR; + const VarDecl *VD; + // Only a VarDecl can have an initializer. VarDecl's implementation of the + // getSourceRange virtual method includes the initializer, but we can manually + // call DeclaratorDecl's implementation, which excludes the initializer. + if (!IncludeInitializer && (VD = dyn_cast(D)) != nullptr) + SR = VD->DeclaratorDecl::getSourceRange(); + else + SR = D->getSourceRange(); + if (!SR.isValid()) + return SR; + SourceLocation DeclEnd = SR.getEnd(); + + // Partial workaround for a compiler bug where if D has certain checked + // pointer types such as `_Ptr` (seen in the partial_checked.c + // regression test), D->getSourceRange() returns only the _Ptr token. (As of + // this writing on 2021-11-18, no bug report has been filed against the + // compiler, but https://github.com/correctcomputation/checkedc-clang/pull/723 + // tracks our work on the bug.) + // + // Always extend the range at least through the name (given by + // D->getLocation()). That fixes the `_Ptr x` case but not cases + // with additional syntax after the name, such as `_Ptr x[10]`. + SourceLocation DeclLoc = D->getLocation(); + if (SM.isBeforeInTranslationUnit(DeclEnd, DeclLoc)) + DeclEnd = DeclLoc; + + SourceLocation AnnotationsEnd = getCheckedCAnnotationsEnd(D); + if (AnnotationsEnd.isValid() && + SM.isBeforeInTranslationUnit(DeclEnd, AnnotationsEnd)) + DeclEnd = AnnotationsEnd; + + SR.setEnd(DeclEnd); + return SR; +} diff --git a/clang/test/3C/basic_checks.c b/clang/test/3C/basic_checks.c index b94ec1e49916..7fcec48335ed 100644 --- a/clang/test/3C/basic_checks.c +++ b/clang/test/3C/basic_checks.c @@ -33,12 +33,13 @@ typedef struct _A { int a; int b; } A, *PA; +//CHECK: typedef _Ptr PA; void mut_pa(PA p) { p->a = 0; p->b = 1; } -//CHECK: void mut_pa(PA p : itype(_Ptr)) { +//CHECK: void mut_pa(PA p) { void pa_driver(void) { A a = {0}; diff --git a/clang/test/3C/inline_anon_structs.c b/clang/test/3C/inline_anon_structs.c index 9b1406157285..e26e64403e65 100644 --- a/clang/test/3C/inline_anon_structs.c +++ b/clang/test/3C/inline_anon_structs.c @@ -1,5 +1,6 @@ // RUN: rm -rf %t* // RUN: 3c -base-dir=%S -alltypes -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK" %s +// RUN: 3c -base-dir=%S -alltypes -addcr %s -- | %clang -c -fcheckedc-extension -x c -o /dev/null - // RUN: 3c -base-dir=%S -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK" %s // RUN: 3c -base-dir=%S -addcr %s -- | %clang -c -fcheckedc-extension -x c -o /dev/null - // RUN: 3c -base-dir=%S -output-dir=%t.checked %s -- @@ -11,13 +12,29 @@ an inlinestruct and its associated VarDecl have different locations*/ int valuable; +// When -alltypes is on, the addition of _Checked to the array triggers +// rewriting of the multi-decl, including splitting of the struct definition. +// When it is off, the multi-decl as such is not rewritten, but in either case, +// the fields of the struct are rewritten as appropriate. static struct foo { + // When an inline struct definition is separated from variable declarations, + // if there was a `static` keyword that applied to the variables, we should + // remove it from the separated struct (where it is not meaningful). + //CHECK_NOALL: static struct foo { + //CHECK_ALL: struct foo { const char *name; + // See https://github.com/correctcomputation/checkedc-clang/issues/470. //CHECK_NOALL: const char *name; //CHECK_ALL: _Ptr name; int *p_valuable; //CHECK: _Ptr p_valuable; } array[] = {{"mystery", &valuable}}; +// {{...}} in a CHECK directive delimits a regular expression +// (https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-regex-matching-syntax) +// and there isn't a way to escape that construct itself, so we use a regular +// expression and escape the contents as needed. +//CHECK_NOALL: {{\} array\[\] = \{\{"mystery", &valuable\}\};}} +//CHECK_ALL: {{static struct foo array _Checked\[\] = \{\{"mystery", &valuable\}\};}} /*This code is a series of more complex tests for inline structs*/ /* a, b, c below all stay as WILD pointers; d can be a _Ptr<...>*/ @@ -67,15 +84,14 @@ void foo(void) { /*This code tests anonymous structs */ struct { + //CHECK: struct x_struct_1 { /*the fields of the anonymous struct are free to be marked checked*/ int *data; //CHECK_NOALL: int *data; - - /* but the actual pointer can't be when alltypes is disabled */ - /* when alltypes is enabled, this whole structure is rewritten - improperly, but that's OK, because we signal a warning to the user*/ + //CHECK_ALL: _Array_ptr data : count(4); } * x; -//CHECK_ALL: _Ptr x = ((void *)0); +//CHECK: }; +//CHECK-NEXT: _Ptr x = ((void *)0); /*ensure trivial conversion*/ void foo1(int *w) { @@ -93,15 +109,25 @@ struct alpha *al[4]; //CHECK_NOALL: struct alpha *al[4]; //CHECK_ALL: _Ptr al _Checked[4] = {((void *)0)}; -/*be should be made wild, whereas a should be converted*/ +// A similar test with an unnamed struct. struct { int *a; - //CHECK_NOALL: _Ptr a; + //CHECK: _Ptr a; } * be[4]; +//CHECK_NOALL: } * be[4]; +//CHECK_ALL: }; +//CHECK_ALL-NEXT: _Ptr be _Checked[4] = {((void *)0)}; + +// The following is explained in second_tu_fn in inline_anon_structs_cross_tu.c. +struct { int x; } *cross_tu_numbering_test; +//CHECK_AB: struct cross_tu_numbering_test_struct_1 { int x; }; +//CHECK_AB-NEXT: _Ptr cross_tu_numbering_test = ((void *)0); +//CHECK_BA: struct cross_tu_numbering_test_struct_2 { int x; }; +//CHECK_BA-NEXT: _Ptr cross_tu_numbering_test = ((void *)0); /*this code checks inline structs withiin functions*/ void foo2(int *x) { - //CHECK: void foo2(_Ptr x) { + //CHECK: void foo2(_Ptr x) _Checked { struct bar { int *x; } *y = 0; @@ -110,29 +136,35 @@ void foo2(int *x) { //CHECK-NEXT: }; //CHECK-NEXT: _Ptr y = 0; - /*A non-pointer struct without an init will be marked wild*/ + // A similar test with an automatically added struct initializer. struct something { int *x; } z; //CHECK: struct something { - //CHECK-NEXT: int *x; - //CHECK-NEXT: } z; + //CHECK-NEXT: _Ptr x; + //CHECK-NEXT: }; + //CHECK-NEXT: struct something z = {}; - /*so will ones that are anonymous*/ + // Ditto with an anonymous struct. struct { int *x; } a; - //CHECK: struct { - //CHECK-NEXT: int *x; - //CHECK-NEXT: } a; + //CHECK: struct a_struct_1 { + //CHECK-NEXT: _Ptr x; + //CHECK-NEXT: }; + //CHECK-NEXT: struct a_struct_1 a = {}; - /*if it have an initializer, the rewriter won't have trouble*/ + // If the variable already has an initializer, then there is no initializer + // addition to trigger rewriting and splitting of the struct. struct { int *c; } b = {}; //CHECK: struct { //CHECK-NEXT: _Ptr c; //CHECK-NEXT: } b = {}; + + // Additional regression tests (only checking compilation with no crash) from + // https://github.com/correctcomputation/checkedc-clang/pull/497. struct { int *i; } * f; @@ -140,3 +172,184 @@ void foo2(int *x) { int *il } * g, *h, *i; } + +// Tests of new functionality from +// https://github.com/correctcomputation/checkedc-clang/pull/657. + +// Test that 3C doesn't mangle the code by attempting to split a forward +// declaration of a struct out of a multi-decl as if it were a definition +// (https://github.com/correctcomputation/checkedc-clang/issues/644). +struct fwd *p; +//CHECK: _Ptr p = ((void *)0); + +// Test the handling of inline TagDecls when the containing multi-decl is +// rewritten: +// - TagDecls are de-nested in postorder whether or not they were originally +// named, and this does not interfere with rewrites inside those TagDecls +// (https://github.com/correctcomputation/checkedc-clang/issues/531). +// - Storage and type qualifiers preceding an inline TagDecl are not copied to +// the split TypeDecl (where they wouldn't be meaningful) but remain +// associated with the fields to which they originally applied +// (https://github.com/correctcomputation/checkedc-clang/issues/647). +// - Unnamed TagDecls are automatically named +// (https://github.com/correctcomputation/checkedc-clang/issues/542). +// This big combined test may be a bit hard to understand, but it may provide +// better coverage of any interactions among these features than separate tests +// would. + +// Use up the c_struct_1 name. +struct c_struct_1 {}; + +static struct A { + const struct B { + struct { + int *c2i; + } *c; + } *ab, ab_arr[2]; + volatile struct D { + struct { + int *c3i; + } *c; + struct E { + int *ei; + } *de; + const enum F { F_0, F_1 } *df; + enum { G_0, G_1 } *dg; + struct H { + int *hi; + } *dh; + union U { + int *ui; + } *du; + union { + int *vi; + } *dv; + } *ad; +} *global_a, global_a_arr[2]; + +void constrain_dh(void) { + struct D d; + d.dh = (struct H *)1; +} + +// Points of note: +// - We have two unnamed inline structs that get automatically named after a +// field `c`, but the name `c_struct_1` is taken, so the names `c_struct_2` +// and `c_struct_3` are assigned. +// - All kinds of TagDecls (structs, unions, and enums) can be moved out of a +// containing struct and automatically named if needed. In principle, 3C +// should be able to move TagDecls out of a union, but I couldn't find any way +// to force a union field to be rewritten in order to demonstrate this, since +// 3C constrains union fields to wild. As far as I know, TagDecls can't be +// nested in an enum in C. +// - `struct H` does not get moved because there is nothing to trigger rewriting +// of the containing multi-decl since `dh` is constrained wild by +// `constrain_dh`. + +//CHECK: struct c_struct_2 { +//CHECK-NEXT: _Ptr c2i; +//CHECK-NEXT: }; +//CHECK-NEXT: struct B { +//CHECK-NEXT: _Ptr c; +//CHECK-NEXT: }; +//CHECK-NEXT: struct c_struct_3 { +//CHECK-NEXT: _Ptr c3i; +//CHECK-NEXT: }; +//CHECK-NEXT: struct E { +//CHECK-NEXT: _Ptr ei; +//CHECK-NEXT: }; +//CHECK-NEXT: enum F { F_0, F_1 }; +//CHECK-NEXT: enum dg_enum_1 { G_0, G_1 }; +//CHECK-NEXT: union U { +//CHECK-NEXT: int *ui; +//CHECK-NEXT: }; +//CHECK-NEXT: union dv_union_1 { +//CHECK-NEXT: int *vi; +//CHECK-NEXT: }; +//CHECK-NEXT: struct D { +//CHECK-NEXT: _Ptr c; +//CHECK-NEXT: _Ptr de; +//CHECK-NEXT: _Ptr df; +//CHECK-NEXT: _Ptr dg; +//CHECK-NEXT: struct H { +//CHECK-NEXT: _Ptr hi; +//CHECK-NEXT: } *dh; +//CHECK-NEXT: _Ptr du; +//CHECK-NEXT: _Ptr dv; +//CHECK-NEXT: }; +//CHECK-NEXT: struct A { +//CHECK-NEXT: _Ptr ab; +//CHECK_NOALL-NEXT: const struct B ab_arr[2]; +//CHECK_ALL-NEXT: const struct B ab_arr _Checked[2]; +//CHECK-NEXT: _Ptr ad; +//CHECK-NEXT: }; +//CHECK-NEXT: static _Ptr global_a = ((void *)0); +//CHECK_NOALL-NEXT: static struct A global_a_arr[2]; +//CHECK_ALL-NEXT: static struct A global_a_arr _Checked[2]; + +// This case is not intentionally supported but works "by accident" because 3C +// deletes everything between the start location of the first member and the +// start of the TagDecl (here, `_Ptr<`) and replaces everything between the end +// location of the TagDecl and the end of the first member (here, +// `> *chkptr_struct_var`) with the new text of the first member. It may well be +// the right decision to break this case in the future, but it would be nice to +// be aware that we're doing so, and we might want to keep a test that this case +// merely produces wrong output and doesn't crash the rewriter. +_Ptr *chkptr_struct_var; +//CHECK: struct chkptr_struct { _Ptr x; }; +//CHECK-NEXT: _Ptr<_Ptr> chkptr_struct_var = ((void *)0); + +// Tests of the special case where we use the first member of a typedef +// multi-decl as the name of the inline TagDecl. + +typedef struct { int *x; } SFOO, *PSFOO; +//CHECK: typedef struct { _Ptr x; } SFOO; +//CHECK-NEXT: typedef _Ptr PSFOO; + +// The other way around, we can't do it because SBAR would be defined too late +// to use it in the definition of PSBAR. +typedef struct { int *x; } *PSBAR, SBAR; +//CHECK: struct PSBAR_struct_1 { _Ptr x; }; +//CHECK-NEXT: typedef _Ptr PSBAR; +//CHECK-NEXT: typedef struct PSBAR_struct_1 SBAR; + +// Borderline case: Since SFOO_CONST has a qualifier, we don't use it as the +// name of the inline struct. If we did, we'd end up with `typedef +// _Ptr PSFOO_CONST`, which still expands to +// `_Ptr`, but the duplicate `const` may be a bit +// confusing to the reader, so we don't do that. +typedef const struct { int *x; } SFOO_CONST, *PSFOO_CONST; +//CHECK: struct SFOO_CONST_struct_1 { _Ptr x; }; +//CHECK-NEXT: typedef const struct SFOO_CONST_struct_1 SFOO_CONST; +//CHECK-NEXT: typedef _Ptr PSFOO_CONST; + +// Test that when the outer struct is preceded by a qualifier, de-nesting +// inserts inner structs before the qualifier, not between the qualifier and the +// outer `struct` keyword. This is moot if the outer struct is split as part of +// multi-decl rewriting because multi-decl rewriting will delete the qualifier +// and add it before the first member, but the problem can happen if the +// multi-decl isn't rewritten or the outer struct isn't split because we have a +// typedef for it. + +typedef struct { struct q_not_rewritten_inner {} *x; } *q_not_rewritten_outer; +q_not_rewritten_outer onr = (q_not_rewritten_outer)1; +//CHECK: struct q_not_rewritten_inner {}; +//CHECK-NEXT: typedef struct { _Ptr x; } *q_not_rewritten_outer; + +typedef struct { + struct q_typedef_inner {} *x; +} q_typedef_outer, *q_typedef_outer_trigger_rewrite; +//CHECK: struct q_typedef_inner {}; +//CHECK-NEXT: typedef struct { +//CHECK-NEXT: _Ptr x; +//CHECK-NEXT: } q_typedef_outer; +//CHECK-NEXT: typedef _Ptr q_typedef_outer_trigger_rewrite; + +// As noted in the comment in DeclRewriter::denestTagDecls, when the outer +// struct isn't part of a multi-decl, we don't have an easy way to find the +// location before the (useless) qualifier, so the output is a bit weird but +// still has only a compiler warning, though in a different place than it +// should. +typedef struct { struct q_pointless_inner {} *x; }; +//CHECK: typedef struct q_pointless_inner {}; +//CHECK-NEXT: struct { _Ptr x; }; diff --git a/clang/test/3C/inline_anon_structs_cross_tu.c b/clang/test/3C/inline_anon_structs_cross_tu.c new file mode 100644 index 000000000000..b83d2027c932 --- /dev/null +++ b/clang/test/3C/inline_anon_structs_cross_tu.c @@ -0,0 +1,69 @@ +// Regression tests for two bugs seen during the development of +// https://github.com/correctcomputation/checkedc-clang/pull/657 with multiple +// translation units: some information about automatically named TagDecls was +// not propagated across translation units. +// +// Our translation units are inline_anon_structs.c and +// inline_anon_structs_cross_tu.c; the latter `#include`s the former and has +// some code of its own. So inline_anon_structs.c effectively plays the role of +// a shared header file, but we just use the .c file instead of moving the code +// to a .h file and making another .c file that does nothing but `#include` the +// .h file. +// +// We test 3C on both orders of the translation units. Remember that when the +// same file is rewritten as part of multiple translation units, the last +// translation unit wins +// (https://github.com/correctcomputation/checkedc-clang/issues/374#issuecomment-804283984). +// So the "inline_anon_structs.c, inline_anon_structs_cross_tu.c" order should +// catch most problems that occur when 3C makes the decisions in one translation +// unit but does the rewriting in another. We still test the other order for +// completeness. + +// RUN: rm -rf %t* + +// Tests of the "inline_anon_structs.c, inline_anon_structs_cross_tu.c" order +// (called "AB" for short). +// +// Note about check prefixes: We currently don't bother with a Cartesian product +// of {ALL,NOALL} x {AB,BA} because we don't have any tests whose results depend +// on both variables. We do pass CHECK_NOALL and CHECK_ALL to +// inline_anon_structs_cross_tu.c for uniformity, even though that file +// currently doesn't use either. +// +// RUN: 3c -base-dir=%S -addcr -alltypes -output-dir=%t.checkedALL_AB %S/inline_anon_structs.c %s -- +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK","CHECK_AB" --input-file %t.checkedALL_AB/inline_anon_structs.c %S/inline_anon_structs.c +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK","CHECK_AB" --input-file %t.checkedALL_AB/inline_anon_structs_cross_tu.c %s +// RUN: 3c -base-dir=%S -addcr -output-dir=%t.checkedNOALL_AB %S/inline_anon_structs.c %s -- +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK","CHECK_AB" --input-file %t.checkedNOALL_AB/inline_anon_structs.c %S/inline_anon_structs.c +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK","CHECK_AB" --input-file %t.checkedNOALL_AB/inline_anon_structs_cross_tu.c %s + +// Tests of the "inline_anon_structs_cross_tu.c, inline_anon_structs.c" order +// (called "BA"). +// +// RUN: 3c -base-dir=%S -addcr -alltypes -output-dir=%t.checkedALL_BA %s %S/inline_anon_structs.c -- +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK","CHECK_BA" --input-file %t.checkedALL_BA/inline_anon_structs.c %S/inline_anon_structs.c +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK","CHECK_BA" --input-file %t.checkedALL_BA/inline_anon_structs_cross_tu.c %s +// RUN: 3c -base-dir=%S -addcr -output-dir=%t.checkedNOALL_BA %s %S/inline_anon_structs.c -- +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK","CHECK_BA" --input-file %t.checkedNOALL_BA/inline_anon_structs.c %S/inline_anon_structs.c +// RUN: FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK","CHECK_BA" --input-file %t.checkedNOALL_BA/inline_anon_structs_cross_tu.c %s + +void second_tu_fn(void) { + // In the AB order, the global `cross_tu_numbering_test` variable in + // inline_anon_structs.c will be seen first and its struct will be named + // cross_tu_numbering_test_struct_1, and this struct will be named + // cross_tu_numbering_test_struct_2. In the BA order, this code is seen before + // the `#include "inline_anon_structs.c"`, so this struct will take the + // cross_tu_numbering_test_struct_1 name and the inline_anon_structs.c one + // will be cross_tu_numbering_test_struct_2. + // + // Note that in order to have two different variables with the same name (in + // order to produce a collision in struct names), we have to put the variables + // in different scopes: in this case, global and function-local. + struct { int y; } *cross_tu_numbering_test; + //CHECK_AB: struct cross_tu_numbering_test_struct_2 { int y; }; + //CHECK_AB-NEXT: _Ptr cross_tu_numbering_test = ((void *)0); + //CHECK_BA: struct cross_tu_numbering_test_struct_1 { int y; }; + //CHECK_BA-NEXT: _Ptr cross_tu_numbering_test = ((void *)0); +} + +#include "inline_anon_structs.c" diff --git a/clang/test/3C/itypes_for_extern.c b/clang/test/3C/itypes_for_extern.c index 6566cc1e25bb..650b926b8c31 100644 --- a/clang/test/3C/itypes_for_extern.c +++ b/clang/test/3C/itypes_for_extern.c @@ -46,9 +46,12 @@ void fn_typedef_test(fn f) {} struct foo { int *a; void (*fn)(int *); + int *b, **c; }; //CHECK: int *a : itype(_Ptr); //CHECK: void ((*fn)(int *)) : itype(_Ptr)>); +//CHECK: int *b : itype(_Ptr); +//CHECK: int **c : itype(_Ptr<_Ptr>); int *glob = 0; extern int *extern_glob = 0; @@ -91,6 +94,9 @@ void has_itype1(int *a : itype(_Ptr)) { a = 0; } // part of the type (the array size) occurs after the name of the variable // being declared. This complicates rewriting. These examples caused errors in // libjpeg. +// +// multivardecls_complex_types.c has tests of some similar cases as part of +// multi-decls, with and without -itypes-for-extern. int const_arr0[10]; //CHECK_ALL: int const_arr0[10] : itype(int _Checked[10]); @@ -117,7 +123,8 @@ void const_arr_fn(int a[10]) {} // Rewriting an existing itype or bounds expression on a global variable. Doing // this correctly requires replacing text until the end of the Checked C -// annotation expression. +// annotation expression. The m_* and s_* tests in multivardecls_complex_types.c +// test some similar cases in combination with multi-decls. int *a : itype(_Ptr); int **b : itype(_Ptr); int *c : count(2); diff --git a/clang/test/3C/macro_end_of_decl.c b/clang/test/3C/macro_end_of_decl.c index 8cc7790ef82d..d4e7daa0efd7 100644 --- a/clang/test/3C/macro_end_of_decl.c +++ b/clang/test/3C/macro_end_of_decl.c @@ -34,13 +34,13 @@ int e SIZE[1]; #define EQ = int *f EQ 0; -//CHECK: _Ptr f = 0; +//CHECK: _Ptr f EQ 0; int(*g0) ARGS, g1 SIZE, *g2 EQ 0; //CHECK: _Ptr g0 = ((void *)0); //CHECK_NOALL: int g1[1]; //CHECK_ALL: int g1 _Checked SIZE; -//CHECK: _Ptr g2 = 0; +//CHECK: _Ptr g2 EQ 0; #define RPAREN ) int * h ( int *a RPAREN { diff --git a/clang/test/3C/multivardecls.c b/clang/test/3C/multivardecls.c index 629554c10c87..476ea1840676 100644 --- a/clang/test/3C/multivardecls.c +++ b/clang/test/3C/multivardecls.c @@ -130,7 +130,22 @@ int *d, e, **f; // CHECK: int e; // CHECK: _Ptr<_Ptr> f = ((void *)0); +// Simple test that storage and type qualifiers are preserved on both global and +// function-scope variables. + +static const int *sd, se, **sf; +// CHECK: static _Ptr sd = ((void *)0); +// CHECK: static const int se; +// CHECK: static _Ptr<_Ptr> sf = ((void *)0); + void test5() { + static const int *fsd, fse, **fsf; + // CHECK: static _Ptr fsd = ((void *)0); + // CHECK: static const int fse; + // CHECK: static _Ptr<_Ptr> fsf = ((void *)0); +} + +void test6() { int *a, *b; int *c, *e; struct foo { @@ -148,3 +163,54 @@ void test5() { // CHECK: _Ptr c; // CHECK: _Ptr d; // CHECK: }; + +void test7() { + // Test that variables that require struct initialization honor base type + // renames the same way as global variables. + struct { int *x; } s7; + //CHECK: struct s7_struct_1 { _Ptr x; }; + //CHECK: struct s7_struct_1 s7 = {}; +} + +// Test that getNextComma doesn't falsely trigger on commas inside a bounds +// annotation. The scan shouldn't start until after the declaration source +// range, which should include the bounds annotation, and it's unlikely that a +// change to 3C could break that without also breaking other tests, but it +// doesn't hurt to have a specific test for commas too. The extra nested comma +// expression `(0, lo)` was needed to trigger the bug in older versions of 3C: +// the lexer didn't seem to report the comma that is part of the `bounds` +// construct to getNextComma as a comma token. +// +// `p3` is needed to trigger the multi-decl to be broken up at all. +_Array_ptr lo, hi; +_Array_ptr p1 : bounds((0, lo), hi), p2 : bounds(lo, (0, hi)), *p3; +//CHECK: _Array_ptr p1 : bounds((0, lo), hi); +// The extra space after `0` seems to be because Decl::print treats the comma +// operator like any other binary operator such as `+` and adds spaces both +// before and after it. (TODO: Research whether this has already been discussed +// in upstream Clang and if not, file a bug there?) +//CHECK: _Array_ptr p2 : bounds(lo, (0 , hi)); +//CHECK: _Ptr<_Array_ptr> p3 = ((void *)0); + +// Simple tests of typedef multi-decls from +// https://github.com/correctcomputation/checkedc-clang/issues/651. +// inline_anon_structs.c has a few additional tests of typedef multi-decls +// involving inline structs. + +typedef int *A, *B; +// CHECK: typedef _Ptr A; +// CHECK: typedef _Ptr B; + +void foo(void) { + A a; + B b; +} + +typedef int *C, *D; +// CHECK: typedef _Ptr C; +// CHECK: typedef int *D; + +void bar(void) { + C c; + D d = (D)1; +} diff --git a/clang/test/3C/multivardecls_complex_types.c b/clang/test/3C/multivardecls_complex_types.c new file mode 100644 index 000000000000..2c5527a23a87 --- /dev/null +++ b/clang/test/3C/multivardecls_complex_types.c @@ -0,0 +1,105 @@ +// A few tests of multi-decls with complex types, with and without +// -itypes-for-extern. These tests cannot be included in multivardecls.c because +// compiling that entire file with -itypes-for-extern -addcr would produce an +// unrelated error in the typedef multi-decl test: see the first example in +// https://github.com/correctcomputation/checkedc-clang/issues/740. + +// RUN: rm -rf %t* + +// RUN: 3c -base-dir=%S -addcr -alltypes %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK" %s +// RUN: 3c -base-dir=%S -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK" %s +// RUN: 3c -base-dir=%S -addcr %s -- | %clang -c -fcheckedc-extension -x c -o /dev/null - +// RUN: 3c -base-dir=%S -alltypes -output-dir=%t.checked %s -- +// RUN: 3c -base-dir=%t.checked -alltypes %t.checked/multivardecls_complex_types.c -- | diff %t.checked/multivardecls_complex_types.c - + +// RUN: 3c -base-dir=%S -itypes-for-extern -alltypes -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_ITE_ALL","CHECK_ITE" %s +// RUN: 3c -base-dir=%S -itypes-for-extern -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_ITE_NOALL","CHECK_ITE" %s +// RUN: 3c -base-dir=%S -itypes-for-extern -alltypes -addcr %s -- | %clang -c -fcheckedc-extension -x c -o /dev/null - +// RUN: 3c -base-dir=%S -itypes-for-extern -alltypes -output-dir=%t.checked_ITE %s -- +// RUN: 3c -base-dir=%t.checked_ITE -itypes-for-extern -alltypes %t.checked_ITE/multivardecls_complex_types.c -- | diff %t.checked_ITE/multivardecls_complex_types.c - + +// A multi-decl with a mix of pointers and arrays, including an "unchecked +// pointer to constant size array" member that would trigger a known bug in +// mkString (item 5 of +// https://github.com/correctcomputation/checkedc-clang/issues/703), +// demonstrating that unchanged multi-decl members whose base type wasn't +// renamed use Decl::print (which doesn't have this bug). +// +// `m_implicit_itype` and `m_change_with_bounds` together test that 3C includes +// Checked C annotations in the range to be replaced (for both changed and +// unchanged multi-decl members) rather than leaving them to duplicate the +// annotations in the newly inserted declaration. +int m_const_arr0[10], *m_const_arr1[10], (*m_p_const_arr_wild)[10] = 1, + (*m_p_const_arr_chk)[10], *m_implicit_itype : count(2), + **m_change_with_bounds : count(2); +//CHECK_ALL: int m_const_arr0 _Checked[10]; +//CHECK_NOALL: int m_const_arr0[10]; +//CHECK_ALL: _Ptr m_const_arr1 _Checked[10] = {((void *)0)}; +// The reason this isn't `_Ptr m_const_arr1[10]` is probably the "outer +// wild -> inner wild" constraint +// (https://github.com/correctcomputation/checkedc-clang/issues/656). +//CHECK_NOALL: int *m_const_arr1[10]; +//CHECK: int (*m_p_const_arr_wild)[10] = 1; +//CHECK_ALL: _Ptr m_p_const_arr_chk = ((void *)0); +//CHECK_NOALL: _Ptr m_p_const_arr_chk = ((void *)0); +// 3C doesn't have proper support for itypes on variables: if a variable has an +// existing itype, 3C uses the checked side as the variable's original type. So +// 3C treats m_implicit_itype as having original type _Array_ptr, but since +// the solved type is the same, 3C uses Decl::print for the unchanged multi-decl +// member and preserves the original declaration with the itype. When 3C gains +// proper itype support for variables, it should generate an actual rewrite to +// the fully checked type if nothing else in the program prevents it from doing +// so. +//CHECK: int *m_implicit_itype : count(2); +// In this case, the solved type changes and shows up in the output. +//CHECK: _Array_ptr<_Ptr> m_change_with_bounds : count(2) = ((void *)0); + +// Test the same multi-decl with -itypes-for-extern. +//CHECK_ITE_ALL: int m_const_arr0[10] : itype(int _Checked[10]); +//CHECK_ITE_NOALL: int m_const_arr0[10]; +//CHECK_ITE_ALL: int *m_const_arr1[10] : itype(_Ptr _Checked[10]) = {((void *)0)}; +//CHECK_ITE_NOALL: int *m_const_arr1[10]; +//CHECK_ITE: int (*m_p_const_arr_wild)[10] = 1; +//CHECK_ITE_ALL: int (*m_p_const_arr_chk)[10] : itype(_Ptr) = ((void *)0); +//CHECK_ITE_NOALL: int (*m_p_const_arr_chk)[10] : itype(_Ptr) = ((void *)0); +//CHECK_ITE: int *m_implicit_itype : count(2); +//CHECK_ITE: int **m_change_with_bounds : itype(_Array_ptr<_Ptr>) count(2) = ((void *)0); + +// A similar multi-decl with an unnamed inline struct, which forces the use of +// mkString. We can't include (*s_p_const_arr_*)[10] because it would trigger +// the previously mentioned mkString bug and produce output that doesn't +// compile. `s` serves just to give the struct a shorter generated name. +struct { int *x; } s, s_const_arr0[10], *s_const_arr1[10], + // The only way a variable of unnamed struct type can have an itype is if it + // comes implicitly from a bounds annotation, since we have no name to refer + // to the struct in a written itype. + *s_implicit_itype : count(2), **s_change_with_bounds : count(2); +//CHECK: struct s_struct_1 { _Ptr x; }; +//CHECK: struct s_struct_1 s; +//CHECK_ALL: struct s_struct_1 s_const_arr0 _Checked[10]; +//CHECK_NOALL: struct s_struct_1 s_const_arr0[10]; +//CHECK_ALL: _Ptr s_const_arr1 _Checked[10] = {((void *)0)}; +//CHECK_NOALL: struct s_struct_1 *s_const_arr1[10]; +// Like with m_implicit_itype above, 3C treats s_implicit_itype as having type +// _Array_ptr, but now 3C uses mkString and that type +// actually shows up in the output: not a great result, but at least we test +// that it isn't any worse and that the bounds annotation is preserved. Since +// s_implicit_itype is now the only member of its "multi-decl", if the user +// manually edits it back to an itype, there won't be another multi-decl breakup +// to cause 3C to mess it up again. +//CHECK: _Array_ptr s_implicit_itype : count(2); +//CHECK: _Array_ptr<_Ptr> s_change_with_bounds : count(2) = ((void *)0); + +// Test the same multi-decl with -itypes-for-extern. +//CHECK_ITE: struct s_struct_1 { int *x : itype(_Ptr); }; +//CHECK_ITE: struct s_struct_1 s; +//CHECK_ITE_ALL: struct s_struct_1 s_const_arr0[10] : itype(struct s_struct_1 _Checked[10]); +//CHECK_ITE_NOALL: struct s_struct_1 s_const_arr0[10]; +//CHECK_ITE_ALL: struct s_struct_1 *s_const_arr1[10] : itype(_Ptr _Checked[10]) = {((void *)0)}; +//CHECK_ITE_NOALL: struct s_struct_1 *s_const_arr1[10]; +// The type of s_implicit_type is still loaded as _Array_ptr, +// but it is downgraded back to an itype by -itypes-for-extern. As long as 3C +// lacks real support for itypes on variables, this is probably the behavior we +// want with -itypes-for-extern in this very unusual case. +//CHECK_ITE: struct s_struct_1 *s_implicit_itype : itype(_Array_ptr) count(2); +//CHECK_ITE: struct s_struct_1 **s_change_with_bounds : itype(_Array_ptr<_Ptr>) count(2) = ((void *)0); diff --git a/clang/test/3C/partial_checked.c b/clang/test/3C/partial_checked.c index 30ffca85563c..952ca2768f6b 100644 --- a/clang/test/3C/partial_checked.c +++ b/clang/test/3C/partial_checked.c @@ -62,6 +62,21 @@ void test4() { // CHECK: _Ptr<_Ptr (void)> n = 0; } +// Test the partial workaround in getDeclSourceRangeWithAnnotations for a +// compiler bug where DeclaratorDecl::getSourceRange gives the wrong answer for +// certain checked pointer types. Previously, if the variable had an +// initializer, 3C used the start of the initializer as the end location of the +// rewrite, which had the side effect of working around the bug for all +// variables with an initializer (such as `m` above). Any variable with an +// affected type and no initializer would trigger the bug; apparently we never +// noticed because 3C unnecessarily adds initializers to global variables +// (https://github.com/correctcomputation/checkedc-clang/issues/741). Now, for +// uniformity, 3C always uses DeclaratorDecl::getSourceRange to get the range +// excluding any initializer, so it needs a workaround specifically for the bug. +// See getDeclSourceRangeWithAnnotations for more information. +_Ptr gm; +// CHECK: _Ptr<_Ptr (void)> gm = ((void *)0); + void test5(_Ptr a, _Ptr b, _Ptr<_Ptr> c, int **d) { // CHECK: void test5(_Ptr<_Ptr> a, _Ptr b : itype(_Ptr<_Ptr>), _Ptr<_Ptr> c, _Ptr<_Ptr> d) { *b = 1; diff --git a/clang/test/3C/qualifiers.c b/clang/test/3C/qualifiers.c index b85b77e2b2a2..4d3ccde7d976 100644 --- a/clang/test/3C/qualifiers.c +++ b/clang/test/3C/qualifiers.c @@ -64,7 +64,7 @@ void structs() { const extern struct qualifier_struct d; } //CHECK: struct qualifier_struct a0 = {}; -//CHECK: static struct qualifier_struct a = {}; -//CHECK: static volatile struct qualifier_struct b = {}; +//CHECK: static struct qualifier_struct a; +//CHECK: volatile static struct qualifier_struct b; //CHECK: static _Ptr c = ((void *)0); //CHECK: const extern struct qualifier_struct d; diff --git a/clang/test/3C/root_cause.c b/clang/test/3C/root_cause.c index 17811a8c7820..74c5f86ffe03 100644 --- a/clang/test/3C/root_cause.c +++ b/clang/test/3C/root_cause.c @@ -46,9 +46,9 @@ void test1() { union u { // unwritable-expected-warning@+1 {{0 unchecked pointers: Source code in non-writable file}} - int *a; // expected-warning {{1 unchecked pointer: Union or external struct field encountered}} + int *a; // expected-warning {{1 unchecked pointer: Union field encountered}} // unwritable-expected-warning@+1 {{0 unchecked pointers: Source code in non-writable file}} - int *b; // expected-warning {{1 unchecked pointer: Union or external struct field encountered}} + int *b; // expected-warning {{1 unchecked pointer: Union field encountered}} }; void (*c)(void); // unwritable-expected-warning {{0 unchecked pointers: Source code in non-writable file}} @@ -75,19 +75,6 @@ void (*void_star_fptr)(void *); // expected-warning {{1 unchecked pointer: Defau // unwritable-expected-warning@+1 {{ 0 unchecked pointers: Source code in non-writable file}} void void_star_fn(void *p); // expected-warning {{1 unchecked pointer: Default void* type}} -typedef struct { - int x; - float f; -// unwritable-expected-warning@+1 {{0 unchecked pointers: Source code in non-writable file}} -} A, *PA; -// expected-warning@-1 {{2 unchecked pointers: Unable to rewrite a typedef with multiple names}} -// Two pointers affected by the above root cause. Do not count the typedef -// itself as an affected pointer even though that's where the star is written. -// Count each of the variables below even though no star is actually written. -// unwritable-expected-warning@+2 {{0 unchecked pointers: Source code in non-writable file}} -// unwritable-expected-warning@+1 {{0 unchecked pointers: Source code in non-writable file}} -PA pa_test0, pa_test1; - // unwritable-expected-warning@+2 {{0 unchecked pointers: Source code in non-writable file}} // expected-warning@+1 {{1 unchecked pointer: Internal constraint for generic function declaration, for which 3C currently does not support re-solving.}} _Itype_for_any(T) void remember(void *p : itype(_Ptr)) {}