From 863c402bf71165f371f6483c45338e0e85d36e08 Mon Sep 17 00:00:00 2001 From: Aravind Machiry Date: Fri, 2 Oct 2020 20:39:07 -0700 Subject: [PATCH 1/6] Major refactoring of array bounds code and some performance improvements --- clang/include/clang/CConv/ABounds.h | 8 +- clang/include/clang/CConv/AVarBoundsInfo.h | 63 +- .../CConv/ArrayBoundsInferenceConsumer.h | 12 +- clang/include/clang/CConv/ConstraintsGraph.h | 21 +- clang/lib/CConv/ABounds.cpp | 16 +- clang/lib/CConv/AVarBoundsInfo.cpp | 584 +++++++++++------- .../CConv/ArrayBoundsInferenceConsumer.cpp | 29 +- clang/lib/CConv/CConv.cpp | 29 +- clang/lib/CConv/IntermediateToolHook.cpp | 8 +- clang/lib/CConv/ProgramVar.cpp | 12 +- .../test/CheckedCRewriter/arrboundsmerging.c | 55 ++ 11 files changed, 572 insertions(+), 265 deletions(-) create mode 100644 clang/test/CheckedCRewriter/arrboundsmerging.c diff --git a/clang/include/clang/CConv/ABounds.h b/clang/include/clang/CConv/ABounds.h index b70adf4b9305..712ebea780f9 100644 --- a/clang/include/clang/CConv/ABounds.h +++ b/clang/include/clang/CConv/ABounds.h @@ -44,7 +44,7 @@ class ABounds { virtual ~ABounds() { } virtual std::string mkString(AVarBoundsInfo *) = 0; - virtual bool areSame(ABounds *) = 0; + virtual bool areSame(ABounds *, AVarBoundsInfo *) = 0; virtual BoundsKey getBKey() = 0; virtual ABounds* makeCopy(BoundsKey NK) = 0; @@ -66,7 +66,7 @@ class CountBound : public ABounds { virtual ~CountBound() { } std::string mkString(AVarBoundsInfo *ABI) override ; - bool areSame(ABounds *O) override; + bool areSame(ABounds *O, AVarBoundsInfo *ABI) override; BoundsKey getBKey() override; ABounds* makeCopy(BoundsKey NK) override; @@ -88,7 +88,7 @@ class ByteBound : public ABounds { virtual ~ByteBound() { } std::string mkString(AVarBoundsInfo *ABI) override ; - bool areSame(ABounds *O) override; + bool areSame(ABounds *O, AVarBoundsInfo *ABI) override; BoundsKey getBKey() override; ABounds* makeCopy(BoundsKey NK) override; @@ -111,7 +111,7 @@ class RangeBound : public ABounds { virtual ~RangeBound() { } std::string mkString(AVarBoundsInfo *ABI) override ; - bool areSame(ABounds *O) override; + bool areSame(ABounds *O, AVarBoundsInfo *ABI) override; BoundsKey getBKey() override { assert (false && "Not implemented."); diff --git a/clang/include/clang/CConv/AVarBoundsInfo.h b/clang/include/clang/CConv/AVarBoundsInfo.h index 4e6f847f6400..ff25e344d6b3 100644 --- a/clang/include/clang/CConv/AVarBoundsInfo.h +++ b/clang/include/clang/CConv/AVarBoundsInfo.h @@ -89,35 +89,60 @@ enum BoundsPriority { }; class AVarBoundsInfo; - +typedef std::map> BndsKindMap; // The main class that handles figuring out bounds of arr variables. class AvarBoundsInference { public: - AvarBoundsInference(AVarBoundsInfo *BoundsInfo) : BI(BoundsInfo) { } + AvarBoundsInference(AVarBoundsInfo *BoundsInfo) : BI(BoundsInfo) { + clearInferredBounds(); + } + + // Clear all possible inferred bounds for all the BoundsKeys + void clearInferredBounds() { + CurrIterInferBounds.clear(); + } // Infer bounds for the given key from the set of given ARR atoms. // The flag FromPB requests the inference to use potential length variables. bool inferBounds(BoundsKey K, AVarGraph &BKGraph, bool FromPB = false); + + // Get a consistent bound for all the arrays whose bounds have been + // inferred. + bool convergeInferredBounds(); private: - bool inferPossibleBounds(BoundsKey K, ABounds *SB, - AVarGraph &BKGraph, - std::set &EB); + // Find all the reachable variables form FromVarK that are visible + // in DstScope + bool getReachableBoundKeys(const ProgramVarScope *DstScope, + BoundsKey FromVarK, + std::set &PotK, + AVarGraph &BKGraph, + bool CheckImmediate = false); bool intersectBounds(std::set &ProgVars, ABounds::BoundsKind BK, std::set &CurrB); - bool getRelevantBounds(std::set &RBKeys, - std::set &ResBounds); + // Check if bounds specified by Bnds are declared bounds of K. + bool areDeclaredBounds(BoundsKey K, + const std::pair> &Bnds); - bool predictBounds(BoundsKey K, std::set &Neighbours, - AVarGraph &BKGraph, - ABounds **KB); + // Get all the bounds of the given array i.e., BK + bool getRelevantBounds(BoundsKey BK, + BndsKindMap &ResBounds); + // Predict possible bounds for DstArrK from the bounds of Neighbours. + // Return true if there is any change in the captured bounds information. + bool predictBounds(BoundsKey DstArrK, std::set &Neighbours, + AVarGraph &BKGraph); - void mergeReachableProgramVars(std::set &AllVars); + + void mergeReachableProgramVars(std::set &AllVars); AVarBoundsInfo *BI; + + // Potential Bounds for each bounds key inferred for the current iteration. + std::map CurrIterInferBounds; }; class AVarBoundsInfo { @@ -163,6 +188,7 @@ class AVarBoundsInfo { BoundsKey getVariable(clang::FieldDecl *FD); BoundsKey getVariable(clang::FunctionDecl *FD); BoundsKey getConstKey(uint64_t value); + bool fetchAllConstKeys(uint64_t value, std::set &AllKeys); // Generate a random bounds key to be used for inference. BoundsKey getRandomBKey(); @@ -227,6 +253,8 @@ class AVarBoundsInfo { const CVarSet &SrcCVarSet, bool JsonFormat = false) const; + bool areSameProgramVar(BoundsKey B1, BoundsKey B2); + private: friend class AvarBoundsInference; @@ -238,8 +266,8 @@ class AVarBoundsInfo { BoundsKey BCount; // Map of VarKeys and corresponding program variables. std::map PVarInfo; - // Map of APSInt (constants) and corresponding VarKeys. - std::map ConstVarKeys; + // Map of APSInt (constants) and set of BoundKeys that correspond to it. + std::map> ConstVarKeys; // Map of BoundsKey and corresponding prioritized bounds information. // Note that although each PSL could have multiple ConstraintKeys Ex: **p. // Only the outer most pointer can have bounds. @@ -305,9 +333,13 @@ class AVarBoundsInfo { // Check if the provided bounds key corresponds to function return. bool isFunctionReturn(BoundsKey BK); - // Of all teh pointer bounds key, find arr pointers. + // Of all the pointer bounds key, find arr pointers. void computerArrPointers(ProgramInfo *PI, std::set &Ret); + // Get all the array pointers that need bounds. + void getBoundsNeededArrPointers(const std::set &ArrPtrs, + std::set &AB); + // Keep only highest priority bounds for all the provided BoundsKeys // returns true if any thing changed, else false. bool keepHighestPriorityBounds(std::set &ArrPtrs); @@ -315,8 +347,9 @@ class AVarBoundsInfo { // Perform worklist based inference on the requested array variables using // the provided graph. // The flag FromPB requests the algorithm to use potential length variables. - bool performWorkListInference(std::set &ArrNeededBounds, + bool performWorkListInference(const std::set &ArrNeededBounds, AVarGraph &BKGraph, + AvarBoundsInference &BI, bool FromPB = false); void insertParamKey(ParamDeclType ParamDecl, BoundsKey NK); diff --git a/clang/include/clang/CConv/ArrayBoundsInferenceConsumer.h b/clang/include/clang/CConv/ArrayBoundsInferenceConsumer.h index 65872e14d16f..70ce5c8e6758 100644 --- a/clang/include/clang/CConv/ArrayBoundsInferenceConsumer.h +++ b/clang/include/clang/CConv/ArrayBoundsInferenceConsumer.h @@ -22,6 +22,15 @@ class LocalVarABVisitor; class ConstraintResolver; +class AllocBasedBoundsInference : public ASTConsumer { +public: + explicit AllocBasedBoundsInference(ProgramInfo &I, clang::ASTContext *C) : Info(I) { } + virtual void HandleTranslationUnit(ASTContext &Context); + +private: + ProgramInfo &Info; +}; + // This class handles determining bounds of global array variables. // i.e., function parameters, structure fields and global variables. class GlobalABVisitor: public clang::RecursiveASTVisitor { @@ -96,7 +105,8 @@ class LengthVarInference : public StmtVisitor { std::unique_ptr Cfg; }; -void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I); +void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I, + bool UseHeuristics = true); // Add constraints based on heuristics to the parameters of the // provided function. diff --git a/clang/include/clang/CConv/ConstraintsGraph.h b/clang/include/clang/CConv/ConstraintsGraph.h index 65ab5e7e4fcf..142b6d3b2433 100644 --- a/clang/include/clang/CConv/ConstraintsGraph.h +++ b/clang/include/clang/CConv/ConstraintsGraph.h @@ -114,6 +114,7 @@ class DataGraph : N = nullptr; } this->Nodes.clear(); + invalidateBFSCache(); } void removeEdge(Data Src, Data Dst) { @@ -126,12 +127,14 @@ class DataGraph : (*NDst)->removeEdge(*E); delete E; } + invalidateBFSCache(); } void addEdge(Data L, Data R) { NodeType *BL = this->findOrCreateNode(L); NodeType *BR = this->findOrCreateNode(R); BL->connectTo(*BR); + invalidateBFSCache(); } bool getNeighbors(Data D, std::set &DataSet, bool Succ){ @@ -161,8 +164,17 @@ class DataGraph : auto *N = this->findNode(NodeType(Start)); if (N == this->end()) return; - for (auto TNode : llvm::breadth_first(*N)) - Fn(TNode->getData()); + // Insert into BFS cache. + if (BFSCache.find(Start) == BFSCache.end()) { + std::set ReachableNodes; + ReachableNodes.clear(); + for (auto TNode : llvm::breadth_first(*N)) { + ReachableNodes.insert(TNode->getData()); + } + BFSCache[Start] = ReachableNodes; + } + for (auto SN : BFSCache[Start]) + Fn(SN); } protected: @@ -183,6 +195,11 @@ class DataGraph : template friend struct llvm::GraphTraits; friend class GraphVizOutputGraph; + std::map> BFSCache; + + void invalidateBFSCache() { + BFSCache.clear(); + } }; // Specialize the graph for the checked and pointer type constraint graphs. This diff --git a/clang/lib/CConv/ABounds.cpp b/clang/lib/CConv/ABounds.cpp index f89fb302b601..ff6f4eed05eb 100644 --- a/clang/lib/CConv/ABounds.cpp +++ b/clang/lib/CConv/ABounds.cpp @@ -63,11 +63,10 @@ std::string CountBound::mkString(AVarBoundsInfo *ABI) { return "count(" + PV->mkString() + ")"; } -bool CountBound::areSame(ABounds *O) { +bool CountBound::areSame(ABounds *O, AVarBoundsInfo *ABI) { if (O != nullptr) { - if (CountBound *OT = dyn_cast(O)) { - return OT->CountVar == CountVar; - } + if (CountBound *OT = dyn_cast(O)) + return ABI->areSameProgramVar(this->CountVar, OT->CountVar); } return false; } @@ -86,10 +85,10 @@ std::string ByteBound::mkString(AVarBoundsInfo *ABI) { return "byte_count(" + PV->mkString() + ")"; } -bool ByteBound::areSame(ABounds *O) { +bool ByteBound::areSame(ABounds *O, AVarBoundsInfo *ABI) { if (O != nullptr) { if (ByteBound *BB = dyn_cast(O)) { - return BB->ByteVar == ByteVar; + return ABI->areSameProgramVar(this->ByteVar, BB->ByteVar); } } return false; @@ -111,10 +110,11 @@ std::string RangeBound::mkString(AVarBoundsInfo *ABI) { return "bounds(" + LBVar->mkString() + ", " + UBVar->mkString() + ")"; } -bool RangeBound::areSame(ABounds *O) { +bool RangeBound::areSame(ABounds *O, AVarBoundsInfo *ABI) { if (O != nullptr) { if (RangeBound *RB = dyn_cast(O)) { - return RB->LB == LB && RB->UB == UB; + return ABI->areSameProgramVar(this->LB, RB->LB) && + ABI->areSameProgramVar(this->UB, RB->UB); } } return false; diff --git a/clang/lib/CConv/AVarBoundsInfo.cpp b/clang/lib/CConv/AVarBoundsInfo.cpp index aa9e97412e8b..069455927b20 100644 --- a/clang/lib/CConv/AVarBoundsInfo.cpp +++ b/clang/lib/CConv/AVarBoundsInfo.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include #include "clang/CConv/AVarBoundsInfo.h" #include "clang/CConv/ProgramInfo.h" #include "clang/CConv/ConstraintResolver.h" @@ -17,6 +18,13 @@ std::vector AVarBoundsInfo::PrioList {Declared, Allocator, FlowInferred, Heuristics}; +extern cl::OptionCategory ArrBoundsInferCat; +static cl::opt DisableInfDecls("disable-arr-missd", + cl::desc("Disable ignoring of missed " + "bounds from declarations."), + cl::init(false), + cl::cat(ArrBoundsInferCat)); + void AVarBoundsStats::print(llvm::raw_ostream &O, const std::set *InSrcArrs, bool JsonFormat) const { @@ -54,14 +62,12 @@ void AVarBoundsStats::print(llvm::raw_ostream &O, } bool AVarBoundsInfo::isValidBoundVariable(clang::Decl *D) { - if (VarDecl *VD = dyn_cast(D)) { - return !VD->getNameAsString().empty(); - } if (isa(D) || isa(D)) { // All parameters and return values are valid bound variables. return true; - } - if(FieldDecl *FD = dyn_cast(D)) { + } else if (VarDecl *VD = dyn_cast(D)) { + return !VD->getNameAsString().empty(); + } else if(FieldDecl *FD = dyn_cast(D)) { return !FD->getNameAsString().empty(); } return false; @@ -84,17 +90,16 @@ void AVarBoundsInfo::insertDeclaredBounds(clang::Decl *D, ABounds *B) { bool AVarBoundsInfo::tryGetVariable(clang::Decl *D, BoundsKey &R) { if (isValidBoundVariable(D)) { - if (VarDecl *VD = dyn_cast(D)) { - R = getVariable(VD); - } if (ParmVarDecl *PD = dyn_cast(D)) { R = getVariable(PD); - } - if (FieldDecl *FD = dyn_cast(D)) { + } else if (VarDecl *VD = dyn_cast(D)) { + R = getVariable(VD); + } else if (FieldDecl *FD = dyn_cast(D)) { R = getVariable(FD); - } - if (FunctionDecl *FD = dyn_cast(D)) { + } else if (FunctionDecl *FD = dyn_cast(D)) { R = getVariable(FD); + } else { + assert(false && "Invalid Declaration\n"); } return true; } @@ -132,7 +137,7 @@ bool AVarBoundsInfo::mergeBounds(BoundsKey L, BoundsPriority P, ABounds *B) { bool RetVal = false; if (BInfo.find(L) != BInfo.end() && BInfo[L].find(P) != BInfo[L].end()) { // If previous computed bounds are not same? Then release the old bounds. - if (!BInfo[L][P]->areSame(B)) { + if (!BInfo[L][P]->areSame(B, this)) { InvalidBounds.insert(L); // TODO: Should we keep bounds for other priorities? removeBounds(L); @@ -505,18 +510,25 @@ BoundsKey AVarBoundsInfo::getVarKey(PersistentSourceLoc &PSL) { } BoundsKey AVarBoundsInfo::getConstKey(uint64_t value) { - if (ConstVarKeys.find(value) == ConstVarKeys.end()) { - BoundsKey NK = ++BCount; - ConstVarKeys[value] = NK; - std::string ConsString = std::to_string(value); - ProgramVar *NPV = - ProgramVar::createNewProgramVar(NK, - ConsString, - GlobalScope::getGlobalScope(), - true); - insertProgramVar(NK, NPV); + BoundsKey NK = ++BCount; + std::string ConsString = std::to_string(value); + ProgramVar *NPV = + ProgramVar::createNewProgramVar(NK, + ConsString, + GlobalScope::getGlobalScope(), + true); + insertProgramVar(NK, NPV); + ConstVarKeys[value].insert(NK); + return NK; +} + +bool AVarBoundsInfo::fetchAllConstKeys(uint64_t value, + std::set &AllKeys) { + if (ConstVarKeys.find(value) != ConstVarKeys.end()) { + AllKeys.insert(ConstVarKeys[value].begin(), ConstVarKeys[value].end()); + return true; } - return ConstVarKeys[value]; + return false; } BoundsKey AVarBoundsInfo::getVarKey(llvm::APSInt &API) { @@ -561,7 +573,7 @@ bool isInSrcArray(ConstraintVariable *CK, Constraints &CS) { // This class picks variables that are in the same scope as the provided scope. class ScopeVisitor { public: - ScopeVisitor(const ProgramVarScope *S, std::set &R, + ScopeVisitor(const ProgramVarScope *S, std::set &R, std::map &VarM, std::set &P): TS(S), Res(R), VM(VarM) , PtrAtoms(P) { } @@ -572,7 +584,7 @@ class ScopeVisitor { // If the variable is constant or in the same scope? if (S->IsNumConstant() || (*(TS) == *(S->getScope()))) { - Res.insert(S); + Res.insert(V); } } } @@ -584,72 +596,38 @@ class ScopeVisitor { auto *S = VM[BK]; // If the variable is constant or in the same scope? if (S->IsNumConstant() || (*(TS) == *(S->getScope()))) { - Res.insert(S); + Res.insert(BK); } } } } const ProgramVarScope *TS; - std::set &Res; + std::set &Res; std::map &VM; std::set &PtrAtoms; }; -bool AvarBoundsInference::intersectBounds(std::set &ProgVars, - ABounds::BoundsKind BK, - std::set &CurrB) { - std::set CommonNewBounds; - for (auto *PVar : ProgVars) { - ABounds *NewB = nullptr; - if (BK == ABounds::CountBoundKind) { - NewB = new CountBound(PVar->getKey()); - } else if (BK == ABounds::ByteBoundKind) { - NewB = new ByteBound(PVar->getKey()); - } else { - continue; - } - assert(NewB != nullptr && "New Bounds cannot be nullptr"); - if (CurrB.empty()) { - CommonNewBounds.insert(NewB); - } else { - bool found = false; - for (auto *OB : CurrB) { - if (OB->areSame(NewB)) { - found = true; - CommonNewBounds.insert(NewB); - break; - } - } - if (!found) { - delete (NewB); - } - } - } - - for (auto *D : CurrB) { - delete(D); - } - CurrB.clear(); - CurrB.insert(CommonNewBounds.begin(), CommonNewBounds.end()); - return !CurrB.empty(); -} - void AvarBoundsInference:: - mergeReachableProgramVars(std::set &AllVars) { + mergeReachableProgramVars(std::set &AllVars) { if (AllVars.size() > 1) { + // Convert the bounds key to corresponding program var. + std::set AllProgVars; + for (auto AV : AllVars) { + AllProgVars.insert(BI->getProgramVar(AV)); + } ProgramVar *BVar = nullptr; // We want to merge all bounds vars. We give preference to // non-constants if there are multiple non-constant variables, // we give up. - for (auto *TmpB : AllVars) { + for (auto *TmpB : AllProgVars) { if (BVar == nullptr) { BVar = TmpB; } else if (BVar->IsNumConstant()) { if (!TmpB->IsNumConstant()) { // We give preference to non-constant lengths. BVar = TmpB; - } else if (BVar->getKey() != TmpB->getKey()) { + } else if (!this->BI->areSameProgramVar(BVar->getKey(), TmpB->getKey())) { // If both are different constants? BVar = nullptr; break; @@ -662,183 +640,320 @@ AvarBoundsInference:: } AllVars.clear(); if (BVar) - AllVars.insert(BVar); + AllVars.insert(BVar->getKey()); } } -bool AvarBoundsInference::inferPossibleBounds(BoundsKey K, ABounds *SB, - AVarGraph &BKGraph, - std::set &EB) { - bool RetVal = false; - if (SB != nullptr) { - auto *Kvar = BI->getProgramVar(K); - bool ValidB = false; - auto BKind = SB->getKind(); - BoundsKey SBKey; - if (CountBound *CB = dyn_cast(SB)) { - ValidB = true; - SBKey = CB->getCountVar(); - } else if (ByteBound *BB = dyn_cast(SB)) { - ValidB = true; - SBKey = BB->getByteVar(); - } - - std::set PotentialB; - // If we can handle the bounds? - if (ValidB) { - // First, find all the in-scope variable to which the SBKey flow to. - auto *SBVar = BI->getProgramVar(SBKey); - if (SBVar->IsNumConstant()) { - PotentialB.insert(SBVar); +bool +AvarBoundsInference::convergeInferredBounds() { + bool FoundSome = false; + for (auto &InfABnds : CurrIterInferBounds) { + auto *AB = BI->getBounds(InfABnds.first); + // If there are no bounds? + if (AB == nullptr) { + auto &BTypeMap = InfABnds.second; + for (auto &TySet : BTypeMap) { + mergeReachableProgramVars(TySet.second); } - // Find all the in scope variables reachable from the current - // bounds variable. - ScopeVisitor TV(Kvar->getScope(), PotentialB, BI->PVarInfo, - BI->PointerBoundsKey); - BKGraph.visitBreadthFirst(SBKey, [&TV](BoundsKey BK) { - TV.visitBoundsKey(BK); - }); - - if (*Kvar->getScope() == *SBVar->getScope()) { - PotentialB.insert(SBVar); + // Order of preference: Count and Byte + if (BTypeMap.find(ABounds::CountBoundKind) != BTypeMap.end() && + !BTypeMap[ABounds::CountBoundKind].empty()) { + AB = new CountBound(*BTypeMap[ABounds::CountBoundKind].begin()); + } else if (BTypeMap.find(ABounds::ByteBoundKind) != BTypeMap.end() && + !BTypeMap[ABounds::ByteBoundKind].empty()) { + AB = new ByteBound(*BTypeMap[ABounds::ByteBoundKind].begin()); } - mergeReachableProgramVars(PotentialB); + // If we found any bounds? + if (AB != nullptr) { + // Record that we inferred bounds using data-flow. + BI->BoundsInferStats.DataflowMatch.insert(InfABnds.first); + BI->replaceBounds(InfABnds.first, BoundsPriority::FlowInferred, AB); + FoundSome = true; + } + } + } + return FoundSome; +} + +bool AvarBoundsInference::getReachableBoundKeys(const ProgramVarScope *DstScope, + BoundsKey FromVarK, + std::set &PotK, + AVarGraph &BKGraph, + bool CheckImmediate) { + + // First, find all the in-scope variable to which the SBKey flow to. + auto *SBVar = BI->getProgramVar(FromVarK); - // Are there are other in-scope variables where the bounds variable - // has been assigned to? - if (!PotentialB.empty()) - RetVal = intersectBounds(PotentialB, BKind, EB); + // If both are in the same scope? + if (*DstScope == *SBVar->getScope()) { + PotK.insert(FromVarK); + if (CheckImmediate) { + return true; } } - return RetVal; + // All constants are reachable! + if (SBVar->IsNumConstant()) { + PotK.insert(FromVarK); + } + + // Get all bounds key that are equivalent to FromVarK + std::set AllFKeys; + AllFKeys.clear(); + AllFKeys.insert(FromVarK); + + // If this is a constant? Then get all bounds keys that + // correspond to the same constant + if (SBVar->IsNumConstant()) { + uint64_t ConsVal; + std::istringstream IS(SBVar->getVarName()); + IS >> ConsVal; + BI->fetchAllConstKeys(ConsVal, AllFKeys); + } + + for (auto CurrVarK : AllFKeys) { + // Find all the in scope variables reachable from the CurrVarK + // bounds variable. + ScopeVisitor TV(DstScope, PotK, BI->PVarInfo, + BI->PointerBoundsKey); + BKGraph.visitBreadthFirst(CurrVarK, [&TV](BoundsKey BK) { + TV.visitBoundsKey(BK); + }); + } + + return !PotK.empty(); } -bool AvarBoundsInference::getRelevantBounds(std::set &RBKeys, - std::set &ResBounds) { +bool AvarBoundsInference::getRelevantBounds(BoundsKey BK, + BndsKindMap &ResBounds) { // Try to get the bounds of all RBKeys. - bool ValidB = true; - for (auto PrevBKey : RBKeys) { - // If this pointer is used in pointer arithmetic then there - // are no relevant bounds for this pointer. - if (BI->hasPointerArithmetic(PrevBKey)) { - continue; - } - auto *PrevBounds = BI->getBounds(PrevBKey); + bool HasBounds = false; + // If this pointer is used in pointer arithmetic then there + // are no relevant bounds for this pointer. + if (!BI->hasPointerArithmetic(BK)) { + auto *PrevBounds = BI->getBounds(BK); // Does the parent arr has bounds? - if (PrevBounds != nullptr) - ResBounds.insert(PrevBounds); + if (PrevBounds != nullptr) { + ResBounds[PrevBounds->getKind()].insert(PrevBounds->getBKey()); + HasBounds = true; + } else if (CurrIterInferBounds.find(BK) != CurrIterInferBounds.end()) { + // get the bounds inferred from the current iteration + ResBounds = CurrIterInferBounds[BK]; + HasBounds = true; + } } - return ValidB; + return HasBounds; +} + +// Variable comparison. Comparator implementation: where given two BoundsKey +// they are checked to see if they correspond to the same program variable. +struct BVarCmp { +public: + BVarCmp(AVarBoundsInfo *ABI) { + this->ABInfo = ABI; + } + bool operator()(BoundsKey a, BoundsKey b) const { + if (this->ABInfo != nullptr && + this->ABInfo->areSameProgramVar(a, b)) { + return false; + } + return a < b; + }; +private: + AVarBoundsInfo *ABInfo; +}; + +bool +AvarBoundsInference::areDeclaredBounds(BoundsKey K, + const std::pair> &Bnds) { + bool IsDeclaredB = false; + // Get declared bounds and check that Bnds are same as the declared + // bounds. + ABounds *DeclB = this->BI->getBounds(K, + BoundsPriority::Declared, + nullptr); + if (DeclB && DeclB->getKind() == Bnds.first) { + IsDeclaredB = true; + for (auto TmpNBK : Bnds.second) { + if (!this->BI->areSameProgramVar(TmpNBK, DeclB->getBKey())) { + IsDeclaredB = false; + break; + } + } + } + return IsDeclaredB; } bool AvarBoundsInference::predictBounds(BoundsKey K, std::set &Neighbours, - AVarGraph &BKGraph, - ABounds **KB) { - std::set ResBounds; - std::set KBounds; - *KB = nullptr; - bool IsValid = false; - // Get all the relevant bounds from the neighbour ARRs - if (getRelevantBounds(Neighbours, ResBounds)) { - bool IsFuncRet = BI->isFunctionReturn(K); - // For function returns, we want all the predecessors to have bounds. - if (IsFuncRet && ResBounds.size() != Neighbours.size()) { - return IsValid; + AVarGraph &BKGraph) { + BndsKindMap NeighboursBnds, InferredKBnds; + // Bounds inferred from each of the neighbours. + std::map InferredNBnds; + bool IsChanged = false; + bool ErrorOccurred = false; + bool IsFuncRet = BI->isFunctionReturn(K); + ProgramVar *KVar = this->BI->getProgramVar(K); + + InferredNBnds.clear(); + // For reach of the Neighbour, try to infer possible bounds. + for (auto NBK : Neighbours) { + NeighboursBnds.clear(); + ErrorOccurred = false; + if (getRelevantBounds(NBK, NeighboursBnds) && !NeighboursBnds.empty()) { + std::set InfBK; + for (auto &NKBChoice : NeighboursBnds) { + InfBK.clear(); + for (auto TmpNBK : NKBChoice.second) { + getReachableBoundKeys(KVar->getScope(), TmpNBK, InfBK, BKGraph); + } + if (!InfBK.empty()) { + InferredNBnds[NBK][NKBChoice.first] = InfBK; + } else { + bool IsDeclaredB = areDeclaredBounds(NBK, NKBChoice); + + if (!IsDeclaredB || DisableInfDecls) { + // Oh, there are bounds for neighbour NBK but no bounds + // can be inferred for K from it. + InferredNBnds.clear(); + ErrorOccurred = true; + break; + } + } + } + } else if (IsFuncRet) { + // If this is a function return we should have bounds from all + // neighbours. + ErrorOccurred = true; } - if (!ResBounds.empty()) { - IsValid = true; - // Find the intersection? - for (auto *B : ResBounds) { - //TODO: check this - // This is stricter version i.e., there should be at least one common - // bounds information from an incoming ARR. - // Should we follow same for all the pointers? - - // For function returns we should have atleast one common bound - // from all the return values. - if (!inferPossibleBounds(K, B, BKGraph, KBounds) && IsFuncRet) { - IsValid = false; - break; + if (ErrorOccurred) { + // If an error occurred while processing bounds from neighbours/ + // clear the inferred bounds and break. + InferredNBnds.clear(); + break; + } + } + + if (!InferredNBnds.empty()) { + // All the possible inferred bounds for K + InferredKBnds.clear(); + std::set TmpBKeys; + // TODO: Figure out if there is a discrepency and try to implement + // root-cause analysis. + + // Find intersection of all bounds from neighbours. + for (auto &IN : InferredNBnds) { + for (auto &INB : IN.second) { + if (InferredKBnds.find(INB.first) == InferredKBnds.end()) { + InferredKBnds[INB.first] = INB.second; + } else { + TmpBKeys.clear(); + // Here, we should use intersection by taking care of comparing + // bounds key that correspond to the same constant. + // Note, DO NOT use findIntersection here, as we need to take + // care of comparing bounds key that correspond to the same + // constant. + auto &S1 = InferredKBnds[INB.first]; + auto &S2 = INB.second; + std::set_intersection(S1.begin(), S1.end(), + S2.begin(), S2.end(), + std::inserter(TmpBKeys, TmpBKeys.begin()), + BVarCmp(this->BI)); + InferredKBnds[INB.first] = TmpBKeys; } } - // If we converge to single bounds information? We found the bounds. - if (KBounds.size() == 1) { - *KB = *KBounds.begin(); - KBounds.clear(); - } else { - IsValid = false; - // TODO: Should we give up when we have multiple bounds? - for (auto *T : KBounds) { - delete(T); + } + + // Now from the newly inferred bounds i.e., InferredKBnds, check + // if is is different from previously known bounds of K + for (auto &IKB : InferredKBnds) { + bool Handled = false; + if (CurrIterInferBounds.find(K) != CurrIterInferBounds.end()) { + auto &BM = CurrIterInferBounds[K]; + if (BM.find(IKB.first) != BM.end()) { + Handled = true; + if (BM[IKB.first] != IKB.second) { + BM[IKB.first] = IKB.second; + if (IKB.second.empty()) + BM.erase(IKB.first); + IsChanged = true; + } + } + } + if (!Handled) { + CurrIterInferBounds[K][IKB.first] = IKB.second; + if (IKB.second.empty()) { + CurrIterInferBounds[K].erase(IKB.first); + } else { + IsChanged = true; } } } + } else if (ErrorOccurred) { + // If any error occurred during inferring bounds then + // remove any previously inferred bounds for K. + IsChanged = CurrIterInferBounds.erase(K) != 0; } - return IsValid; + return IsChanged; } bool AvarBoundsInference::inferBounds(BoundsKey K, AVarGraph &BKGraph, bool FromPB) { bool IsChanged = false; if (BI->InvalidBounds.find(K) == BI->InvalidBounds.end()) { - ABounds *KB = nullptr; // Infer from potential bounds? if (FromPB) { auto &PotBDs = BI->PotentialCntBounds; if (PotBDs.find(K) != PotBDs.end()) { ProgramVar *Kvar = BI->getProgramVar(K); - std::set PotentialB; + std::set PotentialB; PotentialB.clear(); for (auto TK : PotBDs[K]) { ProgramVar *TKVar = BI->getProgramVar(TK); - // If the count var is of different scope? Then try to find variables - // that are in scope. - if (*Kvar->getScope() != *TKVar->getScope()) { - // Find all the in scope variables reachable from the current - // bounds variable. - ScopeVisitor TV(Kvar->getScope(), PotentialB, BI->PVarInfo, - BI->PointerBoundsKey); - BKGraph.visitBreadthFirst(TK, [&TV](BoundsKey BK) { - TV.visitBoundsKey(BK); - }); - } else { - PotentialB.insert(TKVar); + getReachableBoundKeys(Kvar->getScope(), TK, PotentialB, BKGraph, true); + } + + if (!PotentialB.empty()) { + bool Handled = false; + // Potential bounds are always count bounds. + // We use potential bounds + ABounds::BoundsKind PotKind = ABounds::CountBoundKind; + if (CurrIterInferBounds.find(K) != CurrIterInferBounds.end()) { + auto &BM = CurrIterInferBounds[K]; + // If we have any inferred bounds for K then ignore potential + // bounds. + for (auto &PosB : BM) { + if (!PosB.second.empty()) { + Handled = true; + break; + } + } + } + if (!Handled) { + CurrIterInferBounds[K][PotKind] = PotentialB; + IsChanged = true; } } - ProgramVar *BVar = nullptr; - mergeReachableProgramVars(PotentialB); - if (!PotentialB.empty()) - BVar = *(PotentialB.begin()); - if (BVar != nullptr) - KB = new CountBound(BVar->getKey()); } } else { // Infer from the flow-graph. std::set TmpBkeys; // Try to predict bounds from predecessors. BKGraph.getPredecessors(K, TmpBkeys); - if (!predictBounds(K, TmpBkeys, BKGraph, &KB)) { - KB = nullptr; - } - } - - if (KB != nullptr) { - BI->replaceBounds(K, FlowInferred, KB); - IsChanged = true; + IsChanged = predictBounds(K, TmpBkeys, BKGraph); } } return IsChanged; } -bool AVarBoundsInfo::performWorkListInference(std::set &ArrNeededBounds, +bool AVarBoundsInfo::performWorkListInference(const std::set &ArrNeededBounds, AVarGraph &BKGraph, + AvarBoundsInference &BI, bool FromPB) { bool RetVal = false; std::set WorkList; WorkList.insert(ArrNeededBounds.begin(), ArrNeededBounds.end()); - AvarBoundsInference BI(this); std::set NextIterArrs; bool Changed = true; while (Changed) { @@ -851,10 +966,6 @@ bool AVarBoundsInfo::performWorkListInference(std::set &ArrNeededBoun WorkList.erase(CurrArrKey); // Can we find bounds for this Arr? if (BI.inferBounds(CurrArrKey, BKGraph, FromPB)) { - // Record the stats. - BoundsInferStats.DataflowMatch.insert(CurrArrKey); - // We found the bounds. - ArrNeededBounds.erase(CurrArrKey); RetVal = true; Changed = true; // Get all the successors of the ARR whose bounds we just found. @@ -1051,8 +1162,27 @@ void AVarBoundsInfo::computerArrPointers(ProgramInfo *PI, ArrPointers.insert(CtxSensBKeys.begin(), CtxSensBKeys.end()); } +void AVarBoundsInfo::getBoundsNeededArrPointers(const std::set &ArrPtrs, + std::set &AB) { + // Next, get the ARR pointers that has bounds. + // These are pointers with bounds. + std::set ArrWithBounds; + for (auto &T : BInfo) { + ArrWithBounds.insert(T.first); + } + // Also add arrays with invalid bounds. + ArrWithBounds.insert(InvalidBounds.begin(), InvalidBounds.end()); + + // This are the array atoms that need bounds. + // i.e., AB = ArrPtrs - ArrPtrsWithBounds. + std::set_difference(ArrPtrs.begin(), ArrPtrs.end(), + ArrWithBounds.begin(), ArrWithBounds.end(), + std::inserter(AB, AB.end())); +} + bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { bool RetVal = false; + AvarBoundsInference ABI(this); // First get all the pointer vars which are ARRs std::set ArrPointers; computerArrPointers(PI, ArrPointers); @@ -1071,36 +1201,49 @@ bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { removeBounds(TBK, FlowInferred); } - // Next, get the ARR pointers that has bounds. - // These are pointers with bounds. - std::set ArrWithBounds; - for (auto &T : BInfo) { - ArrWithBounds.insert(T.first); - } - // Also add arrays with invalid bounds. - ArrWithBounds.insert(InvalidBounds.begin(), InvalidBounds.end()); + std::set ArrNeededBounds, ArrNeededBoundsNew; + ArrNeededBounds.clear(); - // This are the array atoms that need bounds. - // i.e., ArrNeededBounds = ArrPtrs - ArrPtrsWithBounds. - std::set ArrNeededBounds; - std::set_difference(ArrPointers.begin(), ArrPointers.end(), - ArrWithBounds.begin(), ArrWithBounds.end(), - std::inserter(ArrNeededBounds, ArrNeededBounds.end())); + + getBoundsNeededArrPointers(ArrPointers, ArrNeededBounds); bool Changed = !ArrNeededBounds.empty(); // Now compute the bounds information of all the ARR pointers that need it. + // We iterate until there are no new array variables whose bounds are found. + // The expectation is every iteration we will find bounds for at least one array + // variable. while (Changed) { - Changed = false; - // Use flow-graph. - Changed = performWorkListInference(ArrNeededBounds, this->ProgVarGraph) || Changed; - // Use potential length variables. - Changed = performWorkListInference(ArrNeededBounds, this->ProgVarGraph, true) || Changed; - // Try solving using context-sensitive BoundsKey. - Changed = performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph) || Changed; - Changed = performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph, true) || Changed; + // Clear all inferred bounds. + ABI.clearInferredBounds(); + // Regular flow inference. + performWorkListInference(ArrNeededBounds, this->ProgVarGraph, ABI); + // Flow inference using potential bounds. + performWorkListInference(ArrNeededBounds, this->ProgVarGraph, ABI, true); + // Flow inference from context sensitive keys to original keys. + performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph, ABI); + // Now, by using potential bounds. + performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph, ABI, true); + + // From all the sets of bounds computed for various array variables. Intersect them + // and find the common bound variable. + ABI.convergeInferredBounds(); + + ArrNeededBoundsNew.clear(); + // Get array variables that still need bounds. + getBoundsNeededArrPointers(ArrPointers, ArrNeededBoundsNew); + // Did we find bounds for new array variables? + Changed = ArrNeededBounds != ArrNeededBoundsNew; + if (ArrNeededBounds.size() == ArrNeededBoundsNew.size()) { + assert (!Changed && "New arrays needed bounds after inference"); + } + assert (ArrNeededBoundsNew.size() <= ArrNeededBounds.size() && + "We should always have less number of arrays whose bounds needs " + "to be inferred after each round."); + ArrNeededBounds = ArrNeededBoundsNew; } + return RetVal; } @@ -1161,6 +1304,15 @@ void AVarBoundsInfo::print_stats(llvm::raw_ostream &O, } } +bool AVarBoundsInfo::areSameProgramVar(BoundsKey B1, BoundsKey B2) { + if (B1 != B2) { + ProgramVar *P1 = getProgramVar(B1); + ProgramVar *P2 = getProgramVar(B2); + return P1->IsNumConstant() && P2->IsNumConstant() && P1->getVarName() == P2->getVarName(); + } + return B1 == B2; +} + ContextSensitiveBoundsKeyVisitor::ContextSensitiveBoundsKeyVisitor(ASTContext *C, ProgramInfo &I, ConstraintResolver *CResolver) diff --git a/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp b/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp index a4ce38f7bf2b..7314c8b0e2b3 100644 --- a/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp +++ b/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp @@ -350,6 +350,12 @@ bool isExpressionStructField(Expr *ToCheck, FieldDecl **TargetDecl) { return false; } +void AllocBasedBoundsInference::HandleTranslationUnit(ASTContext &Context) { + Info.enterCompilationUnit(Context); + HandleArrayVariablesBoundsDetection(&Context, Info, false); + Info.exitCompilationUnit(); +} + // This visitor handles the bounds of function local array variables. void GlobalABVisitor::SetParamHeuristicInfo(LocalVarABVisitor *LAB) { @@ -891,7 +897,8 @@ void LengthVarInference::VisitArraySubscriptExpr(ArraySubscriptExpr *ASE) { } -void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I) { +void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I, + bool UseHeuristics) { // Run array bounds for (auto FuncName : AllocatorFunctions) { AllocatorSizeAssoc[FuncName] = {0}; @@ -908,16 +915,22 @@ void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I) { // Try to guess the bounds information for function locals. Stmt *Body = FD->getBody(); LFV.TraverseStmt(Body); - // Set information collected after analyzing the function body. - GlobABV.SetParamHeuristicInfo(&LFV); - GlobABV.TraverseDecl(D); + + if (UseHeuristics) { + // Set information collected after analyzing the function body. + GlobABV.SetParamHeuristicInfo(&LFV); + GlobABV.TraverseDecl(D); + } AddMainFuncHeuristic(C, I, FD); GlobalTraversed = true; } } - // If this is not already traversed? - if (!GlobalTraversed) - GlobABV.TraverseDecl(D); - GlobABV.SetParamHeuristicInfo(nullptr); + + if (UseHeuristics) { + // If this is not already traversed? + if (!GlobalTraversed) + GlobABV.TraverseDecl(D); + GlobABV.SetParamHeuristicInfo(nullptr); + } } } diff --git a/clang/lib/CConv/CConv.cpp b/clang/lib/CConv/CConv.cpp index 1623412946ac..d693a20745ad 100644 --- a/clang/lib/CConv/CConv.cpp +++ b/clang/lib/CConv/CConv.cpp @@ -9,7 +9,7 @@ // Implementation of various method in CConv.h // //===----------------------------------------------------------------------===// - +#include "clang/CConv/ArrayBoundsInferenceConsumer.h" #include "clang/CConv/CConv.h" #include "clang/CConv/ConstraintBuilder.h" #include "clang/CConv/IntermediateToolHook.h" @@ -30,7 +30,7 @@ using namespace llvm; #define BEFORE_SOLVING_SUFFIX "_before_solving_" #define AFTER_SUBTYPING_SUFFIX "_after_subtyping_" -static cl::OptionCategory ArrBoundsInferCat("Array bounds inference options"); +cl::OptionCategory ArrBoundsInferCat("Array bounds inference options"); static cl::opt DebugArrSolver("debug-arr-solver", cl::desc("Dump array bounds inference graph"), cl::init(false), @@ -277,21 +277,38 @@ bool CConvInterface::SolveConstraints(bool ComputeInterimState) { if (DumpIntermediate) dumpConstraintOutputJson(FINAL_OUTPUT_SUFFIX, GlobalProgramInfo); + + ClangTool &Tool = getGlobalClangTool(); if (AllTypes) { if (DebugArrSolver) GlobalProgramInfo.getABoundsInfo().dumpAVarGraph( "arr_bounds_initial.dot"); - // Propagate initial data-flow information for Array pointers. + // Propagate initial data-flow information for Array pointers from + // bounds declarations. + GlobalProgramInfo.getABoundsInfo().performFlowAnalysis(&GlobalProgramInfo); + + // 3. Infer the bounds based on calls to malloc and calloc + std::unique_ptr ABInfTool = + newFrontendActionFactoryA + >(GlobalProgramInfo); + if (ABInfTool) + Tool.run(ABInfTool.get()); + else + llvm_unreachable("No Action"); + + // Propagate the information from allocator bounds. GlobalProgramInfo.getABoundsInfo().performFlowAnalysis(&GlobalProgramInfo); + } - // 3. Run intermediate tool hook to run visitors that need to be executed + // 4. Run intermediate tool hook to run visitors that need to be executed // after constraint solving but before rewriting. - ClangTool &Tool = getGlobalClangTool(); std::unique_ptr IMTool = newFrontendActionFactoryA - >(GlobalProgramInfo); + >(GlobalProgramInfo); if (IMTool) Tool.run(IMTool.get()); else diff --git a/clang/lib/CConv/IntermediateToolHook.cpp b/clang/lib/CConv/IntermediateToolHook.cpp index 7cf6b7d14e2c..4c5055782b4a 100644 --- a/clang/lib/CConv/IntermediateToolHook.cpp +++ b/clang/lib/CConv/IntermediateToolHook.cpp @@ -15,8 +15,14 @@ using namespace llvm; using namespace clang; +extern cl::OptionCategory ArrBoundsInferCat; +static cl::opt DisableArrH("disable-arr-hu", + cl::desc("Disable Array Bounds Inference Heuristics."), + cl::init(false), + cl::cat(ArrBoundsInferCat)); + void IntermediateToolHook::HandleTranslationUnit(ASTContext &Context) { Info.enterCompilationUnit(Context); - HandleArrayVariablesBoundsDetection(&Context, Info); + HandleArrayVariablesBoundsDetection(&Context, Info, !DisableArrH); Info.exitCompilationUnit(); } diff --git a/clang/lib/CConv/ProgramVar.cpp b/clang/lib/CConv/ProgramVar.cpp index 071514d14f1f..6d0960985835 100644 --- a/clang/lib/CConv/ProgramVar.cpp +++ b/clang/lib/CConv/ProgramVar.cpp @@ -32,7 +32,8 @@ const StructScope *StructScope::getStructScope(std::string StName) { if (AllStScopes.find(TmpS) == AllStScopes.end()) { AllStScopes.insert(TmpS); } - return &(*AllStScopes.find(TmpS)); + const auto &SS = *AllStScopes.find(TmpS); + return &SS; } const FunctionParamScope *FunctionParamScope::getFunctionParamScope( @@ -41,7 +42,8 @@ const FunctionParamScope *FunctionParamScope::getFunctionParamScope( if (AllFnParamScopes.find(TmpFPS) == AllFnParamScopes.end()) { AllFnParamScopes.insert(TmpFPS); } - return &(*AllFnParamScopes.find(TmpFPS)); + const auto &FPS = *AllFnParamScopes.find(TmpFPS); + return &FPS; } const CtxFunctionArgScope *CtxFunctionArgScope::getCtxFunctionParamScope( @@ -50,7 +52,8 @@ const CtxFunctionArgScope *CtxFunctionArgScope::getCtxFunctionParamScope( if (AllCtxFnArgScopes.find(TmpAS) == AllCtxFnArgScopes.end()) { AllCtxFnArgScopes.insert(TmpAS); } - return &(*AllCtxFnArgScopes.find(TmpAS)); + const auto &CFAS = *AllCtxFnArgScopes.find(TmpAS); + return &CFAS; } const FunctionScope *FunctionScope::getFunctionScope(std::string FnName, @@ -59,7 +62,8 @@ const FunctionScope *FunctionScope::getFunctionScope(std::string FnName, if (AllFnScopes.find(TmpFS) == AllFnScopes.end()) { AllFnScopes.insert(TmpFS); } - return &(*AllFnScopes.find(TmpFS)); + const auto &FS = *AllFnScopes.find(TmpFS); + return &FS; } std::set ProgramVar::AllProgramVars; diff --git a/clang/test/CheckedCRewriter/arrboundsmerging.c b/clang/test/CheckedCRewriter/arrboundsmerging.c new file mode 100644 index 000000000000..c99a3387fd99 --- /dev/null +++ b/clang/test/CheckedCRewriter/arrboundsmerging.c @@ -0,0 +1,55 @@ +// RUN: cconv-standalone -disable-arr-hu -alltypes %s -- | FileCheck -match-full-lines %s + + +/* +Advanced array-bounds inference (based on control-dependencies). +*/ + +typedef unsigned long size_t; +extern _Itype_for_any(T) void *malloc(size_t size) : itype(_Array_ptr) byte_count(size); + +// Here x bounds will be c +void foo(int *x, int c) { +//CHECK: void foo(_Array_ptr x : count(c), int c) { + + x[3] = c; +} + +// Here x will be of constant size +void foo2(int *x, int c) { +//CHECK: void foo2(_Array_ptr x : count(8), int c) { + x[3] = c; +} + +// Here x bounds is c but the violates bounds. +void foo3(int *x, int c) { +//CHECK: void foo3(_Array_ptr x, int c) { + x[0] = c; +} + +void bar(void) { + int *p = malloc(sizeof(int)*8); + int *q = malloc(sizeof(int)*8); + int *p1 = malloc(sizeof(int)*8); + int *q1 = malloc(sizeof(int)*8); +//CHECK: _Array_ptr p : count(8) = malloc(sizeof(int)*8); +//CHECK: _Array_ptr q : count(8) = malloc(sizeof(int)*8); +//CHECK: _Array_ptr p1 : count(8) = malloc(sizeof(int)*8); +//CHECK: _Array_ptr q1 : count(8) = malloc(sizeof(int)*8); + + int n = 8; + int l; + int *q2 = malloc(sizeof(int)*l); + + // Variation 1 + foo(p,n); + foo(q,8); + + // Variation 2 + foo2(p1,8); + foo2(q1,28); + + // Variation 3 + foo3(q2,l); + foo3(q1,28); +} From dd73a007b21dfefa035290954112f169a90383e3 Mon Sep 17 00:00:00 2001 From: Aravind Machiry Date: Mon, 5 Oct 2020 16:14:55 -0700 Subject: [PATCH 2/6] Handling comments --- clang/include/clang/CConv/ConstraintsGraph.h | 1 - clang/test/CheckedCRewriter/arrboundsmerging.c | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/CConv/ConstraintsGraph.h b/clang/include/clang/CConv/ConstraintsGraph.h index 142b6d3b2433..fe86cef37283 100644 --- a/clang/include/clang/CConv/ConstraintsGraph.h +++ b/clang/include/clang/CConv/ConstraintsGraph.h @@ -167,7 +167,6 @@ class DataGraph : // Insert into BFS cache. if (BFSCache.find(Start) == BFSCache.end()) { std::set ReachableNodes; - ReachableNodes.clear(); for (auto TNode : llvm::breadth_first(*N)) { ReachableNodes.insert(TNode->getData()); } diff --git a/clang/test/CheckedCRewriter/arrboundsmerging.c b/clang/test/CheckedCRewriter/arrboundsmerging.c index c99a3387fd99..b98066cfae63 100644 --- a/clang/test/CheckedCRewriter/arrboundsmerging.c +++ b/clang/test/CheckedCRewriter/arrboundsmerging.c @@ -27,6 +27,11 @@ void foo3(int *x, int c) { x[0] = c; } +void foo4(int *x, int c) { +//CHECK: void foo4(_Array_ptr x, int c) { + x[0] = c; +} + void bar(void) { int *p = malloc(sizeof(int)*8); int *q = malloc(sizeof(int)*8); @@ -38,18 +43,27 @@ void bar(void) { //CHECK: _Array_ptr q1 : count(8) = malloc(sizeof(int)*8); int n = 8; - int l; + int l = 4; int *q2 = malloc(sizeof(int)*l); + int *q4 = malloc(sizeof(int)*n); // Variation 1 + // Correct size association: second argument is indeed the size. foo(p,n); foo(q,8); // Variation 2 + // passing fixed size array: No size association. foo2(p1,8); foo2(q1,28); // Variation 3 + // Variable sized arrays: One correct and one wrong bounds. foo3(q2,l); foo3(q1,28); + + // Variation 4 + // Passing wrong lengths to check bounds. + foo4(q2,n); + foo4(q4,l); } From 472bc27695de1a9d56ef8c1dbb930f38651feafd Mon Sep 17 00:00:00 2001 From: Aravind Machiry Date: Fri, 9 Oct 2020 15:39:15 -0700 Subject: [PATCH 3/6] Adding ctx sensitive keys --- clang/include/clang/CConv/ProgramVar.h | 15 +++++++++++++-- clang/lib/CConv/AVarBoundsInfo.cpp | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/CConv/ProgramVar.h b/clang/include/clang/CConv/ProgramVar.h index 30d0392a0fb0..0d760cccf70f 100644 --- a/clang/include/clang/CConv/ProgramVar.h +++ b/clang/include/clang/CConv/ProgramVar.h @@ -215,6 +215,17 @@ class CtxFunctionArgScope : public FunctionParamScope { const PersistentSourceLoc &CtxPSL) : FunctionParamScope(FN, IsSt) { PSL = CtxPSL; + std::string FileName = PSL.getFileName(); + CtxIDStr = ""; + if (!FileName.empty()) { + llvm::sys::fs::UniqueID UId; + if (llvm::sys::fs::getUniqueID(FileName, UId)) { + CtxIDStr = std::to_string(UId.getDevice()) + ":" + + std::to_string(UId.getFile()) + ":"; + } + } + CtxIDStr += std::to_string(PSL.getLineNo()) + ":" + + std::to_string(PSL.getColSNo()); this->Kind = CtxFunctionArgScopeKind; } @@ -261,7 +272,7 @@ class CtxFunctionArgScope : public FunctionParamScope { } std::string getStr() const { - return "CtxFuncArg_" + FName; + return FName + "_Ctx_" + CtxIDStr; } static const CtxFunctionArgScope * @@ -270,7 +281,7 @@ class CtxFunctionArgScope : public FunctionParamScope { private: PersistentSourceLoc PSL; - + std::string CtxIDStr; static std::set AllCtxFnArgScopes; }; diff --git a/clang/lib/CConv/AVarBoundsInfo.cpp b/clang/lib/CConv/AVarBoundsInfo.cpp index 069455927b20..4bf932adb26a 100644 --- a/clang/lib/CConv/AVarBoundsInfo.cpp +++ b/clang/lib/CConv/AVarBoundsInfo.cpp @@ -823,7 +823,7 @@ bool AvarBoundsInference::predictBounds(BoundsKey K, } } } - } else if (IsFuncRet) { + } else { // If this is a function return we should have bounds from all // neighbours. ErrorOccurred = true; From 3e3fa86c362605f0a1b8816ebd413049d770e3bf Mon Sep 17 00:00:00 2001 From: Aravind Machiry Date: Sat, 10 Oct 2020 00:29:45 -0700 Subject: [PATCH 4/6] Minor refactoring --- clang/include/clang/CConv/AVarBoundsInfo.h | 18 +-- clang/include/clang/CConv/Utils.h | 3 + clang/lib/CConv/AVarBoundsInfo.cpp | 128 +++++++++++++-------- clang/lib/CConv/ConstraintBuilder.cpp | 11 +- clang/lib/CConv/Utils.cpp | 4 + 5 files changed, 101 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/CConv/AVarBoundsInfo.h b/clang/include/clang/CConv/AVarBoundsInfo.h index ff25e344d6b3..54948a6f4dca 100644 --- a/clang/include/clang/CConv/AVarBoundsInfo.h +++ b/clang/include/clang/CConv/AVarBoundsInfo.h @@ -100,6 +100,7 @@ class AvarBoundsInference { // Clear all possible inferred bounds for all the BoundsKeys void clearInferredBounds() { CurrIterInferBounds.clear(); + BKsFailedFlowInference.clear(); } // Infer bounds for the given key from the set of given ARR atoms. @@ -143,11 +144,14 @@ class AvarBoundsInference { // Potential Bounds for each bounds key inferred for the current iteration. std::map CurrIterInferBounds; + // BoundsKey that failed the flow inference. + std::set BKsFailedFlowInference; }; class AVarBoundsInfo { public: - AVarBoundsInfo() : ProgVarGraph(this), CtxSensProgVarGraph(this) { + AVarBoundsInfo() : ProgVarGraph(this), CtxSensProgVarGraph(this), + RevCtxSensProgVarGraph(this) { BCount = 1; PVarInfo.clear(); InProgramArrPtrBoundsKeys.clear(); @@ -300,9 +304,11 @@ class AVarBoundsInfo { // Graph of all program variables. AVarGraph ProgVarGraph; - // Graph that contains only edges between context-sensitive - // BoundsKey and corresponding original BoundsKey. + // Graph that contains only edges from normal BoundsKey to + // context-sensitive BoundsKey. AVarGraph CtxSensProgVarGraph; + // Same as above but in the reverse direction. + AVarGraph RevCtxSensProgVarGraph; // Stats on techniques used to find length for various variables. AVarBoundsStats BoundsInferStats; // This is the map of pointer variable bounds key and set of bounds key @@ -345,12 +351,10 @@ class AVarBoundsInfo { bool keepHighestPriorityBounds(std::set &ArrPtrs); // Perform worklist based inference on the requested array variables using - // the provided graph. - // The flag FromPB requests the algorithm to use potential length variables. + // the provided graph and potential length variables. bool performWorkListInference(const std::set &ArrNeededBounds, AVarGraph &BKGraph, - AvarBoundsInference &BI, - bool FromPB = false); + AvarBoundsInference &BI); void insertParamKey(ParamDeclType ParamDecl, BoundsKey NK); }; diff --git a/clang/include/clang/CConv/Utils.h b/clang/include/clang/CConv/Utils.h index 2747f7bf46a5..9eb95ce8c9a3 100644 --- a/clang/include/clang/CConv/Utils.h +++ b/clang/include/clang/CConv/Utils.h @@ -124,6 +124,9 @@ bool isFunctionAllocator(std::string FuncName); // Is the given variable built in type? bool isPointerType(clang::ValueDecl *VD); +// Is this a pointer or array type? +bool isPtrOrArrayType(const clang::QualType &QT); + // Check if provided type is a var arg type? bool isVarArgType(const std::string &TypeName); diff --git a/clang/lib/CConv/AVarBoundsInfo.cpp b/clang/lib/CConv/AVarBoundsInfo.cpp index 4bf932adb26a..e9514527c4dd 100644 --- a/clang/lib/CConv/AVarBoundsInfo.cpp +++ b/clang/lib/CConv/AVarBoundsInfo.cpp @@ -240,7 +240,7 @@ BoundsKey AVarBoundsInfo::getVariable(clang::VarDecl *VD) { assert(PVS != nullptr && "Context not null"); auto *PVar = ProgramVar::createNewProgramVar(NK, VD->getNameAsString(), PVS); insertProgramVar(NK, PVar); - if (VD->getType()->isPointerType()) + if (isPtrOrArrayType(VD->getType())) PointerBoundsKey.insert(NK); } return getVarKey(PSL); @@ -268,7 +268,7 @@ BoundsKey AVarBoundsInfo::getVariable(clang::ParmVarDecl *PVD) { auto *PVar = ProgramVar::createNewProgramVar(NK, ParamName, FPS); insertProgramVar(NK, PVar); insertParamKey(ParamKey, NK); - if (PVD->getType()->isPointerType()) + if (isPtrOrArrayType(PVD->getType())) PointerBoundsKey.insert(NK); } return ParamDeclVarMap.left().at(ParamKey); @@ -289,7 +289,7 @@ BoundsKey AVarBoundsInfo::getVariable(clang::FunctionDecl *FD) { auto *PVar = ProgramVar::createNewProgramVar(NK, FD->getNameAsString(), FPS); insertProgramVar(NK, PVar); FuncDeclVarMap.insert(FuncKey, NK); - if (FD->getReturnType()->isPointerType()) + if (isPtrOrArrayType(FD->getReturnType())) PointerBoundsKey.insert(NK); } return FuncDeclVarMap.left().at(FuncKey); @@ -305,7 +305,7 @@ BoundsKey AVarBoundsInfo::getVariable(clang::FieldDecl *FD) { const StructScope *SS = StructScope::getStructScope(StName); auto *PVar = ProgramVar::createNewProgramVar(NK, FD->getNameAsString(), SS); insertProgramVar(NK, PVar); - if (FD->getType()->isPointerType()) + if (isPtrOrArrayType(FD->getType())) PointerBoundsKey.insert(NK); } return getVarKey(PSL); @@ -551,8 +551,7 @@ void AVarBoundsInfo::insertProgramVar(BoundsKey NK, ProgramVar *PV) { bool hasArray(ConstraintVariable *CK, Constraints &CS) { auto &E = CS.getVariables(); if (PVConstraint *PV = dyn_cast(CK)) { - if ((PV->hasArr(E, 0) || PV->hasNtArr(E, 0)) && - PV->isTopCvarUnsizedArr()) { + if (PV->hasArr(E, 0) || PV->hasNtArr(E, 0)) { return true; } } @@ -563,7 +562,7 @@ bool isInSrcArray(ConstraintVariable *CK, Constraints &CS) { auto &E = CS.getVariables(); if (PVConstraint *PV = dyn_cast(CK)) { if ((PV->hasArr(E, 0) || PV->hasNtArr(E, 0)) && - PV->isTopCvarUnsizedArr() && PV->isForValidDecl()) { + PV->isForValidDecl()) { return true; } } @@ -647,11 +646,11 @@ AvarBoundsInference:: bool AvarBoundsInference::convergeInferredBounds() { bool FoundSome = false; - for (auto &InfABnds : CurrIterInferBounds) { - auto *AB = BI->getBounds(InfABnds.first); + for (auto &CInfABnds : CurrIterInferBounds) { + auto *AB = BI->getBounds(CInfABnds.first); // If there are no bounds? if (AB == nullptr) { - auto &BTypeMap = InfABnds.second; + auto BTypeMap = CInfABnds.second; for (auto &TySet : BTypeMap) { mergeReachableProgramVars(TySet.second); } @@ -667,9 +666,11 @@ AvarBoundsInference::convergeInferredBounds() { // If we found any bounds? if (AB != nullptr) { // Record that we inferred bounds using data-flow. - BI->BoundsInferStats.DataflowMatch.insert(InfABnds.first); - BI->replaceBounds(InfABnds.first, BoundsPriority::FlowInferred, AB); + BI->BoundsInferStats.DataflowMatch.insert(CInfABnds.first); + BI->replaceBounds(CInfABnds.first, BoundsPriority::FlowInferred, AB); FoundSome = true; + } else { + BKsFailedFlowInference.insert(CInfABnds.first); } } } @@ -732,15 +733,17 @@ bool AvarBoundsInference::getRelevantBounds(BoundsKey BK, // If this pointer is used in pointer arithmetic then there // are no relevant bounds for this pointer. if (!BI->hasPointerArithmetic(BK)) { - auto *PrevBounds = BI->getBounds(BK); - // Does the parent arr has bounds? - if (PrevBounds != nullptr) { - ResBounds[PrevBounds->getKind()].insert(PrevBounds->getBKey()); - HasBounds = true; - } else if (CurrIterInferBounds.find(BK) != CurrIterInferBounds.end()) { + if (CurrIterInferBounds.find(BK) != CurrIterInferBounds.end()) { // get the bounds inferred from the current iteration ResBounds = CurrIterInferBounds[BK]; HasBounds = true; + } else { + // Get the computed bounds? + auto *PrevBounds = BI->getBounds(BK); + if (PrevBounds != nullptr) { + ResBounds[PrevBounds->getKind()].insert(PrevBounds->getBKey()); + HasBounds = true; + } } } return HasBounds; @@ -823,7 +826,9 @@ bool AvarBoundsInference::predictBounds(BoundsKey K, } } } - } else { + } else if (IsFuncRet || + (BKsFailedFlowInference.find(NBK) != BKsFailedFlowInference.end())) { + // If this is a function return we should have bounds from all // neighbours. ErrorOccurred = true; @@ -949,31 +954,39 @@ bool AvarBoundsInference::inferBounds(BoundsKey K, AVarGraph &BKGraph, bool From bool AVarBoundsInfo::performWorkListInference(const std::set &ArrNeededBounds, AVarGraph &BKGraph, - AvarBoundsInference &BI, - bool FromPB) { + AvarBoundsInference &BI) { bool RetVal = false; std::set WorkList; - WorkList.insert(ArrNeededBounds.begin(), ArrNeededBounds.end()); std::set NextIterArrs; - bool Changed = true; - while (Changed) { - Changed = false; - NextIterArrs.clear(); - // Are there any ARR atoms that need bounds? - while (!WorkList.empty()) { - BoundsKey CurrArrKey = *WorkList.begin(); - // Remove the bounds key from the worklist. - WorkList.erase(CurrArrKey); - // Can we find bounds for this Arr? - if (BI.inferBounds(CurrArrKey, BKGraph, FromPB)) { - RetVal = true; - Changed = true; - // Get all the successors of the ARR whose bounds we just found. - BKGraph.getSuccessors(CurrArrKey, NextIterArrs); + std::vector FromBVals; + // We first infer with using only flow information + // i.e., without using any potential bounds. + FromBVals.push_back(false); + // Next, we try using potential bounds. + FromBVals.push_back(true); + for (auto FromPB: FromBVals) { + WorkList.clear(); + WorkList.insert(ArrNeededBounds.begin(), ArrNeededBounds.end()); + bool Changed = true; + while (Changed) { + Changed = false; + NextIterArrs.clear(); + // Are there any ARR atoms that need bounds? + while (!WorkList.empty()) { + BoundsKey CurrArrKey = *WorkList.begin(); + // Remove the bounds key from the worklist. + WorkList.erase(CurrArrKey); + // Can we find bounds for this Arr? + if (BI.inferBounds(CurrArrKey, BKGraph, FromPB)) { + RetVal = true; + Changed = true; + // Get all the successors of the ARR whose bounds we just found. + BKGraph.getSuccessors(CurrArrKey, NextIterArrs); + } + } + if (Changed) { + findIntersection(ArrNeededBounds, NextIterArrs, WorkList); } - } - if (Changed) { - findIntersection(ArrNeededBounds, NextIterArrs, WorkList); } } return RetVal; @@ -986,7 +999,7 @@ AVarBoundsInfo::insertCtxSensBoundsKey(ProgramVar *OldPV, ProgramVar *NKVar = OldPV->makeCopy(NK); NKVar->setScope(CFAS); insertProgramVar(NK, NKVar); - ProgVarGraph.addEdge(OldPV->getKey(), NKVar->getKey()); + RevCtxSensProgVarGraph.addEdge(OldPV->getKey(), NKVar->getKey()); CtxSensProgVarGraph.addEdge(NKVar->getKey(), OldPV->getKey()); } @@ -1218,20 +1231,35 @@ bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { ABI.clearInferredBounds(); // Regular flow inference. performWorkListInference(ArrNeededBounds, this->ProgVarGraph, ABI); - // Flow inference using potential bounds. - performWorkListInference(ArrNeededBounds, this->ProgVarGraph, ABI, true); - // Flow inference from context sensitive keys to original keys. - performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph, ABI); - // Now, by using potential bounds. - performWorkListInference(ArrNeededBounds, this->CtxSensProgVarGraph, ABI, true); - - // From all the sets of bounds computed for various array variables. Intersect them - // and find the common bound variable. + + // Converge using local bounds. + // From all the sets of bounds computed for various array variables. + // Intersect them and find the common bound variable. ABI.convergeInferredBounds(); + ArrNeededBoundsNew.clear(); + getBoundsNeededArrPointers(ArrPointers, ArrNeededBoundsNew); + // Now propagate the bounds information from context-sensitive keys + // to original keys. + performWorkListInference(ArrNeededBoundsNew, this->CtxSensProgVarGraph, ABI); + + ABI.convergeInferredBounds(); + // Now clear all inferred bounds so that context-sensitive nodes do not + // interfere with each other. + ABI.clearInferredBounds(); + ArrNeededBoundsNew.clear(); + // Get array variables that still need bounds. + getBoundsNeededArrPointers(ArrPointers, ArrNeededBoundsNew); + + // Now propagate the bounds information from normal keys to + // context-sensitive keys. + performWorkListInference(ArrNeededBoundsNew, this->RevCtxSensProgVarGraph, ABI); + + ABI.convergeInferredBounds(); ArrNeededBoundsNew.clear(); // Get array variables that still need bounds. getBoundsNeededArrPointers(ArrPointers, ArrNeededBoundsNew); + // Did we find bounds for new array variables? Changed = ArrNeededBounds != ArrNeededBoundsNew; if (ArrNeededBounds.size() == ArrNeededBoundsNew.size()) { diff --git a/clang/lib/CConv/ConstraintBuilder.cpp b/clang/lib/CConv/ConstraintBuilder.cpp index 96b09c697d7f..a499f44816af 100644 --- a/clang/lib/CConv/ConstraintBuilder.cpp +++ b/clang/lib/CConv/ConstraintBuilder.cpp @@ -41,7 +41,7 @@ void processRecordDecl(RecordDecl *Declaration, ProgramInfo &Info, auto VarTy = VD->getType(); unsigned int BeginLoc = VD->getBeginLoc().getRawEncoding(); unsigned int EndLoc = VD->getEndLoc().getRawEncoding(); - IsInLineStruct = !(VarTy->isPointerType() || VarTy->isArrayType()) && + IsInLineStruct = !isPtrOrArrayType(VarTy) && !VD->hasInit() && lastRecordLocation >= BeginLoc && lastRecordLocation <= EndLoc; @@ -56,7 +56,7 @@ void processRecordDecl(RecordDecl *Declaration, ProgramInfo &Info, bool FieldInUnionOrSysHeader = (FL.isInSystemHeader() || Definition->isUnion()); // mark field wild if the above is true and the field is a pointer - if ((FieldTy->isPointerType() || FieldTy->isArrayType()) && + if (isPtrOrArrayType(FieldTy) && (FieldInUnionOrSysHeader || IsInLineStruct)) { std::string Rsn = "External struct field or union encountered"; CVarOption CV = Info.getVariable(F, Context); @@ -92,8 +92,7 @@ class FunctionVisitor : public RecursiveASTVisitor { FullSourceLoc FL = Context->getFullLoc(VD->getBeginLoc()); SourceRange SR = VD->getSourceRange(); if (SR.isValid() && FL.isValid() && - (VD->getType()->isPointerType() || - VD->getType()->isArrayType())) { + isPtrOrArrayType(VD->getType())) { if (lastRecordLocation == VD->getBeginLoc().getRawEncoding()) { CVarOption CV = Info.getVariable(VD, Context); CB.constraintCVarToWild(CV, "Inline struct encountered."); @@ -423,7 +422,7 @@ class ConstraintGenVisitor : public RecursiveASTVisitor { bool VisitVarDecl(VarDecl *G) { if (G->hasGlobalStorage() && - (G->getType()->isPointerType() || G->getType()->isArrayType())) { + isPtrOrArrayType(G->getType())) { if (G->hasInit()) { CB.constrainLocalAssign(nullptr, G, G->getInit()); } @@ -539,7 +538,7 @@ class VariableAdderVisitor : public RecursiveASTVisitor { void addVariable(DeclaratorDecl *D) { VarAdder.addABoundsVariable(D); - if (D->getType()->isPointerType() || D->getType()->isArrayType()) + if (isPtrOrArrayType(D->getType())) VarAdder.addVariable(D, Context); } }; diff --git a/clang/lib/CConv/Utils.cpp b/clang/lib/CConv/Utils.cpp index d32cdd89f0e8..b0e8f44a7a05 100644 --- a/clang/lib/CConv/Utils.cpp +++ b/clang/lib/CConv/Utils.cpp @@ -177,6 +177,10 @@ bool isPointerType(clang::ValueDecl *VD) { return VD->getType().getTypePtr()->isPointerType(); } +bool isPtrOrArrayType(const clang::QualType &QT) { + return QT->isPointerType() || QT->isArrayType(); +} + bool isStructOrUnionType(clang::VarDecl *VD) { return VD->getType().getTypePtr()->isStructureType() || VD->getType().getTypePtr()->isUnionType(); From 058bd2bedf9fab75e3d147a93747d2144e089749 Mon Sep 17 00:00:00 2001 From: Michael Hicks Date: Sat, 17 Oct 2020 20:08:03 -0400 Subject: [PATCH 5/6] Comments; renaming. --- clang/include/clang/CConv/ABounds.h | 2 +- clang/include/clang/CConv/ProgramVar.h | 10 +++++---- clang/lib/CConv/AVarBoundsInfo.cpp | 12 +++++++--- .../CConv/ArrayBoundsInferenceConsumer.cpp | 22 +++++++++++-------- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/CConv/ABounds.h b/clang/include/clang/CConv/ABounds.h index 712ebea780f9..1be86725974a 100644 --- a/clang/include/clang/CConv/ABounds.h +++ b/clang/include/clang/CConv/ABounds.h @@ -49,8 +49,8 @@ class ABounds { virtual ABounds* makeCopy(BoundsKey NK) = 0; // Set that maintains all the bound keys that are used inin + // TODO: Is this still needed? static std::set KeysUsedInBounds; - static bool isKeyUsedInBounds(BoundsKey ToCheck); static ABounds *getBoundsInfo(AVarBoundsInfo *AVBInfo, diff --git a/clang/include/clang/CConv/ProgramVar.h b/clang/include/clang/CConv/ProgramVar.h index 0d760cccf70f..9da539211793 100644 --- a/clang/include/clang/CConv/ProgramVar.h +++ b/clang/include/clang/CConv/ProgramVar.h @@ -19,6 +19,8 @@ #include "clang/AST/ASTContext.h" #include "clang/CConv/PersistentSourceLoc.h" +// Unique ID for a program variable or constant literal, both of +// which could serve as bounds typedef uint32_t BoundsKey; // Class representing scope of a program variable. @@ -27,9 +29,9 @@ class ProgramVarScope { enum ScopeKind { // Function scope. FunctionScopeKind, - // Function parameter scope. + // Parameters of a particular function. FunctionParamScopeKind, - // Context sensitive argument scope. + // All arguments to a particular call CtxFunctionArgScopeKind, // Struct scope. StructScopeKind, @@ -280,7 +282,7 @@ class CtxFunctionArgScope : public FunctionParamScope { const PersistentSourceLoc &PSL); private: - PersistentSourceLoc PSL; + PersistentSourceLoc PSL; // source code location of this function call std::string CtxIDStr; static std::set AllCtxFnArgScopes; }; @@ -365,7 +367,7 @@ class ProgramVar { BoundsKey K; std::string VarName; const ProgramVarScope *VScope; - bool IsConstant; + bool IsConstant; // is a literal integer, not a variable // TODO: All the ProgramVars may not be used. We should try to figure out // a way to free unused program vars. static std::set AllProgramVars; diff --git a/clang/lib/CConv/AVarBoundsInfo.cpp b/clang/lib/CConv/AVarBoundsInfo.cpp index e9514527c4dd..7a716840b44d 100644 --- a/clang/lib/CConv/AVarBoundsInfo.cpp +++ b/clang/lib/CConv/AVarBoundsInfo.cpp @@ -133,6 +133,8 @@ bool AVarBoundsInfo::tryGetVariable(clang::Expr *E, return Ret; } +// Merging bounds B with the present bounds of key L at the same priority P +// Returns true if we update the bounds for L (with B) bool AVarBoundsInfo::mergeBounds(BoundsKey L, BoundsPriority P, ABounds *B) { bool RetVal = false; if (BInfo.find(L) != BInfo.end() && BInfo[L].find(P) != BInfo[L].end()) { @@ -643,6 +645,9 @@ AvarBoundsInference:: } } +// Consider all pointers, each of which may have multiple bounds, +// and intersect these. If they all converge to one possibility, +// use that. If not, give up (no bounds). bool AvarBoundsInference::convergeInferredBounds() { bool FoundSome = false; @@ -1229,10 +1234,10 @@ bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { while (Changed) { // Clear all inferred bounds. ABI.clearInferredBounds(); - // Regular flow inference. + // Regular flow inference (with no edges between callers and callees). performWorkListInference(ArrNeededBounds, this->ProgVarGraph, ABI); - // Converge using local bounds. + // Converge using local bounds (i.e., within each function). // From all the sets of bounds computed for various array variables. // Intersect them and find the common bound variable. ABI.convergeInferredBounds(); @@ -1240,7 +1245,8 @@ bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { ArrNeededBoundsNew.clear(); getBoundsNeededArrPointers(ArrPointers, ArrNeededBoundsNew); // Now propagate the bounds information from context-sensitive keys - // to original keys. + // to original keys (i.e., edges from callers to callees are present, + // but no local edges) performWorkListInference(ArrNeededBoundsNew, this->CtxSensProgVarGraph, ABI); ABI.convergeInferredBounds(); diff --git a/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp b/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp index 7314c8b0e2b3..c07f703df52d 100644 --- a/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp +++ b/clang/lib/CConv/ArrayBoundsInferenceConsumer.cpp @@ -185,8 +185,10 @@ bool tryGetBoundsKeyVar(Decl *D, BoundsKey &BK, ProgramInfo &Info, return CR.resolveBoundsKey(CV, BK) || ABInfo.tryGetVariable(D, BK); } -// Check if the provided expression is a call to one of the known -// memory allocators. +// Check if the provided expression E is a call to one of the known +// memory allocators. Will only return true if the argument to the call +// is a simple expression, and then organizes the ArgVals for determining +// a possible bound static bool isAllocatorCall(Expr *E, std::string &FName, ProgramInfo &I, ASTContext *C, std::vector &ArgVals) { @@ -251,7 +253,7 @@ static void handleAllocatorCall(QualType LHSType, BoundsKey LK, Expr *E, // is the RHS expression a call to allocator function? if (isAllocatorCall(E, FnName, Info, Context, ArgVals)) { BoundsKey RK; - bool FoundKey = false; + bool FoundSingleKeyInAllocExpr = false; // We consider everything as byte_count unless we see a sizeof // expression in which case if the type matches we use count bounds. bool IsByteBound = true; @@ -263,26 +265,26 @@ static void handleAllocatorCall(QualType LHSType, BoundsKey LK, Expr *E, if (LHSType == STy) { IsByteBound = false; } else { - FoundKey = false; + FoundSingleKeyInAllocExpr = false; break; } } else if (tryGetBoundsKeyVar(TmpE, RK, Info, Context)) { // Is this variable? - if (!FoundKey) { - FoundKey = true; + if (!FoundSingleKeyInAllocExpr) { + FoundSingleKeyInAllocExpr = true; } else { // Multiple variables found. - FoundKey = false; + FoundSingleKeyInAllocExpr = false; break; } } else { // Unrecognized expression. - FoundKey = false; + FoundSingleKeyInAllocExpr = false; break; } } - if (FoundKey) { + if (FoundSingleKeyInAllocExpr) { // If we found BoundsKey from the allocate expression? auto *PrgLVar = AVarBInfo.getProgramVar(LK); auto *PrgRVar = AVarBInfo.getProgramVar(RK); @@ -315,6 +317,8 @@ static void handleAllocatorCall(QualType LHSType, BoundsKey LK, Expr *E, // --- which gets translated to: // tmp <- bounds(count) // p->arr <- tmp + // TODO: This trick of introducing bounds for RHS expressions + // could be useful in other places (e.g., &{1,2,3} would have bounds 3) BoundsKey TmpKey = AVarBInfo.getRandomBKey(); AVarBInfo.replaceBounds(TmpKey, Declared, LBounds); AVarBInfo.addAssignment(LK, TmpKey); From 5d4d5ead396e20e283cf24219dde0168eea42189 Mon Sep 17 00:00:00 2001 From: Aravind Machiry Date: Mon, 26 Oct 2020 22:35:46 -0700 Subject: [PATCH 6/6] Added comments --- clang/lib/CConv/AVarBoundsInfo.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/clang/lib/CConv/AVarBoundsInfo.cpp b/clang/lib/CConv/AVarBoundsInfo.cpp index 7a716840b44d..21a24b30489f 100644 --- a/clang/lib/CConv/AVarBoundsInfo.cpp +++ b/clang/lib/CConv/AVarBoundsInfo.cpp @@ -682,6 +682,9 @@ AvarBoundsInference::convergeInferredBounds() { return FoundSome; } +// This function finds all the BoundsKeys (i.e., variables) in +// scope `DstScope` that are reachable from `FromVarK` in the +// graph `BKGraph`. All the reachable bounds key will be stored in `PotK`. bool AvarBoundsInference::getReachableBoundKeys(const ProgramVarScope *DstScope, BoundsKey FromVarK, std::set &PotK, @@ -1198,6 +1201,19 @@ void AVarBoundsInfo::getBoundsNeededArrPointers(const std::set &ArrPt std::inserter(AB, AB.end())); } +// We first propagate all the bounds information from explicit +// declarations and mallocs. +// For other variables that do not have any choice of bounds, +// we use potential bounds choices (FromPB), these are the variables +// that are upper bounds to an index variable used in an array indexing +// operation. +// For example: +// if (i < n) { +// ...arr[i]... +// } +// In the above case, we use n as a potential count bounds for arr. +// Note: we only use potential bounds for a variable when none of its +// predecessors have bounds. bool AVarBoundsInfo::performFlowAnalysis(ProgramInfo *PI) { bool RetVal = false; AvarBoundsInference ABI(this);