diff --git a/.gitignore b/.gitignore index 3ea38b6e0054..d64c90a8f582 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,7 @@ test/debuginfo-tests .vscode .vs +# Pycharm IDE files. +.idea/ +.cmake-build-debug/ + diff --git a/tools/checked-c-convert/ArrayBoundsInferenceConsumer.cpp b/tools/checked-c-convert/ArrayBoundsInferenceConsumer.cpp new file mode 100644 index 000000000000..99db12608095 --- /dev/null +++ b/tools/checked-c-convert/ArrayBoundsInferenceConsumer.cpp @@ -0,0 +1,153 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Implementation of all the methods of the class ArrayBoundsInferenceConsumer. +//===----------------------------------------------------------------------===// + +#include "ArrayBoundsInferenceConsumer.h" +#include "clang/AST/RecursiveASTVisitor.h" + +// This visitor handles the bounds of function local array variables. + +bool LocalVarABVisitor::VisitBinAssign(BinaryOperator *O) { + Expr *LHS = removeImpCasts(O->getLHS()); + Expr *RHS = removeImpCasts(O->getRHS()); + + Expr *sizeExpression; + // is the RHS expression a call to allocator function? + if(isAllocatorCall(RHS, &sizeExpression)) { + // if this an allocator function then + // sizeExpression contains the argument + // used for size argument + + // if LHS is just a variable? + // i.e., ptr = .. + // if yes, get the AST node of the target variable + Decl *targetVar; + if(isExpressionSimpleLocalVar(LHS, &targetVar)) { + if(Info.isIdentifiedArrayVar(targetVar)) { + Info.addAllocationBasedSizeExpr(targetVar, sizeExpression); + } else { + dumpNotArrayIdentifiedVariable(targetVar, RHS, llvm::dbgs()); + } + } + } + + return true; +} + +bool LocalVarABVisitor::VisitDeclStmt(DeclStmt *S) { + // Build rules based on initializers. + for (const auto &D : S->decls()) { + if (VarDecl *VD = dyn_cast(D)) { + Expr *InitE = VD->getInit(); + Expr *sizeArg; + if(isAllocatorCall(InitE, &sizeArg)) { + if(Info.isIdentifiedArrayVar(D)) { + Info.addAllocationBasedSizeExpr(D, sizeArg); + } else { + dumpNotArrayIdentifiedVariable(D, InitE, llvm::dbgs()); + } + } + } + } + + return true; +} + +// check if the provided expression is a call +// to known memory allocators. +// if yes, return true along with the argument used as size +// assigned to the second paramter i.e., sizeArgument +bool LocalVarABVisitor::isAllocatorCall(Expr *currExpr, Expr **sizeArgument) { + if(currExpr != nullptr) { + currExpr = removeAuxillaryCasts(currExpr); + // check if this is a call expression. + if (CallExpr *CA = dyn_cast(currExpr)) { + // Is this a call to a named function? + FunctionDecl *calleeDecl = dyn_cast(CA->getCalleeDecl()); + if (calleeDecl) { + StringRef funcName = calleeDecl->getName(); + // check if the called function is a known allocator? + if (LocalVarABVisitor::AllocatorFunctionNames.find(funcName) != + LocalVarABVisitor::AllocatorFunctionNames.end()) { + if (sizeArgument != nullptr) { + *sizeArgument = CA->getArg(0); + } + return true; + } + } + } + } + return false; +} + +// check if expression is a simple local variable +// i.e., ptr = . +// if yes, return the referenced local variable as the return +// value of the argument. +bool LocalVarABVisitor::isExpressionSimpleLocalVar(Expr *toCheck, Decl **targetDecl) { + if (DeclRefExpr *DRE = dyn_cast(toCheck)) { + if (DeclaratorDecl *FD = dyn_cast(DRE->getDecl())) { + if (Decl *V = dyn_cast(FD)) { + *targetDecl = V; + return true; + } + } + } + return false; +} + +Expr *LocalVarABVisitor::removeImpCasts(Expr *toConvert) { + if(ImplicitCastExpr *impCast =dyn_cast(toConvert)) { + return impCast->getSubExpr(); + } + return toConvert; +} + +Expr *LocalVarABVisitor::removeCHKCBindTempExpr(Expr *toVeri) { + if(CHKCBindTemporaryExpr *toChkExpr = dyn_cast(toVeri)) { + return toChkExpr->getSubExpr(); + } + return toVeri; +} + +void LocalVarABVisitor::dumpNotArrayIdentifiedVariable(Decl *LHS, Expr *RHS, raw_ostream &O) { +#ifdef DEBUG + O << "Not identified as a array variable.\n RHS:"; + RHS->dump(O); + O << "\n LHS:"; + LHS->dump(O); + O << "\n"; +#endif +} + +Expr *LocalVarABVisitor::removeAuxillaryCasts(Expr *srcExpr) { + srcExpr = removeCHKCBindTempExpr(srcExpr); + if (CStyleCastExpr *C = dyn_cast(srcExpr)) { + srcExpr = C->getSubExpr(); + } + srcExpr = removeCHKCBindTempExpr(srcExpr); + srcExpr = removeImpCasts(srcExpr); + return srcExpr; +} + +std::set LocalVarABVisitor::AllocatorFunctionNames = {"malloc", "calloc"}; + +void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I) { + // Run array bounds + LocalVarABVisitor LVAB(C, I); + TranslationUnitDecl *TUD = C->getTranslationUnitDecl(); + for (const auto &D : TUD->decls()) { + if (dyn_cast(D)) { + FunctionDecl *fb = dyn_cast(D); +#ifdef DEBUG + llvm::dbgs() << "Analyzing function:" << fb->getName() << "\n"; +#endif + } + LVAB.TraverseDecl(D); + } +} \ No newline at end of file diff --git a/tools/checked-c-convert/ArrayBoundsInferenceConsumer.h b/tools/checked-c-convert/ArrayBoundsInferenceConsumer.h new file mode 100644 index 000000000000..6bbb82ec322a --- /dev/null +++ b/tools/checked-c-convert/ArrayBoundsInferenceConsumer.h @@ -0,0 +1,59 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is an ASTConsumer that tries to infer the CheckedC style bounds +// for identified array variables. +//===----------------------------------------------------------------------===// + +#ifndef _ARRAYBOUNDSINFERENCECONSUMER_H +#define _ARRAYBOUNDSINFERENCECONSUMER_H + +#include "clang/AST/ASTConsumer.h" + +#include "ProgramInfo.h" + + +class LocalVarABVisitor: public clang::RecursiveASTVisitor { +public: + explicit LocalVarABVisitor(ASTContext *C, ProgramInfo &I) + : Context(C), Info(I) {} + + // handles assignment expression. + bool VisitBinAssign(BinaryOperator *O); + + + bool VisitDeclStmt(DeclStmt *S); +private: + // check if the provided expression is a call + // to known memory allocators. + // if yes, return true along with the argument used as size + // assigned to the second paramter i.e., sizeArgument + bool isAllocatorCall(Expr *currExpr, Expr **sizeArgument); + + // check if expression is a simple local variable + // i.e., ptr = . + // if yes, return the referenced local variable as the return + // value of the argument. + bool isExpressionSimpleLocalVar(Expr *toCheck, Decl **targetDecl); + + Expr *removeCHKCBindTempExpr(Expr *toVeri); + + // remove implicit casts added by clang to the AST + Expr *removeImpCasts(Expr *toConvert); + + Expr *removeAuxillaryCasts(Expr *srcExpr); + + // print variables that should have been detected as arrays but not. + void dumpNotArrayIdentifiedVariable(Decl *LHS, Expr *RHS, raw_ostream &O); + + ASTContext *Context; + ProgramInfo &Info; + static std::set AllocatorFunctionNames; +}; + +void HandleArrayVariablesBoundsDetection(ASTContext *C, ProgramInfo &I); + +#endif //_ARRAYBOUNDSINFERENCECONSUMER_H diff --git a/tools/checked-c-convert/CMakeLists.txt b/tools/checked-c-convert/CMakeLists.txt index e7b6f852d608..4c675ac8af0c 100644 --- a/tools/checked-c-convert/CMakeLists.txt +++ b/tools/checked-c-convert/CMakeLists.txt @@ -11,6 +11,10 @@ add_clang_executable(checked-c-convert ConstraintBuilder.cpp PersistentSourceLoc.cpp Constraints.cpp + ConstraintVariables.cpp + ArrayBoundsInferenceConsumer.cpp + RewriteUtils.cpp + Utils.cpp ) target_link_libraries(checked-c-convert diff --git a/tools/checked-c-convert/CheckedCConvert.cpp b/tools/checked-c-convert/CheckedCConvert.cpp index dd505a60c807..2049837a6aec 100644 --- a/tools/checked-c-convert/CheckedCConvert.cpp +++ b/tools/checked-c-convert/CheckedCConvert.cpp @@ -31,6 +31,8 @@ #include "PersistentSourceLoc.h" #include "ProgramInfo.h" #include "MappingVisitor.h" +#include "RewriteUtils.h" +#include "ArrayBoundsInferenceConsumer.h" using namespace clang::driver; using namespace clang::tooling; @@ -51,886 +53,43 @@ cl::opt Verbose("verbose", cl::init(false), cl::cat(ConvertCategory)); +cl::opt mergeMultipleFuncDecls("mergefds", + cl::desc("Merge multiple declarations of functions."), + cl::init(false), + cl::cat(ConvertCategory)); + static cl::opt OutputPostfix("output-postfix", cl::desc("Postfix to add to the names of rewritten files, if " "not supplied writes to STDOUT"), cl::init("-"), cl::cat(ConvertCategory)); +static cl::opt + ConstraintOutputJson("constraint-output", + cl::desc("Path to the file where all the analysis information will be dumped as json"), + cl::init("constraint_output.json"), cl::cat(ConvertCategory)); + static cl::opt DumpStats( "dump-stats", cl::desc("Dump statistics"), cl::init(false), cl::cat(ConvertCategory)); +cl::opt handleVARARGS( "handle-varargs", + cl::desc("Enable handling of varargs in a sound manner"), + cl::init(false), + cl::cat(ConvertCategory)); + +cl::opt enablePropThruIType( "enable-itypeprop", + cl::desc("Enable propagation of constraints through ityped parameters/returns."), + cl::init(false), + cl::cat(ConvertCategory)); + static cl::opt BaseDir("base-dir", cl::desc("Base directory for the code we're translating"), cl::init(""), cl::cat(ConvertCategory)); -const clang::Type *getNextTy(const clang::Type *Ty) { - if(Ty->isPointerType()) { - // TODO: how to keep the qualifiers around, and what qualifiers do - // we want to keep? - QualType qtmp = Ty->getLocallyUnqualifiedSingleStepDesugaredType(); - return qtmp.getTypePtr()->getPointeeType().getTypePtr(); - } - else - return Ty; -} - -// Test to see if we can rewrite a given SourceRange. -// Note that R.getRangeSize will return -1 if SR is within -// a macro as well. This means that we can't re-write any -// text that occurs within a macro. -bool canRewrite(Rewriter &R, SourceRange &SR) { - return SR.isValid() && (R.getRangeSize(SR) != -1); -} - -ConstraintVariable *getHighest(std::set Vs, ProgramInfo &Info) { - if (Vs.size() == 0) - return nullptr; - - ConstraintVariable *V = nullptr; - - for (auto &P : Vs) { - if (V) { - if (V->isLt(*P, Info)) - V = P; - } else { - V = P; - } - } - - return V; -} - -// Walk the list of declarations and find a declaration accompanied by -// a definition and a function body. -FunctionDecl *getDefinition(FunctionDecl *FD) { - for (const auto &D : FD->redecls()) - if (FunctionDecl *tFD = dyn_cast(D)) - if (tFD->isThisDeclarationADefinition() && tFD->hasBody()) - return tFD; - - return nullptr; -} - -// Walk the list of declarations and find a declaration that is NOT -// a definition and does NOT have a body. -FunctionDecl *getDeclaration(FunctionDecl *FD) { - for (const auto &D : FD->redecls()) - if (FunctionDecl *tFD = dyn_cast(D)) - if (!tFD->isThisDeclarationADefinition()) - return tFD; - - return FD; -} - -// A Declaration, optional DeclStmt, and a replacement string -// for that Declaration. -struct DAndReplace -{ - Decl *Declaration; // The declaration to replace. - DeclStmt *Statement; // The DeclStmt, if it exists. - std::string Replacement; // The string to replace the declaration with. - bool fullDecl; // If the declaration is a function, true if - // replace the entire declaration or just the - // return declaration. - DAndReplace() : Declaration(nullptr), - Statement(nullptr), - Replacement(""), - fullDecl(false) { } - - DAndReplace(Decl *D, std::string R) : Declaration(D), - Statement(nullptr), - Replacement(R), - fullDecl(false) {} - - DAndReplace(Decl *D, std::string R, bool F) : Declaration(D), - Statement(nullptr), - Replacement(R), - fullDecl(F) {} - - - DAndReplace(Decl *D, DeclStmt *S, std::string R) : Declaration(D), - Statement(S), - Replacement(R), - fullDecl(false) { } -}; - -SourceLocation -getFunctionDeclarationEnd(FunctionDecl *FD, SourceManager &S) -{ - const FunctionDecl *oFD = nullptr; - - if (FD->hasBody(oFD) && oFD == FD) { - // Replace everything up to the beginning of the body. - const Stmt *Body = FD->getBody(oFD); - - int Offset = 0; - const char *Buf = S.getCharacterData(Body->getSourceRange().getBegin()); - - while (*Buf != ')') { - Buf--; - Offset--; - } - - return Body->getSourceRange().getBegin().getLocWithOffset(Offset); - } else { - return FD->getSourceRange().getEnd(); - } -} - -// Compare two DAndReplace values. The algorithm for comparing them relates -// their source positions. If two DAndReplace values refer to overlapping -// source positions, then they are the same. Otherwise, they are ordered -// by their placement in the input file. -// -// There are two special cases: Function declarations, and DeclStmts. In turn: -// -// - Function declarations might either be a DAndReplace describing the entire -// declaration, i.e. replacing "int *foo(void)" -// with "int *foo(void) : itype(_Ptr)". Or, it might describe just -// replacing only the return type, i.e. "_Ptr foo(void)". This is -// discriminated against with the 'fullDecl' field of the DAndReplace type -// and the comparison function first checks if the operands are -// FunctionDecls and if the 'fullDecl' field is set. -// - A DeclStmt of mupltiple Decls, i.e. 'int *a = 0, *b = 0'. In this case, -// we want the DAndReplace to refer only to the specific sub-region that -// would be replaced, i.e. '*a = 0' and '*b = 0'. To do that, we traverse -// the Decls contained in a DeclStmt and figure out what the appropriate -// source locations are to describe the positions of the independent -// declarations. -struct DComp -{ - SourceManager &SM; - DComp(SourceManager &S) : SM(S) { } - - SourceRange getWholeSR(SourceRange orig, DAndReplace dr) const { - SourceRange newSourceRange(orig); - - if (FunctionDecl *FD = dyn_cast(dr.Declaration)) { - newSourceRange.setEnd(getFunctionDeclarationEnd(FD, SM)); - if (dr.fullDecl == false) - newSourceRange = FD->getReturnTypeSourceRange(); - } - - return newSourceRange; - } - - bool operator()(const DAndReplace lhs, const DAndReplace rhs) const { - // Does the source location of the Decl in lhs overlap at all with - // the source location of rhs? - SourceRange srLHS = lhs.Declaration->getSourceRange(); - SourceRange srRHS = rhs.Declaration->getSourceRange(); - - // Take into account whether or not a FunctionDeclaration specifies - // the "whole" declaration or not. If it does not, it just specifies - // the return position. - srLHS = getWholeSR(srLHS, lhs); - srRHS = getWholeSR(srRHS, rhs); - - // Also take into account whether or not there is a multi-statement - // decl, because the generated ranges will overlap. - - if (lhs.Statement && !lhs.Statement->isSingleDecl()) { - SourceLocation newBegin = (*lhs.Statement->decls().begin())->getSourceRange().getBegin(); - bool found; - for (const auto &DT : lhs.Statement->decls()) { - if (DT == lhs.Declaration) { - found = true; - break; - } - newBegin = DT->getSourceRange().getEnd(); - } - assert (found); - srLHS.setBegin(newBegin); - // This is needed to make the subsequent test inclusive. - srLHS.setEnd(srLHS.getEnd().getLocWithOffset(-1)); - } - - if (rhs.Statement && !rhs.Statement->isSingleDecl()) { - SourceLocation newBegin = (*rhs.Statement->decls().begin())->getSourceRange().getBegin(); - bool found; - for (const auto &DT : rhs.Statement->decls()) { - if (DT == rhs.Declaration) { - found = true; - break; - } - newBegin = DT->getSourceRange().getEnd(); - } - assert (found); - srRHS.setBegin(newBegin); - // This is needed to make the subsequent test inclusive. - srRHS.setEnd(srRHS.getEnd().getLocWithOffset(-1)); - } - - SourceLocation x1 = srLHS.getBegin(); - SourceLocation x2 = srLHS.getEnd(); - SourceLocation y1 = srRHS.getBegin(); - SourceLocation y2 = srRHS.getEnd(); - - bool contained = SM.isBeforeInTranslationUnit(x1, y2) && - SM.isBeforeInTranslationUnit(y1, x2); - - if (contained) - return false; - else - return SM.isBeforeInTranslationUnit(x2, y1); - } -}; - -typedef std::set RSet; - -void rewrite(ParmVarDecl *PV, Rewriter &R, std::string sRewrite) { - // First, find all the declarations of the containing function. - DeclContext *DF = PV->getParentFunctionOrMethod(); - assert(DF != nullptr && "no parent function or method for decl"); - FunctionDecl *FD = cast(DF); - - // For each function, determine which parameter in the declaration - // matches PV, then, get the type location of that parameter - // declaration and re-write. - - // This is kind of hacky, maybe we should record the index of the - // parameter when we find it, instead of re-discovering it here. - int parmIndex = -1; - int c = 0; - for (const auto &I : FD->parameters()) { - if (I == PV) { - parmIndex = c; - break; - } - c++; - } - assert(parmIndex >= 0); - - for (FunctionDecl *toRewrite = FD; toRewrite != NULL; - toRewrite = toRewrite->getPreviousDecl()) { - int U = toRewrite->getNumParams(); - if (parmIndex < U) { - // TODO these declarations could get us into deeper - // header files. - ParmVarDecl *Rewrite = toRewrite->getParamDecl(parmIndex); - assert(Rewrite != NULL); - SourceRange TR = Rewrite->getSourceRange(); - - if (canRewrite(R, TR)) - R.ReplaceText(TR, sRewrite); - } - } -} - -void rewrite( VarDecl *VD, - Rewriter &R, - std::string sRewrite, - DeclStmt *Where, - RSet &skip, - const DAndReplace &N, - RSet &toRewrite, - ASTContext &A) -{ - if (Where != NULL) { - if (Verbose) { - errs() << "VarDecl at:\n"; - Where->dump(); - } - SourceRange TR = VD->getSourceRange(); - - // Is there an initializer? If there is, change TR so that it points - // to the START of the SourceRange of the initializer text, and drop - // an '=' token into sRewrite. - if (VD->hasInit()) { - SourceLocation eqLoc = VD->getInitializerStartLoc(); - TR.setEnd(eqLoc); - sRewrite = sRewrite + " = "; - } - - // Is it a variable type? This is the easy case, we can re-write it - // locally, at the site of the declaration. - if (Where->isSingleDecl()) { - if (canRewrite(R, TR)) { - R.ReplaceText(TR, sRewrite); - } else { - // This can happen if SR is within a macro. If that is the case, - // maybe there is still something we can do because Decl refers - // to a non-macro line. - - SourceRange possible(R.getSourceMgr().getExpansionLoc(TR.getBegin()), - VD->getLocation()); - - if (canRewrite(R, possible)) { - R.ReplaceText(possible, sRewrite); - std::string newStr = " " + VD->getName().str(); - R.InsertTextAfter(VD->getLocation(), newStr); - } else { - if (Verbose) { - errs() << "Still don't know how to re-write VarDecl\n"; - VD->dump(); - errs() << "at\n"; - Where->dump(); - errs() << "with " << sRewrite << "\n"; - } - } - } - } else if (!(Where->isSingleDecl()) && skip.find(N) == skip.end()) { - // Hack time! - // Sometimes, like in the case of a decl on a single line, we'll need to - // do multiple NewTyps at once. In that case, in the inner loop, we'll - // re-scan and find all of the NewTyps related to that line and do - // everything at once. That means sometimes we'll get NewTyps that - // we don't want to process twice. We'll skip them here. - - // Step 1: get the re-written types. - RSet rewritesForThisDecl(DComp(R.getSourceMgr())); - auto I = toRewrite.find(N); - while (I != toRewrite.end()) { - DAndReplace tmp = *I; - if (tmp.Statement == Where) - rewritesForThisDecl.insert(tmp); - ++I; - } - - // Step 2: remove the original line from the program. - SourceRange DR = Where->getSourceRange(); - R.RemoveText(DR); - - // Step 3: for each decl in the original, build up a new string - // and if the original decl was re-written, write that - // out instead (WITH the initializer). - std::string newMultiLineDeclS = ""; - raw_string_ostream newMLDecl(newMultiLineDeclS); - for (const auto &DL : Where->decls()) { - DAndReplace N; - bool found = false; - VarDecl *VDL = dyn_cast(DL); - assert(VDL != NULL); - - for (const auto &NLT : rewritesForThisDecl) - if (NLT.Declaration == DL) { - N = NLT; - found = true; - break; - } - - if (found) { - newMLDecl << N.Replacement; - if (Expr *E = VDL->getInit()) { - newMLDecl << " = "; - E->printPretty(newMLDecl, nullptr, A.getPrintingPolicy()); - } - newMLDecl << ";\n"; - } - else { - DL->print(newMLDecl); - newMLDecl << ";\n"; - } - } - - // Step 4: Write out the string built up in step 3. - R.InsertTextAfter(DR.getEnd(), newMLDecl.str()); - - // Step 5: Be sure and skip all of the NewTyps that we dealt with - // during this time of hacking, by adding them to the - // skip set. - - for (const auto &TN : rewritesForThisDecl) - skip.insert(TN); - } else { - if (Verbose) { - errs() << "Don't know how to re-write VarDecl\n"; - VD->dump(); - errs() << "at\n"; - Where->dump(); - errs() << "with " << N.Replacement << "\n"; - } - } - } else { - if (Verbose) { - errs() << "Don't know where to rewrite a VarDecl! "; - VD->dump(); - errs() << "\n"; - } - } -} - -// Visit each Decl in toRewrite and apply the appropriate pointer type -// to that Decl. The state of the rewrite is contained within R, which -// is both input and output. R is initialized to point to the 'main' -// source file for this transformation. toRewrite contains the set of -// declarations to rewrite. S is passed for source-level information -// about the current compilation unit. skip indicates some rewrites that -// we should skip because we already applied them, for example, as part -// of turning a single line declaration into a multi-line declaration. -void rewrite( Rewriter &R, - RSet &toRewrite, - RSet &skip, - SourceManager &S, - ASTContext &A, - std::set &Files) -{ - for (const auto &N : toRewrite) { - Decl *D = N.Declaration; - DeclStmt *Where = N.Statement; - assert(D != nullptr); - - if (Verbose) { - errs() << "Replacing type of decl:\n"; - D->dump(); - errs() << "with " << N.Replacement << "\n"; - } - - // Get a FullSourceLoc for the start location and add it to the - // list of file ID's we've touched. - SourceRange tTR = D->getSourceRange(); - FullSourceLoc tFSL(tTR.getBegin(), S); - Files.insert(tFSL.getFileID()); - - // Is it a parameter type? - if (ParmVarDecl *PV = dyn_cast(D)) { - assert(Where == NULL); - rewrite(PV, R, N.Replacement); - } else if (VarDecl *VD = dyn_cast(D)) { - rewrite(VD, R, N.Replacement, Where, skip, N, toRewrite, A); - } else if (FunctionDecl *UD = dyn_cast(D)) { - // TODO: If the return type is a fully-specified function pointer, - // then clang will give back an invalid source range for the - // return type source range. For now, check that the source - // range is valid. - // Additionally, a source range can be (mis) identified as - // spanning multiple files. We don't know how to re-write that, - // so don't. - - if (N.fullDecl) { - SourceRange SR = UD->getSourceRange(); - SR.setEnd(getFunctionDeclarationEnd(UD, S)); - - if (canRewrite(R, SR)) - R.ReplaceText(SR, N.Replacement); - } else { - SourceRange SR = UD->getReturnTypeSourceRange(); - if (canRewrite(R, SR)) - R.ReplaceText(SR, N.Replacement); - } - } else if (FieldDecl *FD = dyn_cast(D)) { - SourceRange SR = FD->getSourceRange(); - std::string sRewrite = N.Replacement; - - if (canRewrite(R, SR)) - R.ReplaceText(SR, sRewrite); - } - } -} - -static -bool -canWrite(std::string filePath, std::set &iof, std::string b) { - // Was this file explicitly provided on the command line? - if (iof.count(filePath) > 0) - return true; - // Is this file contained within the base directory? - - sys::path::const_iterator baseIt = sys::path::begin(b); - sys::path::const_iterator pathIt = sys::path::begin(filePath); - sys::path::const_iterator baseEnd = sys::path::end(b); - sys::path::const_iterator pathEnd = sys::path::end(filePath); - std::string baseSoFar = (*baseIt).str() + sys::path::get_separator().str(); - std::string pathSoFar = (*pathIt).str() + sys::path::get_separator().str(); - ++baseIt; - ++pathIt; - - while ((baseIt != baseEnd) && (pathIt != pathEnd)) { - sys::fs::file_status baseStatus; - sys::fs::file_status pathStatus; - std::string s1 = (*baseIt).str(); - std::string s2 = (*pathIt).str(); - - if (std::error_code ec = sys::fs::status(baseSoFar, baseStatus)) - return false; - - if (std::error_code ec = sys::fs::status(pathSoFar, pathStatus)) - return false; - - if (!sys::fs::equivalent(baseStatus, pathStatus)) - break; - - if (s1 != sys::path::get_separator().str()) - baseSoFar += (s1 + sys::path::get_separator().str()); - if (s2 != sys::path::get_separator().str()) - pathSoFar += (s2 + sys::path::get_separator().str()); - - ++baseIt; - ++pathIt; - } - - if (baseIt == baseEnd && baseSoFar == pathSoFar) - return true; - else - return false; -} - -void emit(Rewriter &R, ASTContext &C, std::set &Files, - std::set &InOutFiles) { - - // Check if we are outputing to stdout or not, if we are, just output the - // main file ID to stdout. - if (Verbose) - errs() << "Writing files out\n"; - - SmallString<254> baseAbs(BaseDir); - std::error_code ec = sys::fs::make_absolute(baseAbs); - assert(!ec); - sys::path::remove_filename(baseAbs); - std::string base = baseAbs.str(); - - SourceManager &SM = C.getSourceManager(); - if (OutputPostfix == "-") { - if (const RewriteBuffer *B = R.getRewriteBufferFor(SM.getMainFileID())) - B->write(outs()); - } else - for (const auto &F : Files) - if (const RewriteBuffer *B = R.getRewriteBufferFor(F)) - if (const FileEntry *FE = SM.getFileEntryForID(F)) { - assert(FE->isValid()); - - // Produce a path/file name for the rewritten source file. - // That path should be the same as the old one, with a - // suffix added between the file name and the extension. - // For example \foo\bar\a.c should become \foo\bar\a.checked.c - // if the OutputPostfix parameter is "checked" . - - std::string pfName = sys::path::filename(FE->getName()).str(); - std::string dirName = sys::path::parent_path(FE->getName()).str(); - std::string fileName = sys::path::remove_leading_dotslash(pfName).str(); - std::string ext = sys::path::extension(fileName).str(); - std::string stem = sys::path::stem(fileName).str(); - std::string nFileName = stem + "." + OutputPostfix + ext; - std::string nFile = nFileName; - if (dirName.size() > 0) - nFile = dirName + sys::path::get_separator().str() + nFileName; - - // Write this file out if it was specified as a file on the command - // line. - SmallString<254> feAbs(FE->getName()); - std::string feAbsS = ""; - if (std::error_code ec = sys::fs::make_absolute(feAbs)) { - if (Verbose) - errs() << "could not make path absolote\n"; - } else - feAbsS = sys::path::remove_leading_dotslash(feAbs.str()); - - if(canWrite(feAbsS, InOutFiles, base)) { - std::error_code EC; - raw_fd_ostream out(nFile, EC, sys::fs::F_None); - - if (!EC) { - if (Verbose) - outs() << "writing out " << nFile << "\n"; - B->write(out); - } - else - errs() << "could not open file " << nFile << "\n"; - // This is awkward. What to do? Since we're iterating, - // we could have created other files successfully. Do we go back - // and erase them? Is that surprising? For now, let's just keep - // going. - } - } -} - -// Class for visiting declarations during re-writing to find locations to -// insert casts. Right now, it looks specifically for 'free'. -class CastPlacementVisitor : public RecursiveASTVisitor { -public: - explicit CastPlacementVisitor(ASTContext *C, ProgramInfo &I, Rewriter &R, - RSet &DR, std::set &Files, std::set &V) - : Context(C), R(R), Info(I), rewriteThese(DR), Files(Files), VisitedSet(V) {} - - bool VisitCallExpr(CallExpr *); - bool VisitFunctionDecl(FunctionDecl *); -private: - std::set getParamsForExtern(std::string); - bool anyTop(std::set); - ASTContext *Context; - Rewriter &R; - ProgramInfo &Info; - RSet &rewriteThese; - std::set &Files; - std::set &VisitedSet; -}; - -// For a given function name, what are the argument positions for that function -// that we would want to treat specially and insert a cast into? -std::set CastPlacementVisitor::getParamsForExtern(std::string E) { - return StringSwitch>(E) - .Case("free", {0}) - .Default(std::set()); -} - -// Checks the bindings in the environment for all of the constraints -// associated with C and returns true if any of those constraints -// are WILD. -bool CastPlacementVisitor::anyTop(std::set C) { - bool anyTopFound = false; - Constraints &CS = Info.getConstraints(); - Constraints::EnvironmentMap &env = CS.getVariables(); - for (ConstraintVariable *c : C) { - if (PointerVariableConstraint *pvc = dyn_cast(c)) { - for (uint32_t v : pvc->getCvars()) { - ConstAtom *CK = env[CS.getVar(v)]; - if (CK->getKind() == Atom::A_Wild) { - anyTopFound = true; - } - } - } - } - return anyTopFound; -} - -// This function checks how to re-write a function declaration. -bool CastPlacementVisitor::VisitFunctionDecl(FunctionDecl *FD) { - - // Get all of the constraint variables for the function. - // Check and see if we have a definition in scope. If we do, then: - // For the return value and each of the parameters, do the following: - // 1. Get a constraint variable representing the definition (def) and the - // declaration (dec). - // 2. Check if def < dec, dec < def, or dec = def. - // 3. Only if def < dec, we insert a bounds-safe interface. - // If we don't have a definition in scope, we can assert that all of - // the constraint variables are equal. - // Finally, we need to note that we've visited this particular function, and - // that we shouldn't make one of these visits again. - - auto funcName = FD->getNameAsString(); - - // Make sure we haven't visited this function name before, and that we - // only visit it once. - if (VisitedSet.find(funcName) != VisitedSet.end()) - return true; - else - VisitedSet.insert(funcName); - - // Do we have a definition for this declaration? - FunctionDecl *Definition = getDefinition(FD); - FunctionDecl *Declaration = getDeclaration(FD); - - if(Definition == nullptr) - return true; - - assert (Declaration != nullptr); - - // Get constraint variables for the declaration and the definition. - // Those constraints should be function constraints. - auto cDecl = dyn_cast( - getHighest(Info.getVariable(Declaration, Context, false), Info)); - auto cDefn = dyn_cast( - getHighest(Info.getVariable(Definition, Context, true), Info)); - assert(cDecl != nullptr); - assert(cDefn != nullptr); - - if (cDecl->numParams() == cDefn->numParams()) { - // Track whether we did any work and need to make a substitution or not. - bool didAny = cDecl->numParams() > 0; - std::string s = ""; - std::vector parmStrs; - // Compare parameters. - for (unsigned i = 0; i < cDecl->numParams(); ++i) { - auto Decl = getHighest(cDecl->getParamVar(i), Info); - auto Defn = getHighest(cDefn->getParamVar(i), Info); - assert(Decl); - assert(Defn); - - // If this holds, then we want to insert a bounds safe interface. - bool anyConstrained = Defn->anyChanges(Info.getConstraints().getVariables()); - if (Defn->isLt(*Decl, Info) && anyConstrained) { - std::string scratch = ""; - raw_string_ostream declText(scratch); - Definition->getParamDecl(i)->print(declText); - std::string ctype = Defn->mkString(Info.getConstraints().getVariables(), false); - std::string bi = declText.str() + " : itype("+ctype+") "; - parmStrs.push_back(bi); - } else { - // Do what we used to do. - if (anyConstrained) { - std::string v = Defn->mkString(Info.getConstraints().getVariables()); - if (PVConstraint *PVC = dyn_cast(Defn)) { - if (PVC->getItypePresent()) { - v = v + " : " + PVC->getItype(); - } - } - parmStrs.push_back(v); - } else { - std::string scratch = ""; - raw_string_ostream declText(scratch); - Definition->getParamDecl(i)->print(declText); - parmStrs.push_back(declText.str()); - } - } - } - - // Compare returns. - auto Decl = getHighest(cDecl->getReturnVars(), Info); - auto Defn = getHighest(cDefn->getReturnVars(), Info); - - // Insert a bounds safe interface for the return. - std::string returnVar = ""; - std::string endStuff = ""; - bool anyConstrained = Defn->anyChanges(Info.getConstraints().getVariables()); - if (Defn->isLt(*Decl, Info) && anyConstrained) { - std::string ctype = Defn->mkString(Info.getConstraints().getVariables()); - returnVar = Defn->getTy(); - endStuff = " : itype("+ctype+") "; - didAny = true; - } else { - // If we used to implement a bounds-safe interface, continue to do that. - returnVar = Decl->mkString(Info.getConstraints().getVariables()); - - if (PVConstraint *PVC = dyn_cast(Decl)) { - if (PVC->getItypePresent()) { - assert(PVC->getItype().size() > 0); - endStuff = " : " + PVC->getItype(); - didAny = true; - } - } - } - - s = returnVar + cDecl->getName() + "("; - if (parmStrs.size() > 0) { - std::ostringstream ss; - - std::copy(parmStrs.begin(), parmStrs.end() - 1, - std::ostream_iterator(ss, ", ")); - ss << parmStrs.back(); - - s = s + ss.str() + ")"; - } else { - s = s + "void)"; - } - - if (endStuff.size() > 0) - s = s + endStuff; - - if (didAny) - // Do all of the declarations. - for (const auto &RD : Definition->redecls()) - rewriteThese.insert(DAndReplace(RD, s, true)); - } - - return true; -} - -bool CastPlacementVisitor::VisitCallExpr(CallExpr *E) { - return true; -} - -class RewriteConsumer : public ASTConsumer { -public: - explicit RewriteConsumer(ProgramInfo &I, - std::set &F, ASTContext *Context) : Info(I), InOutFiles(F) {} - - virtual void HandleTranslationUnit(ASTContext &Context) { - Info.enterCompilationUnit(Context); - - Rewriter R(Context.getSourceManager(), Context.getLangOpts()); - std::set Files; - - std::set v; - RSet rewriteThese(DComp(Context.getSourceManager())); - // Unification is done, so visit and see if we need to place any casts - // in the program. - CastPlacementVisitor CPV = CastPlacementVisitor(&Context, Info, R, rewriteThese, Files, v); - for (const auto &D : Context.getTranslationUnitDecl()->decls()) - CPV.TraverseDecl(D); - - // Build a map of all of the PersistentSourceLoc's back to some kind of - // Stmt, Decl, or Type. - VariableMap &VarMap = Info.getVarMap(); - std::set keys; - - for (const auto &I : VarMap) - keys.insert(I.first); - std::map PSLMap; - VariableDecltoStmtMap VDLToStmtMap; - - RSet skip(DComp(Context.getSourceManager())); - MappingVisitor V(keys, Context); - TranslationUnitDecl *TUD = Context.getTranslationUnitDecl(); - for (const auto &D : TUD->decls()) - V.TraverseDecl(D); - - std::tie(PSLMap, VDLToStmtMap) = V.getResults(); - - for (const auto &V : Info.getVarMap()) { - PersistentSourceLoc PLoc = V.first; - std::set Vars = V.second; - // I don't think it's important that Vars have any especial size, but - // at one point I did so I'm keeping this comment here. It's possible - // that what we really need to do is to ensure that when we work with - // either PV or FV below, that they are the LUB of what is in Vars. - // assert(Vars.size() > 0 && Vars.size() <= 2); - - // PLoc specifies the location of the variable whose type it is to - // re-write, but not where the actual type storage is. To get that, we - // need to turn PLoc into a Decl and then get the SourceRange for the - // type of the Decl. Note that what we need to get is the ExpansionLoc - // of the type specifier, since we want where the text is printed before - // the variable name, not the typedef or #define that creates the - // name of the type. - - Stmt *S = nullptr; - Decl *D = nullptr; - DeclStmt *DS = nullptr; - clang::Type *T = nullptr; - - std::tie(S, D, T) = PSLMap[PLoc]; - - if (D) { - // We might have one Decl for multiple Vars, however, one will be a - // PointerVar so we'll use that. - VariableDecltoStmtMap::iterator K = VDLToStmtMap.find(D); - if (K != VDLToStmtMap.end()) - DS = K->second; - - PVConstraint *PV = nullptr; - FVConstraint *FV = nullptr; - for (const auto &V : Vars) { - if (PVConstraint *T = dyn_cast(V)) - PV = T; - else if (FVConstraint *T = dyn_cast(V)) - FV = T; - } - - if (PV && PV->anyChanges(Info.getConstraints().getVariables())) { - // Rewrite a declaration. - std::string newTy = PV->mkString(Info.getConstraints().getVariables()); - rewriteThese.insert(DAndReplace(D, DS, newTy));; - } else if (FV && FV->anyChanges(Info.getConstraints().getVariables())) { - // Rewrite a function variables return value. - std::set V = FV->getReturnVars(); - if (V.size() > 0) { - std::string newTy = - (*V.begin())->mkString(Info.getConstraints().getVariables()); - rewriteThese.insert(DAndReplace(D, DS, newTy)); - } - } - } - } - - rewrite(R, rewriteThese, skip, Context.getSourceManager(), Context, Files); - - // Output files. - emit(R, Context, Files, InOutFiles); - - Info.exitCompilationUnit(); - return; - } - -private: - ProgramInfo &Info; - std::set &InOutFiles; -}; template class GenericAction : public ASTFrontendAction { @@ -947,14 +106,14 @@ class GenericAction : public ASTFrontendAction { }; template -class GenericAction2 : public ASTFrontendAction { +class RewriteAction : public ASTFrontendAction { public: - GenericAction2(V &I, U &P) : Info(I),Files(P) {} + RewriteAction(V &I, U &P) : Info(I),Files(P) {} virtual std::unique_ptr CreateASTConsumer(CompilerInstance &Compiler, StringRef InFile) { return std::unique_ptr - (new T(Info, Files, &Compiler.getASTContext())); + (new T(Info, Files, &Compiler.getASTContext(), OutputPostfix, BaseDir)); } private: @@ -1025,11 +184,10 @@ int main(int argc, const char **argv) { std::set inoutPaths; for (const auto &S : args) { - SmallString<255> abs_path(S); - if (std::error_code ec = sys::fs::make_absolute(abs_path)) - errs() << "could not make absolute\n"; - else - inoutPaths.insert(abs_path.str()); + std::string abs_path; + if(getAbsoluteFilePath(S, abs_path)) { + inoutPaths.insert(abs_path); + } } if (OutputPostfix == "-" && inoutPaths.size() > 1) { @@ -1063,13 +221,23 @@ int main(int argc, const char **argv) { assert(R.second == true); if (Verbose) outs() << "Constraints solved\n"; - if (DumpIntermediate) + if (DumpIntermediate) { Info.dump(); + outs() << "Writing json output to:" << ConstraintOutputJson << "\n"; + std::error_code ec; + llvm::raw_fd_ostream output_json(ConstraintOutputJson, ec); + if(!output_json.has_error()) { + Info.dump_json(output_json); + output_json.close(); + } else { + Info.dump_json(llvm::errs()); + } + } // 3. Re-write based on constraints. std::unique_ptr RewriteTool = newFrontendActionFactoryB - >>( + >>( Info, inoutPaths); if (RewriteTool) diff --git a/tools/checked-c-convert/ConstraintBuilder.cpp b/tools/checked-c-convert/ConstraintBuilder.cpp index ec831c6ff114..dc601ff23360 100644 --- a/tools/checked-c-convert/ConstraintBuilder.cpp +++ b/tools/checked-c-convert/ConstraintBuilder.cpp @@ -12,6 +12,12 @@ using namespace llvm; using namespace clang; + +// flags +// constraint all the arguments to a function +// accepting var args to be wild. +#define CONSTRAINT_ARGS_TO_VARGS_WILD + // Special-case handling for decl introductions. For the moment this covers: // * void-typed variables // * va_list-typed variables @@ -150,9 +156,16 @@ class FunctionVisitor : public RecursiveASTVisitor { if (SR.isValid() && FL.isValid() && !FL.isInSystemHeader() && (D->getType()->isPointerType() || D->getType()->isArrayType())) { + // add the variable with in the function body context. Info.addVariable(D, S, Context); specialCaseVarIntros(D, Info, Context); + // if this is a static array declaration. + // make this an array. + if(D->getType()->isArrayType()) { + Constraints &CS = Info.getConstraints(); + constraintInBodyVariable(D, CS.getArr()); + } } } @@ -174,6 +187,7 @@ class FunctionVisitor : public RecursiveASTVisitor { // int ** b = &(*(a)); // and the & * cancel each other out. // 4. Assignments from casts. Here, we use the implication rule. + // 5. Assignments from call expressions i.e., a = foo(..) // // In any of these cases, due to conditional expressions, the number of // variables on the RHS could be 0 or more. We just do the same rule @@ -183,130 +197,168 @@ class FunctionVisitor : public RecursiveASTVisitor { // assigning to. V represents constraints on a pointer variable. RHS is // an expression which might produce constraint variables, or, it might // be some expression like NULL, an integer constant or a cast. - void constrainAssign( std::set V, + void constrainLocalAssign( std::set V, QualType lhsType, Expr *RHS) { if (!RHS || V.size() == 0) return; + std::set RHSConstraints; + RHSConstraints.clear(); + Constraints &CS = Info.getConstraints(); - std::set W = Info.getVariable(RHS, Context); - if (W.size() > 0) { - // Case 1. - // There are constraint variables for the RHS, so, use those over - // anything else we could infer. - constrainEq(V, W, Info); + RHS = getNormalizedExpr(RHS); + // if this is a call expression? + if (CallExpr *CE = dyn_cast(RHS)) { + // case 5 + // if this is a call expression? + // is this functions return type an itype + FunctionDecl *Calle = CE->getDirectCallee(); + if(Calle) { + // get the function declaration and look for + // itype in the return + if(getDeclaration(Calle) != nullptr) { + Calle = getDeclaration(Calle); + } + bool itypeHandled = false; + // if this function return an itype? + if(Calle->hasInteropTypeExpr()) { + itypeHandled = handleITypeAssignment(V, Calle->getInteropTypeExpr()); + } + // if this is not an itype + if(!itypeHandled) { + // get the constraint variable corresponding + // to the declaration. + RHSConstraints = Info.getVariable(RHS, Context, false); + if (RHSConstraints.size() > 0) { + constrainEq(V, RHSConstraints, Info); + } + } + } } else { - // Remove the parens from the RHS expression, this makes it easier for - // us to look at the semantics. RHS = RHS->IgnoreParens(); - // Cases 2-4. - if (RHS->isIntegerConstantExpr(*Context)) { - // Case 2. - if (!RHS->isNullPointerConstant(*Context, - Expr::NPC_ValueDependentIsNotNull)) - for (const auto &U : V) - if (PVConstraint *PVC = dyn_cast(U)) - for (const auto &J : PVC->getCvars()) - CS.addConstraint( - CS.createEq(CS.getOrCreateVar(J), CS.getWild())); - } else { + // Cases 2 + if(isNULLExpression(RHS, *Context)) { + // Do Nothing. + } else if (RHS->isIntegerConstantExpr(*Context) && + !RHS->isNullPointerConstant(*Context, Expr::NPC_ValueDependentIsNotNull)) { + // Case 2, Special handling. If this is an assignment of non-zero + // integer constraint, then make the pointer WILD. + for (const auto &U : V) { + if (PVConstraint *PVC = dyn_cast(U)) + for (const auto &J : PVC->getCvars()) { + CS.addConstraint( + CS.createEq(CS.getOrCreateVar(J), CS.getWild())); + } + } + } else if (UnaryOperator *UO = dyn_cast(RHS)) { // Cases 3-4. - if (UnaryOperator *UO = dyn_cast(RHS)) { - if (UO->getOpcode() == UO_AddrOf) { - // Case 3. - // Is there anything to do here, or is it implicitly handled? - } + if (UO->getOpcode() == UO_AddrOf) { + // Case 3. + // Is there anything to do here, or is it implicitly handled? } - else if (CStyleCastExpr *C = dyn_cast(RHS)) { - // Case 4. - Expr *SE = C->getSubExpr(); - // Remove any binding of a Checked C temporary variable. - if (CHKCBindTemporaryExpr *Temp = dyn_cast(SE)) - SE = Temp->getSubExpr(); - W = Info.getVariable(SE, Context); - QualType rhsTy = RHS->getType(); - bool rulesFired = false; - if (Info.checkStructuralEquality(V, W, lhsType, rhsTy)) { - // This has become a little stickier to think about. - // What do you do here if we determine that two things with - // very different arity are structurally equal? Is that even - // possible? - - // We apply a few rules here to determine if there are any - // finer-grained constraints we can add. One of them is if the - // value being cast from on the RHS is a call to malloc, and if - // the type passed to malloc is equal to both lhsType and rhsTy. - // If it is, we can do something less conservative. - if (CallExpr *CA = dyn_cast(SE)) { - // Is this a call to malloc? Can we coerce the callee - // to a NamedDecl? - FunctionDecl *calleeDecl = - dyn_cast(CA->getCalleeDecl()); - if (calleeDecl && calleeDecl->getName() == "malloc") { - // It's a call to malloc. What about the parameter to the call? - if (CA->getNumArgs() > 0) { - UnaryExprOrTypeTraitExpr *arg = - dyn_cast(CA->getArg(0)); - if (arg && arg->isArgumentType()) { - // Check that the argument is a sizeof. - if (arg->getKind() == UETT_SizeOf) { - QualType argTy = arg->getArgumentType(); - // argTy should be made a pointer, then compared for - // equality to lhsType and rhsTy. - QualType argPTy = Context->getPointerType(argTy); - - if (Info.checkStructuralEquality(V, W, argPTy, lhsType) && - Info.checkStructuralEquality(V, W, argPTy, rhsTy)) - { - rulesFired = true; - // At present, I don't think we need to add an - // implication based constraint since this rule - // only fires if there is a cast from a call to malloc. - // Since malloc is an external, there's no point in - // adding constraints to it. - } + } else if (CStyleCastExpr *C = dyn_cast(RHS)) { + // Case 4. + Expr *SE = C->getSubExpr(); + // Remove any binding of a Checked C temporary variable. + if (CHKCBindTemporaryExpr *Temp = dyn_cast(SE)) + SE = Temp->getSubExpr(); + RHSConstraints = Info.getVariable(SE, Context); + QualType rhsTy = RHS->getType(); + bool rulesFired = false; + if (Info.checkStructuralEquality(V, RHSConstraints, lhsType, rhsTy)) { + // This has become a little stickier to think about. + // What do you do here if we determine that two things with + // very different arity are structurally equal? Is that even + // possible? + + // We apply a few rules here to determine if there are any + // finer-grained constraints we can add. One of them is if the + // value being cast from on the RHS is a call to malloc, and if + // the type passed to malloc is equal to both lhsType and rhsTy. + // If it is, we can do something less conservative. + if (CallExpr *CA = dyn_cast(SE)) { + // get the declaration constraints of the callee. + RHSConstraints = Info.getVariable(SE, Context); + // Is this a call to malloc? Can we coerce the callee + // to a NamedDecl? + FunctionDecl *calleeDecl = + dyn_cast(CA->getCalleeDecl()); + if (calleeDecl && calleeDecl->getName() == "malloc") { + // It's a call to malloc. What about the parameter to the call? + if (CA->getNumArgs() > 0) { + UnaryExprOrTypeTraitExpr *arg = + dyn_cast(CA->getArg(0)); + if (arg && arg->isArgumentType()) { + // Check that the argument is a sizeof. + if (arg->getKind() == UETT_SizeOf) { + QualType argTy = arg->getArgumentType(); + // argTy should be made a pointer, then compared for + // equality to lhsType and rhsTy. + QualType argPTy = Context->getPointerType(argTy); + + if (Info.checkStructuralEquality(V, RHSConstraints, argPTy, lhsType) && + Info.checkStructuralEquality(V, RHSConstraints, argPTy, rhsTy)) { + rulesFired = true; + // At present, I don't think we need to add an + // implication based constraint since this rule + // only fires if there is a cast from a call to malloc. + // Since malloc is an external, there's no point in + // adding constraints to it. } } } } } - } - - // If none of the above rules for cast behavior fired, then - // we need to fall back to doing something conservative. - if (rulesFired == false) { - // Constrain everything in both to top. - // Remove the casts from RHS and try again to get a variable - // from it. We want to constrain that side to wild as well. - RHS = RHS->IgnoreCasts(); - W = Info.getVariable(RHS, Context); - for (const auto &A : W) - if (PVConstraint *PVC = dyn_cast(A)) - for (const auto &B : PVC->getCvars()) - CS.addConstraint( - CS.createEq(CS.getOrCreateVar(B), CS.getWild())); - - for (const auto &A : V) - if (PVConstraint *PVC = dyn_cast(A)) - for (const auto &B : PVC->getCvars()) - CS.addConstraint( - CS.createEq(CS.getOrCreateVar(B), CS.getWild())); } } + + // If none of the above rules for cast behavior fired, then + // we need to fall back to doing something conservative. + if (rulesFired == false) { + // Constrain everything in both to top. + // Remove the casts from RHS and try again to get a variable + // from it. We want to constrain that side to wild as well. + RHSConstraints = Info.getVariable(SE, Context, true); + for (const auto &A : RHSConstraints) { + if (PVConstraint *PVC = dyn_cast(A)) + for (const auto &B : PVC->getCvars()) + CS.addConstraint( + CS.createEq(CS.getOrCreateVar(B), CS.getWild())); + } + + for (const auto &A : V) { + if (PVConstraint *PVC = dyn_cast(A)) + for (const auto &B : PVC->getCvars()) + CS.addConstraint( + CS.createEq(CS.getOrCreateVar(B), CS.getWild())); + } + } + } else { + // get the constraint variables of the + // expression from RHS side. + RHSConstraints = Info.getVariable(RHS, Context, true); + if(RHSConstraints.size() > 0) { + // Case 1. + // There are constraint variables for the RHS, so, use those over + // anything else we could infer. + constrainEq(V, RHSConstraints, Info); + } } } } - void constrainAssign(Expr *LHS, Expr *RHS) { - std::set V = Info.getVariable(LHS, Context); - constrainAssign(V, LHS->getType(), RHS); + void constrainLocalAssign(Expr *LHS, Expr *RHS) { + // get the in-context local constraints. + std::set V = Info.getVariable(LHS, Context, true); + constrainLocalAssign(V, LHS->getType(), RHS); } - void constrainAssign(DeclaratorDecl *D, Expr *RHS) { - std::set V = Info.getVariable(D, Context); - constrainAssign(V, D->getType(), RHS); + void constrainLocalAssign(DeclaratorDecl *D, Expr *RHS) { + // get the in-context local constraints. + std::set V = Info.getVariable(D, Context, true); + constrainLocalAssign(V, D->getType(), RHS); } bool VisitDeclStmt(DeclStmt *S) { @@ -324,7 +376,7 @@ class FunctionVisitor : public RecursiveASTVisitor { if (VarDecl *VD = dyn_cast(D)) { std::set V; Expr *InitE = VD->getInit(); - constrainAssign(VD, InitE); + constrainLocalAssign(VD, InitE); } } @@ -362,8 +414,7 @@ class FunctionVisitor : public RecursiveASTVisitor { bool VisitBinAssign(BinaryOperator *O) { Expr *LHS = O->getLHS(); Expr *RHS = O->getRHS(); - constrainAssign(LHS, RHS); - + constrainLocalAssign(LHS, RHS); return true; } @@ -373,26 +424,60 @@ class FunctionVisitor : public RecursiveASTVisitor { return true; if (FunctionDecl *FD = dyn_cast(D)) { + // get the function declaration, + // if exists, this is needed to check + // for itype + if(getDeclaration(FD) != nullptr) { + FD = getDeclaration(FD); + } // Call of a function directly. unsigned i = 0; for (const auto &A : E->arguments()) { - std::set ParameterEC = - Info.getVariable(A, Context, false); + // get constraint variables for the argument + // from with in the context of the caller body + std::set ArgumentConstraints = + Info.getVariable(A, Context, true); if (i < FD->getNumParams()) { - constrainAssign(FD->getParamDecl(i), A); + bool handled = false; + if(FD->getParamDecl(i)->hasInteropTypeExpr()) { + // try handling interop parameters. + handled = handleITypeAssignment(ArgumentConstraints, + FD->getParamDecl(i)->getInteropTypeExpr()); + } + if(!handled) { + // Here, we need to get the constraints of the + // parameter from the callee's declaration. + std::set ParameterConstraints = + Info.getVariable(FD->getParamDecl(i), Context, false); + // add constraint that the arguments are equal to the + // parameters. + //assert(!ParameterConstraints.empty() && "Unable to get parameter constraints"); + // the constrains could be empty for builtin functions. + constrainEq(ParameterConstraints, ArgumentConstraints, Info); + } } else { - // Constrain ParameterEC to wild if it is a pointer type. - Constraints &CS = Info.getConstraints(); - for (const auto &C : ParameterEC) - C->constrainTo(CS, CS.getWild()); + // this is the case of an argument passed to a function + // with varargs. + // Constrain this parameter to be wild. + if(handleVARARGS) { + Constraints &CS = Info.getConstraints(); + assignType(ArgumentConstraints, CS.getWild()); + } else { + if(Verbose) { + std::string funcName = FD->getName(); + errs() << "Ignoring function as it contains varargs:" << funcName << "\n"; + } + } } i++; } } else if (DeclaratorDecl *DD = dyn_cast(D)){ - // This could be a function pointer. - std::set V = Info.getVariable(DD, Context, false); + // This could be a function pointer, + // get the declaration of the function pointer variable + // with in the caller context. + std::set V = Info.getVariable(DD, Context, true); if (V.size() > 0) { for (const auto &C : V) { FVConstraint *FV = nullptr; @@ -405,21 +490,22 @@ class FunctionVisitor : public RecursiveASTVisitor { } if (FV) { - // Constrain parameters, like in the case above. + // Constrain arguments to be of the same type + // as the corresponding parameters. unsigned i = 0; for (const auto &A : E->arguments()) { - std::set ParameterEC = - Info.getVariable(A, Context, false); + std::set ArgumentConstraints = + Info.getVariable(A, Context, true); if (i < FV->numParams()) { std::set ParameterDC = FV->getParamVar(i); - constrainEq(ParameterEC, ParameterDC, Info); + constrainEq(ArgumentConstraints, ParameterDC, Info); } else { - // Constrain parameter to wild since we can't match it + // Constrain argument to wild since we can't match it // to a parameter from the type. Constraints &CS = Info.getConstraints(); - for (const auto &V : ParameterEC) { + for (const auto &V : ArgumentConstraints) { V->constrainTo(CS, CS.getWild()); } } @@ -428,116 +514,78 @@ class FunctionVisitor : public RecursiveASTVisitor { } else { // This can happen when someone does something really wacky, like // cast a char* to a function pointer, then call it. Constrain - // everything. + // everything. + // what we do is, constraint all arguments to wild. + constraintAllArgumentsToWild(E); Constraints &CS = Info.getConstraints(); - for (const auto &A : E->arguments()) - for (const auto &Ct : Info.getVariable(A, Context, false)) - Ct->constrainTo(CS, CS.getWild()); + // also constraint C->constrainTo(CS, CS.getWild()); } } } else { - // Constrain everything to wild. - for (const auto &A : E->arguments()) { - std::set ParameterEC = - Info.getVariable(A, Context, false); - - Constraints &CS = Info.getConstraints(); - for (const auto &C : ParameterEC) - C->constrainTo(CS, CS.getWild()); - } + // Constrain all arguments to wild. + constraintAllArgumentsToWild(E); } } else { - // Constrain everything to wild. - for (const auto &A : E->arguments()) { - std::set ParameterEC = - Info.getVariable(A, Context, false); - - Constraints &CS = Info.getConstraints(); - for (const auto &C : ParameterEC) - C->constrainTo(CS, CS.getWild()); - } + // Constrain all arguments to wild. + constraintAllArgumentsToWild(E); } return true; } + // this will add the constraint that + // variable is an array i.e., (V=ARR) bool VisitArraySubscriptExpr(ArraySubscriptExpr *E) { - constrainExprFirstArr(E->getBase()); + Constraints &CS = Info.getConstraints(); + constraintInBodyVariable(E->getBase(), CS.getArr()); return true; } bool VisitReturnStmt(ReturnStmt *S) { + // Here, we should constrain the return type + // of the function body with the type of the + // return expression. + + // get function variable constraint of the body + // we need to call getVariableOnDemand to avoid auto-correct. std::set Fun = - Info.getVariable(Function, Context); + Info.getVariableOnDemand(Function, Context, true); + // get the constraint of the return variable (again with in the context of the body) std::set Var = - Info.getVariable(S->getRetValue(), Context); - - // We also want to look up the declaration of Function, if it exists. - FunctionDecl *Declaration = getDeclaration(Function); - if (Declaration) { - std::set Fun2 = - Info.getVariable(Declaration, Context); - Fun.insert(Fun2.begin(), Fun2.end()); - } + Info.getVariable(S->getRetValue(), Context, true); // Constrain the value returned (if present) against the return value // of the function. - for (const auto &F : Fun ) - if (FVConstraint *FV = dyn_cast(F)) - constrainEq(FV->getReturnVars(), Var, Info); - - return true; - } - - // Apply ~(V = Ptr) to the first 'level' constraint variable associated with - // 'E' - void constrainExprFirst(Expr *E) { - std::set Var = - Info.getVariable(E, Context); - Constraints &CS = Info.getConstraints(); - for (const auto &I : Var) - if (PVConstraint *PVC = dyn_cast(I)) { - if (PVC->getCvars().size() > 0) - CS.addConstraint( - CS.createNot( - CS.createEq( - CS.getOrCreateVar(*(PVC->getCvars().begin())), CS.getPtr()))); - } - } - - void constrainExprFirstArr(Expr *E) { - std::set Var = - Info.getVariable(E, Context, true); - Constraints &CS = Info.getConstraints(); - for (const auto &I : Var) - if (PVConstraint *PVC = dyn_cast(I)) { - if (PVC->getCvars().size() > 0) { - CS.addConstraint( - CS.createEq( - CS.getOrCreateVar(*(PVC->getCvars().begin())), CS.getArr())); - } + for (const auto &F : Fun ) { + if (FVConstraint *FV = dyn_cast(F)) { + constrainEq(FV->getReturnVars(), Var, Info); } + } + return true; } + // these are the expressions, that will + // add the constraint ~(V = Ptr) and ~(V = NTArr) + // i.e., the variable is not a pointer or nt array bool VisitUnaryPreInc(UnaryOperator *O) { - constrainExprFirst(O->getSubExpr()); + constrainInBodyExprNotPtrNotNt(O->getSubExpr()); return true; } bool VisitUnaryPostInc(UnaryOperator *O) { - constrainExprFirst(O->getSubExpr()); + constrainInBodyExprNotPtrNotNt(O->getSubExpr()); return true; } bool VisitUnaryPreDec(UnaryOperator *O) { - constrainExprFirst(O->getSubExpr()); + constrainInBodyExprNotPtrNotNt(O->getSubExpr()); return true; } bool VisitUnaryPostDec(UnaryOperator *O) { - constrainExprFirst(O->getSubExpr()); + constrainInBodyExprNotPtrNotNt(O->getSubExpr()); return true; } @@ -553,9 +601,132 @@ class FunctionVisitor : public RecursiveASTVisitor { private: + // handle the assignment of constraint variables to an itype expression. + bool handleITypeAssignment(std::set &Vars, InteropTypeExpr *expr) { + bool isHandled = false; + CheckedPointerKind ptrKind = getCheckedPointerKind(expr); + // currently we only handle NT arrays. + if (ptrKind == CheckedPointerKind::NtArray) { + isHandled = true; + // assign the corresponding checked type to + // all teh constraint vars. + assignType(Vars, getCheckedPointerConstraint(ptrKind)); + } + // is this handled or propagation through itype + // has been disabled. In which case, all itypes + // values will be handled. + return isHandled || !enablePropThruIType; + } + + // constraint all the provided vars to be + // not equal to the provided type i.e., ~(V = type) + void constrainVarsNotEq(std::set &Vars, ConstAtom *type) { + Constraints &CS = Info.getConstraints(); + for (const auto &I : Vars) + if (PVConstraint *PVC = dyn_cast(I)) { + if (PVC->getCvars().size() > 0) + CS.addConstraint( + CS.createNot( + CS.createEq( + CS.getOrCreateVar(*(PVC->getCvars().begin())), type))); + } + } + + // constraint all the provided vars to be + // equal to the provided type i.e., (V = type) + void constrainVarsEq(std::set &Vars, ConstAtom *type) { + Constraints &CS = Info.getConstraints(); + for (const auto &I : Vars) + if (PVConstraint *PVC = dyn_cast(I)) { + if (PVC->getCvars().size() > 0) + CS.addConstraint( + CS.createEq( + CS.getOrCreateVar(*(PVC->getCvars().begin())), type)); + } + } + + // Apply ~(V = Ptr) and ~(V = NTArr) to the + // first 'level' constraint variable associated with + // 'E' for in-body variables + void constrainInBodyExprNotPtrNotNt(Expr *E) { + // get the constrain variables + // with in the body context + std::set Var = + Info.getVariable(E, Context, true); + Constraints &CS = Info.getConstraints(); + constrainVarsNotEq(Var, CS.getPtr()); + constrainVarsNotEq(Var, CS.getNTArr()); + } + + // constraint helpers. + void constraintInBodyVariable(Expr *e, ConstAtom *target) { + std::set Var = + Info.getVariable(e, Context, true); + Constraints &CS = Info.getConstraints(); + constrainVarsEq(Var, target); + } + + void constraintInBodyVariable(Decl *d, ConstAtom *target) { + std::set Var = + Info.getVariable(d, Context, true); + Constraints &CS = Info.getConstraints(); + constrainVarsEq(Var, target); + } + + // assign the provided type (target) + // to all the constraint variables (CVars). + void assignType(std::set &CVars, + ConstAtom *target) { + Constraints &CS = Info.getConstraints(); + for (const auto &C : CVars) { + C->constrainTo(CS, target); + } + } + + // constraint all the argument of the provided + // call expression to be WILD + void constraintAllArgumentsToWild(CallExpr *E) { + for (const auto &A : E->arguments()) { + // get constraint from within the function body + // of the caller + std::set ParameterEC = + Info.getVariable(A, Context, true); + + Constraints &CS = Info.getConstraints(); + // assign WILD to each of the constraint + // variables. + assignType(ParameterEC, CS.getWild()); + } + } + void arithBinop(BinaryOperator *O) { - constrainExprFirst(O->getLHS()); - constrainExprFirst(O->getRHS()); + constrainInBodyExprNotPtrNotNt(O->getLHS()); + constrainInBodyExprNotPtrNotNt(O->getRHS()); + } + + ConstAtom* getCheckedPointerConstraint(CheckedPointerKind ptrKind) { + Constraints &CS = Info.getConstraints(); + switch(ptrKind) { + case CheckedPointerKind::NtArray: + return CS.getNTArr(); + case CheckedPointerKind::Array: + return CS.getArr(); + case CheckedPointerKind::Ptr: + return CS.getPtr(); + case CheckedPointerKind::Unchecked: + llvm_unreachable("Unchecked type inside an itype. This should be impossible."); + } + assert(false && "Invalid Pointer kind."); + } + + Expr* getNormalizedExpr(Expr *CE) { + if(dyn_cast(CE)) { + CE = (dyn_cast(CE))->getSubExpr(); + } + if(dyn_cast(CE)) { + CE = (dyn_cast(CE))->getSubExpr(); + } + return CE; } ASTContext *Context; diff --git a/tools/checked-c-convert/ConstraintVariables.cpp b/tools/checked-c-convert/ConstraintVariables.cpp new file mode 100644 index 000000000000..9eccc0bb54cd --- /dev/null +++ b/tools/checked-c-convert/ConstraintVariables.cpp @@ -0,0 +1,782 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Implementation of ConstraintVariables methods. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringSwitch.h" +#include "clang/Lex/Lexer.h" +#include + +#include "ConstraintVariables.h" +#include "ProgramInfo.h" + +using namespace clang; + +// Helper method to print a Type in a way that can be represented in the source. +static +std::string +tyToStr(const Type *T) { + QualType QT(T, 0); + + return QT.getAsString(); +} + +PointerVariableConstraint::PointerVariableConstraint(DeclaratorDecl *D, + ConstraintKey &K, Constraints &CS, const ASTContext &C) : + PointerVariableConstraint(D->getType(), K, D, D->getName(), CS, C) { } + +PointerVariableConstraint::PointerVariableConstraint(const QualType &QT, ConstraintKey &K, + DeclaratorDecl *D, std::string N, Constraints &CS, const ASTContext &C) : + ConstraintVariable(ConstraintVariable::PointerVariable, + tyToStr(QT.getTypePtr()),N),FV(nullptr) +{ + QualType QTy = QT; + const Type *Ty = QTy.getTypePtr(); + OriginalType = tyToStr(Ty); + // If the type is a decayed type, then maybe this is the result of + // decaying an array to a pointer. If the original type is some + // kind of array type, we want to use that instead. + if (const DecayedType *DC = dyn_cast(Ty)) { + QualType QTytmp = DC->getOriginalType(); + if (QTytmp->isArrayType() || QTytmp->isIncompleteArrayType()) { + QTy = QTytmp; + Ty = QTy.getTypePtr(); + } + } + + bool isTypedef = false; + + if (Ty->getAs()) + isTypedef = true; + + arrPresent = false; + + if (InteropTypeExpr *ITE = D->getInteropTypeExpr()) { + // external variables can also have itype. + // check if the provided declaration is an external + // variable. + if(!dyn_cast(D) && !dyn_cast(D)) { + QualType InteropType = C.getInteropTypeAndAdjust(ITE, false); + // TODO: handle array_ptr types. + if (InteropType->isCheckedPointerPtrType()) { + QTy = InteropType; + Ty = QTy.getTypePtr(); + } + } + + SourceRange R = ITE->getSourceRange(); + if (R.isValid()) { + auto &SM = C.getSourceManager(); + auto LO = C.getLangOpts(); + llvm::StringRef txt = + Lexer::getSourceText(CharSourceRange::getTokenRange(R), SM, LO); + itypeStr = txt.str(); + assert(itypeStr.size() > 0); + } + } + + while (Ty->isPointerType() || Ty->isArrayType()) { + if (Ty->isArrayType() || Ty->isIncompleteArrayType()) { + arrPresent = true; + // If it's an array, then we need both a constraint variable + // for each level of the array, and a constraint variable for + // values stored in the array. + vars.insert(K); + assert(CS.getVar(K) == nullptr); + CS.getOrCreateVar(K); + + // See if there is a constant size to this array type at this position. + if (const ConstantArrayType *CAT = dyn_cast(Ty)) { + arrSizes[K] = std::pair( + O_SizedArray,CAT->getSize().getZExtValue()); + } else { + arrSizes[K] = std::pair( + O_UnSizedArray,0); + } + + K++; + + // Boil off the typedefs in the array case. + while(const TypedefType *tydTy = dyn_cast(Ty)) { + QTy = tydTy->desugar(); + Ty = QTy.getTypePtr(); + } + + // Iterate. + if(const ArrayType *arrTy = dyn_cast(Ty)) { + QTy = arrTy->getElementType(); + Ty = QTy.getTypePtr(); + } else { + llvm_unreachable("unknown array type"); + } + } else { + // Allocate a new constraint variable for this level of pointer. + vars.insert(K); + assert(CS.getVar(K) == nullptr); + VarAtom * V = CS.getOrCreateVar(K); + + if (Ty->isCheckedPointerType()) { + if (Ty->isCheckedPointerNtArrayType()) { + // this is an NT array type + // Constrain V to be not equal to Arr, Ptr or Wild + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getArr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getPtr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getWild()))); + ConstrainedVars.insert(K); + } else if (Ty->isCheckedPointerArrayType()) { + // this is an array type + // Constrain V to be not equal to NTArr, Ptr or Wild + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getNTArr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getPtr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getWild()))); + ConstrainedVars.insert(K); + } else if (Ty->isCheckedPointerPtrType()) { + // Constrain V so that it can't be either wild or an array or an NTArray + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getArr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getNTArr()))); + CS.addConstraint(CS.createNot(CS.createEq(V, CS.getWild()))); + ConstrainedVars.insert(K); + } + } + + // Save here if QTy is qualified or not into a map that + // indexes K to the qualification of QTy, if any. + if (QTy.isConstQualified()) + QualMap.insert( + std::pair(K, ConstQualification)); + + arrSizes[K] = std::pair(O_Pointer,0); + + K++; + std::string TyName = tyToStr(Ty); + // TODO: Github issue #61: improve handling of types for + // // variable arguments. + if (TyName == "struct __va_list_tag *" || TyName == "va_list") + break; + + // Iterate. + QTy = QTy.getSingleStepDesugaredType(C); + QTy = QTy.getTypePtr()->getPointeeType(); + Ty = QTy.getTypePtr(); + } + } + + // If, after boiling off the pointer-ness from this type, we hit a + // function, then create a base-level FVConstraint that we carry + // around too. + if (Ty->isFunctionType()) + // C function-pointer type declarator syntax embeds the variable + // name within the function-like syntax. For example: + // void (*fname)(int, int) = ...; + // If a typedef'ed type name is used, the name can be omitted + // because it is not embedded like that. Instead, it has the form + // tn fname = ..., + // where tn is the typedef'ed type name. + // There is possibly something more elegant to do in the code here. + FV = new FVConstraint(Ty, K, D, (isTypedef ? "" : N), CS, C); + + BaseType = tyToStr(Ty); + + if (QTy.isConstQualified()) { + BaseType = "const " + BaseType; + } + + // TODO: Github issue #61: improve handling of types for + // variable arguments. + if (BaseType == "struct __va_list_tag *" || BaseType == "va_list" || + BaseType == "struct __va_list_tag") + for (const auto &V : vars) + CS.addConstraint(CS.createEq(CS.getOrCreateVar(V), CS.getWild())); +} + +bool PVConstraint::liftedOnCVars(const ConstraintVariable &O, + ProgramInfo &Info, + llvm::function_ref Op) const +{ + // If these aren't both PVConstraints, incomparable. + if (!isa(O)) + return false; + + const PVConstraint *P = cast(&O); + const CVars &OC = P->getCvars(); + + // If they don't have the same number of cvars, incomparable. + if (OC.size() != getCvars().size()) + return false; + + auto I = getCvars().begin(); + auto J = OC.begin(); + Constraints &CS = Info.getConstraints(); + auto env = CS.getVariables(); + + while(I != getCvars().end() && J != OC.end()) { + // Look up the valuation for I and J. + ConstAtom *CI = env[CS.getVar(*I)]; + ConstAtom *CJ = env[CS.getVar(*J)]; + + if (!Op(CI, CJ)) + return false; + + ++I; + ++J; + } + + return true; +} + +bool PVConstraint::isLt(const ConstraintVariable &Other, + ProgramInfo &Info) const +{ + if (isEmpty() || Other.isEmpty()) + return false; + + return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { + return *A < *B; + }); +} + +bool PVConstraint::isEq(const ConstraintVariable &Other, + ProgramInfo &Info) const +{ + if (isEmpty() && Other.isEmpty()) + return true; + + if (isEmpty() || Other.isEmpty()) + return false; + + return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { + return *A == *B; + }); +} + +void PointerVariableConstraint::print(raw_ostream &O) const { + O << "{ "; + for (const auto &I : vars) + O << "q_" << I << " "; + O << " }"; + + if (FV) { + O << "("; + FV->print(O); + O << ")"; + } +} + +void PointerVariableConstraint::dump_json(llvm::raw_ostream &O) const { + O << "{\"PointerVar\":{"; + O << "\"Vars\":["; + bool addComma = false; + for (const auto &I : vars) { + if(addComma) { + O << ","; + } + O << "\"q_" << I << "\""; + addComma = true; + } + O << "], \"name\":\"" << getName() << "\""; + if(FV) { + O << ", \"FunctionVariable\":"; + FV->dump_json(O); + } + O << "}}"; + +} + +void PointerVariableConstraint::getQualString(ConstraintKey targetCVar, std::ostringstream &ss) { + std::map::iterator q = QualMap.find(targetCVar); + if (q != QualMap.end()) + if (q->second == ConstQualification) + ss << "const "; +} + +// Mesh resolved constraints with the PointerVariableConstraints set of +// variables and potentially nested function pointer declaration. Produces a +// string that can be replaced in the source code. +std::string +PointerVariableConstraint::mkString(Constraints::EnvironmentMap &E, bool emitName, bool forItype) { + std::ostringstream ss; + std::ostringstream pss; + unsigned caratsToAdd = 0; + bool emittedBase = false; + bool emittedName = false; + if (emitName == false && getItypePresent() == false) + emittedName = true; + for (const auto &V : vars) { + VarAtom VA(V); + ConstAtom *C = E[&VA]; + assert(C != nullptr); + + Atom::AtomKind K = C->getKind(); + + // if this is not an itype + // make this wild as it can hold any pointer type + if (!forItype && BaseType == "void") + K = Atom::A_Wild; + + switch (K) { + case Atom::A_Ptr: + getQualString(V, ss); + + // We need to check and see if this level of variable + // is constrained by a bounds safe interface. If it is, + // then we shouldn't re-write it. + if (getItypePresent() == false) { + emittedBase = false; + ss << "_Ptr<"; + caratsToAdd++; + break; + } + case Atom::A_Arr: + // If it's an Arr, then the character we substitute should + // be [] instead of *, IF, the original type was an array. + // And, if the original type was a sized array of size K, + // we should substitute [K]. + if (arrPresent) { + auto i = arrSizes.find(V); + assert(i != arrSizes.end()); + OriginalArrType oat = i->second.first; + uint64_t oas = i->second.second; + + if (emittedName == false) { + emittedName = true; + pss << getName(); + } + + switch(oat) { + case O_Pointer: + pss << "*"; + break; + case O_SizedArray: + pss << "[" << oas << "]"; + break; + case O_UnSizedArray: + pss << "[]"; + break; + } + + break; + } + case Atom::A_NTArr: + // this additional check is to prevent fall-through from the array. + if(K == Atom::A_NTArr) { + // if this is an NTArray + getQualString(V, ss); + + // We need to check and see if this level of variable + // is constrained by a bounds safe interface. If it is, + // then we shouldn't re-write it. + if (getItypePresent() == false) { + emittedBase = false; + ss << "_Nt_array_ptr<"; + caratsToAdd++; + break; + } + } + // If there is no array in the original program, then we fall through to + // the case where we write a pointer value. + case Atom::A_Wild: + if (emittedBase) { + ss << "*"; + } else { + assert(BaseType.size() > 0); + emittedBase = true; + if (FV) { + ss << FV->mkString(E); + } else { + ss << BaseType << "*"; + } + } + + getQualString(V, ss); + break; + case Atom::A_Const: + case Atom::A_Var: + llvm_unreachable("impossible"); + break; + } + } + + if(emittedBase == false) { + // If we have a FV pointer, then our "base" type is a function pointer + // type. + if (FV) { + ss << FV->mkString(E); + } else { + ss << BaseType; + } + } + + // Push carats onto the end of the string + for (unsigned i = 0; i < caratsToAdd; i++) { + ss << ">"; + } + + ss << " "; + + std::string finalDec; + if (emittedName == false) { + ss << getName(); + finalDec = ss.str(); + } else { + finalDec = ss.str() + pss.str(); + } + + return finalDec; +} + +// This describes a function, either a function pointer or a function +// declaration itself. Either require constraint variables for any pointer +// types that are either return values or paraemeters for the function. +FunctionVariableConstraint::FunctionVariableConstraint(DeclaratorDecl *D, + ConstraintKey &K, Constraints &CS, const ASTContext &C) : + FunctionVariableConstraint(D->getType().getTypePtr(), K, D, + (D->getDeclName().isIdentifier() ? D->getName() : ""), CS, C) +{ } + +FunctionVariableConstraint::FunctionVariableConstraint(const Type *Ty, + ConstraintKey &K, DeclaratorDecl *D, std::string N, Constraints &CS, const ASTContext &Ctx) : + ConstraintVariable(ConstraintVariable::FunctionVariable, tyToStr(Ty), N),name(N) +{ + QualType returnType; + hasproto = false; + hasbody = false; + + if (FunctionDecl *FD = dyn_cast(D)) { + // FunctionDecl::hasBody will return true if *any* declaration in the + // declaration chain has a body, which is not what we want to record. + // We want to record if *this* declaration has a body. To do that, + // we'll check if the declaration that has the body is different + // from the current declaration. + const FunctionDecl *oFD = nullptr; + if (FD->hasBody(oFD) && oFD == FD) + hasbody = true; + } + + if (Ty->isFunctionPointerType()) { + // Is this a function pointer definition? + llvm_unreachable("should not hit this case"); + } else if (Ty->isFunctionProtoType()) { + // Is this a function? + const FunctionProtoType *FT = Ty->getAs(); + FunctionDecl *FD = dyn_cast(D); + assert(FT != nullptr); + returnType = FT->getReturnType(); + + // Extract the types for the parameters to this function. If the parameter + // has a bounds expression associated with it, substitute the type of that + // bounds expression for the other type. + for (unsigned i = 0; i < FT->getNumParams(); i++) { + QualType QT = FT->getParamType(i); + + if (InteropTypeExpr *BA = FT->getParamAnnots(i).getInteropTypeExpr()) { + QualType InteropType= Ctx.getInteropTypeAndAdjust(BA, true); + // TODO: handle array_ptr types. + if (InteropType->isCheckedPointerPtrType()) + QT = InteropType; + } + + std::string paramName = ""; + DeclaratorDecl *tmpD = D; + if (FD && i < FD->getNumParams()) { + ParmVarDecl *PVD = FD->getParamDecl(i); + if (PVD) { + tmpD = PVD; + paramName = PVD->getName(); + } + } + + std::set C; + C.insert(new PVConstraint(QT, K, tmpD, paramName, CS, Ctx)); + paramVars.push_back(C); + } + + if (InteropTypeExpr *BA = FT->getReturnAnnots().getInteropTypeExpr()) { + QualType InteropType = Ctx.getInteropTypeAndAdjust(BA, false); + // TODO: handle array_ptr types. + if (InteropType->isCheckedPointerPtrType()) + returnType = InteropType; + } + hasproto = true; + } else if (Ty->isFunctionNoProtoType()) { + const FunctionNoProtoType *FT = Ty->getAs(); + assert(FT != nullptr); + returnType = FT->getReturnType(); + } else { + llvm_unreachable("don't know what to do"); + } + // This has to be a mapping for all parameter/return types, even those that + // aren't pointer types. If we need to re-emit the function signature + // as a type, then we will need the types for all the parameters and the + // return values + + returnVars.insert(new PVConstraint(returnType, K, D, "", CS, Ctx)); + for ( const auto &V : returnVars) { + if (PVConstraint *PVC = dyn_cast(V)) { + if (PVC->getFV()) + PVC->constrainTo(CS, CS.getWild()); + } else if (FVConstraint *FVC = dyn_cast(V)) { + FVC->constrainTo(CS, CS.getWild()); + } + } +} + +bool FVConstraint::liftedOnCVars(const ConstraintVariable &Other, + ProgramInfo &Info, + llvm::function_ref Op) const +{ + if (!isa(Other)) + return false; + + const FVConstraint *F = cast(&Other); + + if (paramVars.size() != F->paramVars.size()) { + if (paramVars.size() < F->paramVars.size()) { + return true; + } else { + return false; + } + } + + // Consider the return variables. + ConstraintVariable *U = getHighest(returnVars, Info); + ConstraintVariable *V = getHighest(F->returnVars, Info); + + if (!U->liftedOnCVars(*V, Info, Op)) + return false; + + // Consider the parameters. + auto I = paramVars.begin(); + auto J = F->paramVars.begin(); + + while ((I != paramVars.end()) && (J != F->paramVars.end())) { + U = getHighest(*I, Info); + V = getHighest(*J, Info); + + if (!U->liftedOnCVars(*V, Info, Op)) + return false; + + ++I; + ++J; + } + + return true; +} + +bool FVConstraint::isLt(const ConstraintVariable &Other, + ProgramInfo &Info) const +{ + if (isEmpty() || Other.isEmpty()) + return false; + + return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { + return *A < *B; + }); +} + +bool FVConstraint::isEq(const ConstraintVariable &Other, + ProgramInfo &Info) const +{ + if (isEmpty() && Other.isEmpty()) + return true; + + if (isEmpty() || Other.isEmpty()) + return false; + + return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { + return *A == *B; + }); +} + +void FunctionVariableConstraint::constrainTo(Constraints &CS, ConstAtom *A, bool checkSkip) { + for (const auto &V : returnVars) + V->constrainTo(CS, A, checkSkip); + + for (const auto &V : paramVars) + for (const auto &U : V) + U->constrainTo(CS, A, checkSkip); +} + +bool FunctionVariableConstraint::anyChanges(Constraints::EnvironmentMap &E) { + bool f = false; + + for (const auto &C : returnVars) + f |= C->anyChanges(E); + + return f; +} + +bool FunctionVariableConstraint::hasWild(Constraints::EnvironmentMap &E) +{ + for (const auto& C: returnVars) + if (C->hasWild(E)) + return true; + + return false; +} + +bool FunctionVariableConstraint::hasArr(Constraints::EnvironmentMap &E) +{ + for (const auto& C: returnVars) + if (C->hasArr(E)) + return true; + + return false; +} + +void PointerVariableConstraint::constrainTo(Constraints &CS, ConstAtom *A, bool checkSkip) { + for (const auto &V : vars) { + // Check and see if we've already constrained this variable. This is currently + // only done when the bounds-safe interface has refined a type for an external + // function, and we don't want the linking phase to un-refine it by introducing + // a conflicting constraint. + bool doAdd = true; + // this will ensure that we do not make an itype constraint + // variable to be WILD (which should be impossible)!! + if (checkSkip || dyn_cast(A)) { + if (ConstrainedVars.find(V) != ConstrainedVars.end()) + doAdd = false; + } + + if (doAdd) { + CS.addConstraint(CS.createEq(CS.getOrCreateVar(V), A)); + } + } + + if (FV) + FV->constrainTo(CS, A, checkSkip); +} + +bool PointerVariableConstraint::anyChanges(Constraints::EnvironmentMap &E) { + bool f = false; + + for (const auto &C : vars) { + VarAtom V(C); + ConstAtom *CS = E[&V]; + assert(CS != nullptr); + f |= isa(CS); + f |= isa(CS); + } + + if (FV) + f |= FV->anyChanges(E); + + return f; +} + +bool PointerVariableConstraint::hasWild(Constraints::EnvironmentMap &E) +{ + for (const auto& C: vars) { + VarAtom V(C); + ConstAtom *CS = E[&V]; + assert(CS != nullptr); + if (isa(CS)) + return true; + } + + if (FV) + return FV->anyChanges(E); + + return false; +} + +bool PointerVariableConstraint::hasArr(Constraints::EnvironmentMap &E) +{ + for (const auto& C: vars) { + VarAtom V(C); + ConstAtom *CS = E[&V]; + assert(CS != nullptr); + if (isa(CS)) + return true; + } + + if (FV) + return FV->anyChanges(E); + + return false; +} + +void FunctionVariableConstraint::print(raw_ostream &O) const { + O << "( "; + for (const auto &I : returnVars) + I->print(O); + O << " )"; + O << " " << name << " "; + for (const auto &I : paramVars) { + O << "( "; + for (const auto &J : I) + J->print(O); + O << " )"; + } +} + +void FunctionVariableConstraint::dump_json(raw_ostream &O) const { + O << "{\"FunctionVar\":{\"ReturnVar\":["; + bool addComma = false; + for (const auto &I : returnVars) { + if(addComma) { + O << ","; + } + I->dump_json(O); + } + O << "], \"name\":\"" << name << "\", "; + O << "\"Parameters\":["; + addComma = false; + for (const auto &I : paramVars) { + if(I.size() > 0) { + if (addComma) { + O << ",\n"; + } + O << "["; + bool innerComma = false; + for (const auto &J : I) { + if(innerComma) { + O << ","; + } + J->dump_json(O); + innerComma = true; + } + O << "]"; + addComma = true; + } + } + O << "]"; + O << "}}"; +} + +std::string +FunctionVariableConstraint::mkString(Constraints::EnvironmentMap &E, bool emitName, bool forItype) { + std::string s = ""; + // TODO punting on what to do here. The right thing to do is to figure out + // the LUB of all of the V in returnVars. + assert(returnVars.size() > 0); + ConstraintVariable *V = *returnVars.begin(); + assert(V != nullptr); + s = V->mkString(E); + s = s + "("; + std::vector parmStrs; + for (const auto &I : this->paramVars) { + // TODO likewise punting here. + assert(I.size() > 0); + ConstraintVariable *U = *(I.begin()); + assert(U != nullptr); + parmStrs.push_back(U->mkString(E)); + } + + if (parmStrs.size() > 0) { + std::ostringstream ss; + + std::copy(parmStrs.begin(), parmStrs.end() - 1, + std::ostream_iterator(ss, ", ")); + ss << parmStrs.back(); + + s = s + ss.str() + ")"; + } else { + s = s + ")"; + } + + return s; +} \ No newline at end of file diff --git a/tools/checked-c-convert/ConstraintVariables.h b/tools/checked-c-convert/ConstraintVariables.h new file mode 100644 index 000000000000..0e2476cd4823 --- /dev/null +++ b/tools/checked-c-convert/ConstraintVariables.h @@ -0,0 +1,288 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// The class allocates constraint variables and maps program locations +// (specified by PersistentSourceLocs) to constraint variables. +// +// The allocation of constraint variables is a little nuanced. For a given +// variable, there might be multiple constraint variables. For example, some +// declaration of the form: +// +// int **p = ... ; +// +// would be given two constraint variables, visualized like this: +// +// int * q_(i+1) * q_i p = ... ; +// +// The constraint variable at the "highest" or outer-most level of the type +// is the lowest numbered constraint variable for a given declaration. +//===----------------------------------------------------------------------===// + +#ifndef _CONSTRAINTVARIABLES_H +#define _CONSTRAINTVARIABLES_H + +#include "llvm/ADT/StringSwitch.h" +#include "clang/Lex/Lexer.h" +#include "clang/AST/ASTContext.h" + +#include "Constraints.h" + +using namespace clang; + +class ProgramInfo; + +// Holds integers representing constraint variables, with semantics as +// defined in the text above +typedef std::set CVars; + +// Base class for ConstraintVariables. A ConstraintVariable can either be a +// PointerVariableConstraint or a FunctionVariableConstraint. The difference +// is that FunctionVariableConstraints have constraints on the return value +// and on each parameter. +class ConstraintVariable { +public: + enum ConstraintVariableKind { + PointerVariable, + FunctionVariable + }; + + ConstraintVariableKind getKind() const { return Kind; } + +private: + ConstraintVariableKind Kind; +protected: + std::string BaseType; + std::string OriginalType; + // Underlying name of the C variable this ConstraintVariable represents. + std::string Name; + // Set of constraint variables that have been constrained due to a + // bounds-safe interface (itype). They are remembered as being constrained + // so that later on we do not introduce a spurious constraint + // making those variables WILD. + std::set ConstrainedVars; + +public: + ConstraintVariable(ConstraintVariableKind K, std::string T, std::string N) : + Kind(K),BaseType(T),Name(N) {} + + // Create a "for-rewriting" representation of this ConstraintVariable. + // The 'emitName' parameter is true when the generated string should include + // the name of the variable, false for just the type. + // The 'forIType' parameter is true when the generated string is expected + // to be used inside an itype + virtual std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true, bool forItype=false) = 0; + + // Debug printing of the constraint variable. + virtual void print(llvm::raw_ostream &O) const = 0; + virtual void dump() const = 0; + virtual void dump_json(llvm::raw_ostream &O) const = 0; + + // Constrain everything 'within' this ConstraintVariable to be equal to C. + // Set checkSkip to true if you would like constrainTo to consider the + // ConstrainedVars when applying constraints. This should be set when + // applying constraints due to external symbols, during linking. + virtual void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false) = 0; + + // Returns true if any of the constraint variables 'within' this instance + // have a binding in E other than top. E should be the EnvironmentMap that + // results from running unification on the set of constraints and the + // environment. + virtual bool anyChanges(Constraints::EnvironmentMap &E) = 0; + virtual bool hasWild(Constraints::EnvironmentMap &E) = 0; + virtual bool hasArr(Constraints::EnvironmentMap &E) = 0; + + std::string getTy() { return BaseType; } + std::string getOriginalTy() { return OriginalType; } + std::string getName() const { return Name; } + + virtual ~ConstraintVariable() {}; + + // Constraint atoms may be either constants or variables. The constants are + // trivial to compare, but the variables can only really be compared under + // a specific valuation. That valuation is stored in the ProgramInfo data + // structure, so these functions (isLt, isEq) compare two ConstraintVariables + // with a specific assignment to the variables in mind. + virtual bool isLt(const ConstraintVariable &other, ProgramInfo &I) const = 0; + virtual bool isEq(const ConstraintVariable &other, ProgramInfo &I) const = 0; + // Sometimes, constraint variables can be produced that are empty. This + // tests for the existence of those constraint variables. + virtual bool isEmpty(void) const = 0; + + // A helper function for isLt and isEq where the last parameter is a lambda + // for the specific comparison operation to perform. + virtual bool liftedOnCVars(const ConstraintVariable &O, + ProgramInfo &Info, + llvm::function_ref) const = 0; + +}; + +class PointerVariableConstraint; +class FunctionVariableConstraint; + +// Represents an individual constraint on a pointer variable. +// This could contain a reference to a FunctionVariableConstraint +// in the case of a function pointer declaration. +class PointerVariableConstraint : public ConstraintVariable { +public: + enum Qualification { + ConstQualification, + StaticQualification + }; +private: + CVars vars; + FunctionVariableConstraint *FV; + std::map QualMap; + enum OriginalArrType { + O_Pointer, + O_SizedArray, + O_UnSizedArray + }; + // Map from constraint variable to original type and size. + // If the original variable U was: + // * A pointer, then U -> (a,b) , a = O_Pointer, b has no meaning. + // * A sized array, then U -> (a,b) , a = O_SizedArray, b is static size. + // * An unsized array, then U -(a,b) , a = O_UnSizedArray, b has no meaning. + std::map> arrSizes; + // If for all U in arrSizes, any U -> (a,b) where a = O_SizedArray or + // O_UnSizedArray, arrPresent is true. + bool arrPresent; + // Is there an itype associated with this constraint? If there is, how was it + // originally stored in the program? + std::string itypeStr; + // get the qualifier string (e.g., const, etc) for the provided constraint var (targetCvar) + // into the provided string stream (ss) + void getQualString(ConstraintKey targetCVar, std::ostringstream &ss); +public: + // Constructor for when we know a CVars and a type string. + PointerVariableConstraint(CVars V, std::string T, std::string Name, + FunctionVariableConstraint *F, bool isArr, bool isItype, std::string is) : + ConstraintVariable(PointerVariable, T, Name) + ,vars(V),FV(F),arrPresent(isArr), itypeStr(is) {} + + bool getArrPresent() { return arrPresent; } + + // Is an itype present for this constraint? If yes, what is the text of that itype? + bool getItypePresent() { return itypeStr.size() > 0; } + std::string getItype() { return itypeStr; } + + // Constructor for when we have a Decl. K is the current free + // constraint variable index. We don't need to explicitly pass + // the name because it's available in 'D'. + PointerVariableConstraint(clang::DeclaratorDecl *D, ConstraintKey &K, + Constraints &CS, const clang::ASTContext &C); + + // Constructor for when we only have a Type. Needs a string name + // N for the name of the variable that this represents. + PointerVariableConstraint(const clang::QualType &QT, ConstraintKey &K, + clang::DeclaratorDecl *D, std::string N, Constraints &CS, const clang::ASTContext &C); + + const CVars &getCvars() const { return vars; } + + static bool classof(const ConstraintVariable *S) { + return S->getKind() == PointerVariable; + } + + std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true, bool forItype=false); + + FunctionVariableConstraint *getFV() { return FV; } + + void print(llvm::raw_ostream &O) const ; + void dump() const { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const; + void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false); + bool anyChanges(Constraints::EnvironmentMap &E); + bool hasWild(Constraints::EnvironmentMap &E); + bool hasArr(Constraints::EnvironmentMap &E); + + bool isLt(const ConstraintVariable &other, ProgramInfo &P) const; + bool isEq(const ConstraintVariable &other, ProgramInfo &P) const; + bool isEmpty(void) const { return vars.size() == 0; } + + bool liftedOnCVars(const ConstraintVariable &O, + ProgramInfo &Info, + llvm::function_ref) const; + + virtual ~PointerVariableConstraint() {}; +}; + +typedef PointerVariableConstraint PVConstraint; + +// Constraints on a function type. Also contains a 'name' parameter for +// when a re-write of a function pointer is needed. +class FunctionVariableConstraint : public ConstraintVariable { +private: + // N constraints on the return value of the function. + std::set returnVars; + // A vector of K sets of N constraints on the parameter values, for + // K parameters accepted by the function. + std::vector> paramVars; + // Name of the function or function variable. Used by mkString. + std::string name; + bool hasproto; + bool hasbody; +public: + FunctionVariableConstraint() : + ConstraintVariable(FunctionVariable, "", ""),name(""),hasproto(false),hasbody(false) { } + + FunctionVariableConstraint(clang::DeclaratorDecl *D, ConstraintKey &K, + Constraints &CS, const clang::ASTContext &C); + FunctionVariableConstraint(const clang::Type *Ty, ConstraintKey &K, + clang::DeclaratorDecl *D, std::string N, Constraints &CS, const clang::ASTContext &C); + + std::set & + getReturnVars() { return returnVars; } + + size_t numParams() { return paramVars.size(); } + std::string getName() { return name; } + + bool hasProtoType() { return hasproto; } + bool hasBody() { return hasbody; } + + static bool classof(const ConstraintVariable *S) { + return S->getKind() == FunctionVariable; + } + + std::set & + getParamVar(unsigned i) { + assert(i < paramVars.size()); + return paramVars.at(i); + } + + std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true, bool forItype=false); + void print(llvm::raw_ostream &O) const; + void dump() const { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const; + void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false); + bool anyChanges(Constraints::EnvironmentMap &E); + bool hasWild(Constraints::EnvironmentMap &E); + bool hasArr(Constraints::EnvironmentMap &E); + + bool isLt(const ConstraintVariable &other, ProgramInfo &P) const; + bool isEq(const ConstraintVariable &other, ProgramInfo &P) const; + // An FVConstraint is empty if every constraint associated is empty. + bool isEmpty(void) const { + + if (returnVars.size() > 0) + return false; + + for (const auto &u : paramVars) + for (const auto &v : u) + if (!v->isEmpty()) + return false; + + return true; + } + + bool liftedOnCVars(const ConstraintVariable &O, + ProgramInfo &Info, + llvm::function_ref) const; + + virtual ~FunctionVariableConstraint() {}; +}; + +typedef FunctionVariableConstraint FVConstraint; + +#endif //_CONSTRAINTVARIABLES_H diff --git a/tools/checked-c-convert/Constraints.cpp b/tools/checked-c-convert/Constraints.cpp index 5dda0ed388a0..202dad4a700f 100644 --- a/tools/checked-c-convert/Constraints.cpp +++ b/tools/checked-c-convert/Constraints.cpp @@ -157,6 +157,26 @@ Constraints::propImp(Implies *Imp, T *A, ConstraintSet &R, ConstAtom *V) { return changedEnvironment; } +// This method checks if the template +// const atom can be assigned to the provided (src) +// variable. +template +bool Constraints::canAssignConst(VarAtom *src) { + + for (const auto &C : src->Constraints) { + // check if there is a non-equality constraint + // of the provided type. + if (Not *N = dyn_cast(C)) { + if (Eq *E = dyn_cast(N->getBody())) { + if(dyn_cast(E->getRHS())) { + return false; + } + } + } + } + return true; +} + // Takes one iteration to solve the system of constraints. Each step // involves the propagation of quantifiers and the potential firing of // implications. Accepts a single parameter, _env_, that is a map of @@ -198,19 +218,25 @@ bool Constraints::step_solve(EnvironmentMap &env) { for (const auto &C : Var->Constraints) { // Propagate the Neg constraint. if (Not *N = dyn_cast(C)) { - if (Eq *E = dyn_cast(N->getBody())) - // If this is Not ( q == Ptr ) and the current value - // of q is Ptr ( < *getArr() ) then bump q up to Arr. - if (isa(E->getRHS())) - if (*Val < *getArr()) { + if (Eq *E = dyn_cast(N->getBody())) { + // If this is Not ( q == Ptr ) or Not ( q == NTArr) + // and the current value + // of q is Ptr ( < *getArr() ) and ARR can be assigned then bump q up to Arr. + if (isa(E->getRHS()) || isa(E->getRHS())) { + if (*Val < *getArr() && canAssignConst(Var)) { VI->second = getArr(); changedEnvironment = true; } + } + } } - else if (Eq *E = dyn_cast(C)) + else if (Eq *E = dyn_cast(C)) { + changedEnvironment |= propEq(env, E, getNTArr(), rmConstraints, VI); changedEnvironment |= propEq(env, E, getArr(), rmConstraints, VI); - else if (Implies *Imp = dyn_cast(C)) + } else if (Implies *Imp = dyn_cast(C)) { + changedEnvironment |= propImp(Imp, getNTArr(), rmConstraints, Val); changedEnvironment |= propImp(Imp, getArr(), rmConstraints, Val); + } } for (const auto &RC : rmConstraints) @@ -273,6 +299,36 @@ void Constraints::dump(void) const { print(errs()); } +void Constraints::dump_json(llvm::raw_ostream &O) const { + O << "{\"Constraints\":["; + bool addComma = false; + for (const auto &C : constraints) { + if(addComma) { + O << ",\n"; + } + C->dump_json(O); + addComma = true; + } + O << "],\n"; + + addComma = false; + + O << "\"Environment\":["; + for (const auto &V : environment) { + if(addComma) { + O << ",\n"; + } + O << "{\"var\":"; + V.first->dump_json(O); + O << ", \"value:\":"; + V.second->dump_json(O); + O << "}"; + addComma = true; + } + O << "]}"; + +} + VarAtom *Constraints::getOrCreateVar(uint32_t v) { VarAtom tv(v); EnvironmentMap::iterator I = environment.find(&tv); @@ -302,6 +358,9 @@ PtrAtom *Constraints::getPtr() const { ArrAtom *Constraints::getArr() const { return prebuiltArr; } +NTArrAtom *Constraints::getNTArr() const { + return prebuiltNTArr; +} WildAtom *Constraints::getWild() const { return prebuiltWild; } @@ -321,11 +380,13 @@ Implies *Constraints::createImplies(Constraint *premise, Constraint *conclusion) Constraints::Constraints() { prebuiltPtr = new PtrAtom(); prebuiltArr = new ArrAtom(); + prebuiltNTArr = new NTArrAtom(); prebuiltWild = new WildAtom(); } Constraints::~Constraints() { delete prebuiltPtr; delete prebuiltArr; + delete prebuiltNTArr; delete prebuiltWild; } diff --git a/tools/checked-c-convert/Constraints.h b/tools/checked-c-convert/Constraints.h index 9e02c21fc93c..74d0a7fb4475 100644 --- a/tools/checked-c-convert/Constraints.h +++ b/tools/checked-c-convert/Constraints.h @@ -39,6 +39,7 @@ class Atom { enum AtomKind { A_Var, A_Ptr, + A_NTArr, A_Arr, A_Wild, A_Const @@ -53,6 +54,7 @@ class Atom { virtual void print(llvm::raw_ostream &) const = 0; virtual void dump(void) const = 0; + virtual void dump_json(llvm::raw_ostream &) const = 0; virtual bool operator==(const Atom &) const = 0; virtual bool operator!=(const Atom &) const = 0; virtual bool operator<(const Atom &other) const = 0; @@ -76,6 +78,10 @@ class VarAtom : public Atom { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "\"q_" << Loc << "\""; + } + bool operator==(const Atom &other) const { if (const VarAtom *V = llvm::dyn_cast(&other)) return V->Loc == Loc; @@ -133,6 +139,10 @@ class PtrAtom : public ConstAtom { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "\"PTR\""; + } + bool operator==(const Atom &other) const { return llvm::isa (&other); } @@ -146,6 +156,43 @@ class PtrAtom : public ConstAtom { } }; +// This refers to the constant NTARR. +class NTArrAtom : public ConstAtom { +public: + NTArrAtom() : ConstAtom(A_NTArr) {} + + static bool classof(const Atom *S) { + return S->getKind() == A_NTArr; + } + + void print(llvm::raw_ostream &O) const { + O << "NTARR"; + } + + void dump(void) const { + print(llvm::errs()); + } + + void dump_json(llvm::raw_ostream &O) const { + O << "\"NTARR\""; + } + + bool operator==(const Atom &other) const { + return llvm::isa(&other); + } + + bool operator!=(const Atom &other) const { + return !(*this == other); + } + + bool operator<(const Atom &other) const { + if (llvm::isa(&other) || *this == other) + return false; + else + return true; + } +}; + // This refers to the constant ARR. class ArrAtom : public ConstAtom { public: @@ -163,6 +210,10 @@ class ArrAtom : public ConstAtom { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "\"ARR\""; + } + bool operator==(const Atom &other) const { return llvm::isa(&other); } @@ -172,7 +223,7 @@ class ArrAtom : public ConstAtom { } bool operator<(const Atom &other) const { - if (llvm::isa(&other) || *this == other) + if (llvm::isa(&other) || llvm::isa(&other) || *this == other) return false; else return true; @@ -196,10 +247,14 @@ class WildAtom : public ConstAtom { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "\"WILD\""; + } + bool operator==(const Atom &other) const { - if (llvm::isa(&other)) + if (llvm::isa(&other)) return true; - else + else return false; } @@ -208,7 +263,8 @@ class WildAtom : public ConstAtom { } bool operator<(const Atom &other) const { - if (llvm::isa(&other) || llvm::isa(&other) || *this == other) + if (llvm::isa(&other) || llvm::isa(&other) || + llvm::isa(&other) || *this == other) return false; else return true; @@ -236,6 +292,7 @@ class Constraint { virtual void print(llvm::raw_ostream &) const = 0; virtual void dump(void) const = 0; + virtual void dump_json(llvm::raw_ostream &) const = 0; virtual bool operator==(const Constraint &other) const = 0; virtual bool operator!=(const Constraint &other) const = 0; virtual bool operator<(const Constraint &other) const = 0; @@ -262,6 +319,14 @@ class Eq : public Constraint { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "{\"Eq\":{\"Atom1\":"; + lhs->dump_json(O); + O << ", \"Atom2\":"; + rhs->dump_json(O); + O << "}}"; + } + Atom *getLHS(void) const { return lhs; } Atom *getRHS(void) const { return rhs; } @@ -318,6 +383,12 @@ class Not : public Constraint { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "{\"Not\":"; + body->dump_json(O); + O << "}"; + } + bool operator==(const Constraint &other) const { if (const Not *N = llvm::dyn_cast(&other)) return *body == *N->body; @@ -373,6 +444,14 @@ class Implies : public Constraint { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const { + O << "{\"Implies\":{\"Premise\":"; + premise->dump_json(O); + O << ", \"Conclusion\":"; + conclusion->dump_json(O); + O << "}}"; + } + bool operator==(const Constraint &other) const { if (const Implies *I = llvm::dyn_cast(&other)) return *premise == *I->premise && *conclusion == *I->conclusion; @@ -431,6 +510,7 @@ class Constraints { std::pair solve(void); void dump() const; void print(llvm::raw_ostream &) const; + void dump_json(llvm::raw_ostream &) const; Eq *createEq(Atom *lhs, Atom *rhs); Not *createNot(Constraint *body); @@ -440,12 +520,15 @@ class Constraints { VarAtom *getVar(uint32_t v) const; PtrAtom *getPtr() const; ArrAtom *getArr() const; + NTArrAtom *getNTArr() const; WildAtom *getWild() const; private: ConstraintSet constraints; EnvironmentMap environment; + template + bool canAssignConst(VarAtom *src); bool step_solve(EnvironmentMap &); bool check(Constraint *C); @@ -462,7 +545,10 @@ class Constraints { // Constraints class. PtrAtom *prebuiltPtr; ArrAtom *prebuiltArr; + NTArrAtom *prebuiltNTArr; WildAtom *prebuiltWild; }; +typedef uint32_t ConstraintKey; + #endif diff --git a/tools/checked-c-convert/MappingVisitor.cpp b/tools/checked-c-convert/MappingVisitor.cpp index 8f0dae35506a..6bb9f96d4eb0 100644 --- a/tools/checked-c-convert/MappingVisitor.cpp +++ b/tools/checked-c-convert/MappingVisitor.cpp @@ -6,9 +6,10 @@ //===----------------------------------------------------------------------===// // Implementations of the MappingVisitor functions for VisitStmt and VisitDecl. //===----------------------------------------------------------------------===// -#include "MappingVisitor.h" #include "llvm/Support/Path.h" +#include "MappingVisitor.h" + using namespace clang; bool MappingVisitor::VisitDeclStmt(DeclStmt *S) { diff --git a/tools/checked-c-convert/MappingVisitor.h b/tools/checked-c-convert/MappingVisitor.h index 55005da7318e..4b93f2ad9ee9 100644 --- a/tools/checked-c-convert/MappingVisitor.h +++ b/tools/checked-c-convert/MappingVisitor.h @@ -16,9 +16,15 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/RecursiveASTVisitor.h" -#include "utils.h" +#include "Utils.h" #include "PersistentSourceLoc.h" +typedef std::tuple + StmtDeclOrType; +typedef std::map SourceToDeclMapType; +typedef std::pair MappingResultsType; + class MappingVisitor : public clang::RecursiveASTVisitor { public: @@ -26,17 +32,12 @@ class MappingVisitor SourceLocs(S),Context(C) {} // TODO: It's possible the Type field in this tuple isn't needed. - typedef std::tuple - StmtDeclOrType; bool VisitDeclStmt(clang::DeclStmt *S); bool VisitDecl(clang::Decl *D); - std::pair, - VariableDecltoStmtMap> - getResults() - { + MappingResultsType getResults() { return std::pair, VariableDecltoStmtMap>(PSLtoSDT, DeclToDeclStmt); } @@ -44,7 +45,7 @@ class MappingVisitor private: // A map from a PersistentSourceLoc to a tuple describing a statement, decl // or type. - std::map PSLtoSDT; + SourceToDeclMapType PSLtoSDT; // The set of PersistentSourceLoc's this instance of MappingVisitor is tasked // with re-instantiating as either a Stmt, Decl or Type. std::set SourceLocs; diff --git a/tools/checked-c-convert/PersistentSourceLoc.cpp b/tools/checked-c-convert/PersistentSourceLoc.cpp index a32930ac5779..6f6573abd778 100644 --- a/tools/checked-c-convert/PersistentSourceLoc.cpp +++ b/tools/checked-c-convert/PersistentSourceLoc.cpp @@ -7,6 +7,7 @@ // Implementation of the PersistentSourceLoc infrastructure. //===----------------------------------------------------------------------===// #include "PersistentSourceLoc.h" +#include "Utils.h" using namespace clang; using namespace llvm; @@ -23,35 +24,43 @@ PersistentSourceLoc::mkPSL(const Decl *D, ASTContext &C) { SL = C.getSourceManager().getSpellingLoc(FD->getLocation()); else if (const ParmVarDecl *PV = dyn_cast(D)) SL = C.getSourceManager().getSpellingLoc(PV->getLocation()); - else if(const VarDecl *V = dyn_cast(D)) + else if(const VarDecl *V = dyn_cast(D)) SL = C.getSourceManager().getExpansionLoc(V->getLocation()); - return mkPSL(SL, C); + return mkPSL(D->getSourceRange(), SL, C); } // Create a PersistentSourceLoc for a Stmt. PersistentSourceLoc PersistentSourceLoc::mkPSL(const Stmt *S, ASTContext &Context) { - return mkPSL(S->getBeginLoc(), Context); + return mkPSL(S->getSourceRange(), S->getBeginLoc(), Context); } // Use the PresumedLoc infrastructure to get a file name and expansion // line and column numbers for a SourceLocation. PersistentSourceLoc -PersistentSourceLoc::mkPSL(SourceLocation SL, ASTContext &Context) { - PresumedLoc PL = Context.getSourceManager().getPresumedLoc(SL); +PersistentSourceLoc::mkPSL(clang::SourceRange SR, SourceLocation SL, ASTContext &Context) { + SourceManager &SM = Context.getSourceManager(); + PresumedLoc PL = SM.getPresumedLoc(SL); // If there is no PresumedLoc, create a nullary PersistentSourceLoc. if (!PL.isValid()) return PersistentSourceLoc(); - SourceLocation ESL = Context.getSourceManager().getExpansionLoc(SL); + SourceLocation ESL = SM.getExpansionLoc(SL); FullSourceLoc FESL = Context.getFullLoc(ESL); assert(FESL.isValid()); - std::string fn = PL.getFilename(); + // get the absolute filename of the file + FullSourceLoc tFSL(SR.getBegin(), SM); + const FileEntry *fe = SM.getFileEntryForID(tFSL.getFileID()); + std::string feAbsS = ""; + if(fe != nullptr && getAbsoluteFilePath(fe->getName(), feAbsS)) { + fn = sys::path::remove_leading_dotslash(feAbsS); + } + PersistentSourceLoc PSL(fn, FESL.getExpansionLineNumber(), FESL.getExpansionColumnNumber()); diff --git a/tools/checked-c-convert/PersistentSourceLoc.h b/tools/checked-c-convert/PersistentSourceLoc.h index 57a029144779..ab969567d008 100644 --- a/tools/checked-c-convert/PersistentSourceLoc.h +++ b/tools/checked-c-convert/PersistentSourceLoc.h @@ -62,7 +62,7 @@ class PersistentSourceLoc { private: static - PersistentSourceLoc mkPSL(clang::SourceLocation SL, clang::ASTContext &Context); + PersistentSourceLoc mkPSL(clang::SourceRange SR, clang::SourceLocation SL, clang::ASTContext &Context); std::string fileName; uint32_t lineNo; uint32_t colNo; diff --git a/tools/checked-c-convert/ProgramInfo.cpp b/tools/checked-c-convert/ProgramInfo.cpp index a3db2d4081f4..08225fbb3b7f 100644 --- a/tools/checked-c-convert/ProgramInfo.cpp +++ b/tools/checked-c-convert/ProgramInfo.cpp @@ -15,637 +15,91 @@ using namespace clang; -// Helper method to print a Type in a way that can be represented in the source. -static -std::string -tyToStr(const Type *T) { - QualType QT(T, 0); - - return QT.getAsString(); -} - -PointerVariableConstraint::PointerVariableConstraint(DeclaratorDecl *D, - uint32_t &K, Constraints &CS, const ASTContext &C) : - PointerVariableConstraint(D->getType(), K, D, D->getName(), CS, C) { } - -PointerVariableConstraint::PointerVariableConstraint(const QualType &QT, uint32_t &K, - DeclaratorDecl *D, std::string N, Constraints &CS, const ASTContext &C) : - ConstraintVariable(ConstraintVariable::PointerVariable, - tyToStr(QT.getTypePtr()),N),FV(nullptr) -{ - QualType QTy = QT; - const Type *Ty = QTy.getTypePtr(); - // If the type is a decayed type, then maybe this is the result of - // decaying an array to a pointer. If the original type is some - // kind of array type, we want to use that instead. - if (const DecayedType *DC = dyn_cast(Ty)) { - QualType QTytmp = DC->getOriginalType(); - if (QTytmp->isArrayType() || QTytmp->isIncompleteArrayType()) { - QTy = QTytmp; - Ty = QTy.getTypePtr(); - } - } - - bool isTypedef = false; - - if (Ty->getAs()) - isTypedef = true; - - arrPresent = false; +void ProgramInfo::print(raw_ostream &O) const { + CS.print(O); + O << "\n"; - if (InteropTypeExpr *ITE = D->getInteropTypeExpr()) { - SourceRange R = ITE->getSourceRange(); - if (R.isValid()) { - auto &SM = C.getSourceManager(); - auto LO = C.getLangOpts(); - llvm::StringRef txt = - Lexer::getSourceText(CharSourceRange::getTokenRange(R), SM, LO); - itypeStr = txt.str(); - assert(itypeStr.size() > 0); + O << "Constraint Variables\n"; + for( const auto &I : Variables ) { + PersistentSourceLoc L = I.first; + const std::set &S = I.second; + L.print(O); + O << "=>"; + for(const auto &J : S) { + O << "[ "; + J->print(O); + O << " ]"; } + O << "\n"; } - while (Ty->isPointerType() || Ty->isArrayType()) { - if (Ty->isArrayType() || Ty->isIncompleteArrayType()) { - arrPresent = true; - // If it's an array, then we need both a constraint variable - // for each level of the array, and a constraint variable for - // values stored in the array. - vars.insert(K); - assert(CS.getVar(K) == nullptr); - CS.getOrCreateVar(K); - - // See if there is a constant size to this array type at this position. - if (const ConstantArrayType *CAT = dyn_cast(Ty)) { - arrSizes[K] = std::pair( - O_SizedArray,CAT->getSize().getZExtValue()); - } else { - arrSizes[K] = std::pair( - O_UnSizedArray,0); - } - - K++; - - // Boil off the typedefs in the array case. - while(const TypedefType *tydTy = dyn_cast(Ty)) { - QTy = tydTy->desugar(); - Ty = QTy.getTypePtr(); - } - - // Iterate. - if(const ArrayType *arrTy = dyn_cast(Ty)) { - QTy = arrTy->getElementType(); - Ty = QTy.getTypePtr(); - } else { - llvm_unreachable("unknown array type"); - } - } else { - // Allocate a new constraint variable for this level of pointer. - vars.insert(K); - assert(CS.getVar(K) == nullptr); - VarAtom * V = CS.getOrCreateVar(K); - - if (Ty->isCheckedPointerType()) { - if (Ty->isCheckedPointerPtrType()) { - // Constrain V so that it can't be either wild or an array. - CS.addConstraint(CS.createNot(CS.createEq(V, CS.getArr()))); - CS.addConstraint(CS.createNot(CS.createEq(V, CS.getWild()))); - ConstrainedVars.insert(K); - } else if (Ty->isCheckedPointerArrayType()) { - CS.addConstraint(CS.createNot(CS.createEq(V, CS.getPtr()))); - CS.addConstraint(CS.createNot(CS.createEq(V, CS.getWild()))); - ConstrainedVars.insert(K); - } - } - - // Save here if QTy is qualified or not into a map that - // indexes K to the qualification of QTy, if any. - if (QTy.isConstQualified()) - QualMap.insert( - std::pair(K, ConstQualification)); - - arrSizes[K] = std::pair(O_Pointer,0); - - K++; - std::string TyName = tyToStr(Ty); - // TODO: Github issue #61: improve handling of types for - // // variable arguments. - if (TyName == "struct __va_list_tag *" || TyName == "va_list") - break; - - // Iterate. - QTy = QTy.getSingleStepDesugaredType(C); - QTy = QTy.getTypePtr()->getPointeeType(); - Ty = QTy.getTypePtr(); + O << "Dummy Declaration Constraint Variables\n"; + for(const auto &declCons: OnDemandFuncDeclConstraint) { + O << "Func Name:" << declCons.first << " => "; + const std::set &S = declCons.second; + for(const auto &J : S) { + O << "[ "; + J->print(O); + O << " ]"; } - } - - // If, after boiling off the pointer-ness from this type, we hit a - // function, then create a base-level FVConstraint that we carry - // around too. - if (Ty->isFunctionType()) - // C function-pointer type declarator syntax embeds the variable - // name within the function-like syntax. For example: - // void (*fname)(int, int) = ...; - // If a typedef'ed type name is used, the name can be omitted - // because it is not embedded like that. Instead, it has the form - // tn fname = ..., - // where tn is the typedef'ed type name. - // There is possibly something more elegant to do in the code here. - FV = new FVConstraint(Ty, K, D, (isTypedef ? "" : N), CS, C); - - BaseType = tyToStr(Ty); - - if (QTy.isConstQualified()) { - BaseType = "const " + BaseType; - } - - // TODO: Github issue #61: improve handling of types for - // variable arguments. - if (BaseType == "struct __va_list_tag *" || BaseType == "va_list" || - BaseType == "struct __va_list_tag") - for (const auto &V : vars) - CS.addConstraint(CS.createEq(CS.getOrCreateVar(V), CS.getWild())); -} - -bool PVConstraint::liftedOnCVars(const ConstraintVariable &O, - ProgramInfo &Info, - llvm::function_ref Op) const -{ - // If these aren't both PVConstraints, incomparable. - if (!isa(O)) - return false; - - const PVConstraint *P = cast(&O); - const CVars &OC = P->getCvars(); - - // If they don't have the same number of cvars, incomparable. - if (OC.size() != getCvars().size()) - return false; - - auto I = getCvars().begin(); - auto J = OC.begin(); - Constraints &CS = Info.getConstraints(); - auto env = CS.getVariables(); - - while(I != getCvars().end() && J != OC.end()) { - // Look up the valuation for I and J. - ConstAtom *CI = env[CS.getVar(*I)]; - ConstAtom *CJ = env[CS.getVar(*J)]; - - if (!Op(CI, CJ)) - return false; - - ++I; - ++J; - } - - return true; -} - -bool PVConstraint::isLt(const ConstraintVariable &Other, - ProgramInfo &Info) const -{ - if (isEmpty() || Other.isEmpty()) - return false; - - return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { - return *A < *B; - }); -} - -bool PVConstraint::isEq(const ConstraintVariable &Other, - ProgramInfo &Info) const -{ - if (isEmpty() && Other.isEmpty()) - return true; - - if (isEmpty() || Other.isEmpty()) - return false; - - return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { - return *A == *B; - }); -} - -void PointerVariableConstraint::print(raw_ostream &O) const { - O << "{ "; - for (const auto &I : vars) - O << "q_" << I << " "; - O << " }"; - - if (FV) { - O << "("; - FV->print(O); - O << ")"; + O << "\n"; } } -// Mesh resolved constraints with the PointerVariableConstraints set of -// variables and potentially nested function pointer declaration. Produces a -// string that can be replaced in the source code. -std::string -PointerVariableConstraint::mkString(Constraints::EnvironmentMap &E, bool emitName) { - std::ostringstream ss; - std::ostringstream pss; - unsigned caratsToAdd = 0; - bool emittedBase = false; - bool emittedName = false; - if (emitName == false && getItypePresent() == false) - emittedName = true; - for (const auto &V : vars) { - VarAtom VA(V); - ConstAtom *C = E[&VA]; - assert(C != nullptr); - - std::map::iterator q; - Atom::AtomKind K = C->getKind(); - - if (BaseType == "void") - K = Atom::A_Wild; - - switch (K) { - case Atom::A_Ptr: - q = QualMap.find(V); - if (q != QualMap.end()) - if (q->second == ConstQualification) - ss << "const "; - - // We need to check and see if this level of variable - // is constrained by a bounds safe interface. If it is, - // then we shouldn't re-write it. - if (getItypePresent() == false) { - emittedBase = false; - ss << "_Ptr<"; - caratsToAdd++; - break; - } - case Atom::A_Arr: - // If it's an Arr, then the character we substitute should - // be [] instead of *, IF, the original type was an array. - // And, if the original type was a sized array of size K, - // we should substitute [K]. - if (arrPresent) { - auto i = arrSizes.find(V); - assert(i != arrSizes.end()); - OriginalArrType oat = i->second.first; - uint64_t oas = i->second.second; - - if (emittedName == false) { - emittedName = true; - pss << getName(); - } - - switch(oat) { - case O_Pointer: - pss << "*"; - break; - case O_SizedArray: - pss << "[" << oas << "]"; - break; - case O_UnSizedArray: - pss << "[]"; - break; - } - - break; - } - // If there is no array in the original program, then we fall through to - // the case where we write a pointer value. - case Atom::A_Wild: - if (emittedBase) { - ss << "*"; - } else { - assert(BaseType.size() > 0); - emittedBase = true; - if (FV) { - ss << FV->mkString(E); - } else { - ss << BaseType << "*"; - } - } - - q = QualMap.find(V); - if (q != QualMap.end()) - if (q->second == ConstQualification) - ss << "const "; - break; - case Atom::A_Const: - case Atom::A_Var: - llvm_unreachable("impossible"); - break; - } - } - - if(emittedBase == false) { - // If we have a FV pointer, then our "base" type is a function pointer - // type. - if (FV) { - ss << FV->mkString(E); - } else { - ss << BaseType; +void ProgramInfo::dump_json(llvm::raw_ostream &O) const { + O << "{\"Setup\":"; + CS.dump_json(O); + // dump the constraint variables. + O << ", \"ConstraintVariables\":["; + bool addComma = false; + for( const auto &I : Variables ) { + if(addComma) { + O << ",\n"; } - } - - // Push carats onto the end of the string - for (unsigned i = 0; i < caratsToAdd; i++) { - ss << ">"; - } - - ss << " "; - - std::string finalDec; - if (emittedName == false) { - ss << getName(); - finalDec = ss.str(); - } else { - finalDec = ss.str() + pss.str(); - } - - return finalDec; -} - -// This describes a function, either a function pointer or a function -// declaration itself. Either require constraint variables for any pointer -// types that are either return values or paraemeters for the function. -FunctionVariableConstraint::FunctionVariableConstraint(DeclaratorDecl *D, - uint32_t &K, Constraints &CS, const ASTContext &C) : - FunctionVariableConstraint(D->getType().getTypePtr(), K, D, - (D->getDeclName().isIdentifier() ? D->getName() : ""), CS, C) - { } - -FunctionVariableConstraint::FunctionVariableConstraint(const Type *Ty, - uint32_t &K, DeclaratorDecl *D, std::string N, Constraints &CS, const ASTContext &Ctx) : - ConstraintVariable(ConstraintVariable::FunctionVariable, tyToStr(Ty), N),name(N) -{ - QualType returnType; - hasproto = false; - hasbody = false; - - if (FunctionDecl *FD = dyn_cast(D)) { - // FunctionDecl::hasBody will return true if *any* declaration in the - // declaration chain has a body, which is not what we want to record. - // We want to record if *this* declaration has a body. To do that, - // we'll check if the declaration that has the body is different - // from the current declaration. - const FunctionDecl *oFD = nullptr; - if (FD->hasBody(oFD) && oFD == FD) - hasbody = true; - } - - if (Ty->isFunctionPointerType()) { - // Is this a function pointer definition? - llvm_unreachable("should not hit this case"); - } else if (Ty->isFunctionProtoType()) { - // Is this a function? - const FunctionProtoType *FT = Ty->getAs(); - FunctionDecl *FD = dyn_cast(D); - assert(FT != nullptr); - returnType = FT->getReturnType(); - - // Extract the types for the parameters to this function. If the parameter - // has a bounds expression associated with it, substitute the type of that - // bounds expression for the other type. - for (unsigned i = 0; i < FT->getNumParams(); i++) { - QualType QT = FT->getParamType(i); - - if (InteropTypeExpr *BA = FT->getParamAnnots(i).getInteropTypeExpr()) { - QualType InteropType= Ctx.getInteropTypeAndAdjust(BA, true); - // TODO: handle array_ptr types. - if (InteropType->isCheckedPointerPtrType()) - QT = InteropType; - } + PersistentSourceLoc L = I.first; + const std::set &S = I.second; - std::string paramName = ""; - DeclaratorDecl *tmpD = D; - if (FD && i < FD->getNumParams()) { - ParmVarDecl *PVD = FD->getParamDecl(i); - if (PVD) { - tmpD = PVD; - paramName = PVD->getName(); - } + O << "{\"line\":\""; + L.print(O); + O << "\","; + O << "\"Variables\":["; + bool addComma1 = false; + for(const auto &J : S) { + if(addComma1) { + O << ","; } - - std::set C; - C.insert(new PVConstraint(QT, K, tmpD, paramName, CS, Ctx)); - paramVars.push_back(C); + J->dump_json(O); + addComma1 = true; } - - if (InteropTypeExpr *BA = FT->getReturnAnnots().getInteropTypeExpr()) { - QualType InteropType = Ctx.getInteropTypeAndAdjust(BA, false); - // TODO: handle array_ptr types. - if (InteropType->isCheckedPointerPtrType()) - returnType = InteropType; - } - hasproto = true; - } else if (Ty->isFunctionNoProtoType()) { - const FunctionNoProtoType *FT = Ty->getAs(); - assert(FT != nullptr); - returnType = FT->getReturnType(); - } else { - llvm_unreachable("don't know what to do"); + O << "]"; + O << "}"; + addComma = true; } - // This has to be a mapping for all parameter/return types, even those that - // aren't pointer types. If we need to re-emit the function signature - // as a type, then we will need the types for all the parameters and the - // return values - - returnVars.insert(new PVConstraint(returnType, K, D, "", CS, Ctx)); - for ( const auto &V : returnVars) { - if (PVConstraint *PVC = dyn_cast(V)) { - if (PVC->getFV()) - PVC->constrainTo(CS, CS.getWild()); - } else if (FVConstraint *FVC = dyn_cast(V)) { - FVC->constrainTo(CS, CS.getWild()); + O << "]"; + // dump on demand constraints + O << ", \"DummyFunctionConstraints\":["; + addComma = false; + for(const auto &declCons: OnDemandFuncDeclConstraint) { + if(addComma) { + O << ","; } - } -} - -bool FVConstraint::liftedOnCVars(const ConstraintVariable &Other, - ProgramInfo &Info, - llvm::function_ref Op) const - { - if (!isa(Other)) - return false; - - const FVConstraint *F = cast(&Other); - - if (paramVars.size() != F->paramVars.size()) { - if (paramVars.size() < F->paramVars.size()) { - return true; - } else { - return false; - } - } - - // Consider the return variables. - ConstraintVariable *U = getHighest(returnVars, Info); - ConstraintVariable *V = getHighest(F->returnVars, Info); - - if (!U->liftedOnCVars(*V, Info, Op)) - return false; - - // Consider the parameters. - auto I = paramVars.begin(); - auto J = F->paramVars.begin(); - - while ((I != paramVars.end()) && (J != F->paramVars.end())) { - U = getHighest(*I, Info); - V = getHighest(*J, Info); - - if (!U->liftedOnCVars(*V, Info, Op)) - return false; - - ++I; - ++J; - } - - return true; -} - -bool FVConstraint::isLt(const ConstraintVariable &Other, - ProgramInfo &Info) const -{ - if (isEmpty() || Other.isEmpty()) - return false; - - return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { - return *A < *B; - }); -} - -bool FVConstraint::isEq(const ConstraintVariable &Other, - ProgramInfo &Info) const -{ - if (isEmpty() && Other.isEmpty()) - return true; - - if (isEmpty() || Other.isEmpty()) - return false; - - return liftedOnCVars(Other, Info, [](ConstAtom *A, ConstAtom *B) { - return *A == *B; - }); -} - -void FunctionVariableConstraint::constrainTo(Constraints &CS, ConstAtom *A, bool checkSkip) { - for (const auto &V : returnVars) - V->constrainTo(CS, A, checkSkip); - - for (const auto &V : paramVars) - for (const auto &U : V) - U->constrainTo(CS, A, checkSkip); -} - -bool FunctionVariableConstraint::anyChanges(Constraints::EnvironmentMap &E) { - bool f = false; - - for (const auto &C : returnVars) - f |= C->anyChanges(E); - - return f; -} - -void PointerVariableConstraint::constrainTo(Constraints &CS, ConstAtom *A, bool checkSkip) { - for (const auto &V : vars) { - // Check and see if we've already constrained this variable. This is currently - // only done when the bounds-safe interface has refined a type for an external - // function, and we don't want the linking phase to un-refine it by introducing - // a conflicting constraint. - bool doAdd = true; - if (checkSkip) - if (ConstrainedVars.find(V) != ConstrainedVars.end()) - doAdd = false; - - if (doAdd) - CS.addConstraint(CS.createEq(CS.getOrCreateVar(V), A)); - } - - if (FV) - FV->constrainTo(CS, A, checkSkip); -} - -bool PointerVariableConstraint::anyChanges(Constraints::EnvironmentMap &E) { - bool f = false; - - for (const auto &C : vars) { - VarAtom V(C); - ConstAtom *CS = E[&V]; - assert(CS != nullptr); - f |= isa(CS); - } - - if (FV) - f |= FV->anyChanges(E); - - return f; -} - -void FunctionVariableConstraint::print(raw_ostream &O) const { - O << "( "; - for (const auto &I : returnVars) - I->print(O); - O << " )"; - O << " " << name << " "; - for (const auto &I : paramVars) { - O << "( "; - for (const auto &J : I) - J->print(O); - O << " )"; - } -} - -std::string -FunctionVariableConstraint::mkString(Constraints::EnvironmentMap &E, bool emitName) { - std::string s = ""; - // TODO punting on what to do here. The right thing to do is to figure out - // the LUB of all of the V in returnVars. - assert(returnVars.size() > 0); - ConstraintVariable *V = *returnVars.begin(); - assert(V != nullptr); - s = V->mkString(E); - s = s + "("; - std::vector parmStrs; - for (const auto &I : this->paramVars) { - // TODO likewise punting here. - assert(I.size() > 0); - ConstraintVariable *U = *(I.begin()); - assert(U != nullptr); - parmStrs.push_back(U->mkString(E)); - } - - if (parmStrs.size() > 0) { - std::ostringstream ss; - - std::copy(parmStrs.begin(), parmStrs.end() - 1, - std::ostream_iterator(ss, ", ")); - ss << parmStrs.back(); - - s = s + ss.str() + ")"; - } else { - s = s + ")"; - } - - return s; -} - -void ProgramInfo::print(raw_ostream &O) const { - CS.print(O); - O << "\n"; - - O << "Constraint Variables\n"; - for( const auto &I : Variables ) { - PersistentSourceLoc L = I.first; - const std::set &S = I.second; - L.print(O); - O << "=>"; + O << "{\"functionName\":\"" << declCons.first << "\""; + O << ", \"Constraints\":["; + const std::set &S = declCons.second; + bool addComma1 = false; for(const auto &J : S) { - O << "[ "; - J->print(O); - O << " ]"; + if(addComma1) { + O << ","; + } + J->dump_json(O); + addComma1 = true; } + O << "]}"; + addComma = true; O << "\n"; } + O << "]"; + O << "}"; } // Given a ConstraintVariable V, retrieve all of the unique @@ -680,7 +134,10 @@ CVars getVarsFromConstraint(ConstraintVariable *V, CVars T) { // Print out statistics of constraint variables on a per-file basis. void ProgramInfo::print_stats(std::set &F, raw_ostream &O) { - std::map > filesToVars; + O << "Enable itype propagation:" << enablePropThruIType << "\n"; + O << "Merge multiple function declaration:" << mergeMultipleFuncDecls << "\n"; + O << "Sound handling of var args functions:" << handleVARARGS << "\n"; + std::map > filesToVars; Constraints::EnvironmentMap env = CS.getVariables(); // First, build the map and perform the aggregation. @@ -689,12 +146,13 @@ void ProgramInfo::print_stats(std::set &F, raw_ostream &O) { if (F.count(fileName)) { int varC = 0; int pC = 0; + int ntAC = 0; int aC = 0; int wC = 0; auto J = filesToVars.find(fileName); if (J != filesToVars.end()) - std::tie(varC, pC, aC, wC) = J->second; + std::tie(varC, pC, ntAC, aC, wC) = J->second; CVars foundVars; for (auto &C : I.second) { @@ -711,32 +169,35 @@ void ProgramInfo::print_stats(std::set &F, raw_ostream &O) { ConstAtom *CA = K->second; switch (CA->getKind()) { - case Atom::A_Arr: - aC += 1; - break; - case Atom::A_Ptr: - pC += 1; - break; - case Atom::A_Wild: - wC += 1; - break; - case Atom::A_Var: - case Atom::A_Const: - llvm_unreachable("bad constant in environment map"); + case Atom::A_Arr: + aC += 1; + break; + case Atom::A_NTArr: + ntAC += 1; + break; + case Atom::A_Ptr: + pC += 1; + break; + case Atom::A_Wild: + wC += 1; + break; + case Atom::A_Var: + case Atom::A_Const: + llvm_unreachable("bad constant in environment map"); } } - filesToVars[fileName] = std::tuple(varC, pC, aC, wC); + filesToVars[fileName] = std::tuple(varC, pC, ntAC, aC, wC); } } // Then, dump the map to output. - O << "file|#constraints|#ptr|#arr|#wild\n"; + O << "file|#constraints|#ptr|#ntarr|#arr|#wild\n"; for (const auto &I : filesToVars) { - int v, p, a, w; - std::tie(v, p, a, w) = I.second; - O << I.first << "|" << v << "|" << p << "|" << a << "|" << w; + int v, p, nt, a, w; + std::tie(v, p, nt, a, w) = I.second; + O << I.first << "|" << v << "|" << p << "|" << nt << "|" << a << "|" << w; O << "\n"; } } @@ -812,8 +273,7 @@ bool ProgramInfo::link() { FVConstraint *P2 = *J; // Constrain the return values to be equal - // TODO: make this behavior optional? - if (!P1->hasBody() && !P2->hasBody()) { + if (!P1->hasBody() && !P2->hasBody() && mergeMultipleFuncDecls) { constrainEq(P1->getReturnVars(), P2->getReturnVars(), *this); // Constrain the parameters to be equal, if the parameter arity is @@ -957,11 +417,8 @@ void ProgramInfo::enterCompilationUnit(ASTContext &Context) { TranslationUnitDecl *TUD = Context.getTranslationUnitDecl(); for (const auto &D : TUD->decls()) V.TraverseDecl(D); - std::pair, - VariableDecltoStmtMap> - res = V.getResults(); - std::map - PSLtoDecl = res.first; + MappingResultsType res = V.getResults(); + SourceToDeclMapType PSLtoDecl = res.first; // Re-populate VarDeclToStatement. VarDeclToStatement = res.second; @@ -976,10 +433,23 @@ void ProgramInfo::enterCompilationUnit(ASTContext &Context) { void ProgramInfo::exitCompilationUnit() { assert(persisted == false); VarDeclToStatement.clear(); + // remove all the references. + IdentifiedArrayDecls.clear(); + AllocationBasedSizeExprs.clear(); persisted = true; return; } +template +bool ProgramInfo::hasConstraintType(std::set &S) { + for (const auto &I : S) { + if (isa(I)) { + return true; + } + } + return false; +} + // For each pointer type in the declaration of D, add a variable to the // constraint system for that pointer type. bool ProgramInfo::addVariable(DeclaratorDecl *D, DeclStmt *St, ASTContext *C) { @@ -1023,24 +493,22 @@ bool ProgramInfo::addVariable(DeclaratorDecl *D, DeclStmt *St, ASTContext *C) { F = new FVConstraint(D, freeKey, CS, *C); std::set &S = Variables[PLoc]; - bool found = false; - for (const auto &I : S) - if (isa(I)) - found = true; - - if (found == false && F != nullptr) - Variables[PLoc].insert(F); - found = false; + bool newFunction = false; - for (const auto &I : S) - if (isa(I)) - found = true; + if(F != nullptr && !hasConstraintType(S)) { + // insert the function constraint only if it doesn't exist + newFunction = true; + S.insert(F); + } - if (found == false && P != nullptr) - Variables[PLoc].insert(P); + if(P != nullptr && !hasConstraintType(S)) { + // if there is no pointer constraint in this location + // insert it. + S.insert(P); + } - // Did we create a function? - if (F) { + // Did we create a function and it is a newly added function + if (F && newFunction) { // If we did, then we need to add some additional stuff to Variables. // * A mapping from the parameters PLoc to the constraint variables for // the parameters. @@ -1173,6 +641,9 @@ ProgramInfo::getVariableHelper( Expr *E, } else if (CHKCBindTemporaryExpr *CBE = dyn_cast(E)) { return getVariableHelper(CBE->getSubExpr(), V, C, ifc); } else if (CallExpr *CE = dyn_cast(E)) { + // call expression should always get out-of context + // constraint variable. + ifc = false; // Here, we need to look up the target of the call and return the // constraints for the return value of that function. Decl *D = CE->getCalleeDecl(); @@ -1248,72 +719,136 @@ ProgramInfo::getVariableHelper( Expr *E, } } +std::set& +ProgramInfo::getOnDemandFuncDeclarationConstraint(FunctionDecl *targetFunc, ASTContext *C) { + // get function name. + std::string funcName = targetFunc->getNameAsString(); + if(OnDemandFuncDeclConstraint.find(funcName) == OnDemandFuncDeclConstraint.end()) { + const Type *Ty = targetFunc->getTypeSourceInfo()->getTypeLoc().getTypePtr(); + assert (!(Ty->isPointerType() || Ty->isArrayType()) && ""); + assert(Ty->isFunctionType() && ""); + FVConstraint *F = new FVConstraint(targetFunc, freeKey, CS, *C); + OnDemandFuncDeclConstraint[funcName].insert(F); + } + return OnDemandFuncDeclConstraint[funcName]; +} +std::set +ProgramInfo::getVariable(clang::Decl *D, clang::ASTContext *C, FunctionDecl *FD, int parameterIndex) { + // if this is a parameter. + if(parameterIndex >= 0) { + // get the parameter index of the + // requested function declaration + D = FD->getParamDecl(parameterIndex); + } else { + // this is the return value of the function + D = FD; + } + VariableMap::iterator I = Variables.find(PersistentSourceLoc::mkPSL(D, *C)); + assert(I != Variables.end()); + return I->second; + +} + +std::set +ProgramInfo::getVariable(clang::Decl *D, clang::ASTContext *C, bool inFunctionContext) { + // here, we auto-correct the inFunctionContext flag. + // if someone is asking for in context variable of a function + // always give the declaration context. + + // if this a function declaration + // set in context to false. + if(dyn_cast(D)) { + inFunctionContext = false; + } + return getVariableOnDemand(D, C, inFunctionContext); +} + // Given a decl, return the variables for the constraints of the Decl. std::set -ProgramInfo::getVariable(Decl *D, ASTContext *C, bool inFunctionContext) { +ProgramInfo::getVariableOnDemand(Decl *D, ASTContext *C, bool inFunctionContext) { assert(persisted == false); VariableMap::iterator I = Variables.find(PersistentSourceLoc::mkPSL(D, *C)); if (I != Variables.end()) { // If we are looking up a variable, and that variable is a parameter variable, + // or return value // then we should see if we're looking this up in the context of a function or - // not. If we are not, then we should find a declaration - if (ParmVarDecl *PD = dyn_cast(D)) { - if (!inFunctionContext) { - // We need to do 2 things: - // - Look up a forward declaration of the function for this parameter. - // - Map 'D', which is the ith parameter of Parent, to the ith parameter - // of any forward declaration. - // - // If such a forward declaration doesn't exist, then we can back off. - - const DeclContext *DC = PD->getParentFunctionOrMethod(); - assert(DC != nullptr); - if(const FunctionDecl *Parent = dyn_cast(DC)) { - // Check that the current function declaration doesn't have a body. - bool hasbody = false; - const FunctionDecl *oFD = nullptr; - if (Parent->hasBody(oFD) && oFD == Parent) - hasbody = true; - - // This ParmVarDecl belongs to a method declaration that has a body, - // and, our caller asked for a non-method declaration variable. Let's - // see if we can find one by looking through the re-declarations of - // Parent. - if (hasbody) { - // Let's look through all the re-declarations of Parent. - const FunctionDecl *fwdDecl = nullptr; - for (const auto &RD : Parent->redecls()) { - if (RD != Parent) { - fwdDecl = RD; - break; - } - } + // not. If we are not, then we should find a declaration + ParmVarDecl *PD = nullptr; + FunctionDecl *funcDefinition = nullptr; + FunctionDecl *funcDeclaration = nullptr; + // get the function declaration and definition + if(D != nullptr && dyn_cast(D)) { + funcDeclaration = getDeclaration(dyn_cast(D)); + funcDefinition = getDefinition(dyn_cast(D)); + } + int parameterIndex = -1; + if(PD = dyn_cast(D)) { + // okay, we got a request for a parameter + DeclContext *DC = PD->getParentFunctionOrMethod(); + assert(DC != nullptr); + FunctionDecl *FD = dyn_cast(DC); + // get the parameter index with in the function. + for (unsigned i = 0; i < FD->getNumParams(); i++) { + const ParmVarDecl *tmp = FD->getParamDecl(i); + if (tmp == D) { + parameterIndex = i; + break; + } + } + + // get declaration and definition + funcDeclaration = getDeclaration(FD); + funcDefinition = getDefinition(FD); - if (fwdDecl) { - // We found one! Let's figure out the index that D has in Parent, - // then get that decl from fwdDecl and look it up in Variables - // by PSL, then return it. - int idx = -1; - - for (unsigned i = 0; i < Parent->getNumParams(); i++) { - const ParmVarDecl *tmp = Parent->getParamDecl(i); - - if (tmp == D) { - idx = i; - break; - } - } - - assert(idx >= 0); - - const ParmVarDecl *otherDecl = fwdDecl->getParamDecl(idx); - I = Variables.find(PersistentSourceLoc::mkPSL(otherDecl, *C)); - assert(I != Variables.end()); + assert(parameterIndex >= 0 && "Got request for invalid parameter"); + } + if(funcDeclaration || funcDefinition || parameterIndex != -1) { + // if we are asking for the constraint variable of a function + // and that function is an external function. + // then use declaration. + if(dyn_cast(D) && funcDefinition == nullptr) { + funcDefinition = funcDeclaration; + } + // this means either we got a + // request for function return value or parameter + if(inFunctionContext) { + assert(funcDefinition != nullptr && "Requesting for in-context constraints, " + "but there is no definition for this function"); + // return the constraint variable + // that belongs to the function definition. + return getVariable(D, C, funcDefinition, parameterIndex); + } else { + if(funcDeclaration == nullptr) { + // we need constraint variable + // with in the function declaration, + // but there is no declaration + // get on demand declaration. + std::set &fvConstraints = getOnDemandFuncDeclarationConstraint(funcDefinition, C); + if(parameterIndex != -1) { + // this is a parameter. + std::set parameterConstraints; + parameterConstraints.clear(); + assert(fvConstraints.size() && "Unable to find on demand fv constraints."); + // get all parameters from all the FVConstraints. + for(auto fv: fvConstraints) { + auto currParamConstraint = (dyn_cast(fv))->getParamVar(parameterIndex); + parameterConstraints.insert(currParamConstraint.begin(), currParamConstraint.end()); } + return parameterConstraints; } + return fvConstraints; + } else { + // return the variable with in + // the function declaration + return getVariable(D, C, funcDeclaration, parameterIndex); } } + // we got a request for function return or parameter + // but we failed to handle the request. + assert(false && "Invalid state reached."); } + // neither parameter or return value. + // just return the original constraint. return I->second; } else { return std::set(); @@ -1334,3 +869,31 @@ ProgramInfo::getVariable(Expr *E, ASTContext *C, bool inFunctionContext) { else return T; } + +bool ProgramInfo::insertPotentialArrayVar(Decl *var) { + return IdentifiedArrayDecls.insert(var).second; +} + +bool ProgramInfo::isIdentifiedArrayVar(Decl *toCheckVar) { + return IdentifiedArrayDecls.find(toCheckVar) != IdentifiedArrayDecls.end(); +} + +bool ProgramInfo::addAllocationBasedSizeExpr(Decl *targetVar, Expr *sizeExpr) { + assert(isIdentifiedArrayVar(targetVar) && "The provided variable is not an array variable"); + return AllocationBasedSizeExprs[targetVar].insert(sizeExpr).second; +} + +void ProgramInfo::printArrayVarsAndSizes(llvm::raw_ostream &O) { + if(!AllocationBasedSizeExprs.empty()) { + O << "\n\nArray Variables and Sizes\n"; + for (const auto &currEl: AllocationBasedSizeExprs) { + O << "Variable:"; + currEl.first->dump(O); + O << ", Possible Sizes:\n"; + for (auto sizeExpr: currEl.second) { + sizeExpr->dump(O); + O << "\n"; + } + } + } +} \ No newline at end of file diff --git a/tools/checked-c-convert/ProgramInfo.h b/tools/checked-c-convert/ProgramInfo.h index 2c5087b4ee39..c27828419326 100644 --- a/tools/checked-c-convert/ProgramInfo.h +++ b/tools/checked-c-convert/ProgramInfo.h @@ -4,22 +4,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// This class is used to collect information for the program being analyzed. -// The class allocates constraint variables and maps program locations -// (specified by PersistentSourceLocs) to constraint variables. -// -// The allocation of constraint variables is a little nuanced. For a given -// variable, there might be multiple constraint variables. For example, some -// declaration of the form: -// -// int **p = ... ; -// -// would be given two constraint variables, visualized like this: -// -// int * q_(i+1) * q_i p = ... ; -// -// The constraint variable at the "highest" or outer-most level of the type -// is the lowest numbered constraint variable for a given declaration. +// This class represents all the information about a source file +// collected by the converter. //===----------------------------------------------------------------------===// #ifndef _PROGRAM_INFO_H #define _PROGRAM_INFO_H @@ -29,252 +15,23 @@ #include "clang/Frontend/FrontendAction.h" #include "clang/Tooling/Tooling.h" -#include "Constraints.h" -#include "utils.h" +#include "ConstraintVariables.h" +#include "Utils.h" #include "PersistentSourceLoc.h" class ProgramInfo; -// Holds integers representing constraint variables, with semantics as -// defined in the comment at the top of the file. -typedef std::set CVars; - -// Base class for ConstraintVariables. A ConstraintVariable can either be a -// PointerVariableConstraint or a FunctionVariableConstraint. The difference -// is that FunctionVariableConstraints have constraints on the return value -// and on each parameter. -class ConstraintVariable { -public: - enum ConstraintVariableKind { - PointerVariable, - FunctionVariable - }; - - ConstraintVariableKind getKind() const { return Kind; } - -private: - ConstraintVariableKind Kind; -protected: - std::string BaseType; - // Underlying name of the C variable this ConstraintVariable represents. - std::string Name; - // Set of constraint variables that have been constrained due to a - // bounds-safe interface. They are remembered as being constrained - // so that later on we do not introduce a spurious constraint - // making those variables WILD. - std::set ConstrainedVars; - -public: - ConstraintVariable(ConstraintVariableKind K, std::string T, std::string N) : - Kind(K),BaseType(T),Name(N) {} - - // Create a "for-rewriting" representation of this ConstraintVariable. - // The 'emitName' parameter is true when the generated string should include - // the name of the variable, false for just the type. - virtual std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true) = 0; - - // Debug printing of the constraint variable. - virtual void print(llvm::raw_ostream &O) const = 0; - virtual void dump() const = 0; - - // Constrain everything 'within' this ConstraintVariable to be equal to C. - // Set checkSkip to true if you would like constrainTo to consider the - // ConstrainedVars when applying constraints. This should be set when - // applying constraints due to external symbols, during linking. - virtual void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false) = 0; - - // Returns true if any of the constraint variables 'within' this instance - // have a binding in E other than top. E should be the EnvironmentMap that - // results from running unification on the set of constraints and the - // environment. - virtual bool anyChanges(Constraints::EnvironmentMap &E) = 0; - - std::string getTy() { return BaseType; } - std::string getName() { return Name; } - - virtual ~ConstraintVariable() {}; - - // Constraint atoms may be either constants or variables. The constants are - // trivial to compare, but the variables can only really be compared under - // a specific valuation. That valuation is stored in the ProgramInfo data - // structure, so these functions (isLt, isEq) compare two ConstraintVariables - // with a specific assignment to the variables in mind. - virtual bool isLt(const ConstraintVariable &other, ProgramInfo &I) const = 0; - virtual bool isEq(const ConstraintVariable &other, ProgramInfo &I) const = 0; - // Sometimes, constraint variables can be produced that are empty. This - // tests for the existence of those constraint variables. - virtual bool isEmpty(void) const = 0; - - // A helper function for isLt and isEq where the last parameter is a lambda - // for the specific comparison operation to perform. - virtual bool liftedOnCVars(const ConstraintVariable &O, - ProgramInfo &Info, - llvm::function_ref) const = 0; - -}; - -class PointerVariableConstraint; -class FunctionVariableConstraint; - -// Represents an individual constraint on a pointer variable. -// This could contain a reference to a FunctionVariableConstraint -// in the case of a function pointer declaration. -class PointerVariableConstraint : public ConstraintVariable { -public: - enum Qualification { - ConstQualification, - StaticQualification - }; -private: - CVars vars; - FunctionVariableConstraint *FV; - std::map QualMap; - enum OriginalArrType { - O_Pointer, - O_SizedArray, - O_UnSizedArray - }; - // Map from constraint variable to original type and size. - // If the original variable U was: - // * A pointer, then U -> (a,b) , a = O_Pointer, b has no meaning. - // * A sized array, then U -> (a,b) , a = O_SizedArray, b is static size. - // * An unsized array, then U -(a,b) , a = O_UnSizedArray, b has no meaning. - std::map> arrSizes; - // If for all U in arrSizes, any U -> (a,b) where a = O_SizedArray or - // O_UnSizedArray, arrPresent is true. - bool arrPresent; - // Is there an itype associated with this constraint? If there is, how was it - // originally stored in the program? - std::string itypeStr; -public: - // Constructor for when we know a CVars and a type string. - PointerVariableConstraint(CVars V, std::string T, std::string Name, - FunctionVariableConstraint *F, bool isArr, bool isItype, std::string is) : - ConstraintVariable(PointerVariable, T, Name) - ,vars(V),FV(F),arrPresent(isArr), itypeStr(is) {} - - bool getArrPresent() { return arrPresent; } - - // Is an itype present for this constraint? If yes, what is the text of that itype? - bool getItypePresent() { return itypeStr.size() > 0; } - std::string getItype() { return itypeStr; } - - // Constructor for when we have a Decl. K is the current free - // constraint variable index. We don't need to explicitly pass - // the name because it's available in 'D'. - PointerVariableConstraint(clang::DeclaratorDecl *D, uint32_t &K, - Constraints &CS, const clang::ASTContext &C); - - // Constructor for when we only have a Type. Needs a string name - // N for the name of the variable that this represents. - PointerVariableConstraint(const clang::QualType &QT, uint32_t &K, - clang::DeclaratorDecl *D, std::string N, Constraints &CS, const clang::ASTContext &C); - - const CVars &getCvars() const { return vars; } - - static bool classof(const ConstraintVariable *S) { - return S->getKind() == PointerVariable; - } - - std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true); - - FunctionVariableConstraint *getFV() { return FV; } - - void print(llvm::raw_ostream &O) const ; - void dump() const { print(llvm::errs()); } - void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false); - bool anyChanges(Constraints::EnvironmentMap &E); - - bool isLt(const ConstraintVariable &other, ProgramInfo &P) const; - bool isEq(const ConstraintVariable &other, ProgramInfo &P) const; - bool isEmpty(void) const { return vars.size() == 0; } - - bool liftedOnCVars(const ConstraintVariable &O, - ProgramInfo &Info, - llvm::function_ref) const; - - virtual ~PointerVariableConstraint() {}; -}; - -typedef PointerVariableConstraint PVConstraint; - -// Constraints on a function type. Also contains a 'name' parameter for -// when a re-write of a function pointer is needed. -class FunctionVariableConstraint : public ConstraintVariable { -private: - // N constraints on the return value of the function. - std::set returnVars; - // A vector of K sets of N constraints on the parameter values, for - // K parameters accepted by the function. - std::vector> paramVars; - // Name of the function or function variable. Used by mkString. - std::string name; - bool hasproto; - bool hasbody; -public: - FunctionVariableConstraint() : - ConstraintVariable(FunctionVariable, "", ""),name(""),hasproto(false),hasbody(false) { } - - FunctionVariableConstraint(clang::DeclaratorDecl *D, uint32_t &K, - Constraints &CS, const clang::ASTContext &C); - FunctionVariableConstraint(const clang::Type *Ty, uint32_t &K, - clang::DeclaratorDecl *D, std::string N, Constraints &CS, const clang::ASTContext &C); - - std::set & - getReturnVars() { return returnVars; } - - size_t numParams() { return paramVars.size(); } - std::string getName() { return name; } - - bool hasProtoType() { return hasproto; } - bool hasBody() { return hasbody; } - - static bool classof(const ConstraintVariable *S) { - return S->getKind() == FunctionVariable; - } - - std::set & - getParamVar(unsigned i) { - assert(i < paramVars.size()); - return paramVars.at(i); - } - - std::string mkString(Constraints::EnvironmentMap &E, bool emitName=true); - void print(llvm::raw_ostream &O) const; - void dump() const { print(llvm::errs()); } - void constrainTo(Constraints &CS, ConstAtom *C, bool checkSkip=false); - bool anyChanges(Constraints::EnvironmentMap &E); - - bool isLt(const ConstraintVariable &other, ProgramInfo &P) const; - bool isEq(const ConstraintVariable &other, ProgramInfo &P) const; - // An FVConstraint is empty if every constraint associated is empty. - bool isEmpty(void) const { - - if (returnVars.size() > 0) - return false; - - for (const auto &u : paramVars) - for (const auto &v : u) - if (!v->isEmpty()) - return false; - - return true; - } - - bool liftedOnCVars(const ConstraintVariable &O, - ProgramInfo &Info, - llvm::function_ref) const; - - virtual ~FunctionVariableConstraint() {}; -}; - -typedef FunctionVariableConstraint FVConstraint; class ProgramInfo { public: - ProgramInfo() : freeKey(0), persisted(true) {} + ProgramInfo() : + freeKey(0), persisted(true) { + IdentifiedArrayDecls.clear(); + OnDemandFuncDeclConstraint.clear(); + } void print(llvm::raw_ostream &O) const; void dump() const { print(llvm::errs()); } + void dump_json(llvm::raw_ostream &O) const; void dump_stats(std::set &F) { print_stats(F, llvm::errs()); } void print_stats(std::set &F, llvm::raw_ostream &O); @@ -344,16 +101,58 @@ class ProgramInfo { // non-Declaration Definition. std::set getVariable(clang::Expr *E, clang::ASTContext *C, bool inFunctionContext = false); + std::set + getVariableOnDemand(clang::Decl *D, clang::ASTContext *C, bool inFunctionContext = false); std::set getVariable(clang::Decl *D, clang::ASTContext *C, bool inFunctionContext = false); + // get constraint variable for the provided function or its parameter + std::set + getVariable(clang::Decl *D, clang::ASTContext *C, FunctionDecl *FD, int parameterIndex=-1); VariableMap &getVarMap() { return Variables; } + std::set &getIdentifiedArrayVars() { +#ifdef ARRDEBUG + for(auto currD: IdentifiedArrayDecls) { + currD->dump(); + } +#endif + return IdentifiedArrayDecls; + } + + // add the size expression used in allocation routine + // through which the variable was initialized. + bool addAllocationBasedSizeExpr(Decl *targetVar, Expr *sizeExpr); + + bool isIdentifiedArrayVar(Decl *toCheckVar); + + bool insertPotentialArrayVar(Decl *var); + + void printArrayVarsAndSizes(llvm::raw_ostream &O); + + // get on demand function declaration constraint. This is needed for functions + // that do not have corresponding declaration. + // for all functions that do not have corresponding declaration, + // we create an on demand FunctionVariableConstraint. + std::set& + getOnDemandFuncDeclarationConstraint(FunctionDecl *targetFunc, ASTContext *C); + private: + // check if the given set has the corresponding constraint variable type + template + bool hasConstraintType(std::set &S); // Function to check if an external symbol is okay to leave // constrained. bool isExternOkay(std::string ext); + // Map that contains function name and corresponding + // set of function variable constraints. + // We only create on demand variables for non-declared functions. + // we store the constraints based on function name + // as the information needs to be stored across multiple + // instances of the program AST + std::map> OnDemandFuncDeclConstraint; + std::list Records; // Next available integer to assign to a variable. uint32_t freeKey; @@ -379,6 +178,12 @@ class ProgramInfo { // seen before. std::map ExternFunctions; std::map> GlobalSymbols; + + // these are the array declarations identified by the converter. + std::set IdentifiedArrayDecls; + // this is the map of variables that are potential arrays + // and their tentative size expression. + std::map> AllocationBasedSizeExprs; }; #endif diff --git a/tools/checked-c-convert/RewriteUtils.cpp b/tools/checked-c-convert/RewriteUtils.cpp new file mode 100644 index 000000000000..60964fb19be2 --- /dev/null +++ b/tools/checked-c-convert/RewriteUtils.cpp @@ -0,0 +1,848 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class contains implementation of the functions and +// classes of RewriteUtils.h +//===----------------------------------------------------------------------===// +#include "llvm/Support/raw_ostream.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include +#include +#include + +#include "RewriteUtils.h" +#include "MappingVisitor.h" +#include "Utils.h" +#include "ArrayBoundsInferenceConsumer.h" + +using namespace llvm; +using namespace clang; + +SourceRange DComp::getWholeSR(SourceRange orig, DAndReplace dr) const { + SourceRange newSourceRange(orig); + + if (FunctionDecl *FD = dyn_cast(dr.Declaration)) { + newSourceRange.setEnd(getFunctionDeclarationEnd(FD, SM)); + if (dr.fullDecl == false) + newSourceRange = FD->getReturnTypeSourceRange(); + } + + return newSourceRange; +} + +bool DComp::operator()(const DAndReplace lhs, const DAndReplace rhs) const { + // Does the source location of the Decl in lhs overlap at all with + // the source location of rhs? + SourceRange srLHS = lhs.Declaration->getSourceRange(); + SourceRange srRHS = rhs.Declaration->getSourceRange(); + + // Take into account whether or not a FunctionDeclaration specifies + // the "whole" declaration or not. If it does not, it just specifies + // the return position. + srLHS = getWholeSR(srLHS, lhs); + srRHS = getWholeSR(srRHS, rhs); + + // Also take into account whether or not there is a multi-statement + // decl, because the generated ranges will overlap. + DeclStmt *lhStmt = dyn_cast_or_null(lhs.Statement); + + if (lhStmt && !lhStmt->isSingleDecl()) { + SourceLocation newBegin = (*lhStmt->decls().begin())->getSourceRange().getBegin(); + bool found; + for (const auto &DT : lhStmt->decls()) { + if (DT == lhs.Declaration) { + found = true; + break; + } + newBegin = DT->getSourceRange().getEnd(); + } + assert (found); + srLHS.setBegin(newBegin); + // This is needed to make the subsequent test inclusive. + srLHS.setEnd(srLHS.getEnd().getLocWithOffset(-1)); + } + + DeclStmt *rhStmt = dyn_cast_or_null(rhs.Statement); + if (rhStmt && !rhStmt->isSingleDecl()) { + SourceLocation newBegin = (*rhStmt->decls().begin())->getSourceRange().getBegin(); + bool found; + for (const auto &DT : rhStmt->decls()) { + if (DT == rhs.Declaration) { + found = true; + break; + } + newBegin = DT->getSourceRange().getEnd(); + } + assert (found); + srRHS.setBegin(newBegin); + // This is needed to make the subsequent test inclusive. + srRHS.setEnd(srRHS.getEnd().getLocWithOffset(-1)); + } + + SourceLocation x1 = srLHS.getBegin(); + SourceLocation x2 = srLHS.getEnd(); + SourceLocation y1 = srRHS.getBegin(); + SourceLocation y2 = srRHS.getEnd(); + + bool contained = SM.isBeforeInTranslationUnit(x1, y2) && + SM.isBeforeInTranslationUnit(y1, x2); + + if (contained) + return false; + else + return SM.isBeforeInTranslationUnit(x2, y1); +} + +// Test to see if we can rewrite a given SourceRange. +// Note that R.getRangeSize will return -1 if SR is within +// a macro as well. This means that we can't re-write any +// text that occurs within a macro. +static bool canRewrite(Rewriter &R, SourceRange &SR) { + return SR.isValid() && (R.getRangeSize(SR) != -1); +} + +void rewrite(ParmVarDecl *PV, Rewriter &R, std::string sRewrite) { + // First, find all the declarations of the containing function. + DeclContext *DF = PV->getParentFunctionOrMethod(); + assert(DF != nullptr && "no parent function or method for decl"); + FunctionDecl *FD = cast(DF); + + // For each function, determine which parameter in the declaration + // matches PV, then, get the type location of that parameter + // declaration and re-write. + + // This is kind of hacky, maybe we should record the index of the + // parameter when we find it, instead of re-discovering it here. + int parmIndex = -1; + int c = 0; + for (const auto &I : FD->parameters()) { + if (I == PV) { + parmIndex = c; + break; + } + c++; + } + assert(parmIndex >= 0); + + for (FunctionDecl *toRewrite = FD; toRewrite != NULL; + toRewrite = toRewrite->getPreviousDecl()) { + int U = toRewrite->getNumParams(); + if (parmIndex < U) { + // TODO these declarations could get us into deeper + // header files. + ParmVarDecl *Rewrite = toRewrite->getParamDecl(parmIndex); + assert(Rewrite != NULL); + SourceRange TR = Rewrite->getSourceRange(); + + if (canRewrite(R, TR)) + R.ReplaceText(TR, sRewrite); + } + } +} + +void rewrite( VarDecl *VD, + Rewriter &R, + std::string sRewrite, + Stmt *WhereStmt, + RSet &skip, + const DAndReplace &N, + RSet &toRewrite, + ASTContext &A) +{ + DeclStmt *Where = dyn_cast_or_null(WhereStmt); + + if (Where != NULL) { + if (Verbose) { + errs() << "VarDecl at:\n"; + Where->dump(); + } + SourceRange TR = VD->getSourceRange(); + + // Is there an initializer? If there is, change TR so that it points + // to the START of the SourceRange of the initializer text, and drop + // an '=' token into sRewrite. + if (VD->hasInit()) { + SourceLocation eqLoc = VD->getInitializerStartLoc(); + TR.setEnd(eqLoc); + sRewrite = sRewrite + " = "; + } + + // Is it a variable type? This is the easy case, we can re-write it + // locally, at the site of the declaration. + if (Where->isSingleDecl()) { + if (canRewrite(R, TR)) { + R.ReplaceText(TR, sRewrite); + } else { + // This can happen if SR is within a macro. If that is the case, + // maybe there is still something we can do because Decl refers + // to a non-macro line. + + SourceRange possible(R.getSourceMgr().getExpansionLoc(TR.getBegin()), + VD->getLocation()); + + if (canRewrite(R, possible)) { + R.ReplaceText(possible, sRewrite); + std::string newStr = " " + VD->getName().str(); + R.InsertTextAfter(VD->getLocation(), newStr); + } else { + if (Verbose) { + errs() << "Still don't know how to re-write VarDecl\n"; + VD->dump(); + errs() << "at\n"; + Where->dump(); + errs() << "with " << sRewrite << "\n"; + } + } + } + } else if (!(Where->isSingleDecl()) && skip.find(N) == skip.end()) { + // Hack time! + // Sometimes, like in the case of a decl on a single line, we'll need to + // do multiple NewTyps at once. In that case, in the inner loop, we'll + // re-scan and find all of the NewTyps related to that line and do + // everything at once. That means sometimes we'll get NewTyps that + // we don't want to process twice. We'll skip them here. + + // Step 1: get the re-written types. + RSet rewritesForThisDecl(DComp(R.getSourceMgr())); + auto I = toRewrite.find(N); + while (I != toRewrite.end()) { + DAndReplace tmp = *I; + if (tmp.Statement == WhereStmt) + rewritesForThisDecl.insert(tmp); + ++I; + } + + // Step 2: remove the original line from the program. + SourceRange DR = Where->getSourceRange(); + R.RemoveText(DR); + + // Step 3: for each decl in the original, build up a new string + // and if the original decl was re-written, write that + // out instead (WITH the initializer). + std::string newMultiLineDeclS = ""; + raw_string_ostream newMLDecl(newMultiLineDeclS); + for (const auto &DL : Where->decls()) { + DAndReplace N; + bool found = false; + VarDecl *VDL = dyn_cast(DL); + assert(VDL != NULL); + + for (const auto &NLT : rewritesForThisDecl) + if (NLT.Declaration == DL) { + N = NLT; + found = true; + break; + } + + if (found) { + newMLDecl << N.Replacement; + if (Expr *E = VDL->getInit()) { + newMLDecl << " = "; + E->printPretty(newMLDecl, nullptr, A.getPrintingPolicy()); + } + newMLDecl << ";\n"; + } + else { + DL->print(newMLDecl); + newMLDecl << ";\n"; + } + } + + // Step 4: Write out the string built up in step 3. + R.InsertTextAfter(DR.getEnd(), newMLDecl.str()); + + // Step 5: Be sure and skip all of the NewTyps that we dealt with + // during this time of hacking, by adding them to the + // skip set. + + for (const auto &TN : rewritesForThisDecl) + skip.insert(TN); + } else { + if (Verbose) { + errs() << "Don't know how to re-write VarDecl\n"; + VD->dump(); + errs() << "at\n"; + Where->dump(); + errs() << "with " << N.Replacement << "\n"; + } + } + } else { + if (Verbose) { + errs() << "Don't know where to rewrite a VarDecl! "; + VD->dump(); + errs() << "\n"; + } + } +} + +void rewrite( Rewriter &R, + RSet &toRewrite, + RSet &skip, + SourceManager &S, + ASTContext &A, + std::set &Files) +{ + for (const auto &N : toRewrite) { + Decl *D = N.Declaration; + DeclStmt *Where = dyn_cast_or_null(N.Statement); + assert(D != nullptr); + + if (Verbose) { + errs() << "Replacing type of decl:\n"; + D->dump(); + errs() << "with " << N.Replacement << "\n"; + } + + // Get a FullSourceLoc for the start location and add it to the + // list of file ID's we've touched. + SourceRange tTR = D->getSourceRange(); + FullSourceLoc tFSL(tTR.getBegin(), S); + Files.insert(tFSL.getFileID()); + + // Is it a parameter type? + if (ParmVarDecl *PV = dyn_cast(D)) { + assert(Where == NULL); + rewrite(PV, R, N.Replacement); + } else if (VarDecl *VD = dyn_cast(D)) { + rewrite(VD, R, N.Replacement, Where, skip, N, toRewrite, A); + } else if (FunctionDecl *UD = dyn_cast(D)) { + // TODO: If the return type is a fully-specified function pointer, + // then clang will give back an invalid source range for the + // return type source range. For now, check that the source + // range is valid. + // Additionally, a source range can be (mis) identified as + // spanning multiple files. We don't know how to re-write that, + // so don't. + + if (N.fullDecl) { + SourceRange SR = UD->getSourceRange(); + SR.setEnd(getFunctionDeclarationEnd(UD, S)); + + if (canRewrite(R, SR)) + R.ReplaceText(SR, N.Replacement); + } else { + SourceRange SR = UD->getReturnTypeSourceRange(); + if (canRewrite(R, SR)) + R.ReplaceText(SR, N.Replacement); + } + } else if (FieldDecl *FD = dyn_cast(D)) { + SourceRange SR = FD->getSourceRange(); + std::string sRewrite = N.Replacement; + + if (canRewrite(R, SR)) + R.ReplaceText(SR, sRewrite); + } + } +} + + +// For a given function name, what are the argument positions for that function +// that we would want to treat specially and insert a cast into? +std::set CastPlacementVisitor::getParamsForExtern(std::string E) { + return StringSwitch>(E) + .Case("free", {0}) + .Default(std::set()); +} + +// Checks the bindings in the environment for all of the constraints +// associated with C and returns true if any of those constraints +// are WILD. +bool CastPlacementVisitor::anyTop(std::set C) { + bool anyTopFound = false; + Constraints &CS = Info.getConstraints(); + Constraints::EnvironmentMap &env = CS.getVariables(); + for (ConstraintVariable *c : C) { + if (PointerVariableConstraint *pvc = dyn_cast(c)) { + for (uint32_t v : pvc->getCvars()) { + ConstAtom *CK = env[CS.getVar(v)]; + if (CK->getKind() == Atom::A_Wild) { + anyTopFound = true; + } + } + } + } + return anyTopFound; +} + +std::string CastPlacementVisitor::getExistingIType(ConstraintVariable *decl, + ConstraintVariable *defn, + FunctionDecl *funcDecl) { + std::string ret = ""; + ConstraintVariable *target = decl; + if(funcDecl == nullptr) { + target = defn; + } + if (PVConstraint *PVC = dyn_cast(target)) { + if (PVC->getItypePresent()) { + ret = " : " + PVC->getItype(); + } + } + return ret; +} + +// This function checks how to re-write a function declaration. +bool CastPlacementVisitor::VisitFunctionDecl(FunctionDecl *FD) { + + // Get all of the constraint variables for the function. + // Check and see if we have a definition in scope. If we do, then: + // For the return value and each of the parameters, do the following: + // 1. Get a constraint variable representing the definition (def) and the + // declaration (dec). + // 2. Check if def < dec, dec < def, or dec = def. + // 3. Only if def < dec, we insert a bounds-safe interface. + // If we don't have a definition in scope, we can assert that all of + // the constraint variables are equal. + // Finally, we need to note that we've visited this particular function, and + // that we shouldn't make one of these visits again. + + auto funcName = FD->getNameAsString(); + + // Make sure we haven't visited this function name before, and that we + // only visit it once. + if (VisitedSet.find(funcName) != VisitedSet.end()) + return true; + else + VisitedSet.insert(funcName); + + // Do we have a definition for this declaration? + FunctionDecl *Definition = getDefinition(FD); + FunctionDecl *Declaration = getDeclaration(FD); + + if(Definition == nullptr) + return true; + + FVConstraint *cDefn = dyn_cast( + getHighest(Info.getVariableOnDemand(Definition, Context, true), Info)); + + FVConstraint *cDecl = nullptr; + // Get constraint variables for the declaration and the definition. + // Those constraints should be function constraints. + if(Declaration == nullptr) { + // if there is no declaration? + // get the on demand function variable constraint. + cDecl = dyn_cast( + getHighest(Info.getOnDemandFuncDeclarationConstraint(Definition, Context), Info)); + } else { + cDecl = dyn_cast( + getHighest(Info.getVariableOnDemand(Declaration, Context, false), Info)); + } + + assert(cDecl != nullptr); + assert(cDefn != nullptr); + + if (cDecl->numParams() == cDefn->numParams()) { + // Track whether we did any work and need to make a substitution or not. + bool didAny = cDecl->numParams() > 0; + std::string s = ""; + std::vector parmStrs; + // Compare parameters. + for (unsigned i = 0; i < cDecl->numParams(); ++i) { + auto Decl = getHighest(cDecl->getParamVar(i), Info); + auto Defn = getHighest(cDefn->getParamVar(i), Info); + assert(Decl); + assert(Defn); + + // If this holds, then we want to insert a bounds safe interface. + bool anyConstrained = Defn->anyChanges(Info.getConstraints().getVariables()); + // definition is more precise than declaration. + // Section 5.3: + // https://www.microsoft.com/en-us/research/uploads/prod/2019/05/checkedc-post2019.pdf + if(anyConstrained && Defn->isLt(*Decl, Info)) { + std::string scratch = ""; + raw_string_ostream declText(scratch); + Definition->getParamDecl(i)->print(declText); + // if definition is more precise + // than declaration emit an itype + std::string ctype = Defn->mkString(Info.getConstraints().getVariables(), false, true); + std::string bi = declText.str() + " : itype("+ctype+") "; + parmStrs.push_back(bi); + } else if (anyConstrained) { + // both the declaration and definition are same + // and they are safer than what was originally declared. + // here we should emit a checked type! + std::string v = Decl->mkString(Info.getConstraints().getVariables()); + + // if there is no declaration? + // check the itype in definition + v = v + getExistingIType(Decl, Defn, Declaration); + parmStrs.push_back(v); + } else { + std::string scratch = ""; + raw_string_ostream declText(scratch); + Definition->getParamDecl(i)->print(declText); + parmStrs.push_back(declText.str()); + } + } + + // Compare returns. + auto Decl = getHighest(cDecl->getReturnVars(), Info); + auto Defn = getHighest(cDefn->getReturnVars(), Info); + + // Insert a bounds safe interface for the return. + std::string returnVar = ""; + std::string endStuff = ""; + bool returnHandled = false; + bool anyConstrained = Defn->anyChanges(Info.getConstraints().getVariables()); + if(anyConstrained) { + returnHandled = true; + std::string ctype = ""; + // definition is more precise than declaration. + // Section 5.3: + // https://www.microsoft.com/en-us/research/uploads/prod/2019/05/checkedc-post2019.pdf + if(Defn->isLt(*Decl, Info)) { + ctype = Defn->mkString(Info.getConstraints().getVariables(), true, true); + returnVar = Defn->getOriginalTy(); + endStuff = " : itype("+ctype+") "; + didAny = true; + } else { + // this means we were able to infer that return type + // is a checked type. + // however, the function returns a less precise type, whereas + // all the uses of the function converts the return value + // into a more precise type. + // do not change the type + returnVar = Decl->mkString(Info.getConstraints().getVariables()); + endStuff = getExistingIType(Decl, Defn, Declaration); + if(!endStuff.empty()) { + didAny = true; + } + } + } + + if(!returnHandled) { + // If we used to implement a bounds-safe interface, continue to do that. + returnVar = Decl->mkString(Info.getConstraints().getVariables()); + + endStuff = getExistingIType(Decl, Defn, Declaration); + if(!endStuff.empty()) { + didAny = true; + } + } + + s = getStorageQualifierString(Definition) + returnVar + cDecl->getName() + "("; + if (parmStrs.size() > 0) { + std::ostringstream ss; + + std::copy(parmStrs.begin(), parmStrs.end() - 1, + std::ostream_iterator(ss, ", ")); + ss << parmStrs.back(); + + s = s + ss.str() + ")"; + } else { + s = s + "void)"; + } + + if (endStuff.size() > 0) + s = s + endStuff; + + if (didAny) + // Do all of the declarations. + for (const auto &RD : Definition->redecls()) + rewriteThese.insert(DAndReplace(RD, s, true)); + } + + return true; +} + +bool CastPlacementVisitor::VisitCallExpr(CallExpr *E) { + return true; +} + +// check if the function is handled by this visitor +bool CastPlacementVisitor::isFunctionVisited(std::string funcName) { + return VisitedSet.find(funcName) != VisitedSet.end(); +} + +static bool +canWrite(std::string filePath, std::set &iof, std::string b) { + // Was this file explicitly provided on the command line? + if (iof.count(filePath) > 0) + return true; + // Is this file contained within the base directory? + + sys::path::const_iterator baseIt = sys::path::begin(b); + sys::path::const_iterator pathIt = sys::path::begin(filePath); + sys::path::const_iterator baseEnd = sys::path::end(b); + sys::path::const_iterator pathEnd = sys::path::end(filePath); + std::string baseSoFar = (*baseIt).str() + sys::path::get_separator().str(); + std::string pathSoFar = (*pathIt).str() + sys::path::get_separator().str(); + ++baseIt; + ++pathIt; + + while ((baseIt != baseEnd) && (pathIt != pathEnd)) { + sys::fs::file_status baseStatus; + sys::fs::file_status pathStatus; + std::string s1 = (*baseIt).str(); + std::string s2 = (*pathIt).str(); + + if (std::error_code ec = sys::fs::status(baseSoFar, baseStatus)) + return false; + + if (std::error_code ec = sys::fs::status(pathSoFar, pathStatus)) + return false; + + if (!sys::fs::equivalent(baseStatus, pathStatus)) + break; + + if (s1 != sys::path::get_separator().str()) + baseSoFar += (s1 + sys::path::get_separator().str()); + if (s2 != sys::path::get_separator().str()) + pathSoFar += (s2 + sys::path::get_separator().str()); + + ++baseIt; + ++pathIt; + } + + if (baseIt == baseEnd && baseSoFar == pathSoFar) + return true; + else + return false; +} + +static void emit(Rewriter &R, ASTContext &C, std::set &Files, + std::set &InOutFiles, std::string &BaseDir, + std::string &OutputPostfix) { + + // Check if we are outputing to stdout or not, if we are, just output the + // main file ID to stdout. + if (Verbose) + errs() << "Writing files out\n"; + + SmallString<254> baseAbs(BaseDir); + std::string baseDirFP; + if(getAbsoluteFilePath(BaseDir, baseDirFP)) { + baseAbs = baseDirFP; + } + sys::path::remove_filename(baseAbs); + std::string base = baseAbs.str(); + + SourceManager &SM = C.getSourceManager(); + if (OutputPostfix == "-") { + if (const RewriteBuffer *B = R.getRewriteBufferFor(SM.getMainFileID())) + B->write(outs()); + } else + for (const auto &F : Files) + if (const RewriteBuffer *B = R.getRewriteBufferFor(F)) + if (const FileEntry *FE = SM.getFileEntryForID(F)) { + assert(FE->isValid()); + + // Produce a path/file name for the rewritten source file. + // That path should be the same as the old one, with a + // suffix added between the file name and the extension. + // For example \foo\bar\a.c should become \foo\bar\a.checked.c + // if the OutputPostfix parameter is "checked" . + + std::string pfName = sys::path::filename(FE->getName()).str(); + std::string dirName = sys::path::parent_path(FE->getName()).str(); + std::string fileName = sys::path::remove_leading_dotslash(pfName).str(); + std::string ext = sys::path::extension(fileName).str(); + std::string stem = sys::path::stem(fileName).str(); + std::string nFileName = stem + "." + OutputPostfix + ext; + std::string nFile = nFileName; + if (dirName.size() > 0) + nFile = dirName + sys::path::get_separator().str() + nFileName; + + // Write this file out if it was specified as a file on the command + // line. + std::string feAbsS = ""; + if(getAbsoluteFilePath(FE->getName(), feAbsS)) { + feAbsS = sys::path::remove_leading_dotslash(feAbsS); + } + + if(canWrite(feAbsS, InOutFiles, base)) { + std::error_code EC; + raw_fd_ostream out(nFile, EC, sys::fs::F_None); + + if (!EC) { + if (Verbose) + outs() << "writing out " << nFile << "\n"; + B->write(out); + } + else + errs() << "could not open file " << nFile << "\n"; + // This is awkward. What to do? Since we're iterating, + // we could have created other files successfully. Do we go back + // and erase them? Is that surprising? For now, let's just keep + // going. + } + } +} + +// This is a visitor that tries to find all the variables +// inferred as arrayed by the checked-c-convert +class DeclArrayVisitor : public clang::RecursiveASTVisitor +{ +public: + explicit DeclArrayVisitor(ASTContext *_C, Rewriter& _R, ProgramInfo& _I) + : Context(_C), Writer(_R), Info(_I) + { + } + + bool VisitDecl(Decl* D) + { + // check if this is a variable declaration. + VarDecl* VD = dyn_cast_or_null(D); + if (!VD) + return true; + + // ProgramInfo.getVariable() can find variables in a function + // context or not. I'm not clear of the difference yet, so we + // just run our analysis on both. + + std::set a; + // check if the function body exists before + // fetching inbody variable. + if(hasFunctionBody(D)) { + a = Info.getVariable(D, Context, true); + } + + std::set b = Info.getVariable(D, Context, false); + std::set CV; + std::set_union(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(CV, CV.begin())); + + bool foundArr = false; + for (const auto& C: CV) { + foundArr |= C->hasArr(Info.getConstraints().getVariables()); + } + + if (foundArr) { + // Add the identified array declarations here. + Info.insertPotentialArrayVar(D); + // Find the end of the line that contains this statement. + FullSourceLoc sl(D->getEndLoc(), Context->getSourceManager()); + const char* buf = sl.getCharacterData(); + const char* ptr = strchr(buf, '\n'); + + // Deal with Windows/DOS "\r\n" line endings. + if (ptr && ptr > buf && ptr[-1] == '\r') + --ptr; + + if (ptr) { + SourceLocation eol = D->getEndLoc().getLocWithOffset(ptr-buf); + sl = FullSourceLoc(eol, Context->getSourceManager()); + Writer.InsertTextBefore(eol, "*/"); + Writer.InsertTextBefore(eol, VD->getName()); + Writer.InsertTextBefore(eol, "/*ARR:"); + } + } + return true; + } + +private: + ASTContext* Context; + Rewriter& Writer; + ProgramInfo& Info; +}; + +void RewriteConsumer::HandleTranslationUnit(ASTContext &Context) { + Info.enterCompilationUnit(Context); + + Rewriter R(Context.getSourceManager(), Context.getLangOpts()); + std::set Files; + + std::set v; + RSet rewriteThese(DComp(Context.getSourceManager())); + // Unification is done, so visit and see if we need to place any casts + // in the program. + CastPlacementVisitor CPV = CastPlacementVisitor(&Context, Info, R, rewriteThese, Files, v); + for (const auto &D : Context.getTranslationUnitDecl()->decls()) + CPV.TraverseDecl(D); + + // Build a map of all of the PersistentSourceLoc's back to some kind of + // Stmt, Decl, or Type. + VariableMap &VarMap = Info.getVarMap(); + std::set keys; + + for (const auto &I : VarMap) + keys.insert(I.first); + SourceToDeclMapType PSLMap; + VariableDecltoStmtMap VDLToStmtMap; + + RSet skip(DComp(Context.getSourceManager())); + MappingVisitor V(keys, Context); + TranslationUnitDecl *TUD = Context.getTranslationUnitDecl(); + for (const auto &D : TUD->decls()) + V.TraverseDecl(D); + + std::tie(PSLMap, VDLToStmtMap) = V.getResults(); + + for (const auto &V : Info.getVarMap()) { + PersistentSourceLoc PLoc = V.first; + std::set Vars = V.second; + // I don't think it's important that Vars have any especial size, but + // at one point I did so I'm keeping this comment here. It's possible + // that what we really need to do is to ensure that when we work with + // either PV or FV below, that they are the LUB of what is in Vars. + // assert(Vars.size() > 0 && Vars.size() <= 2); + + // PLoc specifies the location of the variable whose type it is to + // re-write, but not where the actual type storage is. To get that, we + // need to turn PLoc into a Decl and then get the SourceRange for the + // type of the Decl. Note that what we need to get is the ExpansionLoc + // of the type specifier, since we want where the text is printed before + // the variable name, not the typedef or #define that creates the + // name of the type. + + Stmt *S = nullptr; + Decl *D = nullptr; + DeclStmt *DS = nullptr; + clang::Type *T = nullptr; + + std::tie(S, D, T) = PSLMap[PLoc]; + + if (D) { + // We might have one Decl for multiple Vars, however, one will be a + // PointerVar so we'll use that. + VariableDecltoStmtMap::iterator K = VDLToStmtMap.find(D); + if (K != VDLToStmtMap.end()) + DS = K->second; + + PVConstraint *PV = nullptr; + FVConstraint *FV = nullptr; + for (const auto &V : Vars) { + if (PVConstraint *T = dyn_cast(V)) + PV = T; + else if (FVConstraint *T = dyn_cast(V)) + FV = T; + } + + if (PV && PV->anyChanges(Info.getConstraints().getVariables())) { + // Rewrite a declaration. + std::string newTy = getStorageQualifierString(D) + PV->mkString(Info.getConstraints().getVariables()); + rewriteThese.insert(DAndReplace(D, DS, newTy)); + } else if (FV && FV->anyChanges(Info.getConstraints().getVariables()) && + !CPV.isFunctionVisited(FV->getName())) { + // Rewrite a function variables return value. + // only if this function is NOT handled by the + // cast placement visitor + std::set V = FV->getReturnVars(); + if (V.size() > 0) { + std::string newTy = + (*V.begin())->mkString(Info.getConstraints().getVariables()); + rewriteThese.insert(DAndReplace(D, DS, newTy)); + } + } + } + } + + rewrite(R, rewriteThese, skip, Context.getSourceManager(), Context, Files); + + // Add ARR marker to array pointer declarations. + // XXX - Must happen after the rewrite to add Checked C types (for now). + DeclArrayVisitor declVisitor(&Context, R, Info); + declVisitor.TraverseAST(Context); + + // Output files. + emit(R, Context, Files, InOutFiles, BaseDir, OutputPostfix); + + HandleArrayVariablesBoundsDetection(&Context, Info); + Info.printArrayVarsAndSizes(errs()); + + Info.exitCompilationUnit(); + return; +} \ No newline at end of file diff --git a/tools/checked-c-convert/RewriteUtils.h b/tools/checked-c-convert/RewriteUtils.h new file mode 100644 index 000000000000..b8cf94bc3bbb --- /dev/null +++ b/tools/checked-c-convert/RewriteUtils.h @@ -0,0 +1,157 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class contains functions and classes that deal with +// rewriting the source file after converting to CheckedC format. +//===----------------------------------------------------------------------===// +#ifndef _REWRITEUTILS_H +#define _REWRITEUTILS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/ASTContext.h" +#include "clang/Rewrite/Core/Rewriter.h" + +#include "ProgramInfo.h" + +using namespace clang; + +// A Declaration, optional DeclStmt, and a replacement string +// for that Declaration. +struct DAndReplace +{ + Decl *Declaration; // The declaration to replace. + Stmt *Statement; // The Stmt, if it exists. + std::string Replacement; // The string to replace the declaration with. + bool fullDecl; // If the declaration is a function, true if + // replace the entire declaration or just the + // return declaration. + DAndReplace() : Declaration(nullptr), + Statement(nullptr), + Replacement(""), + fullDecl(false) { } + + DAndReplace(Decl *D, std::string R) : Declaration(D), + Statement(nullptr), + Replacement(R), + fullDecl(false) {} + + DAndReplace(Decl *D, std::string R, bool F) : Declaration(D), + Statement(nullptr), + Replacement(R), + fullDecl(F) {} + + + DAndReplace(Decl *D, Stmt *S, std::string R) : Declaration(D), + Statement(S), + Replacement(R), + fullDecl(false) { } +}; + +// Compare two DAndReplace values. The algorithm for comparing them relates +// their source positions. If two DAndReplace values refer to overlapping +// source positions, then they are the same. Otherwise, they are ordered +// by their placement in the input file. +// +// There are two special cases: Function declarations, and DeclStmts. In turn: +// +// - Function declarations might either be a DAndReplace describing the entire +// declaration, i.e. replacing "int *foo(void)" +// with "int *foo(void) : itype(_Ptr)". Or, it might describe just +// replacing only the return type, i.e. "_Ptr foo(void)". This is +// discriminated against with the 'fullDecl' field of the DAndReplace type +// and the comparison function first checks if the operands are +// FunctionDecls and if the 'fullDecl' field is set. +// - A DeclStmt of mupltiple Decls, i.e. 'int *a = 0, *b = 0'. In this case, +// we want the DAndReplace to refer only to the specific sub-region that +// would be replaced, i.e. '*a = 0' and '*b = 0'. To do that, we traverse +// the Decls contained in a DeclStmt and figure out what the appropriate +// source locations are to describe the positions of the independent +// declarations. +struct DComp +{ + SourceManager &SM; + DComp(SourceManager &S) : SM(S) { } + + SourceRange getWholeSR(SourceRange orig, DAndReplace dr) const; + + bool operator()(const DAndReplace lhs, const DAndReplace rhs) const; +}; + +typedef std::set RSet; + +void rewrite(ParmVarDecl *PV, Rewriter &R, std::string sRewrite); + +void rewrite( VarDecl *VD, + Rewriter &R, + std::string sRewrite, + Stmt *WhereStmt, + RSet &skip, + const DAndReplace &N, + RSet &toRewrite, + ASTContext &A); + +// Visit each Decl in toRewrite and apply the appropriate pointer type +// to that Decl. The state of the rewrite is contained within R, which +// is both input and output. R is initialized to point to the 'main' +// source file for this transformation. toRewrite contains the set of +// declarations to rewrite. S is passed for source-level information +// about the current compilation unit. skip indicates some rewrites that +// we should skip because we already applied them, for example, as part +// of turning a single line declaration into a multi-line declaration. +void rewrite( Rewriter &R, + RSet &toRewrite, + RSet &skip, + SourceManager &S, + ASTContext &A, + std::set &Files); + + +// Class for visiting declarations during re-writing to find locations to +// insert casts. Right now, it looks specifically for 'free'. +class CastPlacementVisitor : public RecursiveASTVisitor { +public: + explicit CastPlacementVisitor(ASTContext *C, ProgramInfo &I, Rewriter &R, + RSet &DR, std::set &Files, std::set &V) + : Context(C), R(R), Info(I), rewriteThese(DR), Files(Files), VisitedSet(V) {} + + bool VisitCallExpr(CallExpr *); + bool VisitFunctionDecl(FunctionDecl *); + bool isFunctionVisited(std::string funcName); +private: + std::set getParamsForExtern(std::string); + // get existing itype string from constraint variables. + // if tries to get the string from declaration, however, + // if there is no declaration of the function, + // it will try to get it from the definition. + std::string getExistingIType(ConstraintVariable *decl, ConstraintVariable *defn, + FunctionDecl *funcDecl); + bool anyTop(std::set); + ASTContext *Context; + Rewriter &R; + ProgramInfo &Info; + RSet &rewriteThese; + std::set &Files; + std::set &VisitedSet; +}; + + +class RewriteConsumer : public ASTConsumer { +public: + explicit RewriteConsumer(ProgramInfo &I, + std::set &F, ASTContext *Context, std::string &OPostfix, std::string &bDir) : + Info(I), InOutFiles(F), OutputPostfix(OPostfix), BaseDir(bDir) {} + + virtual void HandleTranslationUnit(ASTContext &Context); + +private: + ProgramInfo &Info; + std::set &InOutFiles; + std::string &OutputPostfix; + std::string &BaseDir; +}; + +#endif //_REWRITEUTILS_H diff --git a/tools/checked-c-convert/Utils.cpp b/tools/checked-c-convert/Utils.cpp new file mode 100644 index 000000000000..e78f39b0026c --- /dev/null +++ b/tools/checked-c-convert/Utils.cpp @@ -0,0 +1,174 @@ +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Implementation of Utils methods. +//===----------------------------------------------------------------------===// +#include "llvm/Support/Path.h" + +#include "Utils.h" +#include "ConstraintVariables.h" + +using namespace clang; + +const clang::Type *getNextTy(const clang::Type *Ty) { + if(Ty->isPointerType()) { + // TODO: how to keep the qualifiers around, and what qualifiers do + // we want to keep? + QualType qtmp = Ty->getLocallyUnqualifiedSingleStepDesugaredType(); + return qtmp.getTypePtr()->getPointeeType().getTypePtr(); + } + else + return Ty; +} + +ConstraintVariable *getHighest(std::set Vs, ProgramInfo &Info) { + if (Vs.size() == 0) + return nullptr; + + ConstraintVariable *V = nullptr; + + for (auto &P : Vs) { + if (V) { + if (V->isLt(*P, Info)) + V = P; + } else { + V = P; + } + } + + return V; +} + +// Walk the list of declarations and find a declaration that is NOT +// a definition and does NOT have a body. +FunctionDecl *getDeclaration(FunctionDecl *FD) { + // optimization + if(!FD->isThisDeclarationADefinition()) { + return FD; + } + for (const auto &D : FD->redecls()) + if (FunctionDecl *tFD = dyn_cast(D)) + if (!tFD->isThisDeclarationADefinition()) + return tFD; + + return nullptr; +} + +// Walk the list of declarations and find a declaration accompanied by +// a definition and a function body. +FunctionDecl *getDefinition(FunctionDecl *FD) { + // optimization + if(FD->isThisDeclarationADefinition() && FD->hasBody()) { + return FD; + } + for (const auto &D : FD->redecls()) + if (FunctionDecl *tFD = dyn_cast(D)) + if (tFD->isThisDeclarationADefinition() && tFD->hasBody()) + return tFD; + + return nullptr; +} + +SourceLocation +getFunctionDeclarationEnd(FunctionDecl *FD, SourceManager &S) +{ + const FunctionDecl *oFD = nullptr; + + if (FD->hasBody(oFD) && oFD == FD) { + // Replace everything up to the beginning of the body. + const Stmt *Body = FD->getBody(oFD); + + int Offset = 0; + const char *Buf = S.getCharacterData(Body->getSourceRange().getBegin()); + + while (*Buf != ')') { + Buf--; + Offset--; + } + + return Body->getSourceRange().getBegin().getLocWithOffset(Offset); + } else { + return FD->getSourceRange().getEnd(); + } +} + +clang::CheckedPointerKind getCheckedPointerKind(InteropTypeExpr *itypeExpr) { + TypeSourceInfo * interopTypeInfo = itypeExpr->getTypeInfoAsWritten(); + const clang::Type *innerType = interopTypeInfo->getType().getTypePtr(); + if(innerType->isCheckedPointerNtArrayType()) { + return CheckedPointerKind ::NtArray; + } + if(innerType->isCheckedPointerArrayType()) { + return CheckedPointerKind ::Array; + } + if(innerType->isCheckedPointerType()) { + return CheckedPointerKind ::Ptr; + } + return CheckedPointerKind::Unchecked; +} + +// check if function body exists for the +// provided declaration. +bool hasFunctionBody(clang::Decl *param) { + // if this a parameter? + if(ParmVarDecl *PD = dyn_cast(param)) { + if(DeclContext *DC = PD->getParentFunctionOrMethod()) { + FunctionDecl *FD = dyn_cast(DC); + if (getDefinition(FD) != nullptr) { + return true; + } + } + return false; + } + // else this should be within body and + // the function body should exist. + return true; +} + +static std::string storageClassToString(StorageClass SC) { + switch(SC) { + case StorageClass::SC_Static: return "static "; + case StorageClass::SC_Extern: return "extern "; + case StorageClass::SC_Register: return "register "; + // no default class, we do not care. + } + return ""; +} + +// this method gets the storage qualifier for the +// provided declaration i.e., static, extern, etc. +std::string getStorageQualifierString(Decl *D) { + if(FunctionDecl *FD = dyn_cast(D)) { + return storageClassToString(FD->getStorageClass()); + } + if(VarDecl *VD = dyn_cast(D)) { + return storageClassToString(VD->getStorageClass()); + } + return ""; +} + +bool isNULLExpression(clang::Expr *expr, ASTContext &Ctx) { + // this checks if the expression is NULL. Specifically, (void*)0 + if(CStyleCastExpr *CS = dyn_cast(expr)) { + Expr *subExpr = CS->getSubExpr(); + + return subExpr->isIntegerConstantExpr(Ctx) && + subExpr->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNotNull); + } + return false; +} + +bool getAbsoluteFilePath(std::string fileName, std::string &absoluteFP) { + // get absolute path of the provided file + // returns true if successful else false + SmallString<255> abs_path(fileName); + std::error_code ec = llvm::sys::fs::make_absolute(abs_path); + if(!ec) { + absoluteFP = abs_path.str(); + return true; + } + return false; +} \ No newline at end of file diff --git a/tools/checked-c-convert/utils.h b/tools/checked-c-convert/Utils.h similarity index 64% rename from tools/checked-c-convert/utils.h rename to tools/checked-c-convert/Utils.h index c9b93841167c..2e27962bc341 100644 --- a/tools/checked-c-convert/utils.h +++ b/tools/checked-c-convert/Utils.h @@ -10,6 +10,7 @@ #define _UTILS_H #include #include "llvm/Support/CommandLine.h" + #include "PersistentSourceLoc.h" class ConstraintVariable; @@ -24,10 +25,27 @@ typedef std::map VariableDecltoStmtMap; extern llvm::cl::opt Verbose; extern llvm::cl::opt DumpIntermediate; +extern llvm::cl::opt handleVARARGS; +extern llvm::cl::opt mergeMultipleFuncDecls; +extern llvm::cl::opt enablePropThruIType; const clang::Type *getNextTy(const clang::Type *Ty); ConstraintVariable *getHighest(std::set Vs, ProgramInfo &Info); clang::FunctionDecl *getDeclaration(clang::FunctionDecl *FD); + +clang::FunctionDecl *getDefinition(clang::FunctionDecl *FD); + +clang::CheckedPointerKind getCheckedPointerKind(clang::InteropTypeExpr *itypeExpr); + +bool hasFunctionBody(clang::Decl *param); + +std::string getStorageQualifierString(clang::Decl *D); + +bool getAbsoluteFilePath(std::string fileName, std::string &absoluteFP); + +bool isNULLExpression(clang::Expr *expr, clang::ASTContext &Ctx); + +clang::SourceLocation getFunctionDeclarationEnd(clang::FunctionDecl *FD, clang::SourceManager &S); #endif diff --git a/tools/checked-c-convert/functests/README.md b/tools/checked-c-convert/functests/README.md new file mode 100644 index 000000000000..2fb06a6d528a --- /dev/null +++ b/tools/checked-c-convert/functests/README.md @@ -0,0 +1,44 @@ +Functionality Tester +--- +This folder contains scripts to test the functionality of `checked-c-convert` tool. +## Usage: +``` +python run_tests.py --help +usage: FuntionalityTester [-h] -p PROG_NAME + +Script that checks functionality of checked-c-convert tool + +optional arguments: + -h, --help show this help message and exit + -p PROG_NAME, --prog_name PROG_NAME + Program name to run. i.e., path to checked-c-convert +``` +### Example: +``` +python run_tests.py -p /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/cmake-build-debug/bin/checked-c-convert +[*] Got:11 tests. +[*] Running Tests. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_inter.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_inter.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_inter_field.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_inter_field.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_local.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/arr/basic_local.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_inter.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_inter.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_field_local.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_field_local.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_inter_field.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_inter_field.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_local.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ntarr/basic_local.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_inter.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_inter.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_field_local.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_field_local.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_inter_field.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_inter_field.c Passed. +[*] Testing:/Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_local.c +[+] Test /Users/machiry/Projects/checkedc/llvm-stuff/checkedc-llvm/tools/clang/tools/checked-c-convert/functests/ptr/basic_local.c Passed. +[+] ALL TESTS PASSED. +``` diff --git a/tools/checked-c-convert/functests/arr/basic_field_local.c b/tools/checked-c-convert/functests/arr/basic_field_local.c new file mode 100644 index 000000000000..a4d9ee9a6449 --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_field_local.c @@ -0,0 +1,39 @@ +// This tests the propagation of constraints +// within the fields of structure. +// we will use b as an ARR +typedef struct { + int a; + int *b; +} foo; +// here we use b in a safe way as an array, hence +// it will be a array ptr +typedef struct { + float *b; +} foo2; +// here we use p in unsafe way +// and hence will not be a safe ptr +typedef struct { + float c; + int *p; + char d; +} foo3; +int main() { + foo obj; + // this will be ARR + int *bp; + float b; + foo2 obj2; + int hel; + + // this will make field b of foo an ARR + obj.b = bp; + bp = &hel; + bp[0] = 1; + + // this will make obj2.b an array. + obj2.b = &b; + obj2.b++; + + foo3 obj3; + obj3.p = 0xcafebabe; +} diff --git a/tools/checked-c-convert/functests/arr/basic_inter.c b/tools/checked-c-convert/functests/arr/basic_inter.c new file mode 100644 index 000000000000..f55533b89930 --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_inter.c @@ -0,0 +1,53 @@ +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +int funcdecl(int *ptr, int *iptr, int *arrwild); +int funcdecl(int *ptr, int *iptr, int *arrwild) { + if(ptr != 0) { + *ptr = 0; + } + arrwild++; +} + +// ptr is an arr ptr +// iptr will be itype +// wild will be a wild ptr. +int func(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + ptr[0] = 1; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be Ptr + int *ap; + // this will be WILD + int *bp; + // this will be _Ptr + int *cp; + // this will be _Ptr + int *ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + int *cp1; + + + ap1 = &a; + ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + func(ap, bp, cp); + funcdecl(ap1, bp1, cp1); + + +} diff --git a/tools/checked-c-convert/functests/arr/basic_inter.expected.c b/tools/checked-c-convert/functests/arr/basic_inter.expected.c new file mode 100644 index 000000000000..aa857f3e5741 --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_inter.expected.c @@ -0,0 +1,53 @@ +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +int funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *arrwild);/*ARR:arrwild*/ +int funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *arrwild) {/*ARR:arrwild*/ + if(ptr != 0) { + *ptr = 0; + } + arrwild++; +} + +// ptr is an arr ptr +// iptr will be itype +// wild will be a wild ptr. +int func(int *ptr, int *iptr : itype(_Ptr ) , int *wild) {/*ARR:ptr*/ + if(ptr != 0) { + ptr[0] = 1; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be Ptr + _Ptr ap; + // this will be WILD + int *bp; + // this will be _Ptr + _Ptr cp; + // this will be _Ptr + _Ptr ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + _Ptr cp1; + + + ap1 = &a; + ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + func(ap, bp, cp); + funcdecl(ap1, bp1, cp1); + + +} diff --git a/tools/checked-c-convert/functests/arr/basic_inter_field.c b/tools/checked-c-convert/functests/arr/basic_inter_field.c new file mode 100644 index 000000000000..a9dfed89321f --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_inter_field.c @@ -0,0 +1,25 @@ +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + // regular ptr + int *ptr; + // this will be array ptr + char *arrptr; +} foo; + +foo obj1; + +int* func(int *ptr, char *arrptr) { + obj1.ptr = ptr; + arrptr++; + obj1.arrptr = arrptr; + return ptr; +} + +int main() { + int a; + int *b; + char *wil; + wil = 0xdeadbeef; + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/arr/basic_inter_field.expected.c b/tools/checked-c-convert/functests/arr/basic_inter_field.expected.c new file mode 100644 index 000000000000..f79bc153b1b0 --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_inter_field.expected.c @@ -0,0 +1,25 @@ +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + // regular ptr + _Ptr ptr; + // this will be array ptr + char *arrptr; +} foo; + +foo obj1; + +_Ptr func(_Ptr ptr, char *arrptr) {/*ARR:arrptr*/ + obj1.ptr = ptr; + arrptr++; + obj1.arrptr = arrptr; + return ptr; +} + +int main() { + int a; + _Ptr b; + char *wil; + wil = 0xdeadbeef; + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/arr/basic_local.c b/tools/checked-c-convert/functests/arr/basic_local.c new file mode 100644 index 000000000000..d3c4632fbd5b --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_local.c @@ -0,0 +1,19 @@ +// basic test +// just create a regular arr pointer +// and access it +int main() { + // a has to identified + // as ARR + int *a; + // c also should be identified as + // _Ptr as we do not use it. + int *c; + // we will make this wild. + int *d; + int b; + a = &b; + // this will make a as ARR + a++; + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/arr/basic_local.expected.c b/tools/checked-c-convert/functests/arr/basic_local.expected.c new file mode 100644 index 000000000000..64a7fe6b406c --- /dev/null +++ b/tools/checked-c-convert/functests/arr/basic_local.expected.c @@ -0,0 +1,19 @@ +// basic test +// just create a regular arr pointer +// and access it +int main() { + // a has to identified + // as ARR + int *a;/*ARR:a*/ + // c also should be identified as + // _Ptr as we do not use it. + _Ptr c; + // we will make this wild. + int *d; + int b; + a = &b; + // this will make a as ARR + a++; + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_field_local.c b/tools/checked-c-convert/functests/ntarr/basic_field_local.c new file mode 100644 index 000000000000..37c2073abc86 --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_field_local.c @@ -0,0 +1,40 @@ +#include +// This tests the propagation of constraints +// within the fields of structure. +// we will use b as an NTArr +typedef struct { + int a; + char *b; +} foo; +// here we use b in a safe way as an array, hence +// it will be a array ptr +typedef struct { + float *b; +} foo2; +// here we use p in unsafe way +// and hence will not be a safe ptr +typedef struct { + float c; + int *p; + char d; +} foo3; + +int main() { + foo obj; + char *bp; + float b; + foo2 obj2; + int hel; + + // this will make both bp and + // b of foo NtARR + obj.b = bp; + hel = strstr(bp, "hello"); + + // this will make obj2.b an array. + obj2.b = &b; + obj2.b++; + // this will make obj3.p a WILD + foo3 obj3; + obj3.p = 0xcafebabe; +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_field_local.expected.c b/tools/checked-c-convert/functests/ntarr/basic_field_local.expected.c new file mode 100644 index 000000000000..86a3f871d784 --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_field_local.expected.c @@ -0,0 +1,40 @@ +#include +// This tests the propagation of constraints +// within the fields of structure. +// we will use b as an NTArr +typedef struct { + int a; + _Nt_array_ptrchar> b; +} foo; +// here we use b in a safe way as an array, hence +// it will be a array ptr +typedef struct { + float *b; +} foo2; +// here we use p in unsafe way +// and hence will not be a safe ptr +typedef struct { + float c; + int *p; + char d; +} foo3; + +int main() { + foo obj; + _Nt_array_ptrchar> bp; + float b; + foo2 obj2; + int hel; + + // this will make both bp and + // b of foo NtARR + obj.b = bp; + hel = strstr(bp, "hello"); + + // this will make obj2.b an array. + obj2.b = &b; + obj2.b++; + // this will make obj3.p a WILD + foo3 obj3; + obj3.p = 0xcafebabe; +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_inter.c b/tools/checked-c-convert/functests/ntarr/basic_inter.c new file mode 100644 index 000000000000..ea9bd6dbd3fb --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_inter.c @@ -0,0 +1,59 @@ +#include +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +// here, ntiptr will be an itype(Nt_ptr) +int funcdecl(char *ntiptr, int *iptr, int *wild); +int funcdecl(char *ntiptr, int *iptr, int *wild) { + if(ntiptr != 0) { + ntiptr = strstr("Hello", "world"); + } + wild = 0xdeadbeef; +} + +// ptr is a ARR ptr +// iptr will be itype +// wild will be a wild ptr. +int func(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + ptr[0] = 1; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be ARR + int *ap; + // this will be WILD + int *bp; + // this will be _Ptr + int *cp; + // this will be wild + char *ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + int *cp1; + + + //ap1 = &a; + ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // we will make this wild in + // main. + ap1 = 0xdeadbe; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func + func(ap, bp, cp); + // ap1 will be WILD in main + // bp1 will be WILD in main + // cp1 will _Ptr + funcdecl(ap1, bp1, cp1); + +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_inter.expected.c b/tools/checked-c-convert/functests/ntarr/basic_inter.expected.c new file mode 100644 index 000000000000..b15aa500a520 --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_inter.expected.c @@ -0,0 +1,59 @@ +#include +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +// here, ntiptr will be an itype(Nt_ptr) +int funcdecl(char *ntiptr : itype(_Nt_array_ptrchar> ) , int *iptr : itype(_Ptr ) , int *wild); +int funcdecl(char *ntiptr : itype(_Nt_array_ptrchar> ) , int *iptr : itype(_Ptr ) , int *wild) { + if(ntiptr != 0) { + ntiptr = strstr("Hello", "world"); + } + wild = 0xdeadbeef; +} + +// ptr is a ARR ptr +// iptr will be itype +// wild will be a wild ptr. +int func(int *ptr, int *iptr : itype(_Ptr ) , int *wild) {/*ARR:ptr*/ + if(ptr != 0) { + ptr[0] = 1; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be ARR + _Ptr ap; + // this will be WILD + int *bp; + // this will be _Ptr + _Ptr cp; + // this will be wild + char *ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + _Ptr cp1; + + + //ap1 = &a; + ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // we will make this wild in + // main. + ap1 = 0xdeadbe; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func + func(ap, bp, cp); + // ap1 will be WILD in main + // bp1 will be WILD in main + // cp1 will _Ptr + funcdecl(ap1, bp1, cp1); + +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_inter_field.c b/tools/checked-c-convert/functests/ntarr/basic_inter_field.c new file mode 100644 index 000000000000..ae5db2df1d57 --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_inter_field.c @@ -0,0 +1,23 @@ +#include +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + int *ptr; + char *ntptr; +} foo; + +foo obj1; + +int* func(int *ptr, char *ntptr) { + obj1.ptr = ptr; + obj1.ntptr = strstr(ntptr, "world"); + return ptr; +} + +int main() { + int a; + int *b; + char *wil; + a = strlen(wil); + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_inter_field.expected.c b/tools/checked-c-convert/functests/ntarr/basic_inter_field.expected.c new file mode 100644 index 000000000000..48241002f6ba --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_inter_field.expected.c @@ -0,0 +1,23 @@ +#include +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + _Ptr ptr; + _Nt_array_ptrchar> ntptr; +} foo; + +foo obj1; + +_Ptr func(_Ptr ptr, _Nt_array_ptrchar> ntptr) { + obj1.ptr = ptr; + obj1.ntptr = strstr(ntptr, "world"); + return ptr; +} + +int main() { + int a; + _Ptr b; + _Nt_array_ptrchar> wil; + a = strlen(wil); + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_local.c b/tools/checked-c-convert/functests/ntarr/basic_local.c new file mode 100644 index 000000000000..460e891c95af --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_local.c @@ -0,0 +1,23 @@ +#include +// basic test +// just create a NT pointer +int main() { + // a has to identified + // as NTArr + // we use this as an argument + // to string function. + char *a; + // c should be identified as + // ARR as we assign it the return value of + // string function use it. + char *c; + // we will make this wild. + int *d; + int b; + // this will make a as NTARR + b = strlen(a); + // this will make C as NTArr + c = strstr("Hello", "World"); + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/ntarr/basic_local.expected.c b/tools/checked-c-convert/functests/ntarr/basic_local.expected.c new file mode 100644 index 000000000000..cc5f7d94f202 --- /dev/null +++ b/tools/checked-c-convert/functests/ntarr/basic_local.expected.c @@ -0,0 +1,23 @@ +#include +// basic test +// just create a NT pointer +int main() { + // a has to identified + // as NTArr + // we use this as an argument + // to string function. + _Nt_array_ptrchar> a; + // c should be identified as + // ARR as we assign it the return value of + // string function use it. + _Nt_array_ptrchar> c; + // we will make this wild. + int *d; + int b; + // this will make a as NTARR + b = strlen(a); + // this will make C as NTArr + c = strstr("Hello", "World"); + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/ptr/basic_field_local.c b/tools/checked-c-convert/functests/ptr/basic_field_local.c new file mode 100644 index 000000000000..2ce53cd1cee0 --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_field_local.c @@ -0,0 +1,27 @@ +// This tests the propagation of constraints +// within the fields of structure. +// we do not use b so it will be a _Ptr +typedef struct { + int a; + int *b; +} foo; +// here we use b in a safe way, hence +// it will be a _Ptr +typedef struct { + float *b; +} foo2; +// here we use p in unsafe way +// and hence will not be a safe ptr +typedef struct { + float c; + int *p; + char d; +} foo3; +int main() { + foo obj; + float b; + foo2 obj2; + obj2.b = &b; + foo3 obj3; + obj3.p = 0xcafebabe; +} diff --git a/tools/checked-c-convert/functests/ptr/basic_field_local.expected.c b/tools/checked-c-convert/functests/ptr/basic_field_local.expected.c new file mode 100644 index 000000000000..081111d915de --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_field_local.expected.c @@ -0,0 +1,27 @@ +// This tests the propagation of constraints +// within the fields of structure. +// we do not use b so it will be a _Ptr +typedef struct { + int a; + _Ptr b; +} foo; +// here we use b in a safe way, hence +// it will be a _Ptr +typedef struct { + _Ptr b; +} foo2; +// here we use p in unsafe way +// and hence will not be a safe ptr +typedef struct { + float c; + int *p; + char d; +} foo3; +int main() { + foo obj; + float b; + foo2 obj2; + obj2.b = &b; + foo3 obj3; + obj3.p = 0xcafebabe; +} diff --git a/tools/checked-c-convert/functests/ptr/basic_inter.c b/tools/checked-c-convert/functests/ptr/basic_inter.c new file mode 100644 index 000000000000..ef9679ab5128 --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_inter.c @@ -0,0 +1,50 @@ +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +int funcdecl(int *ptr, int *iptr, int *wild); +int funcdecl(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; +} + +// ptr is a regular _Ptr +// iptr will be itype +// wild will be a wild ptr. +int func(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be _Ptr + int *ap; + // this will be WILD + int *bp; + // this will be _Ptr + int *cp; + // this will be _Ptr + int *ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + int *cp1; + + + ap1 = ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + func(ap, bp, cp); + funcdecl(ap1, bp1, cp1); +} diff --git a/tools/checked-c-convert/functests/ptr/basic_inter.expected.c b/tools/checked-c-convert/functests/ptr/basic_inter.expected.c new file mode 100644 index 000000000000..204db4f0176f --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_inter.expected.c @@ -0,0 +1,50 @@ +// here we test the propagation of constraints +// between functions. + +// we test propagation with and without function +// declaration. +int funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild); +int funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; +} + +// ptr is a regular _Ptr +// iptr will be itype +// wild will be a wild ptr. +int func(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; +} +int main() { + int a, b, c; + // this will be _Ptr + _Ptr ap; + // this will be WILD + int *bp; + // this will be _Ptr + _Ptr cp; + // this will be _Ptr + _Ptr ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + _Ptr cp1; + + + ap1 = ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + func(ap, bp, cp); + funcdecl(ap1, bp1, cp1); +} diff --git a/tools/checked-c-convert/functests/ptr/basic_inter_field.c b/tools/checked-c-convert/functests/ptr/basic_inter_field.c new file mode 100644 index 000000000000..1ea72a96fb80 --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_inter_field.c @@ -0,0 +1,24 @@ +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + int *ptr; + char *ptr2; +} foo; + +foo obj1; + +int* func(int *ptr, char *iwild) { + // both the arguments are pointers + // within function body + obj1.ptr = ptr; + obj1.ptr2 = iwild; + return ptr; +} + +int main() { + int a; + int *b; + char *wil; + wil = 0xdeadbeef; + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/ptr/basic_inter_field.expected.c b/tools/checked-c-convert/functests/ptr/basic_inter_field.expected.c new file mode 100644 index 000000000000..ceadaae12800 --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_inter_field.expected.c @@ -0,0 +1,24 @@ +// This tests the propagation of constraints +// within the fields of structure. +typedef struct { + _Ptr ptr; + _Ptr ptr2; +} foo; + +foo obj1; + +_Ptr func(_Ptr ptr, char *iwild : itype(_Ptr ) ) { + // both the arguments are pointers + // within function body + obj1.ptr = ptr; + obj1.ptr2 = iwild; + return ptr; +} + +int main() { + int a; + _Ptr b; + char *wil; + wil = 0xdeadbeef; + b = func(&a, wil); +} diff --git a/tools/checked-c-convert/functests/ptr/basic_local.c b/tools/checked-c-convert/functests/ptr/basic_local.c new file mode 100644 index 000000000000..75517b2a60c0 --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_local.c @@ -0,0 +1,18 @@ +// basic test +// just create a regular pointer +// and access it +int main() { + // a has to identified + // as _Ptr + int *a; + // c also + // should be identified as + // _Ptr + int *c; + int *d; + int b; + a = &b; + *a = 4; + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/ptr/basic_local.expected.c b/tools/checked-c-convert/functests/ptr/basic_local.expected.c new file mode 100644 index 000000000000..a4e6a91ce27a --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_local.expected.c @@ -0,0 +1,18 @@ +// basic test +// just create a regular pointer +// and access it +int main() { + // a has to identified + // as _Ptr + _Ptr a; + // c also + // should be identified as + // _Ptr + _Ptr c; + int *d; + int b; + a = &b; + *a = 4; + // this should mark d as WILD. + d = 0xdeadbeef; +} diff --git a/tools/checked-c-convert/functests/ptr/basic_return_itype.c b/tools/checked-c-convert/functests/ptr/basic_return_itype.c new file mode 100644 index 000000000000..e95c800b183f --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_return_itype.c @@ -0,0 +1,53 @@ +// here we test the propagation of constraints +// between functions. +static int funcvar; +static int funcdecvar; +// we test propagation with and without function +// declaration. +static int* funcdecl(int *ptr, int *iptr, int *wild); +static int* funcdecl(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; + return &funcdecvar; +} + +// ptr is a regular _Ptr +// iptr will be itype +// wild will be a wild ptr. +static int* func(int *ptr, int *iptr, int *wild) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; + return &funcvar; +} +int main() { + int a, b, c; + // this will be _Ptr + int *ap; + // this will be WILD + int *bp; + // this will be _Ptr + int *cp; + // this will be _Ptr + int *ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + int *cp1; + + + ap1 = ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + bp = func(ap, bp, cp); + bp1 = funcdecl(ap1, bp1, cp1); +} diff --git a/tools/checked-c-convert/functests/ptr/basic_return_itype.expected.c b/tools/checked-c-convert/functests/ptr/basic_return_itype.expected.c new file mode 100644 index 000000000000..e601cc390d4d --- /dev/null +++ b/tools/checked-c-convert/functests/ptr/basic_return_itype.expected.c @@ -0,0 +1,53 @@ +// here we test the propagation of constraints +// between functions. +static int funcvar; +static int funcdecvar; +// we test propagation with and without function +// declaration. +static int *funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild) : itype(_Ptr ) ; +static int *funcdecl(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild) : itype(_Ptr ) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; + return &funcdecvar; +} + +// ptr is a regular _Ptr +// iptr will be itype +// wild will be a wild ptr. +static int *func(_Ptr ptr, int *iptr : itype(_Ptr ) , int *wild) : itype(_Ptr ) { + if(ptr != 0) { + *ptr = 0; + } + wild = 0xdeadbeef; + return &funcvar; +} +int main() { + int a, b, c; + // this will be _Ptr + _Ptr ap; + // this will be WILD + int *bp; + // this will be _Ptr + _Ptr cp; + // this will be _Ptr + _Ptr ap1; + // this will be WILD + int *bp1; + // this will be _Ptr + _Ptr cp1; + + + ap1 = ap = &a; + // we will make this pointer wild. + bp1 = bp = 0xcafeba; + cp = &c; + cp1 = &c; + // although, we are passing cp + // to a paramter that will be + // treated as WILD in func, cp + // is Ptr within main + bp = func(ap, bp, cp); + bp1 = funcdecl(ap1, bp1, cp1); +} diff --git a/tools/checked-c-convert/functests/run_tests.py b/tools/checked-c-convert/functests/run_tests.py new file mode 100644 index 000000000000..4206cfdd32f0 --- /dev/null +++ b/tools/checked-c-convert/functests/run_tests.py @@ -0,0 +1,71 @@ +import os +import sys +import argparse +import filecmp + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +def get_all_tests(dir_folder): + tests = [] + for curr_f in os.listdir(dir_folder): + curr_f = os.path.join(dir_folder, curr_f) + if os.path.isdir(curr_f): + tests.extend(get_all_tests(curr_f)) + else: + if not curr_f.endswith("expected.c"): + expected_file = curr_f[:-1] + "expected.c" + if os.path.exists(expected_file): + tests.append((curr_f, expected_file)) + return tests + + +def test_diff(actual_out_file, expected_out_file): + return filecmp.cmp(actual_out_file, expected_out_file) + +def run_tests(prog, tests): + allgood = True + for act_file, expected_file in tests: + cmd_line = prog + " -output-postfix=actual " + act_file + " >/dev/null 2>/dev/null" + print(bcolors.OKBLUE + "[*] Testing:" + act_file + bcolors.ENDC) + os.system(cmd_line) + actual_out = act_file[:-1] + "actual.c" + if not test_diff(actual_out, expected_file): + print(bcolors.FAIL + "[-] Expected file:" + expected_file + " and Actual file:" + + actual_out + " doesn't match." + bcolors.ENDC) + allgood = False + else: + print(bcolors.OKGREEN + "[+] Test " + act_file + " Passed." + bcolors.ENDC) + return allgood + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser("FuntionalityTester", description="Script that checks functionality of " + "checked-c-convert tool") + + parser.add_argument("-p","--prog_name", dest='prog_name',type=str, required=True, + help='Program name to run. i.e., path to checked-c-convert') + args = parser.parse_args() + if not args.prog_name or not os.path.isfile(args.prog_name): + print("Error: Path to the program to run is invalid.") + print("Provided argument: {} is not a file.".format(args.prog_name)) + sys.exit() + + testfolder = os.path.dirname(os.path.abspath(__file__)) + all_tests = get_all_tests(testfolder) + print(bcolors.HEADER + "[*] Got:" + str(len(all_tests)) + " tests." + bcolors.ENDC) + print(bcolors.HEADER + "[*] Running Tests." + bcolors.ENDC) + if run_tests(args.prog_name, all_tests): + print(bcolors.OKGREEN + "[+] ALL TESTS PASSED." + bcolors.ENDC) + else: + print(bcolors.FAIL + "[-] TESTS FAILED." + bcolors.ENDC) \ No newline at end of file diff --git a/tools/checked-c-convert/unittests/ConstraintTests.cpp b/tools/checked-c-convert/unittests/ConstraintTests.cpp index 3f5b22395d8a..79dffe428b73 100644 --- a/tools/checked-c-convert/unittests/ConstraintTests.cpp +++ b/tools/checked-c-convert/unittests/ConstraintTests.cpp @@ -133,5 +133,83 @@ TEST(Conflicts, test1) { EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(1), CS.getWild()))); EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(0), CS.getOrCreateVar(1)))); - EXPECT_FALSE(CS.solve().second); + EXPECT_TRUE(CS.solve().second); + Constraints::EnvironmentMap env = CS.getVariables(); + EXPECT_TRUE(*env[CS.getVar(0)] == *CS.getWild()); +} + +TEST(BasicNTArrayTest, NTArrayTests) { + Constraints CS; + + // q_0 = NTArr + + // should derive + // q_0 = NTArr + + EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(0), CS.getNTArr()))); + + EXPECT_TRUE(CS.solve().second); + Constraints::EnvironmentMap env = CS.getVariables(); + + EXPECT_TRUE(*env[CS.getVar(0)] == *CS.getNTArr()); +} + +TEST(NTArrayAndArrayTest, NTArrayTests) { + // tries to test the following case: + /* + * // this will derive first set: set 1 + * char *str = strstr(..,..); + * .. + * // this will derive the second constraint: set 2 + * str[j] = 'a'; + */ + Constraints CS; + // set 1 + // q_0 == NTARR + // set 2 + // q_0 = ARR + + // should derive + // q_0 = ARR + + EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(0), CS.getNTArr()))); + EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(0), CS.getArr()))); + + EXPECT_TRUE(CS.solve().second); + Constraints::EnvironmentMap env = CS.getVariables(); + + EXPECT_TRUE(*env[CS.getVar(0)] == *CS.getArr()); +} + +TEST(NTArrayAndArrayConflictTest, NTArrayTests) { + // tries to test the following case: + /* + * // this will derive first set: set 1 + * char *data; + * data = str.. + * ... + * // this will add second set of constraints + * data += 1; + */ + Constraints CS; + // set 1 + // q_0 == NTArr + // set 2 + // q_0 != NTArr + // q_0 != PTR + + // should derive + // q_0 = Arr + + // set 1 + EXPECT_TRUE(CS.addConstraint(CS.createEq(CS.getOrCreateVar(0), CS.getNTArr()))); + // set 2 + EXPECT_TRUE(CS.addConstraint(CS.createNot(CS.createEq(CS.getOrCreateVar(0), CS.getNTArr())))); + EXPECT_TRUE(CS.addConstraint(CS.createNot(CS.createEq(CS.getOrCreateVar(0), CS.getPtr())))); + + + EXPECT_TRUE(CS.solve().second); + Constraints::EnvironmentMap env = CS.getVariables(); + + EXPECT_TRUE(*env[CS.getVar(0)] == *CS.getArr()); } diff --git a/tools/checked-c-convert/utils/README.md b/tools/checked-c-convert/utils/README.md index b8885fd23fa2..4109723f401f 100644 --- a/tools/checked-c-convert/utils/README.md +++ b/tools/checked-c-convert/utils/README.md @@ -3,11 +3,55 @@ This directory contains a set of utilities to help with converting a codebase. Python 2.7 required. ## convert-commands.py -This script takes two arguments `compileCommands` (the path to the `compile_commands.json` for the configuration you plan to convert) and `progName` (which is checked-c-convert). It reads the `compile_commands.json` (which must match the fields produced by CMake's versions of such files) and produces an output file which contains a command-line invocation of `progName` with some flags and all `.c` files which are compiled by this configuration (and thus should be converted by `checked-c-convert`). This file is currently saved as `convert.sh` and can be run directly as a shell script. +This script takes two named arguments `compileCommands` (`-cc`) +(the path to the `compile_commands.json` for the configuration you plan to convert) +and `progName` (`-p`), which is the path to the checked-c-convert binary. +It reads the `compile_commands.json` +(which must match the fields produced by CMake's versions of such files) and +produces an output file which contains a command-line invocation of `progName` with +some flags and all `.c` files which are compiled by this configuration +(and thus should be converted by `checked-c-convert`). +This file is currently saved as `convert_all.sh` and can be run directly as a shell script. +The `convert-commands.py` also creates `convert_individual.sh` file that +contains the commands to run the `checked-c-convert` tool on individual source files. + +### Example: +``` +python convert-commands.py --cc -p +``` + +### Generating `compile_commands.json` +#### Using `cmake` +Use the CMAKE_EXPORT_COMPILE_COMMANDS flag. You can run +``` +cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ... +``` +or add the following line to your CMakeLists.txt script: +``` +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +``` +The `compile_commands.json` file will be put into the build directory. +#### Using `Bear` (Recommended) +For `make` and `cmake` based build systems, you can use `Bear`. + +Install Bear from: https://github.com/rizsotto/Bear + +Prepend `bear` to your make command i.e., if you were running `make -j4` +then run `bear make -j4`. +The `compile_commands.json` file will be put into the current directory. + ## update-includes.py -Given a file that contains a list of `.c` or `.h` filenames (such as the one produced by `convert-commands.py` or generated with the POSIX `find > outfile` command), this script will look for `#include` statements in each file in the list that `#include` headers for which there are CheckedC replacements (e.g. stdlib, math) and substitutes in those replacements. This rewrites the files in place! +Given a file that contains a list of `.c` or `.h` filenames +(such as the one produced by `convert-commands.py` or generated with the POSIX `find > outfile` command), +this script will look for `#include` statements in each file in the list +that `#include` headers for which there are CheckedC replacements (e.g. stdlib, math) +and substitutes in those replacements. **This rewrites the files in place!** + +**Hint: To get all the `.c` and `.h` files in a directory recursively use the following command: `find -regex '.*/.*\.\(cpp\|h\)$'`** + + Details: - The default directory to look for the CheckedC replacement headers is documented at the top of the file, but the optional argument `--includeDir` allows specification of a different directory. - This script will strip `\` characters and whitespace off the end of each line but expects the filename to be at the beginning of the line. diff --git a/tools/checked-c-convert/utils/constraint_debugger.py b/tools/checked-c-convert/utils/constraint_debugger.py new file mode 100644 index 000000000000..7a735f17d91d --- /dev/null +++ b/tools/checked-c-convert/utils/constraint_debugger.py @@ -0,0 +1,150 @@ +""" +Simple Constraint Debugging script. + +This is a simple help script based on IPython + to debug the constraint system of the +converter tool. + +How to use it: + lets say, if you want to know how the constraint variable "q_11858" + got assigned to WILD. + + $ python constraint_debugger.py constraint_output.json + ... + In [1]: how_wild_recurse("q_11858") + Out[1]: + ['q_11858->', + u'q_317953->', + u'q_11815<-', + u'q_305961<-', + u'q_11811->', + u'q_317927<-', + u'q_909->', + 'WILD'] + '->' and '<-' shows whether the corresponding variable is in the LHS + or RHS of the equality constraint. + + // if you want to know how "q_11858" got assigned to "ARR", then: + In [2]: how_wild_recurse("q_11858", "ARR") + Out[2]: [] + + Which means that there is no path i.e., "q_11858" never got assigned to "ARR". + + You can get the description of a constraint variable (q_305961) as follows: + In [3]: desc_cons("q_305961") + Out[3]: + {'desc': u'tmp_passwdfile', + 'line': u'/home/machiry/checkedc/benchmarks/icecast-2.4.4/src/auth_htpasswd.c:317:11'} + + +""" +import json +import sys + + +def parse_pointer_var(ptr_json): + all_vars = {} + var_name = ptr_json['name'] + for cvar in ptr_json['Vars']: + all_vars[cvar] = var_name + return all_vars + + +def parse_function_ptr_var(ptr_json): + all_vars = {} + for cptr in ptr_json['Parameters']: + for cval in cptr: + childv = parse_ptr_var(cval) + all_vars.update(childv) + + for rvar in ptr_json['ReturnVar']: + childv = parse_ptr_var(rvar) + all_vars.update(childv) + return all_vars + + +def parse_ptr_var(json_obj): + ret_val = {} + if 'FunctionVar' in json_obj: + ret_val = parse_function_ptr_var(json_obj['FunctionVar']) + if 'PointerVar' in json_obj: + ret_val = parse_pointer_var(json_obj['PointerVar']) + return ret_val + + +fwd_map = dict() +rev_map = dict() + +fp = open(sys.argv[1], "r") +jobj = json.loads(fp.read()) +fp.close() + +# load the provided json in fwd and rev_map +for curr_d in jobj["Setup"]["Constraints"]: + if "Eq" in curr_d.keys(): + curr_d = curr_d["Eq"] + atom1 = curr_d["Atom1"] + atom2 = curr_d["Atom2"] + if atom1 not in fwd_map: + fwd_map[atom1] = set() + fwd_map[atom1].add(atom2) + + if atom2 != "WILD" and atom2 != "ARR" and atom2 != "PTR" and atom2 != "NTARR": + if atom2 not in rev_map: + rev_map[atom2] = set() + rev_map[atom2].add(atom1) + +total_cons_vars = {} +for curr_c_var in jobj["ConstraintVariables"]: + li = curr_c_var["line"] + for pvar in curr_c_var["Variables"]: + all_vals = parse_ptr_var(pvar) + for curr_k in all_vals: + total_cons_vars[curr_k] = {} + total_cons_vars[curr_k]['line'] = li + total_cons_vars[curr_k]['desc'] = all_vals[curr_k] + + +how_wild_cache = {} + + +def desc_cons(curr_e): + if curr_e in total_cons_vars: + return total_cons_vars[curr_e] + else: + return "None" + +def how_wild_recurse(curr_e, target_tag="WILD", visited=None): + if visited is None: + visited = set() + if curr_e in visited: + return [] + visited.add(curr_e) + cache_key = (curr_e, target_tag) + if cache_key in how_wild_cache: + return how_wild_cache[cache_key] + map_dirs = (("->", fwd_map), ("<-", rev_map)) + for dir_char, map in map_dirs: + if curr_e in map: + if target_tag in map[curr_e]: + visited.remove(curr_e) + res = [curr_e + dir_char, target_tag] + how_wild_cache[cache_key] = res + return res + for cc in map[curr_e]: + ccr = how_wild_recurse(cc, target_tag, visited) + if len(ccr) > 0: + visited.remove(curr_e) + res = [curr_e + dir_char] + ccr + how_wild_cache[cache_key] = res + return res + visited.remove(curr_e) + return [] + +import IPython +IPython.embed() + + + + + diff --git a/tools/checked-c-convert/utils/convert-commands.py b/tools/checked-c-convert/utils/convert-commands.py index d2b019ef196b..bf94ef8794f1 100755 --- a/tools/checked-c-convert/utils/convert-commands.py +++ b/tools/checked-c-convert/utils/convert-commands.py @@ -1,5 +1,6 @@ import re import os +import sys import json import argparse import traceback @@ -10,12 +11,39 @@ It contains some work-arounds for cmake+nmake generated compile_commands.json files, where the files are malformed. """ -SLASH = "/" +SLASH = os.sep +# file in which the individual commands will be stored +INDIVIDUAL_COMMANDS_FILE = os.path.realpath("convert_individual.sh") +# file in which the total commands will be stored. +TOTAL_COMMANDS_FILE = os.path.realpath("convert_all.sh") +# to separate multiple commands in a line +CMD_SEP = " &" DEFAULT_ARGS = ["-dump-stats", "-output-postfix=checked"] if os.name == "nt": DEFAULT_ARGS.append("-extra-arg-before=--driver-mode=cl") - SLASH = "\\" + CMD_SEP = " ;" + + +def getCheckedCArgs(argument_list, work_dir): + """ + Convert the compilation arguments (include folder and #defines) + to checked C format. + :param argument_list: list of compiler argument. + :param work_dir: Path to the working directory from which + the compilation command was run. + :return: checked c args + """ + clang_x_args = [] + for curr_arg in argument_list: + if curr_arg.startswith("-D") or curr_arg.startswith("-I"): + if curr_arg.startswith("-I"): + # if this is relative path, + # convert into absolute path + if not os.path.isabs(curr_arg[2:]): + curr_arg = "-I" + os.path.abspath(os.path.join(work_dir, curr_arg[2:])) + clang_x_args.append('-extra-arg-before=' + curr_arg) + return clang_x_args def tryFixUp(s): """ @@ -29,24 +57,30 @@ def tryFixUp(s): f.close() return -def runMain(args): +def runMain(cmd_args): + global INDIVIDUAL_COMMANDS_FILE + global TOTAL_COMMANDS_FILE runs = 0 cmds = None while runs < 2: runs = runs + 1 try: - cmds = json.load(open(args.compile_commands, 'r')) + cmds = json.load(open(cmd_args.compile_commands, 'r')) except: traceback.print_exc() - tryFixUp(args.compile_commands) + tryFixUp(cmd_args.compile_commands) if cmds == None: - print "failed" + print("failed") return s = set() + total_x_args = [] + all_files = [] for i in cmds: file_to_add = i['file'] + compiler_x_args = [] + target_directory = "" if file_to_add.endswith(".cpp"): continue # Checked C extension doesn't support cpp files yet @@ -56,26 +90,83 @@ def runMain(args): if 'arguments' in i and not 'command' in i: # BEAR. Need to add directory. file_to_add = i['directory'] + SLASH + file_to_add + # get the checked-c-convert and compiler arguments + compiler_x_args = getCheckedCArgs(i["arguments"], i['directory']) + total_x_args.extend(compiler_x_args) + # get the directory used during compilation. + target_directory = i['directory'] file_to_add = os.path.realpath(file_to_add) - s.add(file_to_add) + all_files.append(file_to_add) + s.add((frozenset(compiler_x_args), target_directory, file_to_add)) - print s + # get the common path of the files as the base directory + compilation_base_dir = os.path.commonprefix(all_files) + prog_name = cmd_args.prog_name + f = open(INDIVIDUAL_COMMANDS_FILE, 'w') + for compiler_args, target_directory, src_file in s: + args = [] + # get the command to change the working directory + change_dir_cmd = "" + if len(target_directory) > 0: + change_dir_cmd = "cd " + target_directory + CMD_SEP + else: + # default working directory + target_directory = os.getcwd() + args.append(prog_name) + if len(compiler_args) > 0: + args.extend(list(compiler_args)) + args.append("-base-dir=\"" + compilation_base_dir + "\"") + args.extend(DEFAULT_ARGS) + args.append(src_file) + # run individual commands. + if cmd_args.individual: + print("Running:" + ' '.join(args)) + subprocess.check_call(args, cwd=target_directory) + # prepend the command to change the working directory. + if len(change_dir_cmd) > 0: + args = [change_dir_cmd] + args + f.write(" \\\n".join(args)) + f.write("\n") + f.close() + print("[+] Saved all the individual commands into the file:" + INDIVIDUAL_COMMANDS_FILE) - prog_name = args.prog_name args = [] args.append(prog_name) args.extend(DEFAULT_ARGS) - args.extend(list(s)) - f = open('convert.sh', 'w') + args.extend(list(set(total_x_args))) + args.append("-base-dir=\"" + compilation_base_dir + "\"") + args.extend(list(set(all_files))) + f = open(TOTAL_COMMANDS_FILE, 'w') f.write(" \\\n".join(args)) f.close() - subprocess.check_call(args) - + # run whole command + if not cmd_args.individual: + print("Running:" + str(' '.join(args))) + subprocess.check_call(args) + print("[+] Saved the total command into the file:" + TOTAL_COMMANDS_FILE) return + if __name__ == '__main__': - parser = argparse.ArgumentParser("runner") - parser.add_argument("compile_commands", type=str) - parser.add_argument("prog_name", type=str) + checked_c_convert_bin = "" + if 'LLVM_OBJ' in os.environ: + checked_c_convert_bin = os.path.join(os.environ['LLVM_OBJ'], "bin/checked-c-convert") + + parser = argparse.ArgumentParser(__file__, description="Tool that converts the compilation commands into" + " the commands for checked-c-convert tool and also " + " runs the tool.") + parser.add_argument("-cc", "--compile_commands", dest='compile_commands', + type=str, required=True, + help='Path to the compile_commands.json file, ' + 'i.e., the file generated by Bear or cmake') + parser.add_argument("-i", '--individual', dest='individual', action='store_true', default=False, + help='Option to enable running on individual files (default: false), ' + 'instead of all the files at once, which is the default behavior.') + parser.add_argument("-p","--prog_name", dest='prog_name',type=str, default=checked_c_convert_bin, + help='Program name to run. i.e., path to checked-c-convert') args = parser.parse_args() + if not args.prog_name or not os.path.isfile(args.prog_name): + print("Error: Path to the program to run is invalid.") + print("Provided argument: {} is not a file.".format(args.prog_name)) + sys.exit() runMain(args) diff --git a/tools/checked-c-convert/utils/update-includes.py b/tools/checked-c-convert/utils/update-includes.py index 12034504f841..f0412a3fb6cd 100644 --- a/tools/checked-c-convert/utils/update-includes.py +++ b/tools/checked-c-convert/utils/update-includes.py @@ -17,21 +17,29 @@ # This default value will be overwritten if an alternate path is provided # in an argument. +CHECKEDC_INCLUDE_REL_PATH = "projects/checkedc-wrapper/checkedc/include" checkedcHeaderDir = os.path.abspath( - os.path.join("../../../../.." , - "projects/checkedc-wrapper/checkedc/include")) + os.path.join("../../../../..", + CHECKEDC_INCLUDE_REL_PATH)) + # If the arg is a valid filename, returns the absolute path to it def parseTheArg(): global checkedcHeaderDir - parser = argparse.ArgumentParser( - description='Convert includes of standard headers to their checked versions for a list of c files.') - parser.add_argument( - 'filename', default="", - help='Filename containing list of C files to have includes converted') - parser.add_argument( - '--includeDir', default=checkedcHeaderDir, required=False, - help='Path to the checkedC headers, run from a checkedCclang repo') + global CHECKEDC_INCLUDE_REL_PATH + # get the directory based on `LLVM_SRC` environment variable. + pathBasedDir = "" + if 'LLVM_SRC' in os.environ: + pathBasedDir = os.path.join(os.environ['LLVM_SRC'], CHECKEDC_INCLUDE_REL_PATH) + + parser = argparse.ArgumentParser(description='Convert includes of standard headers to their ' + 'checked versions for a list of c files.') + parser.add_argument('filename', default="", + help='Filename containing list of C files to have includes converted') + parser.add_argument('--includeDir', + default=checkedcHeaderDir if os.path.exists(checkedcHeaderDir) else pathBasedDir, + required=False, + help='Path to the checkedC headers, run from a checkedCclang repo') args = parser.parse_args() if not args.filename or not os.path.isfile(args.filename): @@ -48,6 +56,7 @@ def parseTheArg(): return os.path.abspath(args.filename) + # Initializes the find replace function so it can be run on multiple files def makeFindReplace(): hFiles = ["<"+f+">" for f in os.listdir(checkedcHeaderDir)