diff --git a/clang/include/clang/3C/3CGlobalOptions.h b/clang/include/clang/3C/3CGlobalOptions.h index ab2748ea6e12..e092c126683b 100644 --- a/clang/include/clang/3C/3CGlobalOptions.h +++ b/clang/include/clang/3C/3CGlobalOptions.h @@ -67,6 +67,8 @@ struct _3COptions { bool AllowRewriteFailures; bool ItypesForExtern; + + bool InferTypesForUndefs; }; // NOLINTNEXTLINE(readability-identifier-naming) diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index 23dfe71c0712..167b1f8244a6 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -269,6 +269,8 @@ class ProgramInfo : public ProgramVariableAdder { // constraint system for that pointer type. void addVariable(clang::DeclaratorDecl *D, clang::ASTContext *AstContext) override; + + void linkFunction(FunctionVariableConstraint *FV); }; #endif diff --git a/clang/include/clang/3C/Utils.h b/clang/include/clang/3C/Utils.h index 441833b9bd4c..8bcccc7596f5 100644 --- a/clang/include/clang/3C/Utils.h +++ b/clang/include/clang/3C/Utils.h @@ -181,6 +181,8 @@ bool isInSysHeader(clang::Decl *D); std::string getSourceText(const clang::SourceRange &SR, const clang::ASTContext &C); +std::string getSourceText(const clang::CharSourceRange &SR, + const clang::ASTContext &C); // Find the longest common subsequence. unsigned longestCommonSubsequence(const char *Str1, const char *Str2, diff --git a/clang/lib/3C/ConstraintVariables.cpp b/clang/lib/3C/ConstraintVariables.cpp index a90c8d5103cf..47e408ee4603 100644 --- a/clang/lib/3C/ConstraintVariables.cpp +++ b/clang/lib/3C/ConstraintVariables.cpp @@ -556,12 +556,21 @@ std::string PointerVariableConstraint::tryExtractBaseType(DeclaratorDecl *D, QualType QT, const Type *Ty, const ASTContext &C) { - bool FoundBaseTypeInSrc = false; - if (D && !TSI) + // Implicit parameters declarations from typedef function declarations will + // still have valid and non-empty source ranges, but implicit declarations + // aren't written in the source, so extracting the base type from this range + // gives incorrect type strings. For example, the base type for the implicit + // parameter for `foo_decl` in `typedef void foo(int*); foo foo_decl;` would + // be extracted as "foo_decl", when it should be "int". + if (!D || D->isImplicit()) + return ""; + + if (!TSI) TSI = D->getTypeSourceInfo(); - if (!QT->isOrContainsCheckedType() && !Ty->getAs() && D && TSI) { + if (!QT->isOrContainsCheckedType() && !Ty->getAs() && TSI) { // Try to extract the type from original source to preserve defines TypeLoc TL = TSI->getTypeLoc(); + bool FoundBaseTypeInSrc = false; if (isa(D)) { FoundBaseTypeInSrc = D->getAsFunction()->getReturnType() == QT; TL = getBaseTypeLoc(TL).getAs(); @@ -2220,8 +2229,30 @@ FVComponentVariable::FVComponentVariable(const QualType &QT, // parameters similarly to how we do it for pointers in regular function // declarations. if (D && D->getType() == QT) { - SourceRange SR = D->getSourceRange(); - SourceDeclaration = SR.isValid() ? getSourceText(SR, C) : ""; + SourceRange DRange = D->getSourceRange(); + if (DRange.isValid()) { + const SourceManager &SM = C.getSourceManager(); + SourceLocation DLoc = D->getLocation(); + CharSourceRange CSR; + if (SM.isBeforeInTranslationUnit(DRange.getEnd(), DLoc)) { + // It's not clear to me why, but the end of the SourceRange for the + // declaration can come before the SourceLocation for the declaration. + // This can result in SourceDeclaration failing to capture the whole + // declaration string, causing rewriting errors. In this case it is also + // necessary to use CharSourceRange::getCharRange to work around some + // cases where CharSourceRange::getTokenRange (the default behavior of + // getSourceText when passed a SourceRange) would not get the full + // declaration. For example: a parameter declared without a name such as + // `_Ptr` was captured as `_Ptr`. + CSR = CharSourceRange::getCharRange(DRange.getBegin(), DLoc); + } else { + CSR = CharSourceRange::getTokenRange(DRange); + } + + SourceDeclaration = getSourceText(CSR , C); + } else { + SourceDeclaration = ""; + } } } diff --git a/clang/lib/3C/DeclRewriter.cpp b/clang/lib/3C/DeclRewriter.cpp index bd37b53c044f..6645b50d5f80 100644 --- a/clang/lib/3C/DeclRewriter.cpp +++ b/clang/lib/3C/DeclRewriter.cpp @@ -567,7 +567,14 @@ bool FunctionDeclBuilder::VisitFunctionDecl(FunctionDecl *FD) { // If this is an external function, there is no need to rewrite the // declaration. We cannot change the signature of external functions. - if (!FDConstraint->hasBody()) + // Under the flag -infer-types-for-undef, however, undefined functions do need + // to be rewritten. If the rest of the 3c inference and rewriting code is + // correct, short-circuiting here shouldn't be necessary; the rest of the + // logic in this function should successfully not rewrite undefined functions + // when -infer-types-for-undef is not passed. This assumption could be + // transformed into an assertion if we're confident it won't fail in too many + // places. + if (!_3COpts.InferTypesForUndefs && !FDConstraint->hasBody()) return true; // RewriteParams and RewriteReturn track if we will need to rewrite the diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 836586287090..6cf40e3d9384 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -417,44 +417,8 @@ bool ProgramInfo::link() { // For every global function that is an unresolved external, constrain // its parameter types to be wild. Unless it has a bounds-safe annotation. - for (const auto &U : ExternalFunctionFVCons) { - std::string FuncName = U.first; - FVConstraint *G = U.second; - - // If there was a checked type on a variable in the input program, it - // should stay that way. Otherwise, we shouldn't be adding a checked type - // to an extern function. - std::string Rsn = (G->hasBody() ? "" - : "Unchecked pointer in parameter or " - "return of external function " + - FuncName); - - // Handle the cases where itype parameters should not be treated as their - // unchecked type. - // TODO: Ditto re getting a PSL (in the case in which Rsn is non-empty and - // it is actually used). - G->equateWithItype(*this, Rsn, nullptr); - - // If we've seen this symbol, but never seen a body for it, constrain - // everything about it. - // Some global symbols we don't need to constrain to wild, like - // malloc and free. Check those here and skip if we find them. - if (!G->hasBody()) { - const FVComponentVariable *Ret = G->getCombineReturn(); - Ret->getInternal()->constrainToWild(CS, Rsn); - if (!Ret->getExternal()->srcHasItype() && - !Ret->getExternal()->isGeneric()) - Ret->getExternal()->constrainToWild(CS, Rsn); - - for (unsigned I = 0; I < G->numParams(); I++) { - const FVComponentVariable *Param = G->getCombineParam(I); - Param->getInternal()->constrainToWild(CS, Rsn); - if (!Param->getExternal()->srcHasItype() && - !Param->getExternal()->isGeneric()) - Param->getExternal()->constrainToWild(CS, Rsn); - } - } - } + for (const auto &U : ExternalFunctionFVCons) + linkFunction(U.second); // Repeat for static functions. // @@ -462,33 +426,45 @@ bool ProgramInfo::link() { // error during compilation. They may still be useful as code is developed, // so we treat them as if they are external, and constrain parameters // to wild as appropriate. - for (const auto &U : StaticFunctionFVCons) { - for (const auto &V : U.second) { + for (const auto &U : StaticFunctionFVCons) + for (const auto &V : U.second) + linkFunction(V.second); - std::string FileName = U.first; - std::string FuncName = V.first; - FVConstraint *G = V.second; - - std::string Rsn = (G->hasBody() ? "" - : "Unchecked pointer in parameter or " - "return of static function " + - FuncName + " in " + FileName); - - // TODO: Ditto re getting a PSL - G->equateWithItype(*this, Rsn, nullptr); + return true; +} - if (!G->hasBody()) { +void ProgramInfo::linkFunction(FunctionVariableConstraint *FV) { + // If there was a checked type on a variable in the input program, it + // should stay that way. Otherwise, we shouldn't be adding a checked type + // to an undefined function. + std::string Rsn = (FV->hasBody() ? "" : "Unchecked pointer in parameter or " + "return of undefined function " + + FV->getName()); + + // Handle the cases where itype parameters should not be treated as their + // unchecked type. + // TODO: Ditto re getting a PSL (in the case in which Rsn is non-empty and + // it is actually used). + FV->equateWithItype(*this, Rsn, nullptr); + + // Used to apply constraints to parameters and returns for function without a + // body. In the default configuration, the function is fully constrained so + // that parameters and returns are considered unchecked. When 3C is run with + // --infer-types-for-undefs, only internal variables are constrained, allowing + // external variables to solve to checked types meaning the parameter will be + // rewritten to an itype. + auto LinkComponent = [this, Rsn](const FVComponentVariable *FVC) { + FVC->getInternal()->constrainToWild(CS, Rsn); + if (!_3COpts.InferTypesForUndefs && + !FVC->getExternal()->srcHasItype() && !FVC->getExternal()->isGeneric()) + FVC->getExternal()->constrainToWild(CS, Rsn); + }; - if (!G->getExternalReturn()->isGeneric()) - G->getExternalReturn()->constrainToWild(CS, Rsn); - for (unsigned I = 0; I < G->numParams(); I++) - if (!G->getExternalParam(I)->isGeneric()) - G->getExternalParam(I)->constrainToWild(CS, Rsn); - } - } + if (!FV->hasBody()) { + LinkComponent(FV->getCombineReturn()); + for (unsigned I = 0; I < FV->numParams(); I++) + LinkComponent(FV->getCombineParam(I)); } - - return true; } // Populate Variables, VarDeclToStatement, RVariables, and DepthMap with diff --git a/clang/lib/3C/Utils.cpp b/clang/lib/3C/Utils.cpp index 69cb1cfdb28b..09208228ad9a 100644 --- a/clang/lib/3C/Utils.cpp +++ b/clang/lib/3C/Utils.cpp @@ -387,11 +387,15 @@ bool isInSysHeader(clang::Decl *D) { std::string getSourceText(const clang::SourceRange &SR, const clang::ASTContext &C) { + return getSourceText(CharSourceRange::getTokenRange(SR), C); +} + +std::string getSourceText(const clang::CharSourceRange &SR, + const clang::ASTContext &C) { assert(SR.isValid() && "Invalid Source Range requested."); auto &SM = C.getSourceManager(); auto LO = C.getLangOpts(); - llvm::StringRef Srctxt = - Lexer::getSourceText(CharSourceRange::getTokenRange(SR), SM, LO); + llvm::StringRef Srctxt = Lexer::getSourceText(SR, SM, LO); return Srctxt.str(); } diff --git a/clang/test/3C/implicit_casts_root_cause.c b/clang/test/3C/implicit_casts_root_cause.c index c3f1451c96d4..e9ebadc1a47a 100644 --- a/clang/test/3C/implicit_casts_root_cause.c +++ b/clang/test/3C/implicit_casts_root_cause.c @@ -11,7 +11,7 @@ void test_no_cause() { has_void(c); } -void has_float(float* v); // expected-warning {{Unchecked pointer in parameter or return of external function has_float}} +void has_float(float* v); // expected-warning {{Unchecked pointer in parameter or return of undefined function has_float}} void test_float_cause() { int *b, *c; has_float(b); // expected-warning {{1 unchecked pointer: Cast from int * to float *}} diff --git a/clang/test/3C/itype_undef.c b/clang/test/3C/itype_undef.c new file mode 100644 index 000000000000..e32006226f91 --- /dev/null +++ b/clang/test/3C/itype_undef.c @@ -0,0 +1,162 @@ +// RUN: rm -rf %t* +// RUN: 3c -base-dir=%S --infer-types-for-undefs -alltypes -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_ALL","CHECK" %s +// RUN: 3c -base-dir=%S --infer-types-for-undefs -addcr %s -- | FileCheck -match-full-lines -check-prefixes="CHECK_NOALL","CHECK" %s +// RUN: 3c -base-dir=%S --infer-types-for-undefs -alltypes -addcr %s -- | %clang -c -fcheckedc-extension -x c -o /dev/null - +// RUN: 3c -base-dir=%S --infer-types-for-undefs -alltypes -output-dir=%t.checked %s -- +// RUN: 3c -base-dir=%t.checked --infer-types-for-undefs -alltypes %t.checked/itype_undef.c -- | diff %t.checked/itype_undef.c - + +// Basic checks for adding itypes on undefined functions. +void test0(int *a); +//CHECK: void test0(int *a : itype(_Ptr)); + +int *test1(); +//CHECK: int *test1(void) : itype(_Ptr); + +void caller() { +//CHECK: void caller() _Checked { + int *a = 0; + //CHECK: _Ptr a = 0; + test0(a); + + // This block is left unchecked because it calls a function without either a + // defintion or a prototype even though a `void` prototype is added durring + // conversion. Checked region insertion could be improved to recognize this + // as safe. + { + // CHECK: _Unchecked { + int *b = test1(); + //CHECK: _Ptr b = test1(); + } +} + +// Check for undef functions with existing checked types/itypes. +void test2(int* a : itype(_Ptr)); +int *test3(void) : itype(_Ptr); +void test4(_Ptr a); +_Ptr test5(void); +int *test6(void) : count(10); +void test7(int * : count(10)); +//CHECK: void test2(int* a : itype(_Ptr)); +//CHECK: int *test3(void) : itype(_Ptr); +//CHECK: void test4(_Ptr a); +//CHECK: _Ptr test5(void); +//CHECK: int *test6(void) : count(10); +//CHECK: void test7(int * : count(10)); + +void checked_caller() { +//CHECK_NOALL: void checked_caller() { +//CHECK_ALL: void checked_caller() _Checked { + int *a; + //CHECK: _Ptr a = ((void *)0); + test2(a); + + int *b = test3(); + //CHECK: _Ptr b = test3(); + + int *c; + //CHECK: _Ptr c = ((void *)0); + test4(c); + + int *d = test5(); + //CHECK: _Ptr d = test5(); + + int *e = test6(); + //CHECK_NOALL: int *e = test6(); + //CHECK_ALL: _Array_ptr e : count(10) = test6(); + + int *f; + //CHECK_NOALL: int *f; + //CHECK_ALL: _Array_ptr f : count(10) = ((void *)0); + test7(f); + + // Get 3C to infer the correct length for f and e. + for(int i = 0; i < 10; i++) { + f[i]; + e[i]; + } +} + +// Void pointers should still be fully unchecked unless there is an existing +// checked type/itype. +void test_void0(void *); +void *test_void1(); +//CHECK: void test_void0(void *); +//CHECK: void *test_void1(); +void void_caller() { + void * a = 0; + test_void0(a); + void *b = test_void1(); +} + +void test_void_itype(void * : itype(_Array_ptr)); +void test_void_count(void * : byte_count(0)); +void test_void_checked(_Array_ptr); +//CHECK: void test_void_itype(void * : itype(_Array_ptr)); +//CHECK: void test_void_count(void * : byte_count(0)); +//CHECK: void test_void_checked(_Array_ptr); + +// Pointer type and bounds inference should still work. +int *test_arr0(int n); +//CHECK_ALL: int *test_arr0(int n) : itype(_Array_ptr) count(n); +//CHECK_NOALL: int *test_arr0(int n) : itype(_Ptr); + +void test_arr1(int *a, int n); +//CHECK: void test_arr1(int *a : itype(_Ptr), int n); + +void arr_caller(int s) { +//CHECK_ALL: void arr_caller(int s) _Checked { + // Returns variables use the least solution, so this will cause test_arr0 to + // solve to _Array_ptr. + int *b = test_arr0(s); + //CHECK_ALL: _Array_ptr b : count(s) = test_arr0(s); + //CHECK_NOALL: int *b = test_arr0(s); + for (int i = 0; i < s; i++) + b[i]; + + // Parameter variables use the greatest solution, so test_arr1 will still + // solve to _Ptr. It might be a good idea to change the solution used for + // undefined functions so that we can infer _Array_ptr here and infer it a + // bound. + int *c = 0; + //CHECK_ALL: _Array_ptr c : count(s) = 0; + //CHECK_NOALL: int *c = 0; + test_arr1(c, s); + for (int i = 0; i < s; i++) + c[i]; +} + +// If all uses of the typedef are checked, the unchecked internal constraint +// variable of the undefined function should not make the typedef unchecked. + +typedef int *td0; +void typedef0(td0 a); +void typedef_caller0() { +//CHECK: typedef _Ptr td0; +//CHECK: void typedef0(int *a : itype(td0)); +//CHECK: void typedef_caller0() _Checked { + td0 b = 0; + typedef0(b); +} + +// If the typedef is otherwise unchecked, we should still give a useful +// checked itype to the typedef'ed parameter. + +typedef int *td1; +void typedef1(td1 a); +void typedef_caller1() { +//CHECK: typedef int *td1; +//CHECK: void typedef1(td1 a : itype(_Ptr)); +//CHECK: void typedef_caller1() { + td1 b = 1; + typedef1(b); +} + +// As expected, function typedefs aren't handled well, but they should generate +// valid code. +typedef void fn_typedef0(int *); +fn_typedef0 fntd_decl0; +//CHECK: void fntd_decl0(int * : itype(_Ptr)); + +typedef int *fn_typedef1(); +fn_typedef1 fntd_decl1; +//CHECK: int *fntd_decl1(void) : itype(_Ptr); diff --git a/clang/test/3C/root_cause.c b/clang/test/3C/root_cause.c index cbe244955108..318c1f94bbdc 100644 --- a/clang/test/3C/root_cause.c +++ b/clang/test/3C/root_cause.c @@ -6,7 +6,7 @@ // as including stdlib will create numerous root-cause warnings we don't want to deal with // unwritable-expected-warning@+2 {{0 unchecked pointers: Source code in non-writable file}} -// expected-warning@+1 {{Unchecked pointer in parameter or return of external function my_malloc}} +// expected-warning@+1 {{Unchecked pointer in parameter or return of undefined function my_malloc}} _Itype_for_any(T) void *my_malloc(unsigned long size) : itype(_Array_ptr) byte_count(size); @@ -64,7 +64,7 @@ void test1() { extern int *glob; // unwritable-expected-warning@+2 {{0 unchecked pointers: Source code in non-writable file}} -// expected-warning@+1 {{1 unchecked pointer: Unchecked pointer in parameter or return of external function glob_f}} +// expected-warning@+1 {{1 unchecked pointer: Unchecked pointer in parameter or return of undefined function glob_f}} int *glob_f(void); // unwritable-expected-warning@+1 {{0 unchecked pointers: Source code in non-writable file}} diff --git a/clang/tools/3c/3CStandalone.cpp b/clang/tools/3c/3CStandalone.cpp index 164a18749114..08a63c4d4979 100644 --- a/clang/tools/3c/3CStandalone.cpp +++ b/clang/tools/3c/3CStandalone.cpp @@ -236,6 +236,24 @@ static cl::opt OptItypesForExtern( "unsafe."), cl::init(false), cl::cat(_3CCategory)); +static cl::opt OptInferTypesForUndef( + "infer-types-for-undefs", + cl::desc("Enable type inference for undefined functions. Under this flag, " + "types for undefined functions are inferred according to the same " + "rules as defined functions with the caveat that an undefined " + "function will only solve to an itype and not a fully checked type. " + "Because 3c is not able to examine the body of the function, the " + "inferred pointer types (and array bounds) may not be consistent " + "with the actual implementation. By default, the Checked C compiler " + "trusts the declared itypes and will not detect a spatial memory " + "safety violation if the function is used in a way that is " + "consistent with the itypes but not the assumptions actually made " + "by the implementation. Thus, if you want to guarantee spatial " + "memory safety, you must manually check the inferred types against " + "your understanding of what the function actually does (or any " + "available documentation)."), + cl::init(false), cl::cat(_3CCategory)); + #ifdef FIVE_C static cl::opt OptRemoveItypes( "remove-itypes", @@ -302,6 +320,7 @@ int main(int argc, const char **argv) { CcOptions.AllowUnwritableChanges = OptAllowUnwritableChanges; CcOptions.AllowRewriteFailures = OptAllowRewriteFailures; CcOptions.ItypesForExtern = OptItypesForExtern; + CcOptions.InferTypesForUndefs = OptInferTypesForUndef; #ifdef FIVE_C CcOptions.RemoveItypes = OptRemoveItypes;