Skip to content

Commit 86b63d0

Browse files
author
Mandeep Singh Grang
authored
Bounds widening for _Nt_array_ptr's (#728)
* Implement bounds widening for _Nt_array_ptr's _Nt_array_ptr's are terminated by a null terminator. If we determine that the dereference of a _Nt_array_ptr is non-null then we can widen the bounds of the pointer by 1 because there will always be at least the null terminator in the string. Formally we can state the problem as: ∀ p | p ∈ _Nt_array_ptr & bounds(p) = [l, u) *p != 0 => bounds(p) = [l, u + 1)
1 parent a0f94ec commit 86b63d0

File tree

8 files changed

+860
-0
lines changed

8 files changed

+860
-0
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd re
230230
BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables")
231231
BENIGN_LANGOPT(DumpInferredBounds, 1, 0, "dump inferred Checked C bounds for assignments and declarations")
232232
BENIGN_LANGOPT(DumpExtractedComparisonFacts, 1, 0, "dump extracted comparison facts")
233+
BENIGN_LANGOPT(DumpWidenedBounds, 1, 0, "dump widened bounds")
233234
LANGOPT(InjectVerifierCalls, 1, 0, "Injects calls to VERIFIER_assume and VERIFIER_error in the bitcode")
234235
LANGOPT(UncheckedPointersDynamicCheck, 1, 0, "Adds dynamic checks for unchecked pointers")
235236
LANGOPT(NoConstantCFStrings , 1, 0, "no constant CoreFoundation strings")

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,8 @@ def fno_checkedc_extension : Flag<["-"], "fno-checkedc-extension">, Group<f_Grou
789789
HelpText<"Do ont accept Checked C extension">;
790790
def fdump_extracted_comparison_facts : Flag<["-"], "fdump-extracted-comparison-facts">, Group<f_Group>, Flags<[CC1Option]>,
791791
HelpText<"Dump extracted comparison facts">;
792+
def fdump_widened_bounds : Flag<["-"], "fdump-widened-bounds">, Group<f_Group>, Flags<[CC1Option]>,
793+
HelpText<"Dump widened bounds">;
792794
def fdump_inferred_bounds : Flag<["-"], "fdump-inferred-bounds">, Group<f_Group>, Flags<[CC1Option]>,
793795
HelpText<"Dump inferred Checked C bounds for assignments and declarations">;
794796
def finject_verifier_calls : Flag<["-"], "finject-verifier-calls">, Group<f_Group>, Flags<[CC1Option]>,
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
//===---------- BoundsAnalysis.h - Dataflow for bounds widening-----------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===---------------------------------------------------------------------===//
9+
//
10+
// This file defines the interface for a dataflow analysis for bounds
11+
// widening.
12+
//
13+
//===---------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_CLANG_BOUNDS_ANALYSIS_H
16+
#define LLVM_CLANG_BOUNDS_ANALYSIS_H
17+
18+
#include "clang/AST/CanonBounds.h"
19+
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
20+
#include "clang/Sema/Sema.h"
21+
#include <queue>
22+
23+
namespace clang {
24+
// QueueSet is a queue backed by a set. The queue is useful for processing
25+
// the items in a Topological sort order which means that if item1 is a
26+
// predecessor of item2 then item1 is processed before item2. The set is
27+
// useful for maintaining uniqueness of items added to the queue.
28+
29+
template <class T>
30+
class QueueSet {
31+
private:
32+
std::queue<T *> _queue;
33+
llvm::DenseSet<T *> _set;
34+
35+
public:
36+
T *next() const {
37+
return _queue.front();
38+
}
39+
40+
void remove(T *B) {
41+
if (_queue.empty())
42+
return;
43+
_queue.pop();
44+
_set.erase(B);
45+
}
46+
47+
void append(T *B) {
48+
if (!_set.count(B)) {
49+
_queue.push(B);
50+
_set.insert(B);
51+
}
52+
}
53+
54+
bool empty() const {
55+
return _queue.empty();
56+
}
57+
};
58+
59+
} // end namespace clang
60+
61+
namespace clang {
62+
// Note: We use the shorthand "ntptr" to denote _Nt_array_ptr. We extract the
63+
// declaration of an ntptr as a VarDecl from a DeclRefExpr.
64+
65+
// BoundsMapTy denotes the widened bounds of an ntptr. Given VarDecl V with
66+
// declared bounds (low, high), the bounds of V have been widened to (low,
67+
// high + the unsigned integer).
68+
using BoundsMapTy = llvm::MapVector<const VarDecl *, unsigned>;
69+
70+
// For each edge B1->B2, EdgeBoundsTy denotes the Gen and Out sets.
71+
using EdgeBoundsTy = llvm::DenseMap<const CFGBlock *, BoundsMapTy>;
72+
73+
// For each block B, DeclSetTy denotes the Kill set. A VarDecl V is killed if:
74+
// 1. V is assigned to in the block, or
75+
// 2. any variable used in the bounds expr of V is assigned to in the block.
76+
using DeclSetTy = llvm::DenseSet<const VarDecl *>;
77+
78+
// A mapping of VarDecl V to all the variables occuring in its bounds
79+
// expression. This is used to compute Kill sets. An assignment to any
80+
// variable occuring in the bounds expression of an ntptr kills any computed
81+
// bounds for that ntptr in that block.
82+
using BoundsVarTy = llvm::DenseMap<const VarDecl *, DeclSetTy>;
83+
84+
// OrderedBlocksTy denotes blocks ordered by block numbers. This is useful
85+
// for printing the blocks in a deterministic order.
86+
using OrderedBlocksTy = std::vector<const CFGBlock *>;
87+
88+
class BoundsAnalysis {
89+
private:
90+
Sema &S;
91+
CFG *Cfg;
92+
ASTContext &Ctx;
93+
// The final widened bounds will reside here. This is a map keyed by
94+
// CFGBlock.
95+
EdgeBoundsTy WidenedBounds;
96+
97+
class ElevatedCFGBlock {
98+
public:
99+
const CFGBlock *Block;
100+
// The In set for the block.
101+
BoundsMapTy In;
102+
// The Gen and Out sets for the block.
103+
EdgeBoundsTy Gen, Out;
104+
// The Kill set for the block.
105+
DeclSetTy Kill;
106+
// The set of all variables used in bounds expr for each ntptr in the
107+
// block.
108+
BoundsVarTy BoundsVars;
109+
110+
ElevatedCFGBlock(const CFGBlock *B) : Block(B) {}
111+
};
112+
113+
// BlockMapTy stores the mapping from CFGBlocks to ElevatedCFGBlocks.
114+
using BlockMapTy = llvm::DenseMap<const CFGBlock *, ElevatedCFGBlock *>;
115+
// A queue of unique ElevatedCFGBlocks to run the dataflow analysis on.
116+
using WorkListTy = QueueSet<ElevatedCFGBlock>;
117+
118+
public:
119+
BoundsAnalysis(Sema &S, CFG *Cfg) : S(S), Cfg(Cfg), Ctx(S.Context) {}
120+
121+
// Run the dataflow analysis to widen bounds for ntptr's.
122+
void WidenBounds();
123+
124+
// Get the widened bounds for block B.
125+
// @param[in] B is the block for which the widened bounds are needed.
126+
// @return Widened bounds for ntptrs in block B.
127+
BoundsMapTy GetWidenedBounds(const CFGBlock *B);
128+
129+
// Pretty print the widen bounds analysis.
130+
// @param[in] FD is used to extract the name of the current function for
131+
// printing.
132+
void DumpWidenedBounds(FunctionDecl *FD);
133+
134+
private:
135+
// Compute Gen set for each edge in the CFG. If there is an edge B1->B2 and
136+
// the edge condition is of the form "if (*(p + i))" then Gen[B1] = {B2,
137+
// p:i} . The actual computation of i is done in FillGenSet.
138+
// @param[in] BlockMap is the map from CFGBlock to ElevatedCFGBlock. Used
139+
// to lookup ElevatedCFGBlock from CFGBlock.
140+
void ComputeGenSets(BlockMapTy BlockMap);
141+
142+
// Compute Kill set for each block in BlockMap. For a block B, a variable V
143+
// is added to Kill[B] if V is assigned to in B.
144+
// @param[in] BlockMap is the map from CFGBlock to ElevatedCFGBlock. Used
145+
// to lookup ElevatedCFGBlock from CFGBlock.
146+
void ComputeKillSets(BlockMapTy BlockMap);
147+
148+
// Compute In set for each block in BlockMap. In[B1] = n Out[B*->B1], where
149+
// B* are all preds of B1.
150+
// @param[in] EB is the block to compute the In set for.
151+
// @param[in] BlockMap is the map from CFGBlock to ElevatedCFGBlock. Used
152+
// to lookup ElevatedCFGBlock from CFGBlock.
153+
void ComputeInSets(ElevatedCFGBlock *EB, BlockMapTy BlockMap);
154+
155+
// Compute Out set for each outgoing edge of EB. If the Out set on any edge
156+
// of EB changes then the successor of EB on that edge is added to
157+
// Worklist.
158+
// @param[in] EB is the block to compute the Out set for.
159+
// @param[in] BlockMap is the map from CFGBlock to ElevatedCFGBlock. Used
160+
// to lookup ElevatedCFGBlock from CFGBlock.
161+
// @param[out] The successors of EB are added to WorkList if the Out set of
162+
// EB changes.
163+
void ComputeOutSets(ElevatedCFGBlock *EB, BlockMapTy BlockMap,
164+
WorkListTy &Worklist);
165+
166+
// Perform checks, handles conditional expressions, extracts the
167+
// ntptr offset and fills the Gen set for the edge.
168+
// @param[in] E is the expr possibly containing the deref of an ntptr. If E
169+
// contains a pointer deref, the Gen set for the edge EB->SuccEB is
170+
// updated.
171+
// @param[in] Source block for the edge for which the Gen set is updated.
172+
// @param[in] Dest block for the edge for which the Gen set is updated.
173+
void FillGenSet(Expr *E, ElevatedCFGBlock *EB, ElevatedCFGBlock *SuccEB);
174+
175+
// Collect all variables used in bounds expr E.
176+
// @param[in] E represents the bounds expr for an ntptr.
177+
// @param[out] BoundsVars is a set of all variables used in the bounds expr
178+
// E.
179+
void CollectBoundsVars(const Expr *E, DeclSetTy &BoundsVars);
180+
181+
// Collect the variables assigned to in a block.
182+
// @param[in] S is an assignment statement.
183+
// @param[in] EB is used to access the BoundsVars for the block.
184+
// @param[out] DefinedVars is the set of all ntptrs whose widened bounds
185+
// are no longer valid as the ntptr has been assigned to, and hence it must
186+
// be added to the Kill set of the block.
187+
void CollectDefinedVars(const Stmt *S, ElevatedCFGBlock *EB,
188+
DeclSetTy &DefinedVars);
189+
190+
// Assign the widened bounds from the ElevatedBlock to the CFG Block.
191+
// @param[in] BlockMap is the map from CFGBlock to ElevatedCFGBlock. Used
192+
// to associate the widened bounds from the ElevatedCFGBlock to the CFGBlock.
193+
void CollectWidenedBounds(BlockMapTy BlockMap);
194+
195+
// Get the terminating condition for a block. This could be an if condition
196+
// of the form "if(*(p + i))".
197+
// @param[in] B is the block for which we need the terminating condition.
198+
// @return Expression for the terminating condition of block B.
199+
Expr *GetTerminatorCondition(const CFGBlock *B) const;
200+
201+
// Check if E is a pointer dereference.
202+
// @param[in] E is the expression for possibly a pointer deref.
203+
// @return Whether E is a pointer deref.
204+
bool IsPointerDerefLValue(Expr *E) const;
205+
206+
// Check if E contains a pointer dereference.
207+
// @param[in] E is the expression which possibly contains a pointer deref.
208+
// @return Whether E contains a pointer deref.
209+
bool ContainsPointerDeref(Expr *E) const;
210+
211+
// WidenedBounds is a DenseMap and hence is not suitable for iteration as
212+
// its iteration order is non-deterministic. So we first need to order the
213+
// blocks.
214+
// @return Blocks ordered by block numbers from higher to lower since block
215+
// numbers decrease from entry to exit.
216+
OrderedBlocksTy GetOrderedBlocks();
217+
218+
// Strip E of all casts.
219+
// @param[in] E is the expression which must be stripped off of all casts.
220+
// @return Expr stripped off of all casts.
221+
Expr *IgnoreCasts(Expr *E);
222+
223+
// Check if the declared bounds of p are zero. ie: the upper bound of p is
224+
// equal to p.
225+
// @param[in] E is the bounds expression for V.
226+
// @param[in] V is the ntptr.
227+
// @return Whether the declared bounds of p are zero.
228+
bool AreDeclaredBoundsZero(const Expr *E, const Expr *V);
229+
230+
// We do not want to run dataflow analysis on null, entry or exit blocks.
231+
// So we skip them.
232+
// @param[in] B is the block which may need to the skipped from dataflow
233+
// analysis.
234+
// @return Whether B should be skipped.
235+
bool SkipBlock(const CFGBlock *B) const;
236+
237+
// Compute the intersection of sets A and B.
238+
// @param[in] A is a set.
239+
// @param[in] B is a set.
240+
// @return The intersection of sets A and B.
241+
template<class T> T Intersect(T &A, T &B) const;
242+
243+
// Compute the union of sets A and B.
244+
// @param[in] A is a set.
245+
// @param[in] B is a set.
246+
// @return The union of sets A and B.
247+
template<class T> T Union(T &A, T &B) const;
248+
249+
// Compute the set difference of sets A and B.
250+
// @param[in] A is a set.
251+
// @param[in] B is a set.
252+
// @return The set difference of sets A and B.
253+
template<class T, class U> T Difference(T &A, U &B) const;
254+
255+
// Check whether the sets A and B differ.
256+
// @param[in] A is a set.
257+
// @param[in] B is a set.
258+
// @return Whether sets A and B differ.
259+
template<class T> bool Differ(T &A, T &B) const;
260+
};
261+
}
262+
263+
#endif

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,6 +2579,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
25792579
if (Args.hasArg(OPT_fdump_extracted_comparison_facts))
25802580
Opts.DumpExtractedComparisonFacts = true;
25812581

2582+
if (Args.hasArg(OPT_fdump_widened_bounds))
2583+
Opts.DumpWidenedBounds = true;
2584+
25822585
Opts.WritableStrings = Args.hasArg(OPT_fwritable_strings);
25832586
Opts.ConstStrings = Args.hasFlag(OPT_fconst_strings, OPT_fno_const_strings,
25842587
Opts.ConstStrings);

0 commit comments

Comments
 (0)