Skip to content

Support bounds widening for conditionals within switch statements #805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions clang/include/clang/Sema/BoundsAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,30 @@ namespace clang {
// block.
BoundsVarTy BoundsVars;

// To compute In[B] we compute the intersection of Out[B*->B], where B*
// are all preds of B. When there is a back edge from block B' to B (for
// example in loops), the Out set for block B' will be empty when we
// first enter B. As a result, the intersection operation would always
// result in an empty In set for B.

// So to handle this, we consider the In and Out sets for all blocks to
// have a default value of "Top" which indicates a set of all members of
// the Gen set. In this way we ensure that the intersection does not
// result in an empty set even if the Out set for a block is actually
// empty.

// But we also need to handle the case where there is an unconditional
// jump into a block (for example, as a result of a goto). In this case,
// we cannot widen the bounds because we would not have checked for the
// ptr dereference. So in this case we want the intersection to result in
// an empty set.

// So we mark the In and Out sets of the Entry block as "empty".
// IsInSetEmpty and IsOutSetEmpty indicate whether the In and Out sets
// for a block have been marked as "empty".
bool IsInSetEmpty;
llvm::DenseMap<const CFGBlock *, bool> IsOutSetEmpty;

ElevatedCFGBlock(const CFGBlock *B) : Block(B) {}
};

Expand Down Expand Up @@ -299,6 +323,10 @@ namespace clang {
// as Top.
void InitInOutSets();

// Check if the switch case label is null.
// @param[in] EB is the ElevatedCFGBlock for the current block.
bool CheckIsSwitchCaseNull(ElevatedCFGBlock *EB);

// Compute the intersection of sets A and B.
// @param[in] A is a set.
// @param[in] B is a set.
Expand Down
80 changes: 77 additions & 3 deletions clang/lib/Sema/BoundsAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ void BoundsAnalysis::WidenBounds(FunctionDecl *FD) {
// iterate WorkList.
WorkList.append(EB);
BlockMap[B] = EB;

// Mark the In set for the Entry block as "empty". The Out set for the
// Entry block would be marked as "empty" in ComputeOutSets.
EB->IsInSetEmpty = B == &Cfg->getEntry();
}

// At this time, BlockMap only contains reachable blocks. We iterate through
Expand Down Expand Up @@ -83,13 +87,48 @@ void BoundsAnalysis::InitInOutSets() {
}
}

bool BoundsAnalysis::CheckIsSwitchCaseNull(ElevatedCFGBlock *EB) {
if (const auto *CS = dyn_cast_or_null<CaseStmt>(EB->Block->getLabel())) {

// We mimic how clang (in SemaStmt.cpp) gets the value of a switch case. It
// invokes EvaluateKnownConstInt and we do the same here. SemaStmt has
// already extended/truncated the case value to fit the integer range and
// EvaluateKnownConstInt gives us that value.
llvm::APSInt LHSVal = CS->getLHS()->EvaluateKnownConstInt(Ctx);
llvm::APSInt LHSZero (LHSVal.getBitWidth(), LHSVal.isUnsigned());
if (llvm::APSInt::compareValues(LHSVal, LHSZero) == 0)
return true;

// Check if the case statement is of the form "case LHS ... RHS" (a GNU
// extension).
if (CS->caseStmtIsGNURange()) {
llvm::APSInt RHSVal = CS->getRHS()->EvaluateKnownConstInt(Ctx);
llvm::APSInt RHSZero (RHSVal.getBitWidth(), RHSVal.isUnsigned());
if (llvm::APSInt::compareValues(RHSVal, RHSZero) == 0)
return true;

// Check if 0 if contained within the range [LHS, RHS].
return (LHSVal <= LHSZero && RHSZero <= RHSVal) ||
(LHSVal >= LHSZero && RHSZero >= RHSVal);
}
return false;
}
return true;
}

void BoundsAnalysis::ComputeGenSets() {
// If there is an edge B1->B2 and the edge condition is of the form
// "if (*(p + i))" then Gen[B1] = {B2, p:i} .

// Here, EB is B2.
for (const auto item : BlockMap) {
ElevatedCFGBlock *EB = item.second;

// Check if this is a switch case and whether the case label is non-null.
// We can only widen the bounds for a non-null case label.
bool IsSwitchCaseNull = CheckIsSwitchCaseNull(EB);

// Iterate through all preds of EB.
for (const CFGBlock *pred : EB->Block->preds()) {
if (SkipBlock(pred))
continue;
Expand All @@ -103,10 +142,39 @@ void BoundsAnalysis::ComputeGenSets() {
// Here we have the edges (B1->B2) and (B1->B3). We can add "p:i" only
// on the true edge. Which means we will add the following entry to
// Gen[B1]: {B2, p:i}
if (!pred->succ_empty() && *pred->succs().begin() == EB->Block)
// Get the edge condition and fill the Gen set.
if (Expr *E = GetTerminatorCondition(pred))

// Check if EB is on a true edge of pred. The false edge (including the
// default case for a switch) is always the last edge in the list of
// edges. So we check that EB is not on the last edge for pred.

// TODO: Allow bounds widening for the default case of a switch-case.
// If we establish that another label in a switch statement tests for 0,
// then the default case will handle non-zero case, and the bounds can be
// widened there. The following github issue tracks this:
// https://github.com/microsoft/checkedc-clang/issues/818.

if (pred->succ_size() && EB->Block != *(pred->succs().end() - 1)) {
// Get the edge condition and fill the Gen set.
if (Expr *E = GetTerminatorCondition(pred)) {

// Check if the pred ends in a switch statement.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a TODO about handling the default case better and cite the GitHub issue?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if (isa<SwitchStmt>(pred->getTerminatorStmt())) {
// We can widen the bounds only if the current block has a non-null
// case statement.
if (IsSwitchCaseNull)
continue;

// If the switch expression is integral, strip off the
// IntegralCast.
if (auto *CE = dyn_cast<CastExpr>(E)) {
if (CE->getCastKind() == CastKind::CK_IntegralCast)
E = CE->getSubExpr();
}
}

FillGenSet(E, BlockMap[pred], EB);
}
}
}
}
}
Expand Down Expand Up @@ -643,6 +711,10 @@ void BoundsAnalysis::ComputeOutSets(ElevatedCFGBlock *EB,

EB->Out[succ] = Union(Diff, EB->Gen[succ]);

// The Out set on an edge is marked "empty" if the In set is marked "empty"
// and the Gen set on that edge is empty.
EB->IsOutSetEmpty[succ] = EB->IsInSetEmpty && !EB->Gen[succ].size();

if (Differ(OldOut, EB->Out[succ]))
WorkList.append(BlockMap[succ]);
}
Expand All @@ -666,6 +738,8 @@ Expr *BoundsAnalysis::GetTerminatorCondition(const CFGBlock *B) const {
return const_cast<Expr *>(WhileS->getCond());
if (const auto *ForS = dyn_cast<ForStmt>(S))
return const_cast<Expr *>(ForS->getCond());
if (const auto *SwitchS = dyn_cast<SwitchStmt>(S))
return const_cast<Expr *>(SwitchS->getCond());
}
return nullptr;
}
Expand Down
Loading