Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 204 additions & 59 deletions lib/Sema/SemaBounds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1707,62 +1707,150 @@ namespace {
static_cast<unsigned>(Test));
}

// Representation and operations on constant-sized ranges. A constant-sized
// range is a range that has the form (e1 + const1, e1 + const2), where e1
// is an expression. For now, we represent const1 and const2 as signed (APSInt)
// integers. They must have the same bitsize.
class ConstantSizedRange {
enum class RangeType {
ConstantSized,
VariableSized,
Unsupported
};

// Representation and operations on ranges.
// A range has the form (e1 + e2, e1 + e3) where e1 is an expression.
// A range can be either Constant- or Variable-sized.
//
// - If e2 and e3 are both constant integer expressions, the range is Constant-sized.
// For now, in this case, we represent e2 and e3 as signed (APSInt) integers.
// They must have the same bitsize.
// In this case, UpperOffsetExpr and LowerOffsetExpr should be both null.
// - If one or both of e2 and e3 are non-constant expressions, the range is Variable-sized.
// TODO: elaborate more.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please state the invariants for the representation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtarditi For
I will add the following lines to the comment:

  • for constant-sized range:
    More specifically: (UpperOffsetExpr == nullptr && LowerOffsetExpr == nullptr && BaseRangeKind == Kind::ConstantSized)
  • for variable-sized range:
    More specifically: ((UpperOffsetExpr != nullptr || LowerOffsetExpr != nullptr) && BaseRangeKind == Kind::VariableSized)

Is that enough?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that will be good.

class BaseRange {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think ranges are more complex than this comment indicates. A variable sized range has the form (e1 + e2, e1 + e3), where one or both of e2 and e3 are non-constant expressions.

I think it will be useful to state the invariant about the representation:

  • If a range is constant sized, the LowerOffsetExpr and UpperOffsetExpr should be null.
  • For a variable sized range, you'll need to think about to indicate whether to use the constant offset or the expression.

In theory, you could just check whether the expression is constant or not. We don't want to do this in practice because that's expensive to determine. So we want to compute this when handed the expression.

private:
Sema &S;
Expr *Base;
llvm::APSInt LowerOffset;
llvm::APSInt UpperOffset;
Expr *LowerOffsetExpr;
Expr *UpperOffsetExpr;
RangeType BaseRangeType;

public:
ConstantSizedRange(Sema &S) : S(S), Base(nullptr), LowerOffset(1, true),
UpperOffset(1, true) {
BaseRange(Sema &S) : S(S), Base(nullptr), LowerOffset(1, true),
UpperOffset(1, true), LowerOffsetExpr(nullptr), UpperOffsetExpr(nullptr),
BaseRangeType(RangeType::Unsupported) {
}

ConstantSizedRange(Sema &S, Expr *Base,
BaseRange(Sema &S, Expr *Base,
llvm::APSInt &LowerOffset,
llvm::APSInt &UpperOffset) :
S(S), Base(Base), LowerOffset(LowerOffset), UpperOffset(UpperOffset) {
S(S), Base(Base), LowerOffset(LowerOffset), UpperOffset(UpperOffset),
LowerOffsetExpr(nullptr), UpperOffsetExpr(nullptr),
BaseRangeType(RangeType::ConstantSized) {
}

// Is R in range of this range?
ProofResult InRange(ConstantSizedRange &R, ProofFailure &Cause, EquivExprSets *EquivExprs) {
BaseRange(Sema &S, Expr *Base,
Expr *LowerOffsetExpr,
Expr *UpperOffsetExpr) :
S(S), Base(Base), LowerOffset(1, true), UpperOffset(1, true),
LowerOffsetExpr(LowerOffsetExpr), UpperOffsetExpr(UpperOffsetExpr),
BaseRangeType(RangeType::VariableSized) {
}

// Is R a subrange of this range?
ProofResult InRange(BaseRange &R, ProofFailure &Cause, EquivExprSets *EquivExprs) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably change that comment at line 1754 for clarity: Is R a subrange of this range?

if (EqualValue(S.Context, Base, R.Base, EquivExprs)) {
ProofResult Result = ProofResult::True;
if (LowerOffset > R.LowerOffset) {
ProofResult LowerBoundsResult = CompareLowerOffsets(R, Cause, EquivExprs);
ProofResult UpperBoundsResult = CompareUpperOffsets(R, Cause, EquivExprs);

if (LowerBoundsResult == ProofResult::True &&
UpperBoundsResult == ProofResult::True)
return ProofResult::True;
if (LowerBoundsResult == ProofResult::False ||
UpperBoundsResult == ProofResult::False)
return ProofResult::False;
}
return ProofResult::Maybe;
}

ProofResult CompareLowerOffsets(BaseRange &R, ProofFailure &Cause, EquivExprSets *EquivExprs) {
ProofResult Result = ProofResult::Maybe;

// If both ranges are constant-sized, the constant integer values of the offsets are compared.
// If at least one of the ranges is variable-sized, then in some special cases
// we can prove the lower bound is correct.
if (IsConstantSizedRange() && R.IsConstantSizedRange()) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of checking that the ranges are constant sized, how about checking that both lower bounds are constant? Then you can eliminate the last case at lines 1794 - 1796.

After reading this code, I think having fields named LowerOffsetExpr and LowerOffset (as well UpperOffsetExpr and UpperOffset) are confusing. The Expr suffix doesn't mean much. LowerOffsetVariable and LowerOffsetConstant instead. It is also worth introducing predicate functions IsLowerOffsetConstant and IsLowerOffsetVariable, as that makes the code easier to read.

assert(!UpperOffsetExpr && !LowerOffsetExpr && !R.UpperOffsetExpr && !R.LowerOffsetExpr);
if (LowerOffset <= R.LowerOffset)
return ProofResult::True;
else {
Cause = CombineFailures(Cause, ProofFailure::LowerBound);
Result = ProofResult::False;
}
if (UpperOffset < R.UpperOffset) {
} else if (LowerOffsetExpr && R.LowerOffsetExpr &&
!EqualValue(S.Context, LowerOffsetExpr, R.LowerOffsetExpr, EquivExprs))

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the case when the offset expressions are equal?

return ProofResult::Maybe;
else if (R.LowerOffsetExpr && R.LowerOffsetExpr->getType()->isUnsignedIntegerType() &&
LowerOffset.getExtValue() == 0)
return ProofResult::True;
else if (!R.LowerOffsetExpr && !LowerOffsetExpr &&
R.LowerOffset.getExtValue() == 0 && LowerOffset.getExtValue() == 0)
return ProofResult::True;
return Result;
}

ProofResult CompareUpperOffsets(BaseRange &R, ProofFailure &Cause, EquivExprSets *EquivExprs) {
ProofResult Result = ProofResult::Maybe;

// If both ranges are constant-sized, the constant integer values of the offsets are compared.
// If at least one of the ranges is variable-sized, then in some special cases
// we can prove the upper bound is correct.
if (IsConstantSizedRange() && R.IsConstantSizedRange()) {
assert(!UpperOffsetExpr && !LowerOffsetExpr && !R.UpperOffsetExpr && !R.LowerOffsetExpr);
if (R.UpperOffset <= UpperOffset)
return ProofResult::True;
else {
Cause = CombineFailures(Cause, ProofFailure::UpperBound);
Result = ProofResult::False;
}
return Result;
}
return ProofResult::Maybe;
} else if (UpperOffsetExpr && R.UpperOffsetExpr &&
!EqualValue(S.Context, UpperOffsetExpr, R.UpperOffsetExpr, EquivExprs))
return ProofResult::Maybe;
else if (UpperOffsetExpr && UpperOffsetExpr->getType()->isUnsignedIntegerType() &&
R.UpperOffset.getExtValue() == 0)
return ProofResult::True;
else if (!R.UpperOffsetExpr && !UpperOffsetExpr &&
R.UpperOffset.getExtValue() == 0 && UpperOffset.getExtValue() == 0)
return ProofResult::True;
return Result;
}

bool IsConstantSizedRange() {
return BaseRangeType == RangeType::ConstantSized;
}

bool IsVariableSizedRange() {
return BaseRangeType == RangeType::VariableSized;
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would recommend adding a matching method IsVariableSizedRange().

bool IsEmpty() {
return UpperOffset <= LowerOffset;
}

// Does R partially overlap this range?
ProofResult PartialOverlap(ConstantSizedRange &R) {
ProofResult PartialOverlap(BaseRange &R) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be generalized.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I addressed all the above comments (except this one) in my newest commit.
Can you please elaborate more on this comment?

if (Lexicographic(S.Context, nullptr).CompareExpr(Base, R.Base) ==
Lexicographic::Result::Equal) {
if (!IsEmpty() && !R.IsEmpty()) {
// R.LowerOffset is within this range, but R.UpperOffset is above the range
if (LowerOffset <= R.LowerOffset && R.LowerOffset < UpperOffset &&
UpperOffset < R.UpperOffset)
return ProofResult::True;
// Or R.UpperOffset is within this range, but R.LowerOffset is below the range.
if (LowerOffset < R.UpperOffset && R.UpperOffset <= UpperOffset &&
R.LowerOffset < LowerOffset)
return ProofResult::True;
// TODO: generalize to non-constant ranges
if (IsConstantSizedRange() && R.IsConstantSizedRange()) {
if (!IsEmpty() && !R.IsEmpty()) {
// R.LowerOffset is within this range, but R.UpperOffset is above the range
if (LowerOffset <= R.LowerOffset && R.LowerOffset < UpperOffset &&
UpperOffset < R.UpperOffset)
return ProofResult::True;
// Or R.UpperOffset is within this range, but R.LowerOffset is below the range.
if (LowerOffset < R.UpperOffset && R.UpperOffset <= UpperOffset &&
R.LowerOffset < LowerOffset)
return ProofResult::True;
}
}
return ProofResult::False;
}
Expand Down Expand Up @@ -1791,6 +1879,18 @@ namespace {
UpperOffset = Upper;
}

void SetLowerExpr(Expr *Lower) {
LowerOffsetExpr = Lower;
}

void SetUpperExpr(Expr *Upper) {
UpperOffsetExpr = Upper;
}

void SetRangeType(RangeType RT) {
BaseRangeType = RT;
}

void Dump(raw_ostream &OS) {
OS << "ConstantRange:\n";
OS << "Base: ";
Expand Down Expand Up @@ -1835,19 +1935,24 @@ namespace {
}
exit:
return I;
}
}

// Convert an expression E into a base and offset form. The offset is
// in characters.
// Convert an expression E into a base and offset form.
// - If E is pointer arithmetic involving addition or subtraction of a
// constant integer, return the base and offset.
// - If E is a pointer arithmetic involving addition or subtraction of

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment needs to be updated. OnlyConstant is gone now.

// a pointer with an unsigned integer expression, return the pointer as
// the base, and the rest as the offset. (If OnlyConstant is set to true,
// this case is not checked. Instead, the function will try to split the
// E into Base and Offset assuming the Offset can only be constant.)
// - If it is not or we run into issues such as pointer arithmetic overflow,
// return a default expression of (E, 0).
// TODO: we use signed integers to represent the result of the Offset.
// We can't represent unsigned offsets larger the the maximum signed
// integer that will fit pointer width.
void SplitIntoBaseAndOffset(Expr *E, Expr *&Base,
llvm::APSInt &Offset) {
llvm::APSInt &Offset, Expr *&OffsetExpr,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding the extra parameter OnlyConstant, I suggest always letting this function always split expressions if possible and checking the results returned by the function. I think you will need to change the function to return a value indicating what it did.

bool OnlyConstant = false) {
if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(E->IgnoreParens())) {
if (BO->isAdditiveOp()) {
Expr *Other = nullptr;
Expand All @@ -1860,7 +1965,7 @@ namespace {
} else
goto exit;
assert(Other->getType()->isIntegerType());
if (Other->isIntegerConstantExpr(Offset, S.Context)) {
if (Other->isEvaluatable(S.Context) && Other->isIntegerConstantExpr(Offset, S.Context)) {
// Widen the integer to the number of bits in a pointer.
bool Overflow;
Offset = ConvertToSignedPointerWidth(Offset, Overflow);
Expand All @@ -1879,6 +1984,10 @@ namespace {
if (Overflow)
goto exit;
return;
} else if (!OnlyConstant) {
// if the offset is not a number (i.e., it is an expression)
OffsetExpr = Other;
return;
}
}
}
Expand All @@ -1894,10 +2003,14 @@ namespace {
return R == Lexicographic::Result::Equal;
}

// Convert a bounds expression to a constant-sized range. Returns true if the
// the bounds expression can be converted and false if it cannot be converted.
bool CreateConstantRange(const BoundsExpr *Bounds, ConstantSizedRange *R,
EquivExprSets *EquivExprs) {
// Convert a bounds expression to a constant- or variable-sized range. Returns
// true if the the bounds expression can be converted and false if it cannot
// be converted.
// If OnlyConstant is set to true, it tries to only create a constant range,
// and returns false if it fails to do so. It is especially needed when handling
// memory accesses.
bool CreateBaseRange(const BoundsExpr *Bounds, BaseRange *R,
EquivExprSets *EquivExprs, bool OnlyConstant = false) {
switch (Bounds->getKind()) {
case BoundsExpr::Kind::Invalid:
case BoundsExpr::Kind::Unknown:
Expand All @@ -1913,13 +2026,41 @@ namespace {
Expr *Upper = RB->getUpperExpr();
Expr *LowerBase, *UpperBase;
llvm::APSInt LowerOffset, UpperOffset;
SplitIntoBaseAndOffset(Lower, LowerBase, LowerOffset);
SplitIntoBaseAndOffset(Upper, UpperBase, UpperOffset);
if (EqualValue(S.Context, LowerBase, UpperBase, EquivExprs)) {
R->SetBase(LowerBase);
R->SetLower(LowerOffset);
R->SetUpper(UpperOffset);
return true;
Expr *LowerOffsetExpr = nullptr;
Expr *UpperOffsetExpr = nullptr;
SplitIntoBaseAndOffset(Lower, LowerBase, LowerOffset, LowerOffsetExpr, OnlyConstant);
SplitIntoBaseAndOffset(Upper, UpperBase, UpperOffset, UpperOffsetExpr, OnlyConstant);

if (OnlyConstant) {
if (EqualValue(S.Context, LowerBase, UpperBase, EquivExprs)) {
R->SetBase(LowerBase);
R->SetLower(LowerOffset);
R->SetUpper(UpperOffset);
R->SetRangeType(RangeType::ConstantSized);
return true;
}
return false;
}

// If at least one of the offsets is not constant (i.e., it is an
// expression), the range is considered variable-sized. If both of the
// offsets are constants, the range is considered constant-sized.
if (!LowerOffsetExpr && !UpperOffsetExpr) {
if (EqualValue(S.Context, LowerBase, UpperBase, EquivExprs)) {
R->SetBase(LowerBase);
R->SetLower(LowerOffset);
R->SetUpper(UpperOffset);
R->SetRangeType(RangeType::ConstantSized);
return true;
}
} else {
if (EqualValue(S.Context, LowerBase, UpperBase, EquivExprs)) {
R->SetBase(LowerBase);
R->SetLowerExpr(LowerOffsetExpr);
R->SetUpperExpr(UpperOffsetExpr);
R->SetRangeType(RangeType::VariableSized);
return true;
}
}
return false;
}
Expand Down Expand Up @@ -1958,10 +2099,10 @@ namespace {
if (S.Context.EquivalentBounds(DeclaredBounds, SrcBounds, EquivExprs))
return ProofResult::True;

ConstantSizedRange DeclaredRange(S);
ConstantSizedRange SrcRange(S);
if (CreateConstantRange(DeclaredBounds, &DeclaredRange, EquivExprs) &&
CreateConstantRange(SrcBounds, &SrcRange, EquivExprs)) {
BaseRange DeclaredRange(S);
BaseRange SrcRange(S);
if (CreateBaseRange(DeclaredBounds, &DeclaredRange, EquivExprs) &&
CreateBaseRange(SrcBounds, &SrcRange, EquivExprs)) {
#ifdef TRACE_RANGE
llvm::outs() << "Found constant ranges:\n";
llvm::outs() << "Declared bounds";
Expand All @@ -1979,13 +2120,15 @@ namespace {
if (R == ProofResult::False || R == ProofResult::Maybe) {
if (SrcRange.IsEmpty())
Cause = CombineFailures(Cause, ProofFailure::Empty);
if (DeclaredRange.GetWidth() > SrcRange.GetWidth()) {
Cause = CombineFailures(Cause, ProofFailure::Width);
R = ProofResult::False;
} else if (Kind == ProofStmtKind::StaticBoundsCast) {
// For checking static casts between Ptr types, we only need to
// prove that the declared width <= the source width.
return ProofResult::True;
if (DeclaredRange.IsConstantSizedRange() && SrcRange.IsConstantSizedRange()) {
if (DeclaredRange.GetWidth() > SrcRange.GetWidth()) {
Cause = CombineFailures(Cause, ProofFailure::Width);
R = ProofResult::False;
} else if (Kind == ProofStmtKind::StaticBoundsCast) {
// For checking static casts between Ptr types, we only need to
// prove that the declared width <= the source width.
return ProofResult::True;
}
}
}
return R;
Expand All @@ -2011,8 +2154,8 @@ namespace {
assert(BoundsUtil::IsStandardForm(Bounds) &&
"bounds not in standard form");
Cause = ProofFailure::None;
ConstantSizedRange ValidRange(S);
if (!CreateConstantRange(Bounds, &ValidRange, nullptr))
BaseRange ValidRange(S);
if (!CreateBaseRange(Bounds, &ValidRange, nullptr, true))
return ProofResult::Maybe;

bool Overflow;
Expand All @@ -2027,7 +2170,9 @@ namespace {

Expr *AccessBase;
llvm::APSInt AccessStartOffset;
SplitIntoBaseAndOffset(PtrBase, AccessBase, AccessStartOffset);
Expr *DummyOffset;
SplitIntoBaseAndOffset(PtrBase, AccessBase, AccessStartOffset, DummyOffset, true);

if (Offset) {
llvm::APSInt IntVal;
if (!Offset->isIntegerConstantExpr(IntVal, S.Context))
Expand All @@ -2042,7 +2187,7 @@ namespace {
if (Overflow)
return ProofResult::Maybe;
}
ConstantSizedRange MemoryAccessRange(S, AccessBase, AccessStartOffset,
BaseRange MemoryAccessRange(S, AccessBase, AccessStartOffset,
AccessStartOffset);
Overflow = MemoryAccessRange.AddToUpper(ElementSize);
if (Overflow)
Expand Down Expand Up @@ -3269,4 +3414,4 @@ void Sema::WarnDynamicCheckAlwaysFails(const Expr *Condition) {
<< Condition->getSourceRange();
}
}
}
}
Loading