1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines RangeConstraintManager, a class that tracks simple
10 // equality and inequality constraints on symbolic values of ProgramState.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Basic/JsonSupport.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <algorithm>
28 #include <iterator>
29 #include <optional>
30
31 using namespace clang;
32 using namespace ento;
33
34 // This class can be extended with other tables which will help to reason
35 // about ranges more precisely.
36 class OperatorRelationsTable {
37 static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
38 BO_GE < BO_EQ && BO_EQ < BO_NE,
39 "This class relies on operators order. Rework it otherwise.");
40
41 public:
42 enum TriStateKind {
43 False = 0,
44 True,
45 Unknown,
46 };
47
48 private:
49 // CmpOpTable holds states which represent the corresponding range for
50 // branching an exploded graph. We can reason about the branch if there is
51 // a previously known fact of the existence of a comparison expression with
52 // operands used in the current expression.
53 // E.g. assuming (x < y) is true that means (x != y) is surely true.
54 // if (x previous_operation y) // < | != | >
55 // if (x operation y) // != | > | <
56 // tristate // True | Unknown | False
57 //
58 // CmpOpTable represents next:
59 // __|< |> |<=|>=|==|!=|UnknownX2|
60 // < |1 |0 |* |0 |0 |* |1 |
61 // > |0 |1 |0 |* |0 |* |1 |
62 // <=|1 |0 |1 |* |1 |* |0 |
63 // >=|0 |1 |* |1 |1 |* |0 |
64 // ==|0 |0 |* |* |1 |0 |1 |
65 // !=|1 |1 |* |* |0 |1 |0 |
66 //
67 // Columns stands for a previous operator.
68 // Rows stands for a current operator.
69 // Each row has exactly two `Unknown` cases.
70 // UnknownX2 means that both `Unknown` previous operators are met in code,
71 // and there is a special column for that, for example:
72 // if (x >= y)
73 // if (x != y)
74 // if (x <= y)
75 // False only
76 static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
77 const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
78 // < > <= >= == != UnknownX2
79 {True, False, Unknown, False, False, Unknown, True}, // <
80 {False, True, False, Unknown, False, Unknown, True}, // >
81 {True, False, True, Unknown, True, Unknown, False}, // <=
82 {False, True, Unknown, True, True, Unknown, False}, // >=
83 {False, False, Unknown, Unknown, True, False, True}, // ==
84 {True, True, Unknown, Unknown, False, True, False}, // !=
85 };
86
getIndexFromOp(BinaryOperatorKind OP)87 static size_t getIndexFromOp(BinaryOperatorKind OP) {
88 return static_cast<size_t>(OP - BO_LT);
89 }
90
91 public:
getCmpOpCount() const92 constexpr size_t getCmpOpCount() const { return CmpOpCount; }
93
getOpFromIndex(size_t Index)94 static BinaryOperatorKind getOpFromIndex(size_t Index) {
95 return static_cast<BinaryOperatorKind>(Index + BO_LT);
96 }
97
getCmpOpState(BinaryOperatorKind CurrentOP,BinaryOperatorKind QueriedOP) const98 TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
99 BinaryOperatorKind QueriedOP) const {
100 return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
101 }
102
getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const103 TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
104 return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
105 }
106 };
107
108 //===----------------------------------------------------------------------===//
109 // RangeSet implementation
110 //===----------------------------------------------------------------------===//
111
112 RangeSet::ContainerType RangeSet::Factory::EmptySet{};
113
add(RangeSet LHS,RangeSet RHS)114 RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
115 ContainerType Result;
116 Result.reserve(LHS.size() + RHS.size());
117 std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
118 std::back_inserter(Result));
119 return makePersistent(std::move(Result));
120 }
121
add(RangeSet Original,Range Element)122 RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
123 ContainerType Result;
124 Result.reserve(Original.size() + 1);
125
126 const_iterator Lower = llvm::lower_bound(Original, Element);
127 Result.insert(Result.end(), Original.begin(), Lower);
128 Result.push_back(Element);
129 Result.insert(Result.end(), Lower, Original.end());
130
131 return makePersistent(std::move(Result));
132 }
133
add(RangeSet Original,const llvm::APSInt & Point)134 RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
135 return add(Original, Range(Point));
136 }
137
unite(RangeSet LHS,RangeSet RHS)138 RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
139 ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
140 return makePersistent(std::move(Result));
141 }
142
unite(RangeSet Original,Range R)143 RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
144 ContainerType Result;
145 Result.push_back(R);
146 Result = unite(*Original.Impl, Result);
147 return makePersistent(std::move(Result));
148 }
149
unite(RangeSet Original,llvm::APSInt Point)150 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
151 return unite(Original, Range(ValueFactory.getValue(Point)));
152 }
153
unite(RangeSet Original,llvm::APSInt From,llvm::APSInt To)154 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
155 llvm::APSInt To) {
156 return unite(Original,
157 Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
158 }
159
160 template <typename T>
swapIterators(T & First,T & FirstEnd,T & Second,T & SecondEnd)161 void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
162 std::swap(First, Second);
163 std::swap(FirstEnd, SecondEnd);
164 }
165
unite(const ContainerType & LHS,const ContainerType & RHS)166 RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
167 const ContainerType &RHS) {
168 if (LHS.empty())
169 return RHS;
170 if (RHS.empty())
171 return LHS;
172
173 using llvm::APSInt;
174 using iterator = ContainerType::const_iterator;
175
176 iterator First = LHS.begin();
177 iterator FirstEnd = LHS.end();
178 iterator Second = RHS.begin();
179 iterator SecondEnd = RHS.end();
180 APSIntType Ty = APSIntType(First->From());
181 const APSInt Min = Ty.getMinValue();
182
183 // Handle a corner case first when both range sets start from MIN.
184 // This helps to avoid complicated conditions below. Specifically, this
185 // particular check for `MIN` is not needed in the loop below every time
186 // when we do `Second->From() - One` operation.
187 if (Min == First->From() && Min == Second->From()) {
188 if (First->To() > Second->To()) {
189 // [ First ]--->
190 // [ Second ]----->
191 // MIN^
192 // The Second range is entirely inside the First one.
193
194 // Check if Second is the last in its RangeSet.
195 if (++Second == SecondEnd)
196 // [ First ]--[ First + 1 ]--->
197 // [ Second ]--------------------->
198 // MIN^
199 // The Union is equal to First's RangeSet.
200 return LHS;
201 } else {
202 // case 1: [ First ]----->
203 // case 2: [ First ]--->
204 // [ Second ]--->
205 // MIN^
206 // The First range is entirely inside or equal to the Second one.
207
208 // Check if First is the last in its RangeSet.
209 if (++First == FirstEnd)
210 // [ First ]----------------------->
211 // [ Second ]--[ Second + 1 ]---->
212 // MIN^
213 // The Union is equal to Second's RangeSet.
214 return RHS;
215 }
216 }
217
218 const APSInt One = Ty.getValue(1);
219 ContainerType Result;
220
221 // This is called when there are no ranges left in one of the ranges.
222 // Append the rest of the ranges from another range set to the Result
223 // and return with that.
224 const auto AppendTheRest = [&Result](iterator I, iterator E) {
225 Result.append(I, E);
226 return Result;
227 };
228
229 while (true) {
230 // We want to keep the following invariant at all times:
231 // ---[ First ------>
232 // -----[ Second --->
233 if (First->From() > Second->From())
234 swapIterators(First, FirstEnd, Second, SecondEnd);
235
236 // The Union definitely starts with First->From().
237 // ----------[ First ------>
238 // ------------[ Second --->
239 // ----------[ Union ------>
240 // UnionStart^
241 const llvm::APSInt &UnionStart = First->From();
242
243 // Loop where the invariant holds.
244 while (true) {
245 // Skip all enclosed ranges.
246 // ---[ First ]--->
247 // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
248 while (First->To() >= Second->To()) {
249 // Check if Second is the last in its RangeSet.
250 if (++Second == SecondEnd) {
251 // Append the Union.
252 // ---[ Union ]--->
253 // -----[ Second ]----->
254 // --------[ First ]--->
255 // UnionEnd^
256 Result.emplace_back(UnionStart, First->To());
257 // ---[ Union ]----------------->
258 // --------------[ First + 1]--->
259 // Append all remaining ranges from the First's RangeSet.
260 return AppendTheRest(++First, FirstEnd);
261 }
262 }
263
264 // Check if First and Second are disjoint. It means that we find
265 // the end of the Union. Exit the loop and append the Union.
266 // ---[ First ]=------------->
267 // ------------=[ Second ]--->
268 // ----MinusOne^
269 if (First->To() < Second->From() - One)
270 break;
271
272 // First is entirely inside the Union. Go next.
273 // ---[ Union ----------->
274 // ---- [ First ]-------->
275 // -------[ Second ]----->
276 // Check if First is the last in its RangeSet.
277 if (++First == FirstEnd) {
278 // Append the Union.
279 // ---[ Union ]--->
280 // -----[ First ]------->
281 // --------[ Second ]--->
282 // UnionEnd^
283 Result.emplace_back(UnionStart, Second->To());
284 // ---[ Union ]------------------>
285 // --------------[ Second + 1]--->
286 // Append all remaining ranges from the Second's RangeSet.
287 return AppendTheRest(++Second, SecondEnd);
288 }
289
290 // We know that we are at one of the two cases:
291 // case 1: --[ First ]--------->
292 // case 2: ----[ First ]------->
293 // --------[ Second ]---------->
294 // In both cases First starts after Second->From().
295 // Make sure that the loop invariant holds.
296 swapIterators(First, FirstEnd, Second, SecondEnd);
297 }
298
299 // Here First and Second are disjoint.
300 // Append the Union.
301 // ---[ Union ]--------------->
302 // -----------------[ Second ]--->
303 // ------[ First ]--------------->
304 // UnionEnd^
305 Result.emplace_back(UnionStart, First->To());
306
307 // Check if First is the last in its RangeSet.
308 if (++First == FirstEnd)
309 // ---[ Union ]--------------->
310 // --------------[ Second ]--->
311 // Append all remaining ranges from the Second's RangeSet.
312 return AppendTheRest(Second, SecondEnd);
313 }
314
315 llvm_unreachable("Normally, we should not reach here");
316 }
317
getRangeSet(Range From)318 RangeSet RangeSet::Factory::getRangeSet(Range From) {
319 ContainerType Result;
320 Result.push_back(From);
321 return makePersistent(std::move(Result));
322 }
323
makePersistent(ContainerType && From)324 RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
325 llvm::FoldingSetNodeID ID;
326 void *InsertPos;
327
328 From.Profile(ID);
329 ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
330
331 if (!Result) {
332 // It is cheaper to fully construct the resulting range on stack
333 // and move it to the freshly allocated buffer if we don't have
334 // a set like this already.
335 Result = construct(std::move(From));
336 Cache.InsertNode(Result, InsertPos);
337 }
338
339 return Result;
340 }
341
construct(ContainerType && From)342 RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
343 void *Buffer = Arena.Allocate();
344 return new (Buffer) ContainerType(std::move(From));
345 }
346
getMinValue() const347 const llvm::APSInt &RangeSet::getMinValue() const {
348 assert(!isEmpty());
349 return begin()->From();
350 }
351
getMaxValue() const352 const llvm::APSInt &RangeSet::getMaxValue() const {
353 assert(!isEmpty());
354 return std::prev(end())->To();
355 }
356
isUnsigned() const357 bool clang::ento::RangeSet::isUnsigned() const {
358 assert(!isEmpty());
359 return begin()->From().isUnsigned();
360 }
361
getBitWidth() const362 uint32_t clang::ento::RangeSet::getBitWidth() const {
363 assert(!isEmpty());
364 return begin()->From().getBitWidth();
365 }
366
getAPSIntType() const367 APSIntType clang::ento::RangeSet::getAPSIntType() const {
368 assert(!isEmpty());
369 return APSIntType(begin()->From());
370 }
371
containsImpl(llvm::APSInt & Point) const372 bool RangeSet::containsImpl(llvm::APSInt &Point) const {
373 if (isEmpty() || !pin(Point))
374 return false;
375
376 Range Dummy(Point);
377 const_iterator It = llvm::upper_bound(*this, Dummy);
378 if (It == begin())
379 return false;
380
381 return std::prev(It)->Includes(Point);
382 }
383
pin(llvm::APSInt & Point) const384 bool RangeSet::pin(llvm::APSInt &Point) const {
385 APSIntType Type(getMinValue());
386 if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
387 return false;
388
389 Type.apply(Point);
390 return true;
391 }
392
pin(llvm::APSInt & Lower,llvm::APSInt & Upper) const393 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
394 // This function has nine cases, the cartesian product of range-testing
395 // both the upper and lower bounds against the symbol's type.
396 // Each case requires a different pinning operation.
397 // The function returns false if the described range is entirely outside
398 // the range of values for the associated symbol.
399 APSIntType Type(getMinValue());
400 APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
401 APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
402
403 switch (LowerTest) {
404 case APSIntType::RTR_Below:
405 switch (UpperTest) {
406 case APSIntType::RTR_Below:
407 // The entire range is outside the symbol's set of possible values.
408 // If this is a conventionally-ordered range, the state is infeasible.
409 if (Lower <= Upper)
410 return false;
411
412 // However, if the range wraps around, it spans all possible values.
413 Lower = Type.getMinValue();
414 Upper = Type.getMaxValue();
415 break;
416 case APSIntType::RTR_Within:
417 // The range starts below what's possible but ends within it. Pin.
418 Lower = Type.getMinValue();
419 Type.apply(Upper);
420 break;
421 case APSIntType::RTR_Above:
422 // The range spans all possible values for the symbol. Pin.
423 Lower = Type.getMinValue();
424 Upper = Type.getMaxValue();
425 break;
426 }
427 break;
428 case APSIntType::RTR_Within:
429 switch (UpperTest) {
430 case APSIntType::RTR_Below:
431 // The range wraps around, but all lower values are not possible.
432 Type.apply(Lower);
433 Upper = Type.getMaxValue();
434 break;
435 case APSIntType::RTR_Within:
436 // The range may or may not wrap around, but both limits are valid.
437 Type.apply(Lower);
438 Type.apply(Upper);
439 break;
440 case APSIntType::RTR_Above:
441 // The range starts within what's possible but ends above it. Pin.
442 Type.apply(Lower);
443 Upper = Type.getMaxValue();
444 break;
445 }
446 break;
447 case APSIntType::RTR_Above:
448 switch (UpperTest) {
449 case APSIntType::RTR_Below:
450 // The range wraps but is outside the symbol's set of possible values.
451 return false;
452 case APSIntType::RTR_Within:
453 // The range starts above what's possible but ends within it (wrap).
454 Lower = Type.getMinValue();
455 Type.apply(Upper);
456 break;
457 case APSIntType::RTR_Above:
458 // The entire range is outside the symbol's set of possible values.
459 // If this is a conventionally-ordered range, the state is infeasible.
460 if (Lower <= Upper)
461 return false;
462
463 // However, if the range wraps around, it spans all possible values.
464 Lower = Type.getMinValue();
465 Upper = Type.getMaxValue();
466 break;
467 }
468 break;
469 }
470
471 return true;
472 }
473
intersect(RangeSet What,llvm::APSInt Lower,llvm::APSInt Upper)474 RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
475 llvm::APSInt Upper) {
476 if (What.isEmpty() || !What.pin(Lower, Upper))
477 return getEmptySet();
478
479 ContainerType DummyContainer;
480
481 if (Lower <= Upper) {
482 // [Lower, Upper] is a regular range.
483 //
484 // Shortcut: check that there is even a possibility of the intersection
485 // by checking the two following situations:
486 //
487 // <---[ What ]---[------]------>
488 // Lower Upper
489 // -or-
490 // <----[------]----[ What ]---->
491 // Lower Upper
492 if (What.getMaxValue() < Lower || Upper < What.getMinValue())
493 return getEmptySet();
494
495 DummyContainer.push_back(
496 Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
497 } else {
498 // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
499 //
500 // Shortcut: check that there is even a possibility of the intersection
501 // by checking the following situation:
502 //
503 // <------]---[ What ]---[------>
504 // Upper Lower
505 if (What.getMaxValue() < Lower && Upper < What.getMinValue())
506 return getEmptySet();
507
508 DummyContainer.push_back(
509 Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
510 DummyContainer.push_back(
511 Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
512 }
513
514 return intersect(*What.Impl, DummyContainer);
515 }
516
intersect(const RangeSet::ContainerType & LHS,const RangeSet::ContainerType & RHS)517 RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
518 const RangeSet::ContainerType &RHS) {
519 ContainerType Result;
520 Result.reserve(std::max(LHS.size(), RHS.size()));
521
522 const_iterator First = LHS.begin(), Second = RHS.begin(),
523 FirstEnd = LHS.end(), SecondEnd = RHS.end();
524
525 // If we ran out of ranges in one set, but not in the other,
526 // it means that those elements are definitely not in the
527 // intersection.
528 while (First != FirstEnd && Second != SecondEnd) {
529 // We want to keep the following invariant at all times:
530 //
531 // ----[ First ---------------------->
532 // --------[ Second ----------------->
533 if (Second->From() < First->From())
534 swapIterators(First, FirstEnd, Second, SecondEnd);
535
536 // Loop where the invariant holds:
537 do {
538 // Check for the following situation:
539 //
540 // ----[ First ]--------------------->
541 // ---------------[ Second ]--------->
542 //
543 // which means that...
544 if (Second->From() > First->To()) {
545 // ...First is not in the intersection.
546 //
547 // We should move on to the next range after First and break out of the
548 // loop because the invariant might not be true.
549 ++First;
550 break;
551 }
552
553 // We have a guaranteed intersection at this point!
554 // And this is the current situation:
555 //
556 // ----[ First ]----------------->
557 // -------[ Second ------------------>
558 //
559 // Additionally, it definitely starts with Second->From().
560 const llvm::APSInt &IntersectionStart = Second->From();
561
562 // It is important to know which of the two ranges' ends
563 // is greater. That "longer" range might have some other
564 // intersections, while the "shorter" range might not.
565 if (Second->To() > First->To()) {
566 // Here we make a decision to keep First as the "longer"
567 // range.
568 swapIterators(First, FirstEnd, Second, SecondEnd);
569 }
570
571 // At this point, we have the following situation:
572 //
573 // ---- First ]-------------------->
574 // ---- Second ]--[ Second+1 ---------->
575 //
576 // We don't know the relationship between First->From and
577 // Second->From and we don't know whether Second+1 intersects
578 // with First.
579 //
580 // However, we know that [IntersectionStart, Second->To] is
581 // a part of the intersection...
582 Result.push_back(Range(IntersectionStart, Second->To()));
583 ++Second;
584 // ...and that the invariant will hold for a valid Second+1
585 // because First->From <= Second->To < (Second+1)->From.
586 } while (Second != SecondEnd);
587 }
588
589 if (Result.empty())
590 return getEmptySet();
591
592 return makePersistent(std::move(Result));
593 }
594
intersect(RangeSet LHS,RangeSet RHS)595 RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
596 // Shortcut: let's see if the intersection is even possible.
597 if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
598 RHS.getMaxValue() < LHS.getMinValue())
599 return getEmptySet();
600
601 return intersect(*LHS.Impl, *RHS.Impl);
602 }
603
intersect(RangeSet LHS,llvm::APSInt Point)604 RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
605 if (LHS.containsImpl(Point))
606 return getRangeSet(ValueFactory.getValue(Point));
607
608 return getEmptySet();
609 }
610
negate(RangeSet What)611 RangeSet RangeSet::Factory::negate(RangeSet What) {
612 if (What.isEmpty())
613 return getEmptySet();
614
615 const llvm::APSInt SampleValue = What.getMinValue();
616 const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
617 const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
618
619 ContainerType Result;
620 Result.reserve(What.size() + (SampleValue == MIN));
621
622 // Handle a special case for MIN value.
623 const_iterator It = What.begin();
624 const_iterator End = What.end();
625
626 const llvm::APSInt &From = It->From();
627 const llvm::APSInt &To = It->To();
628
629 if (From == MIN) {
630 // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
631 if (To == MAX) {
632 return What;
633 }
634
635 const_iterator Last = std::prev(End);
636
637 // Try to find and unite the following ranges:
638 // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
639 if (Last->To() == MAX) {
640 // It means that in the original range we have ranges
641 // [MIN, A], ... , [B, MAX]
642 // And the result should be [MIN, -B], ..., [-A, MAX]
643 Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
644 // We already negated Last, so we can skip it.
645 End = Last;
646 } else {
647 // Add a separate range for the lowest value.
648 Result.emplace_back(MIN, MIN);
649 }
650
651 // Skip adding the second range in case when [From, To] are [MIN, MIN].
652 if (To != MIN) {
653 Result.emplace_back(ValueFactory.getValue(-To), MAX);
654 }
655
656 // Skip the first range in the loop.
657 ++It;
658 }
659
660 // Negate all other ranges.
661 for (; It != End; ++It) {
662 // Negate int values.
663 const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
664 const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
665
666 // Add a negated range.
667 Result.emplace_back(NewFrom, NewTo);
668 }
669
670 llvm::sort(Result);
671 return makePersistent(std::move(Result));
672 }
673
674 // Convert range set to the given integral type using truncation and promotion.
675 // This works similar to APSIntType::apply function but for the range set.
castTo(RangeSet What,APSIntType Ty)676 RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
677 // Set is empty or NOOP (aka cast to the same type).
678 if (What.isEmpty() || What.getAPSIntType() == Ty)
679 return What;
680
681 const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
682 const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
683 const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
684
685 if (IsTruncation)
686 return makePersistent(truncateTo(What, Ty));
687
688 // Here we handle 2 cases:
689 // - IsConversion && !IsPromotion.
690 // In this case we handle changing a sign with same bitwidth: char -> uchar,
691 // uint -> int. Here we convert negatives to positives and positives which
692 // is out of range to negatives. We use convertTo function for that.
693 // - IsConversion && IsPromotion && !What.isUnsigned().
694 // In this case we handle changing a sign from signeds to unsigneds with
695 // higher bitwidth: char -> uint, int-> uint64. The point is that we also
696 // need convert negatives to positives and use convertTo function as well.
697 // For example, we don't need such a convertion when converting unsigned to
698 // signed with higher bitwidth, because all the values of unsigned is valid
699 // for the such signed.
700 if (IsConversion && (!IsPromotion || !What.isUnsigned()))
701 return makePersistent(convertTo(What, Ty));
702
703 assert(IsPromotion && "Only promotion operation from unsigneds left.");
704 return makePersistent(promoteTo(What, Ty));
705 }
706
castTo(RangeSet What,QualType T)707 RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
708 assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
709 return castTo(What, ValueFactory.getAPSIntType(T));
710 }
711
truncateTo(RangeSet What,APSIntType Ty)712 RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
713 APSIntType Ty) {
714 using llvm::APInt;
715 using llvm::APSInt;
716 ContainerType Result;
717 ContainerType Dummy;
718 // CastRangeSize is an amount of all possible values of cast type.
719 // Example: `char` has 256 values; `short` has 65536 values.
720 // But in fact we use `amount of values` - 1, because
721 // we can't keep `amount of values of UINT64` inside uint64_t.
722 // E.g. 256 is an amount of all possible values of `char` and we can't keep
723 // it inside `char`.
724 // And it's OK, it's enough to do correct calculations.
725 uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
726 for (const Range &R : What) {
727 // Get bounds of the given range.
728 APSInt FromInt = R.From();
729 APSInt ToInt = R.To();
730 // CurrentRangeSize is an amount of all possible values of the current
731 // range minus one.
732 uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
733 // This is an optimization for a specific case when this Range covers
734 // the whole range of the target type.
735 Dummy.clear();
736 if (CurrentRangeSize >= CastRangeSize) {
737 Dummy.emplace_back(ValueFactory.getMinValue(Ty),
738 ValueFactory.getMaxValue(Ty));
739 Result = std::move(Dummy);
740 break;
741 }
742 // Cast the bounds.
743 Ty.apply(FromInt);
744 Ty.apply(ToInt);
745 const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
746 const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
747 if (FromInt > ToInt) {
748 Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
749 Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
750 } else
751 Dummy.emplace_back(PersistentFrom, PersistentTo);
752 // Every range retrieved after truncation potentialy has garbage values.
753 // So, we have to unite every next range with the previouses.
754 Result = unite(Result, Dummy);
755 }
756
757 return Result;
758 }
759
760 // Divide the convertion into two phases (presented as loops here).
761 // First phase(loop) works when casted values go in ascending order.
762 // E.g. char{1,3,5,127} -> uint{1,3,5,127}
763 // Interrupt the first phase and go to second one when casted values start
764 // go in descending order. That means that we crossed over the middle of
765 // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
766 // For instance:
767 // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
768 // Here we put {1,3,5} to one array and {-128, -1} to another
769 // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
770 // Here we put {128,129,255} to one array and {0,1,3} to another.
771 // After that we unite both arrays.
772 // NOTE: We don't just concatenate the arrays, because they may have
773 // adjacent ranges, e.g.:
774 // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
775 // unite -> uchar(0, 255)
776 // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
777 // unite -> uchar(-2, 1)
convertTo(RangeSet What,APSIntType Ty)778 RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
779 APSIntType Ty) {
780 using llvm::APInt;
781 using llvm::APSInt;
782 using Bounds = std::pair<const APSInt &, const APSInt &>;
783 ContainerType AscendArray;
784 ContainerType DescendArray;
785 auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
786 // Get bounds of the given range.
787 APSInt FromInt = R.From();
788 APSInt ToInt = R.To();
789 // Cast the bounds.
790 Ty.apply(FromInt);
791 Ty.apply(ToInt);
792 return {VF.getValue(FromInt), VF.getValue(ToInt)};
793 };
794 // Phase 1. Fill the first array.
795 APSInt LastConvertedInt = Ty.getMinValue();
796 const auto *It = What.begin();
797 const auto *E = What.end();
798 while (It != E) {
799 Bounds NewBounds = CastRange(*(It++));
800 // If values stop going acsending order, go to the second phase(loop).
801 if (NewBounds.first < LastConvertedInt) {
802 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
803 break;
804 }
805 // If the range contains a midpoint, then split the range.
806 // E.g. char(-5, 5) -> uchar(251, 5)
807 // Here we shall add a range (251, 255) to the first array and (0, 5) to the
808 // second one.
809 if (NewBounds.first > NewBounds.second) {
810 DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
811 AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
812 } else
813 // Values are going acsending order.
814 AscendArray.emplace_back(NewBounds.first, NewBounds.second);
815 LastConvertedInt = NewBounds.first;
816 }
817 // Phase 2. Fill the second array.
818 while (It != E) {
819 Bounds NewBounds = CastRange(*(It++));
820 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
821 }
822 // Unite both arrays.
823 return unite(AscendArray, DescendArray);
824 }
825
826 /// Promotion from unsigneds to signeds/unsigneds left.
promoteTo(RangeSet What,APSIntType Ty)827 RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
828 APSIntType Ty) {
829 ContainerType Result;
830 // We definitely know the size of the result set.
831 Result.reserve(What.size());
832
833 // Each unsigned value fits every larger type without any changes,
834 // whether the larger type is signed or unsigned. So just promote and push
835 // back each range one by one.
836 for (const Range &R : What) {
837 // Get bounds of the given range.
838 llvm::APSInt FromInt = R.From();
839 llvm::APSInt ToInt = R.To();
840 // Cast the bounds.
841 Ty.apply(FromInt);
842 Ty.apply(ToInt);
843 Result.emplace_back(ValueFactory.getValue(FromInt),
844 ValueFactory.getValue(ToInt));
845 }
846 return Result;
847 }
848
deletePoint(RangeSet From,const llvm::APSInt & Point)849 RangeSet RangeSet::Factory::deletePoint(RangeSet From,
850 const llvm::APSInt &Point) {
851 if (!From.contains(Point))
852 return From;
853
854 llvm::APSInt Upper = Point;
855 llvm::APSInt Lower = Point;
856
857 ++Upper;
858 --Lower;
859
860 // Notice that the lower bound is greater than the upper bound.
861 return intersect(From, Upper, Lower);
862 }
863
dump(raw_ostream & OS) const864 LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
865 OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
866 }
dump() const867 LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
868
dump(raw_ostream & OS) const869 LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
870 OS << "{ ";
871 llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
872 OS << " }";
873 }
dump() const874 LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
875
876 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
877
878 namespace {
879 class EquivalenceClass;
880 } // end anonymous namespace
881
882 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
883 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
884 REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
885
886 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
887 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
888
889 namespace {
890 /// This class encapsulates a set of symbols equal to each other.
891 ///
892 /// The main idea of the approach requiring such classes is in narrowing
893 /// and sharing constraints between symbols within the class. Also we can
894 /// conclude that there is no practical need in storing constraints for
895 /// every member of the class separately.
896 ///
897 /// Main terminology:
898 ///
899 /// * "Equivalence class" is an object of this class, which can be efficiently
900 /// compared to other classes. It represents the whole class without
901 /// storing the actual in it. The members of the class however can be
902 /// retrieved from the state.
903 ///
904 /// * "Class members" are the symbols corresponding to the class. This means
905 /// that A == B for every member symbols A and B from the class. Members of
906 /// each class are stored in the state.
907 ///
908 /// * "Trivial class" is a class that has and ever had only one same symbol.
909 ///
910 /// * "Merge operation" merges two classes into one. It is the main operation
911 /// to produce non-trivial classes.
912 /// If, at some point, we can assume that two symbols from two distinct
913 /// classes are equal, we can merge these classes.
914 class EquivalenceClass : public llvm::FoldingSetNode {
915 public:
916 /// Find equivalence class for the given symbol in the given state.
917 [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
918 SymbolRef Sym);
919
920 /// Merge classes for the given symbols and return a new state.
921 [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
922 ProgramStateRef State,
923 SymbolRef First,
924 SymbolRef Second);
925 // Merge this class with the given class and return a new state.
926 [[nodiscard]] inline ProgramStateRef
927 merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
928
929 /// Return a set of class members for the given state.
930 [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
931
932 /// Return true if the current class is trivial in the given state.
933 /// A class is trivial if and only if there is not any member relations stored
934 /// to it in State/ClassMembers.
935 /// An equivalence class with one member might seem as it does not hold any
936 /// meaningful information, i.e. that is a tautology. However, during the
937 /// removal of dead symbols we do not remove classes with one member for
938 /// resource and performance reasons. Consequently, a class with one member is
939 /// not necessarily trivial. It could happen that we have a class with two
940 /// members and then during the removal of dead symbols we remove one of its
941 /// members. In this case, the class is still non-trivial (it still has the
942 /// mappings in ClassMembers), even though it has only one member.
943 [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
944
945 /// Return true if the current class is trivial and its only member is dead.
946 [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
947 SymbolReaper &Reaper) const;
948
949 [[nodiscard]] static inline ProgramStateRef
950 markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
951 SymbolRef Second);
952 [[nodiscard]] static inline ProgramStateRef
953 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
954 EquivalenceClass First, EquivalenceClass Second);
955 [[nodiscard]] inline ProgramStateRef
956 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
957 EquivalenceClass Other) const;
958 [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
959 SymbolRef Sym);
960 [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
961 [[nodiscard]] inline ClassSet
962 getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
963
964 [[nodiscard]] static inline std::optional<bool>
965 areEqual(ProgramStateRef State, EquivalenceClass First,
966 EquivalenceClass Second);
967 [[nodiscard]] static inline std::optional<bool>
968 areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
969
970 /// Remove one member from the class.
971 [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
972 const SymbolRef Old);
973
974 /// Iterate over all symbols and try to simplify them.
975 [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
976 RangeSet::Factory &F,
977 ProgramStateRef State,
978 EquivalenceClass Class);
979
980 void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
dump(ProgramStateRef State) const981 LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
982 dumpToStream(State, llvm::errs());
983 }
984
985 /// Check equivalence data for consistency.
986 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
987 isClassDataConsistent(ProgramStateRef State);
988
getType() const989 [[nodiscard]] QualType getType() const {
990 return getRepresentativeSymbol()->getType();
991 }
992
993 EquivalenceClass() = delete;
994 EquivalenceClass(const EquivalenceClass &) = default;
995 EquivalenceClass &operator=(const EquivalenceClass &) = delete;
996 EquivalenceClass(EquivalenceClass &&) = default;
997 EquivalenceClass &operator=(EquivalenceClass &&) = delete;
998
operator ==(const EquivalenceClass & Other) const999 bool operator==(const EquivalenceClass &Other) const {
1000 return ID == Other.ID;
1001 }
operator <(const EquivalenceClass & Other) const1002 bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
operator !=(const EquivalenceClass & Other) const1003 bool operator!=(const EquivalenceClass &Other) const {
1004 return !operator==(Other);
1005 }
1006
Profile(llvm::FoldingSetNodeID & ID,uintptr_t CID)1007 static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1008 ID.AddInteger(CID);
1009 }
1010
Profile(llvm::FoldingSetNodeID & ID) const1011 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1012
1013 private:
EquivalenceClass(SymbolRef Sym)1014 /* implicit */ EquivalenceClass(SymbolRef Sym)
1015 : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1016
1017 /// This function is intended to be used ONLY within the class.
1018 /// The fact that ID is a pointer to a symbol is an implementation detail
1019 /// and should stay that way.
1020 /// In the current implementation, we use it to retrieve the only member
1021 /// of the trivial class.
getRepresentativeSymbol() const1022 SymbolRef getRepresentativeSymbol() const {
1023 return reinterpret_cast<SymbolRef>(ID);
1024 }
1025 static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1026
1027 inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1028 SymbolSet Members, EquivalenceClass Other,
1029 SymbolSet OtherMembers);
1030
1031 static inline bool
1032 addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1033 RangeSet::Factory &F, ProgramStateRef State,
1034 EquivalenceClass First, EquivalenceClass Second);
1035
1036 /// This is a unique identifier of the class.
1037 uintptr_t ID;
1038 };
1039
1040 //===----------------------------------------------------------------------===//
1041 // Constraint functions
1042 //===----------------------------------------------------------------------===//
1043
1044 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
areFeasible(ConstraintRangeTy Constraints)1045 areFeasible(ConstraintRangeTy Constraints) {
1046 return llvm::none_of(
1047 Constraints,
1048 [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1049 return ClassConstraint.second.isEmpty();
1050 });
1051 }
1052
getConstraint(ProgramStateRef State,EquivalenceClass Class)1053 [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1054 EquivalenceClass Class) {
1055 return State->get<ConstraintRange>(Class);
1056 }
1057
getConstraint(ProgramStateRef State,SymbolRef Sym)1058 [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1059 SymbolRef Sym) {
1060 return getConstraint(State, EquivalenceClass::find(State, Sym));
1061 }
1062
setConstraint(ProgramStateRef State,EquivalenceClass Class,RangeSet Constraint)1063 [[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
1064 EquivalenceClass Class,
1065 RangeSet Constraint) {
1066 return State->set<ConstraintRange>(Class, Constraint);
1067 }
1068
setConstraints(ProgramStateRef State,ConstraintRangeTy Constraints)1069 [[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
1070 ConstraintRangeTy Constraints) {
1071 return State->set<ConstraintRange>(Constraints);
1072 }
1073
1074 //===----------------------------------------------------------------------===//
1075 // Equality/diseqiality abstraction
1076 //===----------------------------------------------------------------------===//
1077
1078 /// A small helper function for detecting symbolic (dis)equality.
1079 ///
1080 /// Equality check can have different forms (like a == b or a - b) and this
1081 /// class encapsulates those away if the only thing the user wants to check -
1082 /// whether it's equality/diseqiality or not.
1083 ///
1084 /// \returns true if assuming this Sym to be true means equality of operands
1085 /// false if it means disequality of operands
1086 /// std::nullopt otherwise
meansEquality(const SymSymExpr * Sym)1087 std::optional<bool> meansEquality(const SymSymExpr *Sym) {
1088 switch (Sym->getOpcode()) {
1089 case BO_Sub:
1090 // This case is: A - B != 0 -> disequality check.
1091 return false;
1092 case BO_EQ:
1093 // This case is: A == B != 0 -> equality check.
1094 return true;
1095 case BO_NE:
1096 // This case is: A != B != 0 -> diseqiality check.
1097 return false;
1098 default:
1099 return std::nullopt;
1100 }
1101 }
1102
1103 //===----------------------------------------------------------------------===//
1104 // Intersection functions
1105 //===----------------------------------------------------------------------===//
1106
1107 template <class SecondTy, class... RestTy>
1108 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1109 SecondTy Second, RestTy... Tail);
1110
1111 template <class... RangeTy> struct IntersectionTraits;
1112
1113 template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1114 // Found RangeSet, no need to check any further
1115 using Type = RangeSet;
1116 };
1117
1118 template <> struct IntersectionTraits<> {
1119 // We ran out of types, and we didn't find any RangeSet, so the result should
1120 // be optional.
1121 using Type = std::optional<RangeSet>;
1122 };
1123
1124 template <class OptionalOrPointer, class... TailTy>
1125 struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1126 // If current type is Optional or a raw pointer, we should keep looking.
1127 using Type = typename IntersectionTraits<TailTy...>::Type;
1128 };
1129
1130 template <class EndTy>
intersect(RangeSet::Factory & F,EndTy End)1131 [[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1132 // If the list contains only RangeSet or std::optional<RangeSet>, simply
1133 // return that range set.
1134 return End;
1135 }
1136
1137 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
intersect(RangeSet::Factory & F,const RangeSet * End)1138 intersect(RangeSet::Factory &F, const RangeSet *End) {
1139 // This is an extraneous conversion from a raw pointer into
1140 // std::optional<RangeSet>
1141 if (End) {
1142 return *End;
1143 }
1144 return std::nullopt;
1145 }
1146
1147 template <class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,RangeSet Second,RestTy...Tail)1148 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1149 RangeSet Second, RestTy... Tail) {
1150 // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1151 // of the function and can be sure that the result is RangeSet.
1152 return intersect(F, F.intersect(Head, Second), Tail...);
1153 }
1154
1155 template <class SecondTy, class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,SecondTy Second,RestTy...Tail)1156 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1157 SecondTy Second, RestTy... Tail) {
1158 if (Second) {
1159 // Here we call the <RangeSet,RangeSet,...> version of the function...
1160 return intersect(F, Head, *Second, Tail...);
1161 }
1162 // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1163 // means that the result is definitely RangeSet.
1164 return intersect(F, Head, Tail...);
1165 }
1166
1167 /// Main generic intersect function.
1168 /// It intersects all of the given range sets. If some of the given arguments
1169 /// don't hold a range set (nullptr or std::nullopt), the function will skip
1170 /// them.
1171 ///
1172 /// Available representations for the arguments are:
1173 /// * RangeSet
1174 /// * std::optional<RangeSet>
1175 /// * RangeSet *
1176 /// Pointer to a RangeSet is automatically assumed to be nullable and will get
1177 /// checked as well as the optional version. If this behaviour is undesired,
1178 /// please dereference the pointer in the call.
1179 ///
1180 /// Return type depends on the arguments' types. If we can be sure in compile
1181 /// time that there will be a range set as a result, the returning type is
1182 /// simply RangeSet, in other cases we have to back off to
1183 /// std::optional<RangeSet>.
1184 ///
1185 /// Please, prefer optional range sets to raw pointers. If the last argument is
1186 /// a raw pointer and all previous arguments are std::nullopt, it will cost one
1187 /// additional check to convert RangeSet * into std::optional<RangeSet>.
1188 template <class HeadTy, class SecondTy, class... RestTy>
1189 [[nodiscard]] inline
1190 typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
intersect(RangeSet::Factory & F,HeadTy Head,SecondTy Second,RestTy...Tail)1191 intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1192 RestTy... Tail) {
1193 if (Head) {
1194 return intersect(F, *Head, Second, Tail...);
1195 }
1196 return intersect(F, Second, Tail...);
1197 }
1198
1199 //===----------------------------------------------------------------------===//
1200 // Symbolic reasoning logic
1201 //===----------------------------------------------------------------------===//
1202
1203 /// A little component aggregating all of the reasoning we have about
1204 /// the ranges of symbolic expressions.
1205 ///
1206 /// Even when we don't know the exact values of the operands, we still
1207 /// can get a pretty good estimate of the result's range.
1208 class SymbolicRangeInferrer
1209 : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1210 public:
1211 template <class SourceType>
inferRange(RangeSet::Factory & F,ProgramStateRef State,SourceType Origin)1212 static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1213 SourceType Origin) {
1214 SymbolicRangeInferrer Inferrer(F, State);
1215 return Inferrer.infer(Origin);
1216 }
1217
VisitSymExpr(SymbolRef Sym)1218 RangeSet VisitSymExpr(SymbolRef Sym) {
1219 if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1220 return *RS;
1221 // If we've reached this line, the actual type of the symbolic
1222 // expression is not supported for advanced inference.
1223 // In this case, we simply backoff to the default "let's simply
1224 // infer the range from the expression's type".
1225 return infer(Sym->getType());
1226 }
1227
VisitUnarySymExpr(const UnarySymExpr * USE)1228 RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1229 if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1230 return *RS;
1231 return infer(USE->getType());
1232 }
1233
VisitSymIntExpr(const SymIntExpr * Sym)1234 RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1235 return VisitBinaryOperator(Sym);
1236 }
1237
VisitIntSymExpr(const IntSymExpr * Sym)1238 RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1239 return VisitBinaryOperator(Sym);
1240 }
1241
VisitSymSymExpr(const SymSymExpr * SSE)1242 RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1243 return intersect(
1244 RangeFactory,
1245 // If Sym is a difference of symbols A - B, then maybe we have range
1246 // set stored for B - A.
1247 //
1248 // If we have range set stored for both A - B and B - A then
1249 // calculate the effective range set by intersecting the range set
1250 // for A - B and the negated range set of B - A.
1251 getRangeForNegatedSymSym(SSE),
1252 // If Sym is a comparison expression (except <=>),
1253 // find any other comparisons with the same operands.
1254 // See function description.
1255 getRangeForComparisonSymbol(SSE),
1256 // If Sym is (dis)equality, we might have some information
1257 // on that in our equality classes data structure.
1258 getRangeForEqualities(SSE),
1259 // And we should always check what we can get from the operands.
1260 VisitBinaryOperator(SSE));
1261 }
1262
1263 private:
SymbolicRangeInferrer(RangeSet::Factory & F,ProgramStateRef S)1264 SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1265 : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1266
1267 /// Infer range information from the given integer constant.
1268 ///
1269 /// It's not a real "inference", but is here for operating with
1270 /// sub-expressions in a more polymorphic manner.
inferAs(const llvm::APSInt & Val,QualType)1271 RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1272 return {RangeFactory, Val};
1273 }
1274
1275 /// Infer range information from symbol in the context of the given type.
inferAs(SymbolRef Sym,QualType DestType)1276 RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1277 QualType ActualType = Sym->getType();
1278 // Check that we can reason about the symbol at all.
1279 if (ActualType->isIntegralOrEnumerationType() ||
1280 Loc::isLocType(ActualType)) {
1281 return infer(Sym);
1282 }
1283 // Otherwise, let's simply infer from the destination type.
1284 // We couldn't figure out nothing else about that expression.
1285 return infer(DestType);
1286 }
1287
infer(SymbolRef Sym)1288 RangeSet infer(SymbolRef Sym) {
1289 return intersect(RangeFactory,
1290 // Of course, we should take the constraint directly
1291 // associated with this symbol into consideration.
1292 getConstraint(State, Sym),
1293 // Apart from the Sym itself, we can infer quite a lot if
1294 // we look into subexpressions of Sym.
1295 Visit(Sym));
1296 }
1297
infer(EquivalenceClass Class)1298 RangeSet infer(EquivalenceClass Class) {
1299 if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1300 return *AssociatedConstraint;
1301
1302 return infer(Class.getType());
1303 }
1304
1305 /// Infer range information solely from the type.
infer(QualType T)1306 RangeSet infer(QualType T) {
1307 // Lazily generate a new RangeSet representing all possible values for the
1308 // given symbol type.
1309 RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1310 ValueFactory.getMaxValue(T));
1311
1312 // References are known to be non-zero.
1313 if (T->isReferenceType())
1314 return assumeNonZero(Result, T);
1315
1316 return Result;
1317 }
1318
1319 template <class BinarySymExprTy>
VisitBinaryOperator(const BinarySymExprTy * Sym)1320 RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1321 // TODO #1: VisitBinaryOperator implementation might not make a good
1322 // use of the inferred ranges. In this case, we might be calculating
1323 // everything for nothing. This being said, we should introduce some
1324 // sort of laziness mechanism here.
1325 //
1326 // TODO #2: We didn't go into the nested expressions before, so it
1327 // might cause us spending much more time doing the inference.
1328 // This can be a problem for deeply nested expressions that are
1329 // involved in conditions and get tested continuously. We definitely
1330 // need to address this issue and introduce some sort of caching
1331 // in here.
1332 QualType ResultType = Sym->getType();
1333 return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1334 Sym->getOpcode(),
1335 inferAs(Sym->getRHS(), ResultType), ResultType);
1336 }
1337
1338 RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1339 RangeSet RHS, QualType T);
1340
1341 //===----------------------------------------------------------------------===//
1342 // Ranges and operators
1343 //===----------------------------------------------------------------------===//
1344
1345 /// Return a rough approximation of the given range set.
1346 ///
1347 /// For the range set:
1348 /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1349 /// it will return the range [x_0, y_N].
fillGaps(RangeSet Origin)1350 static Range fillGaps(RangeSet Origin) {
1351 assert(!Origin.isEmpty());
1352 return {Origin.getMinValue(), Origin.getMaxValue()};
1353 }
1354
1355 /// Try to convert given range into the given type.
1356 ///
1357 /// It will return std::nullopt only when the trivial conversion is possible.
convert(const Range & Origin,APSIntType To)1358 std::optional<Range> convert(const Range &Origin, APSIntType To) {
1359 if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1360 To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1361 return std::nullopt;
1362 }
1363 return Range(ValueFactory.Convert(To, Origin.From()),
1364 ValueFactory.Convert(To, Origin.To()));
1365 }
1366
1367 template <BinaryOperator::Opcode Op>
VisitBinaryOperator(RangeSet LHS,RangeSet RHS,QualType T)1368 RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1369 assert(!LHS.isEmpty() && !RHS.isEmpty());
1370
1371 Range CoarseLHS = fillGaps(LHS);
1372 Range CoarseRHS = fillGaps(RHS);
1373
1374 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1375
1376 // We need to convert ranges to the resulting type, so we can compare values
1377 // and combine them in a meaningful (in terms of the given operation) way.
1378 auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1379 auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1380
1381 // It is hard to reason about ranges when conversion changes
1382 // borders of the ranges.
1383 if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1384 return infer(T);
1385 }
1386
1387 return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1388 }
1389
1390 template <BinaryOperator::Opcode Op>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1391 RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1392 return infer(T);
1393 }
1394
1395 /// Return a symmetrical range for the given range and type.
1396 ///
1397 /// If T is signed, return the smallest range [-x..x] that covers the original
1398 /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1399 /// exist due to original range covering min(T)).
1400 ///
1401 /// If T is unsigned, return the smallest range [0..x] that covers the
1402 /// original range.
getSymmetricalRange(Range Origin,QualType T)1403 Range getSymmetricalRange(Range Origin, QualType T) {
1404 APSIntType RangeType = ValueFactory.getAPSIntType(T);
1405
1406 if (RangeType.isUnsigned()) {
1407 return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1408 }
1409
1410 if (Origin.From().isMinSignedValue()) {
1411 // If mini is a minimal signed value, absolute value of it is greater
1412 // than the maximal signed value. In order to avoid these
1413 // complications, we simply return the whole range.
1414 return {ValueFactory.getMinValue(RangeType),
1415 ValueFactory.getMaxValue(RangeType)};
1416 }
1417
1418 // At this point, we are sure that the type is signed and we can safely
1419 // use unary - operator.
1420 //
1421 // While calculating absolute maximum, we can use the following formula
1422 // because of these reasons:
1423 // * If From >= 0 then To >= From and To >= -From.
1424 // AbsMax == To == max(To, -From)
1425 // * If To <= 0 then -From >= -To and -From >= From.
1426 // AbsMax == -From == max(-From, To)
1427 // * Otherwise, From <= 0, To >= 0, and
1428 // AbsMax == max(abs(From), abs(To))
1429 llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1430
1431 // Intersection is guaranteed to be non-empty.
1432 return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1433 }
1434
1435 /// Return a range set subtracting zero from \p Domain.
assumeNonZero(RangeSet Domain,QualType T)1436 RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1437 APSIntType IntType = ValueFactory.getAPSIntType(T);
1438 return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1439 }
1440
1441 template <typename ProduceNegatedSymFunc>
getRangeForNegatedExpr(ProduceNegatedSymFunc F,QualType T)1442 std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1443 QualType T) {
1444 // Do not negate if the type cannot be meaningfully negated.
1445 if (!T->isUnsignedIntegerOrEnumerationType() &&
1446 !T->isSignedIntegerOrEnumerationType())
1447 return std::nullopt;
1448
1449 if (SymbolRef NegatedSym = F())
1450 if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1451 return RangeFactory.negate(*NegatedRange);
1452
1453 return std::nullopt;
1454 }
1455
getRangeForNegatedUnarySym(const UnarySymExpr * USE)1456 std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1457 // Just get the operand when we negate a symbol that is already negated.
1458 // -(-a) == a
1459 return getRangeForNegatedExpr(
1460 [USE]() -> SymbolRef {
1461 if (USE->getOpcode() == UO_Minus)
1462 return USE->getOperand();
1463 return nullptr;
1464 },
1465 USE->getType());
1466 }
1467
getRangeForNegatedSymSym(const SymSymExpr * SSE)1468 std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1469 return getRangeForNegatedExpr(
1470 [SSE, State = this->State]() -> SymbolRef {
1471 if (SSE->getOpcode() == BO_Sub)
1472 return State->getSymbolManager().getSymSymExpr(
1473 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1474 return nullptr;
1475 },
1476 SSE->getType());
1477 }
1478
getRangeForNegatedSym(SymbolRef Sym)1479 std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1480 return getRangeForNegatedExpr(
1481 [Sym, State = this->State]() {
1482 return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1483 Sym->getType());
1484 },
1485 Sym->getType());
1486 }
1487
1488 // Returns ranges only for binary comparison operators (except <=>)
1489 // when left and right operands are symbolic values.
1490 // Finds any other comparisons with the same operands.
1491 // Then do logical calculations and refuse impossible branches.
1492 // E.g. (x < y) and (x > y) at the same time are impossible.
1493 // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1494 // E.g. (x == y) and (y == x) are just reversed but the same.
1495 // It covers all possible combinations (see CmpOpTable description).
1496 // Note that `x` and `y` can also stand for subexpressions,
1497 // not only for actual symbols.
getRangeForComparisonSymbol(const SymSymExpr * SSE)1498 std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1499 const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1500
1501 // We currently do not support <=> (C++20).
1502 if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1503 return std::nullopt;
1504
1505 static const OperatorRelationsTable CmpOpTable{};
1506
1507 const SymExpr *LHS = SSE->getLHS();
1508 const SymExpr *RHS = SSE->getRHS();
1509 QualType T = SSE->getType();
1510
1511 SymbolManager &SymMgr = State->getSymbolManager();
1512
1513 // We use this variable to store the last queried operator (`QueriedOP`)
1514 // for which the `getCmpOpState` returned with `Unknown`. If there are two
1515 // different OPs that returned `Unknown` then we have to query the special
1516 // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1517 // never returns `Unknown`, so `CurrentOP` is a good initial value.
1518 BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1519
1520 // Loop goes through all of the columns exept the last one ('UnknownX2').
1521 // We treat `UnknownX2` column separately at the end of the loop body.
1522 for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1523
1524 // Let's find an expression e.g. (x < y).
1525 BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
1526 const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1527 const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1528
1529 // If ranges were not previously found,
1530 // try to find a reversed expression (y > x).
1531 if (!QueriedRangeSet) {
1532 const BinaryOperatorKind ROP =
1533 BinaryOperator::reverseComparisonOp(QueriedOP);
1534 SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1535 QueriedRangeSet = getConstraint(State, SymSym);
1536 }
1537
1538 if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1539 continue;
1540
1541 const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1542 const bool isInFalseBranch =
1543 ConcreteValue ? (*ConcreteValue == 0) : false;
1544
1545 // If it is a false branch, we shall be guided by opposite operator,
1546 // because the table is made assuming we are in the true branch.
1547 // E.g. when (x <= y) is false, then (x > y) is true.
1548 if (isInFalseBranch)
1549 QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1550
1551 OperatorRelationsTable::TriStateKind BranchState =
1552 CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1553
1554 if (BranchState == OperatorRelationsTable::Unknown) {
1555 if (LastQueriedOpToUnknown != CurrentOP &&
1556 LastQueriedOpToUnknown != QueriedOP) {
1557 // If we got the Unknown state for both different operators.
1558 // if (x <= y) // assume true
1559 // if (x != y) // assume true
1560 // if (x < y) // would be also true
1561 // Get a state from `UnknownX2` column.
1562 BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1563 } else {
1564 LastQueriedOpToUnknown = QueriedOP;
1565 continue;
1566 }
1567 }
1568
1569 return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1570 : getFalseRange(T);
1571 }
1572
1573 return std::nullopt;
1574 }
1575
getRangeForEqualities(const SymSymExpr * Sym)1576 std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1577 std::optional<bool> Equality = meansEquality(Sym);
1578
1579 if (!Equality)
1580 return std::nullopt;
1581
1582 if (std::optional<bool> AreEqual =
1583 EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1584 // Here we cover two cases at once:
1585 // * if Sym is equality and its operands are known to be equal -> true
1586 // * if Sym is disequality and its operands are disequal -> true
1587 if (*AreEqual == *Equality) {
1588 return getTrueRange(Sym->getType());
1589 }
1590 // Opposite combinations result in false.
1591 return getFalseRange(Sym->getType());
1592 }
1593
1594 return std::nullopt;
1595 }
1596
getTrueRange(QualType T)1597 RangeSet getTrueRange(QualType T) {
1598 RangeSet TypeRange = infer(T);
1599 return assumeNonZero(TypeRange, T);
1600 }
1601
getFalseRange(QualType T)1602 RangeSet getFalseRange(QualType T) {
1603 const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1604 return RangeSet(RangeFactory, Zero);
1605 }
1606
1607 BasicValueFactory &ValueFactory;
1608 RangeSet::Factory &RangeFactory;
1609 ProgramStateRef State;
1610 };
1611
1612 //===----------------------------------------------------------------------===//
1613 // Range-based reasoning about symbolic operations
1614 //===----------------------------------------------------------------------===//
1615
1616 template <>
VisitBinaryOperator(RangeSet LHS,RangeSet RHS,QualType T)1617 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
1618 RangeSet RHS,
1619 QualType T) {
1620 assert(!LHS.isEmpty() && !RHS.isEmpty());
1621
1622 if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
1623 if (intersect(RangeFactory, LHS, RHS).isEmpty())
1624 return getTrueRange(T);
1625
1626 } else {
1627 // We can only lose information if we are casting smaller signed type to
1628 // bigger unsigned type. For e.g.,
1629 // LHS (unsigned short): [2, USHRT_MAX]
1630 // RHS (signed short): [SHRT_MIN, 0]
1631 //
1632 // Casting RHS to LHS type will leave us with overlapping values
1633 // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
1634 //
1635 // We can avoid this by checking if signed type's maximum value is lesser
1636 // than unsigned type's minimum value.
1637
1638 // If both have different signs then only we can get more information.
1639 if (LHS.isUnsigned() != RHS.isUnsigned()) {
1640 if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
1641 if (RHS.getMaxValue().isNegative() ||
1642 LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
1643 return getTrueRange(T);
1644
1645 } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
1646 if (LHS.getMaxValue().isNegative() ||
1647 RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
1648 return getTrueRange(T);
1649 }
1650 }
1651
1652 // Both RangeSets should be casted to bigger unsigned type.
1653 APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
1654 LHS.isUnsigned() || RHS.isUnsigned());
1655
1656 RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
1657 RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
1658
1659 if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
1660 return getTrueRange(T);
1661 }
1662
1663 // In all other cases, the resulting range cannot be deduced.
1664 return infer(T);
1665 }
1666
1667 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1668 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1669 QualType T) {
1670 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1671 llvm::APSInt Zero = ResultType.getZeroValue();
1672
1673 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1674 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1675
1676 bool IsLHSNegative = LHS.To() < Zero;
1677 bool IsRHSNegative = RHS.To() < Zero;
1678
1679 // Check if both ranges have the same sign.
1680 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1681 (IsLHSNegative && IsRHSNegative)) {
1682 // The result is definitely greater or equal than any of the operands.
1683 const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1684
1685 // We estimate maximal value for positives as the maximal value for the
1686 // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
1687 //
1688 // TODO: We basically, limit the resulting range from below, but don't do
1689 // anything with the upper bound.
1690 //
1691 // For positive operands, it can be done as follows: for the upper
1692 // bound of LHS and RHS we calculate the most significant bit set.
1693 // Let's call it the N-th bit. Then we can estimate the maximal
1694 // number to be 2^(N+1)-1, i.e. the number with all the bits up to
1695 // the N-th bit set.
1696 const llvm::APSInt &Max = IsLHSNegative
1697 ? ValueFactory.getValue(--Zero)
1698 : ValueFactory.getMaxValue(ResultType);
1699
1700 return {RangeFactory, ValueFactory.getValue(Min), Max};
1701 }
1702
1703 // Otherwise, let's check if at least one of the operands is negative.
1704 if (IsLHSNegative || IsRHSNegative) {
1705 // This means that the result is definitely negative as well.
1706 return {RangeFactory, ValueFactory.getMinValue(ResultType),
1707 ValueFactory.getValue(--Zero)};
1708 }
1709
1710 RangeSet DefaultRange = infer(T);
1711
1712 // It is pretty hard to reason about operands with different signs
1713 // (and especially with possibly different signs). We simply check if it
1714 // can be zero. In order to conclude that the result could not be zero,
1715 // at least one of the operands should be definitely not zero itself.
1716 if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1717 return assumeNonZero(DefaultRange, T);
1718 }
1719
1720 // Nothing much else to do here.
1721 return DefaultRange;
1722 }
1723
1724 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1725 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1726 Range RHS,
1727 QualType T) {
1728 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1729 llvm::APSInt Zero = ResultType.getZeroValue();
1730
1731 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1732 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1733
1734 bool IsLHSNegative = LHS.To() < Zero;
1735 bool IsRHSNegative = RHS.To() < Zero;
1736
1737 // Check if both ranges have the same sign.
1738 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1739 (IsLHSNegative && IsRHSNegative)) {
1740 // The result is definitely less or equal than any of the operands.
1741 const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1742
1743 // We conservatively estimate lower bound to be the smallest positive
1744 // or negative value corresponding to the sign of the operands.
1745 const llvm::APSInt &Min = IsLHSNegative
1746 ? ValueFactory.getMinValue(ResultType)
1747 : ValueFactory.getValue(Zero);
1748
1749 return {RangeFactory, Min, Max};
1750 }
1751
1752 // Otherwise, let's check if at least one of the operands is positive.
1753 if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1754 // This makes result definitely positive.
1755 //
1756 // We can also reason about a maximal value by finding the maximal
1757 // value of the positive operand.
1758 const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1759
1760 // The minimal value on the other hand is much harder to reason about.
1761 // The only thing we know for sure is that the result is positive.
1762 return {RangeFactory, ValueFactory.getValue(Zero),
1763 ValueFactory.getValue(Max)};
1764 }
1765
1766 // Nothing much else to do here.
1767 return infer(T);
1768 }
1769
1770 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1771 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1772 Range RHS,
1773 QualType T) {
1774 llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1775
1776 Range ConservativeRange = getSymmetricalRange(RHS, T);
1777
1778 llvm::APSInt Max = ConservativeRange.To();
1779 llvm::APSInt Min = ConservativeRange.From();
1780
1781 if (Max == Zero) {
1782 // It's an undefined behaviour to divide by 0 and it seems like we know
1783 // for sure that RHS is 0. Let's say that the resulting range is
1784 // simply infeasible for that matter.
1785 return RangeFactory.getEmptySet();
1786 }
1787
1788 // At this point, our conservative range is closed. The result, however,
1789 // couldn't be greater than the RHS' maximal absolute value. Because of
1790 // this reason, we turn the range into open (or half-open in case of
1791 // unsigned integers).
1792 //
1793 // While we operate on integer values, an open interval (a, b) can be easily
1794 // represented by the closed interval [a + 1, b - 1]. And this is exactly
1795 // what we do next.
1796 //
1797 // If we are dealing with unsigned case, we shouldn't move the lower bound.
1798 if (Min.isSigned()) {
1799 ++Min;
1800 }
1801 --Max;
1802
1803 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1804 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1805
1806 // Remainder operator results with negative operands is implementation
1807 // defined. Positive cases are much easier to reason about though.
1808 if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1809 // If maximal value of LHS is less than maximal value of RHS,
1810 // the result won't get greater than LHS.To().
1811 Max = std::min(LHS.To(), Max);
1812 // We want to check if it is a situation similar to the following:
1813 //
1814 // <------------|---[ LHS ]--------[ RHS ]----->
1815 // -INF 0 +INF
1816 //
1817 // In this situation, we can conclude that (LHS / RHS) == 0 and
1818 // (LHS % RHS) == LHS.
1819 Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1820 }
1821
1822 // Nevertheless, the symmetrical range for RHS is a conservative estimate
1823 // for any sign of either LHS, or RHS.
1824 return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1825 }
1826
VisitBinaryOperator(RangeSet LHS,BinaryOperator::Opcode Op,RangeSet RHS,QualType T)1827 RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
1828 BinaryOperator::Opcode Op,
1829 RangeSet RHS, QualType T) {
1830 // We should propagate information about unfeasbility of one of the
1831 // operands to the resulting range.
1832 if (LHS.isEmpty() || RHS.isEmpty()) {
1833 return RangeFactory.getEmptySet();
1834 }
1835
1836 switch (Op) {
1837 case BO_NE:
1838 return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
1839 case BO_Or:
1840 return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1841 case BO_And:
1842 return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1843 case BO_Rem:
1844 return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1845 default:
1846 return infer(T);
1847 }
1848 }
1849
1850 //===----------------------------------------------------------------------===//
1851 // Constraint manager implementation details
1852 //===----------------------------------------------------------------------===//
1853
1854 class RangeConstraintManager : public RangedConstraintManager {
1855 public:
RangeConstraintManager(ExprEngine * EE,SValBuilder & SVB)1856 RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1857 : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1858
1859 //===------------------------------------------------------------------===//
1860 // Implementation for interface from ConstraintManager.
1861 //===------------------------------------------------------------------===//
1862
haveEqualConstraints(ProgramStateRef S1,ProgramStateRef S2) const1863 bool haveEqualConstraints(ProgramStateRef S1,
1864 ProgramStateRef S2) const override {
1865 // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1866 // so comparing constraint ranges and class maps should be
1867 // sufficient.
1868 return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1869 S1->get<ClassMap>() == S2->get<ClassMap>();
1870 }
1871
1872 bool canReasonAbout(SVal X) const override;
1873
1874 ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1875
1876 const llvm::APSInt *getSymVal(ProgramStateRef State,
1877 SymbolRef Sym) const override;
1878
1879 const llvm::APSInt *getSymMinVal(ProgramStateRef State,
1880 SymbolRef Sym) const override;
1881
1882 const llvm::APSInt *getSymMaxVal(ProgramStateRef State,
1883 SymbolRef Sym) const override;
1884
1885 ProgramStateRef removeDeadBindings(ProgramStateRef State,
1886 SymbolReaper &SymReaper) override;
1887
1888 void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1889 unsigned int Space = 0, bool IsDot = false) const override;
1890 void printValue(raw_ostream &Out, ProgramStateRef State,
1891 SymbolRef Sym) override;
1892 void printConstraints(raw_ostream &Out, ProgramStateRef State,
1893 const char *NL = "\n", unsigned int Space = 0,
1894 bool IsDot = false) const;
1895 void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1896 const char *NL = "\n", unsigned int Space = 0,
1897 bool IsDot = false) const;
1898 void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1899 const char *NL = "\n", unsigned int Space = 0,
1900 bool IsDot = false) const;
1901
1902 //===------------------------------------------------------------------===//
1903 // Implementation for interface from RangedConstraintManager.
1904 //===------------------------------------------------------------------===//
1905
1906 ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1907 const llvm::APSInt &V,
1908 const llvm::APSInt &Adjustment) override;
1909
1910 ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1911 const llvm::APSInt &V,
1912 const llvm::APSInt &Adjustment) override;
1913
1914 ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1915 const llvm::APSInt &V,
1916 const llvm::APSInt &Adjustment) override;
1917
1918 ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1919 const llvm::APSInt &V,
1920 const llvm::APSInt &Adjustment) override;
1921
1922 ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1923 const llvm::APSInt &V,
1924 const llvm::APSInt &Adjustment) override;
1925
1926 ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1927 const llvm::APSInt &V,
1928 const llvm::APSInt &Adjustment) override;
1929
1930 ProgramStateRef assumeSymWithinInclusiveRange(
1931 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1932 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1933
1934 ProgramStateRef assumeSymOutsideInclusiveRange(
1935 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1936 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1937
1938 private:
1939 RangeSet::Factory F;
1940
1941 RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
1942 RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1943 ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1944 RangeSet Range);
1945 ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1946 RangeSet Range);
1947
1948 RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1949 const llvm::APSInt &Int,
1950 const llvm::APSInt &Adjustment);
1951 RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1952 const llvm::APSInt &Int,
1953 const llvm::APSInt &Adjustment);
1954 RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1955 const llvm::APSInt &Int,
1956 const llvm::APSInt &Adjustment);
1957 RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1958 const llvm::APSInt &Int,
1959 const llvm::APSInt &Adjustment);
1960 RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1961 const llvm::APSInt &Int,
1962 const llvm::APSInt &Adjustment);
1963 };
1964
1965 //===----------------------------------------------------------------------===//
1966 // Constraint assignment logic
1967 //===----------------------------------------------------------------------===//
1968
1969 /// ConstraintAssignorBase is a small utility class that unifies visitor
1970 /// for ranges with a visitor for constraints (rangeset/range/constant).
1971 ///
1972 /// It is designed to have one derived class, but generally it can have more.
1973 /// Derived class can control which types we handle by defining methods of the
1974 /// following form:
1975 ///
1976 /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1977 /// CONSTRAINT Constraint);
1978 ///
1979 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1980 /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1981 /// return value signifies whether we should try other handle methods
1982 /// (i.e. false would mean to stop right after calling this method)
1983 template <class Derived> class ConstraintAssignorBase {
1984 public:
1985 using Const = const llvm::APSInt &;
1986
1987 #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1988
1989 #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
1990 if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
1991 return false
1992
assign(SymbolRef Sym,RangeSet Constraint)1993 void assign(SymbolRef Sym, RangeSet Constraint) {
1994 assignImpl(Sym, Constraint);
1995 }
1996
assignImpl(SymbolRef Sym,RangeSet Constraint)1997 bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1998 switch (Sym->getKind()) {
1999 #define SYMBOL(Id, Parent) \
2000 case SymExpr::Id##Kind: \
2001 DISPATCH(Id);
2002 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2003 }
2004 llvm_unreachable("Unknown SymExpr kind!");
2005 }
2006
2007 #define DEFAULT_ASSIGN(Id) \
2008 bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
2009 return true; \
2010 } \
2011 bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
2012 bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
2013
2014 // When we dispatch for constraint types, we first try to check
2015 // if the new constraint is the constant and try the corresponding
2016 // assignor methods. If it didn't interrupt, we can proceed to the
2017 // range, and finally to the range set.
2018 #define CONSTRAINT_DISPATCH(Id) \
2019 if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
2020 ASSIGN(Id, Const, Sym, *Const); \
2021 } \
2022 if (Constraint.size() == 1) { \
2023 ASSIGN(Id, Range, Sym, *Constraint.begin()); \
2024 } \
2025 ASSIGN(Id, RangeSet, Sym, Constraint)
2026
2027 // Our internal assign method first tries to call assignor methods for all
2028 // constraint types that apply. And if not interrupted, continues with its
2029 // parent class.
2030 #define SYMBOL(Id, Parent) \
2031 bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
2032 CONSTRAINT_DISPATCH(Id); \
2033 DISPATCH(Parent); \
2034 } \
2035 DEFAULT_ASSIGN(Id)
2036 #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
2037 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2038
2039 // Default implementations for the top class that doesn't have parents.
assignSymExprImpl(const SymExpr * Sym,RangeSet Constraint)2040 bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
2041 CONSTRAINT_DISPATCH(SymExpr);
2042 return true;
2043 }
2044 DEFAULT_ASSIGN(SymExpr);
2045
2046 #undef DISPATCH
2047 #undef CONSTRAINT_DISPATCH
2048 #undef DEFAULT_ASSIGN
2049 #undef ASSIGN
2050 };
2051
2052 /// A little component aggregating all of the reasoning we have about
2053 /// assigning new constraints to symbols.
2054 ///
2055 /// The main purpose of this class is to associate constraints to symbols,
2056 /// and impose additional constraints on other symbols, when we can imply
2057 /// them.
2058 ///
2059 /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
2060 /// can provide more precise ranges by looking into the operands of the
2061 /// expression in question, ConstraintAssignor looks into the operands
2062 /// to see if we can imply more from the new constraint.
2063 class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
2064 public:
2065 template <class ClassOrSymbol>
2066 [[nodiscard]] static ProgramStateRef
assign(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F,ClassOrSymbol CoS,RangeSet NewConstraint)2067 assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
2068 ClassOrSymbol CoS, RangeSet NewConstraint) {
2069 if (!State || NewConstraint.isEmpty())
2070 return nullptr;
2071
2072 ConstraintAssignor Assignor{State, Builder, F};
2073 return Assignor.assign(CoS, NewConstraint);
2074 }
2075
2076 /// Handle expressions like: a % b != 0.
2077 template <typename SymT>
handleRemainderOp(const SymT * Sym,RangeSet Constraint)2078 bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2079 if (Sym->getOpcode() != BO_Rem)
2080 return true;
2081 // a % b != 0 implies that a != 0.
2082 if (!Constraint.containsZero()) {
2083 SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2084 if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2085 State = State->assume(*NonLocSymSVal, true);
2086 if (!State)
2087 return false;
2088 }
2089 }
2090 return true;
2091 }
2092
2093 inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
assignSymIntExprToRangeSet(const SymIntExpr * Sym,RangeSet Constraint)2094 inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2095 RangeSet Constraint) {
2096 return handleRemainderOp(Sym, Constraint);
2097 }
2098 inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2099 RangeSet Constraint);
2100
2101 private:
ConstraintAssignor(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F)2102 ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2103 RangeSet::Factory &F)
2104 : State(State), Builder(Builder), RangeFactory(F) {}
2105 using Base = ConstraintAssignorBase<ConstraintAssignor>;
2106
2107 /// Base method for handling new constraints for symbols.
assign(SymbolRef Sym,RangeSet NewConstraint)2108 [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2109 // All constraints are actually associated with equivalence classes, and
2110 // that's what we are going to do first.
2111 State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2112 if (!State)
2113 return nullptr;
2114
2115 // And after that we can check what other things we can get from this
2116 // constraint.
2117 Base::assign(Sym, NewConstraint);
2118 return State;
2119 }
2120
2121 /// Base method for handling new constraints for classes.
assign(EquivalenceClass Class,RangeSet NewConstraint)2122 [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
2123 RangeSet NewConstraint) {
2124 // There is a chance that we might need to update constraints for the
2125 // classes that are known to be disequal to Class.
2126 //
2127 // In order for this to be even possible, the new constraint should
2128 // be simply a constant because we can't reason about range disequalities.
2129 if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2130
2131 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2132 ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2133
2134 // Add new constraint.
2135 Constraints = CF.add(Constraints, Class, NewConstraint);
2136
2137 for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2138 RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2139 RangeFactory, State, DisequalClass);
2140
2141 UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2142
2143 // If we end up with at least one of the disequal classes to be
2144 // constrained with an empty range-set, the state is infeasible.
2145 if (UpdatedConstraint.isEmpty())
2146 return nullptr;
2147
2148 Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2149 }
2150 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2151 "a state with infeasible constraints");
2152
2153 return setConstraints(State, Constraints);
2154 }
2155
2156 return setConstraint(State, Class, NewConstraint);
2157 }
2158
trackDisequality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2159 ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2160 SymbolRef RHS) {
2161 return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2162 }
2163
trackEquality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2164 ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2165 SymbolRef RHS) {
2166 return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2167 }
2168
interpreteAsBool(RangeSet Constraint)2169 [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
2170 assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2171
2172 if (Constraint.getConcreteValue())
2173 return !Constraint.getConcreteValue()->isZero();
2174
2175 if (!Constraint.containsZero())
2176 return true;
2177
2178 return std::nullopt;
2179 }
2180
2181 ProgramStateRef State;
2182 SValBuilder &Builder;
2183 RangeSet::Factory &RangeFactory;
2184 };
2185
assignSymExprToConst(const SymExpr * Sym,const llvm::APSInt & Constraint)2186 bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2187 const llvm::APSInt &Constraint) {
2188 llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2189 // Iterate over all equivalence classes and try to simplify them.
2190 ClassMembersTy Members = State->get<ClassMembers>();
2191 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2192 EquivalenceClass Class = ClassToSymbolSet.first;
2193 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2194 if (!State)
2195 return false;
2196 SimplifiedClasses.insert(Class);
2197 }
2198
2199 // Trivial equivalence classes (those that have only one symbol member) are
2200 // not stored in the State. Thus, we must skim through the constraints as
2201 // well. And we try to simplify symbols in the constraints.
2202 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2203 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2204 EquivalenceClass Class = ClassConstraint.first;
2205 if (SimplifiedClasses.count(Class)) // Already simplified.
2206 continue;
2207 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2208 if (!State)
2209 return false;
2210 }
2211
2212 // We may have trivial equivalence classes in the disequality info as
2213 // well, and we need to simplify them.
2214 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2215 for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2216 DisequalityInfo) {
2217 EquivalenceClass Class = DisequalityEntry.first;
2218 ClassSet DisequalClasses = DisequalityEntry.second;
2219 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2220 if (!State)
2221 return false;
2222 }
2223
2224 return true;
2225 }
2226
assignSymSymExprToRangeSet(const SymSymExpr * Sym,RangeSet Constraint)2227 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2228 RangeSet Constraint) {
2229 if (!handleRemainderOp(Sym, Constraint))
2230 return false;
2231
2232 std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2233
2234 if (!ConstraintAsBool)
2235 return true;
2236
2237 if (std::optional<bool> Equality = meansEquality(Sym)) {
2238 // Here we cover two cases:
2239 // * if Sym is equality and the new constraint is true -> Sym's operands
2240 // should be marked as equal
2241 // * if Sym is disequality and the new constraint is false -> Sym's
2242 // operands should be also marked as equal
2243 if (*Equality == *ConstraintAsBool) {
2244 State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2245 } else {
2246 // Other combinations leave as with disequal operands.
2247 State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2248 }
2249
2250 if (!State)
2251 return false;
2252 }
2253
2254 return true;
2255 }
2256
2257 } // end anonymous namespace
2258
2259 std::unique_ptr<ConstraintManager>
CreateRangeConstraintManager(ProgramStateManager & StMgr,ExprEngine * Eng)2260 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
2261 ExprEngine *Eng) {
2262 return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2263 }
2264
getConstraintMap(ProgramStateRef State)2265 ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
2266 ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2267 ConstraintMap Result = F.getEmptyMap();
2268
2269 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2270 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2271 EquivalenceClass Class = ClassConstraint.first;
2272 SymbolSet ClassMembers = Class.getClassMembers(State);
2273 assert(!ClassMembers.isEmpty() &&
2274 "Class must always have at least one member!");
2275
2276 SymbolRef Representative = *ClassMembers.begin();
2277 Result = F.add(Result, Representative, ClassConstraint.second);
2278 }
2279
2280 return Result;
2281 }
2282
2283 //===----------------------------------------------------------------------===//
2284 // EqualityClass implementation details
2285 //===----------------------------------------------------------------------===//
2286
dumpToStream(ProgramStateRef State,raw_ostream & os) const2287 LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2288 raw_ostream &os) const {
2289 SymbolSet ClassMembers = getClassMembers(State);
2290 for (const SymbolRef &MemberSym : ClassMembers) {
2291 MemberSym->dump();
2292 os << "\n";
2293 }
2294 }
2295
find(ProgramStateRef State,SymbolRef Sym)2296 inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2297 SymbolRef Sym) {
2298 assert(State && "State should not be null");
2299 assert(Sym && "Symbol should not be null");
2300 // We store far from all Symbol -> Class mappings
2301 if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2302 return *NontrivialClass;
2303
2304 // This is a trivial class of Sym.
2305 return Sym;
2306 }
2307
merge(RangeSet::Factory & F,ProgramStateRef State,SymbolRef First,SymbolRef Second)2308 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2309 ProgramStateRef State,
2310 SymbolRef First,
2311 SymbolRef Second) {
2312 EquivalenceClass FirstClass = find(State, First);
2313 EquivalenceClass SecondClass = find(State, Second);
2314
2315 return FirstClass.merge(F, State, SecondClass);
2316 }
2317
merge(RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Other)2318 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2319 ProgramStateRef State,
2320 EquivalenceClass Other) {
2321 // It is already the same class.
2322 if (*this == Other)
2323 return State;
2324
2325 // FIXME: As of now, we support only equivalence classes of the same type.
2326 // This limitation is connected to the lack of explicit casts in
2327 // our symbolic expression model.
2328 //
2329 // That means that for `int x` and `char y` we don't distinguish
2330 // between these two very different cases:
2331 // * `x == y`
2332 // * `(char)x == y`
2333 //
2334 // The moment we introduce symbolic casts, this restriction can be
2335 // lifted.
2336 if (getType()->getCanonicalTypeUnqualified() !=
2337 Other.getType()->getCanonicalTypeUnqualified())
2338 return State;
2339
2340 SymbolSet Members = getClassMembers(State);
2341 SymbolSet OtherMembers = Other.getClassMembers(State);
2342
2343 // We estimate the size of the class by the height of tree containing
2344 // its members. Merging is not a trivial operation, so it's easier to
2345 // merge the smaller class into the bigger one.
2346 if (Members.getHeight() >= OtherMembers.getHeight()) {
2347 return mergeImpl(F, State, Members, Other, OtherMembers);
2348 } else {
2349 return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2350 }
2351 }
2352
2353 inline ProgramStateRef
mergeImpl(RangeSet::Factory & RangeFactory,ProgramStateRef State,SymbolSet MyMembers,EquivalenceClass Other,SymbolSet OtherMembers)2354 EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2355 ProgramStateRef State, SymbolSet MyMembers,
2356 EquivalenceClass Other, SymbolSet OtherMembers) {
2357 // Essentially what we try to recreate here is some kind of union-find
2358 // data structure. It does have certain limitations due to persistence
2359 // and the need to remove elements from classes.
2360 //
2361 // In this setting, EquialityClass object is the representative of the class
2362 // or the parent element. ClassMap is a mapping of class members to their
2363 // parent. Unlike the union-find structure, they all point directly to the
2364 // class representative because we don't have an opportunity to actually do
2365 // path compression when dealing with immutability. This means that we
2366 // compress paths every time we do merges. It also means that we lose
2367 // the main amortized complexity benefit from the original data structure.
2368 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2369 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2370
2371 // 1. If the merged classes have any constraints associated with them, we
2372 // need to transfer them to the class we have left.
2373 //
2374 // Intersection here makes perfect sense because both of these constraints
2375 // must hold for the whole new class.
2376 if (std::optional<RangeSet> NewClassConstraint =
2377 intersect(RangeFactory, getConstraint(State, *this),
2378 getConstraint(State, Other))) {
2379 // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2380 // range inferrer shouldn't generate ranges incompatible with
2381 // equivalence classes. However, at the moment, due to imperfections
2382 // in the solver, it is possible and the merge function can also
2383 // return infeasible states aka null states.
2384 if (NewClassConstraint->isEmpty())
2385 // Infeasible state
2386 return nullptr;
2387
2388 // No need in tracking constraints of a now-dissolved class.
2389 Constraints = CRF.remove(Constraints, Other);
2390 // Assign new constraints for this class.
2391 Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2392
2393 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2394 "a state with infeasible constraints");
2395
2396 State = State->set<ConstraintRange>(Constraints);
2397 }
2398
2399 // 2. Get ALL equivalence-related maps
2400 ClassMapTy Classes = State->get<ClassMap>();
2401 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2402
2403 ClassMembersTy Members = State->get<ClassMembers>();
2404 ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2405
2406 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2407 DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2408
2409 ClassSet::Factory &CF = State->get_context<ClassSet>();
2410 SymbolSet::Factory &F = getMembersFactory(State);
2411
2412 // 2. Merge members of the Other class into the current class.
2413 SymbolSet NewClassMembers = MyMembers;
2414 for (SymbolRef Sym : OtherMembers) {
2415 NewClassMembers = F.add(NewClassMembers, Sym);
2416 // *this is now the class for all these new symbols.
2417 Classes = CMF.add(Classes, Sym, *this);
2418 }
2419
2420 // 3. Adjust member mapping.
2421 //
2422 // No need in tracking members of a now-dissolved class.
2423 Members = MF.remove(Members, Other);
2424 // Now only the current class is mapped to all the symbols.
2425 Members = MF.add(Members, *this, NewClassMembers);
2426
2427 // 4. Update disequality relations
2428 ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2429 // We are about to merge two classes but they are already known to be
2430 // non-equal. This is a contradiction.
2431 if (DisequalToOther.contains(*this))
2432 return nullptr;
2433
2434 if (!DisequalToOther.isEmpty()) {
2435 ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2436 DisequalityInfo = DF.remove(DisequalityInfo, Other);
2437
2438 for (EquivalenceClass DisequalClass : DisequalToOther) {
2439 DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2440
2441 // Disequality is a symmetric relation meaning that if
2442 // DisequalToOther not null then the set for DisequalClass is not
2443 // empty and has at least Other.
2444 ClassSet OriginalSetLinkedToOther =
2445 *DisequalityInfo.lookup(DisequalClass);
2446
2447 // Other will be eliminated and we should replace it with the bigger
2448 // united class.
2449 ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2450 NewSet = CF.add(NewSet, *this);
2451
2452 DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2453 }
2454
2455 DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2456 State = State->set<DisequalityMap>(DisequalityInfo);
2457 }
2458
2459 // 5. Update the state
2460 State = State->set<ClassMap>(Classes);
2461 State = State->set<ClassMembers>(Members);
2462
2463 return State;
2464 }
2465
2466 inline SymbolSet::Factory &
getMembersFactory(ProgramStateRef State)2467 EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2468 return State->get_context<SymbolSet>();
2469 }
2470
getClassMembers(ProgramStateRef State) const2471 SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2472 if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2473 return *Members;
2474
2475 // This class is trivial, so we need to construct a set
2476 // with just that one symbol from the class.
2477 SymbolSet::Factory &F = getMembersFactory(State);
2478 return F.add(F.getEmptySet(), getRepresentativeSymbol());
2479 }
2480
isTrivial(ProgramStateRef State) const2481 bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2482 return State->get<ClassMembers>(*this) == nullptr;
2483 }
2484
isTriviallyDead(ProgramStateRef State,SymbolReaper & Reaper) const2485 bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2486 SymbolReaper &Reaper) const {
2487 return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2488 }
2489
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,SymbolRef First,SymbolRef Second)2490 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2491 ProgramStateRef State,
2492 SymbolRef First,
2493 SymbolRef Second) {
2494 return markDisequal(RF, State, find(State, First), find(State, Second));
2495 }
2496
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2497 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2498 ProgramStateRef State,
2499 EquivalenceClass First,
2500 EquivalenceClass Second) {
2501 return First.markDisequal(RF, State, Second);
2502 }
2503
2504 inline ProgramStateRef
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass Other) const2505 EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2506 EquivalenceClass Other) const {
2507 // If we know that two classes are equal, we can only produce an infeasible
2508 // state.
2509 if (*this == Other) {
2510 return nullptr;
2511 }
2512
2513 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2514 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2515
2516 // Disequality is a symmetric relation, so if we mark A as disequal to B,
2517 // we should also mark B as disequalt to A.
2518 if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2519 Other) ||
2520 !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2521 *this))
2522 return nullptr;
2523
2524 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2525 "a state with infeasible constraints");
2526
2527 State = State->set<DisequalityMap>(DisequalityInfo);
2528 State = State->set<ConstraintRange>(Constraints);
2529
2530 return State;
2531 }
2532
addToDisequalityInfo(DisequalityMapTy & Info,ConstraintRangeTy & Constraints,RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2533 inline bool EquivalenceClass::addToDisequalityInfo(
2534 DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2535 RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2536 EquivalenceClass Second) {
2537
2538 // 1. Get all of the required factories.
2539 DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2540 ClassSet::Factory &CF = State->get_context<ClassSet>();
2541 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2542
2543 // 2. Add Second to the set of classes disequal to First.
2544 const ClassSet *CurrentSet = Info.lookup(First);
2545 ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2546 NewSet = CF.add(NewSet, Second);
2547
2548 Info = F.add(Info, First, NewSet);
2549
2550 // 3. If Second is known to be a constant, we can delete this point
2551 // from the constraint asociated with First.
2552 //
2553 // So, if Second == 10, it means that First != 10.
2554 // At the same time, the same logic does not apply to ranges.
2555 if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2556 if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2557
2558 RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2559 RF, State, First.getRepresentativeSymbol());
2560
2561 FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2562
2563 // If the First class is about to be constrained with an empty
2564 // range-set, the state is infeasible.
2565 if (FirstConstraint.isEmpty())
2566 return false;
2567
2568 Constraints = CRF.add(Constraints, First, FirstConstraint);
2569 }
2570
2571 return true;
2572 }
2573
areEqual(ProgramStateRef State,SymbolRef FirstSym,SymbolRef SecondSym)2574 inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2575 SymbolRef FirstSym,
2576 SymbolRef SecondSym) {
2577 return EquivalenceClass::areEqual(State, find(State, FirstSym),
2578 find(State, SecondSym));
2579 }
2580
areEqual(ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2581 inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2582 EquivalenceClass First,
2583 EquivalenceClass Second) {
2584 // The same equivalence class => symbols are equal.
2585 if (First == Second)
2586 return true;
2587
2588 // Let's check if we know anything about these two classes being not equal to
2589 // each other.
2590 ClassSet DisequalToFirst = First.getDisequalClasses(State);
2591 if (DisequalToFirst.contains(Second))
2592 return false;
2593
2594 // It is not clear.
2595 return std::nullopt;
2596 }
2597
2598 [[nodiscard]] ProgramStateRef
removeMember(ProgramStateRef State,const SymbolRef Old)2599 EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2600
2601 SymbolSet ClsMembers = getClassMembers(State);
2602 assert(ClsMembers.contains(Old));
2603
2604 // Remove `Old`'s Class->Sym relation.
2605 SymbolSet::Factory &F = getMembersFactory(State);
2606 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2607 ClsMembers = F.remove(ClsMembers, Old);
2608 // Ensure another precondition of the removeMember function (we can check
2609 // this only with isEmpty, thus we have to do the remove first).
2610 assert(!ClsMembers.isEmpty() &&
2611 "Class should have had at least two members before member removal");
2612 // Overwrite the existing members assigned to this class.
2613 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2614 ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2615 State = State->set<ClassMembers>(ClassMembersMap);
2616
2617 // Remove `Old`'s Sym->Class relation.
2618 ClassMapTy Classes = State->get<ClassMap>();
2619 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2620 Classes = CMF.remove(Classes, Old);
2621 State = State->set<ClassMap>(Classes);
2622
2623 return State;
2624 }
2625
2626 // Re-evaluate an SVal with top-level `State->assume` logic.
2627 [[nodiscard]] ProgramStateRef
reAssume(ProgramStateRef State,const RangeSet * Constraint,SVal TheValue)2628 reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
2629 if (!Constraint)
2630 return State;
2631
2632 const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2633
2634 // If the SVal is 0, we can simply interpret that as `false`.
2635 if (Constraint->encodesFalseRange())
2636 return State->assume(DefinedVal, false);
2637
2638 // If the constraint does not encode 0 then we can interpret that as `true`
2639 // AND as a Range(Set).
2640 if (Constraint->encodesTrueRange()) {
2641 State = State->assume(DefinedVal, true);
2642 if (!State)
2643 return nullptr;
2644 // Fall through, re-assume based on the range values as well.
2645 }
2646 // Overestimate the individual Ranges with the RangeSet' lowest and
2647 // highest values.
2648 return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2649 Constraint->getMaxValue(), true);
2650 }
2651
2652 // Iterate over all symbols and try to simplify them. Once a symbol is
2653 // simplified then we check if we can merge the simplified symbol's equivalence
2654 // class to this class. This way, we simplify not just the symbols but the
2655 // classes as well: we strive to keep the number of the classes to be the
2656 // absolute minimum.
2657 [[nodiscard]] ProgramStateRef
simplify(SValBuilder & SVB,RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Class)2658 EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2659 ProgramStateRef State, EquivalenceClass Class) {
2660 SymbolSet ClassMembers = Class.getClassMembers(State);
2661 for (const SymbolRef &MemberSym : ClassMembers) {
2662
2663 const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2664 const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2665
2666 // The symbol is collapsed to a constant, check if the current State is
2667 // still feasible.
2668 if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2669 const llvm::APSInt &SV = CI->getValue();
2670 const RangeSet *ClassConstraint = getConstraint(State, Class);
2671 // We have found a contradiction.
2672 if (ClassConstraint && !ClassConstraint->contains(SV))
2673 return nullptr;
2674 }
2675
2676 if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2677 // The simplified symbol should be the member of the original Class,
2678 // however, it might be in another existing class at the moment. We
2679 // have to merge these classes.
2680 ProgramStateRef OldState = State;
2681 State = merge(F, State, MemberSym, SimplifiedMemberSym);
2682 if (!State)
2683 return nullptr;
2684 // No state change, no merge happened actually.
2685 if (OldState == State)
2686 continue;
2687
2688 // Be aware that `SimplifiedMemberSym` might refer to an already dead
2689 // symbol. In that case, the eqclass of that might not be the same as the
2690 // eqclass of `MemberSym`. This is because the dead symbols are not
2691 // preserved in the `ClassMap`, hence
2692 // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
2693 // compared to the eqclass of `MemberSym`.
2694 // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
2695 // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
2696 //
2697 // Note that `MemberSym` must be alive here since that is from the
2698 // `ClassMembers` where all the symbols are alive.
2699
2700 // Remove the old and more complex symbol.
2701 State = find(State, MemberSym).removeMember(State, MemberSym);
2702
2703 // Query the class constraint again b/c that may have changed during the
2704 // merge above.
2705 const RangeSet *ClassConstraint = getConstraint(State, Class);
2706
2707 // Re-evaluate an SVal with top-level `State->assume`, this ignites
2708 // a RECURSIVE algorithm that will reach a FIXPOINT.
2709 //
2710 // About performance and complexity: Let us assume that in a State we
2711 // have N non-trivial equivalence classes and that all constraints and
2712 // disequality info is related to non-trivial classes. In the worst case,
2713 // we can simplify only one symbol of one class in each iteration. The
2714 // number of symbols in one class cannot grow b/c we replace the old
2715 // symbol with the simplified one. Also, the number of the equivalence
2716 // classes can decrease only, b/c the algorithm does a merge operation
2717 // optionally. We need N iterations in this case to reach the fixpoint.
2718 // Thus, the steps needed to be done in the worst case is proportional to
2719 // N*N.
2720 //
2721 // This worst case scenario can be extended to that case when we have
2722 // trivial classes in the constraints and in the disequality map. This
2723 // case can be reduced to the case with a State where there are only
2724 // non-trivial classes. This is because a merge operation on two trivial
2725 // classes results in one non-trivial class.
2726 State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2727 if (!State)
2728 return nullptr;
2729 }
2730 }
2731 return State;
2732 }
2733
getDisequalClasses(ProgramStateRef State,SymbolRef Sym)2734 inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2735 SymbolRef Sym) {
2736 return find(State, Sym).getDisequalClasses(State);
2737 }
2738
2739 inline ClassSet
getDisequalClasses(ProgramStateRef State) const2740 EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2741 return getDisequalClasses(State->get<DisequalityMap>(),
2742 State->get_context<ClassSet>());
2743 }
2744
2745 inline ClassSet
getDisequalClasses(DisequalityMapTy Map,ClassSet::Factory & Factory) const2746 EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2747 ClassSet::Factory &Factory) const {
2748 if (const ClassSet *DisequalClasses = Map.lookup(*this))
2749 return *DisequalClasses;
2750
2751 return Factory.getEmptySet();
2752 }
2753
isClassDataConsistent(ProgramStateRef State)2754 bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2755 ClassMembersTy Members = State->get<ClassMembers>();
2756
2757 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2758 for (SymbolRef Member : ClassMembersPair.second) {
2759 // Every member of the class should have a mapping back to the class.
2760 if (find(State, Member) == ClassMembersPair.first) {
2761 continue;
2762 }
2763
2764 return false;
2765 }
2766 }
2767
2768 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2769 for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2770 EquivalenceClass Class = DisequalityInfo.first;
2771 ClassSet DisequalClasses = DisequalityInfo.second;
2772
2773 // There is no use in keeping empty sets in the map.
2774 if (DisequalClasses.isEmpty())
2775 return false;
2776
2777 // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2778 // B != A should also be true.
2779 for (EquivalenceClass DisequalClass : DisequalClasses) {
2780 const ClassSet *DisequalToDisequalClasses =
2781 Disequalities.lookup(DisequalClass);
2782
2783 // It should be a set of at least one element: Class
2784 if (!DisequalToDisequalClasses ||
2785 !DisequalToDisequalClasses->contains(Class))
2786 return false;
2787 }
2788 }
2789
2790 return true;
2791 }
2792
2793 //===----------------------------------------------------------------------===//
2794 // RangeConstraintManager implementation
2795 //===----------------------------------------------------------------------===//
2796
canReasonAbout(SVal X) const2797 bool RangeConstraintManager::canReasonAbout(SVal X) const {
2798 std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2799 if (SymVal && SymVal->isExpression()) {
2800 const SymExpr *SE = SymVal->getSymbol();
2801
2802 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2803 switch (SIE->getOpcode()) {
2804 // We don't reason yet about bitwise-constraints on symbolic values.
2805 case BO_And:
2806 case BO_Or:
2807 case BO_Xor:
2808 return false;
2809 // We don't reason yet about these arithmetic constraints on
2810 // symbolic values.
2811 case BO_Mul:
2812 case BO_Div:
2813 case BO_Rem:
2814 case BO_Shl:
2815 case BO_Shr:
2816 return false;
2817 // All other cases.
2818 default:
2819 return true;
2820 }
2821 }
2822
2823 if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2824 // FIXME: Handle <=> here.
2825 if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
2826 BinaryOperator::isRelationalOp(SSE->getOpcode())) {
2827 // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2828 // We've recently started producing Loc <> NonLoc comparisons (that
2829 // result from casts of one of the operands between eg. intptr_t and
2830 // void *), but we can't reason about them yet.
2831 if (Loc::isLocType(SSE->getLHS()->getType())) {
2832 return Loc::isLocType(SSE->getRHS()->getType());
2833 }
2834 }
2835 }
2836
2837 return false;
2838 }
2839
2840 return true;
2841 }
2842
checkNull(ProgramStateRef State,SymbolRef Sym)2843 ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2844 SymbolRef Sym) {
2845 const RangeSet *Ranges = getConstraint(State, Sym);
2846
2847 // If we don't have any information about this symbol, it's underconstrained.
2848 if (!Ranges)
2849 return ConditionTruthVal();
2850
2851 // If we have a concrete value, see if it's zero.
2852 if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2853 return *Value == 0;
2854
2855 BasicValueFactory &BV = getBasicVals();
2856 APSIntType IntType = BV.getAPSIntType(Sym->getType());
2857 llvm::APSInt Zero = IntType.getZeroValue();
2858
2859 // Check if zero is in the set of possible values.
2860 if (!Ranges->contains(Zero))
2861 return false;
2862
2863 // Zero is a possible value, but it is not the /only/ possible value.
2864 return ConditionTruthVal();
2865 }
2866
getSymVal(ProgramStateRef St,SymbolRef Sym) const2867 const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2868 SymbolRef Sym) const {
2869 const RangeSet *T = getConstraint(St, Sym);
2870 return T ? T->getConcreteValue() : nullptr;
2871 }
2872
getSymMinVal(ProgramStateRef St,SymbolRef Sym) const2873 const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St,
2874 SymbolRef Sym) const {
2875 const RangeSet *T = getConstraint(St, Sym);
2876 if (!T || T->isEmpty())
2877 return nullptr;
2878 return &T->getMinValue();
2879 }
2880
getSymMaxVal(ProgramStateRef St,SymbolRef Sym) const2881 const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St,
2882 SymbolRef Sym) const {
2883 const RangeSet *T = getConstraint(St, Sym);
2884 if (!T || T->isEmpty())
2885 return nullptr;
2886 return &T->getMaxValue();
2887 }
2888
2889 //===----------------------------------------------------------------------===//
2890 // Remove dead symbols from existing constraints
2891 //===----------------------------------------------------------------------===//
2892
2893 /// Scan all symbols referenced by the constraints. If the symbol is not alive
2894 /// as marked in LSymbols, mark it as dead in DSymbols.
2895 ProgramStateRef
removeDeadBindings(ProgramStateRef State,SymbolReaper & SymReaper)2896 RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2897 SymbolReaper &SymReaper) {
2898 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2899 ClassMembersTy NewClassMembersMap = ClassMembersMap;
2900 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2901 SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2902
2903 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2904 ConstraintRangeTy NewConstraints = Constraints;
2905 ConstraintRangeTy::Factory &ConstraintFactory =
2906 State->get_context<ConstraintRange>();
2907
2908 ClassMapTy Map = State->get<ClassMap>();
2909 ClassMapTy NewMap = Map;
2910 ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2911
2912 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2913 DisequalityMapTy::Factory &DisequalityFactory =
2914 State->get_context<DisequalityMap>();
2915 ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2916
2917 bool ClassMapChanged = false;
2918 bool MembersMapChanged = false;
2919 bool ConstraintMapChanged = false;
2920 bool DisequalitiesChanged = false;
2921
2922 auto removeDeadClass = [&](EquivalenceClass Class) {
2923 // Remove associated constraint ranges.
2924 Constraints = ConstraintFactory.remove(Constraints, Class);
2925 ConstraintMapChanged = true;
2926
2927 // Update disequality information to not hold any information on the
2928 // removed class.
2929 ClassSet DisequalClasses =
2930 Class.getDisequalClasses(Disequalities, ClassSetFactory);
2931 if (!DisequalClasses.isEmpty()) {
2932 for (EquivalenceClass DisequalClass : DisequalClasses) {
2933 ClassSet DisequalToDisequalSet =
2934 DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2935 // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2936 // disequality info.
2937 assert(!DisequalToDisequalSet.isEmpty());
2938 ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2939
2940 // No need in keeping an empty set.
2941 if (NewSet.isEmpty()) {
2942 Disequalities =
2943 DisequalityFactory.remove(Disequalities, DisequalClass);
2944 } else {
2945 Disequalities =
2946 DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2947 }
2948 }
2949 // Remove the data for the class
2950 Disequalities = DisequalityFactory.remove(Disequalities, Class);
2951 DisequalitiesChanged = true;
2952 }
2953 };
2954
2955 // 1. Let's see if dead symbols are trivial and have associated constraints.
2956 for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2957 Constraints) {
2958 EquivalenceClass Class = ClassConstraintPair.first;
2959 if (Class.isTriviallyDead(State, SymReaper)) {
2960 // If this class is trivial, we can remove its constraints right away.
2961 removeDeadClass(Class);
2962 }
2963 }
2964
2965 // 2. We don't need to track classes for dead symbols.
2966 for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2967 SymbolRef Sym = SymbolClassPair.first;
2968
2969 if (SymReaper.isDead(Sym)) {
2970 ClassMapChanged = true;
2971 NewMap = ClassFactory.remove(NewMap, Sym);
2972 }
2973 }
2974
2975 // 3. Remove dead members from classes and remove dead non-trivial classes
2976 // and their constraints.
2977 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2978 ClassMembersMap) {
2979 EquivalenceClass Class = ClassMembersPair.first;
2980 SymbolSet LiveMembers = ClassMembersPair.second;
2981 bool MembersChanged = false;
2982
2983 for (SymbolRef Member : ClassMembersPair.second) {
2984 if (SymReaper.isDead(Member)) {
2985 MembersChanged = true;
2986 LiveMembers = SetFactory.remove(LiveMembers, Member);
2987 }
2988 }
2989
2990 // Check if the class changed.
2991 if (!MembersChanged)
2992 continue;
2993
2994 MembersMapChanged = true;
2995
2996 if (LiveMembers.isEmpty()) {
2997 // The class is dead now, we need to wipe it out of the members map...
2998 NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2999
3000 // ...and remove all of its constraints.
3001 removeDeadClass(Class);
3002 } else {
3003 // We need to change the members associated with the class.
3004 NewClassMembersMap =
3005 EMFactory.add(NewClassMembersMap, Class, LiveMembers);
3006 }
3007 }
3008
3009 // 4. Update the state with new maps.
3010 //
3011 // Here we try to be humble and update a map only if it really changed.
3012 if (ClassMapChanged)
3013 State = State->set<ClassMap>(NewMap);
3014
3015 if (MembersMapChanged)
3016 State = State->set<ClassMembers>(NewClassMembersMap);
3017
3018 if (ConstraintMapChanged)
3019 State = State->set<ConstraintRange>(Constraints);
3020
3021 if (DisequalitiesChanged)
3022 State = State->set<DisequalityMap>(Disequalities);
3023
3024 assert(EquivalenceClass::isClassDataConsistent(State));
3025
3026 return State;
3027 }
3028
getRange(ProgramStateRef State,SymbolRef Sym)3029 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
3030 SymbolRef Sym) {
3031 return SymbolicRangeInferrer::inferRange(F, State, Sym);
3032 }
3033
setRange(ProgramStateRef State,SymbolRef Sym,RangeSet Range)3034 ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
3035 SymbolRef Sym,
3036 RangeSet Range) {
3037 return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
3038 }
3039
3040 //===------------------------------------------------------------------------===
3041 // assumeSymX methods: protected interface for RangeConstraintManager.
3042 //===------------------------------------------------------------------------===
3043
3044 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
3045 // and (x, y) for open ranges. These ranges are modular, corresponding with
3046 // a common treatment of C integer overflow. This means that these methods
3047 // do not have to worry about overflow; RangeSet::Intersect can handle such a
3048 // "wraparound" range.
3049 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
3050 // UINT_MAX, 0, 1, and 2.
3051
3052 ProgramStateRef
assumeSymNE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3053 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
3054 const llvm::APSInt &Int,
3055 const llvm::APSInt &Adjustment) {
3056 // Before we do any real work, see if the value can even show up.
3057 APSIntType AdjustmentType(Adjustment);
3058 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3059 return St;
3060
3061 llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
3062 RangeSet New = getRange(St, Sym);
3063 New = F.deletePoint(New, Point);
3064
3065 return setRange(St, Sym, New);
3066 }
3067
3068 ProgramStateRef
assumeSymEQ(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3069 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
3070 const llvm::APSInt &Int,
3071 const llvm::APSInt &Adjustment) {
3072 // Before we do any real work, see if the value can even show up.
3073 APSIntType AdjustmentType(Adjustment);
3074 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3075 return nullptr;
3076
3077 // [Int-Adjustment, Int-Adjustment]
3078 llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
3079 RangeSet New = getRange(St, Sym);
3080 New = F.intersect(New, AdjInt);
3081
3082 return setRange(St, Sym, New);
3083 }
3084
getSymLTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3085 RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
3086 SymbolRef Sym,
3087 const llvm::APSInt &Int,
3088 const llvm::APSInt &Adjustment) {
3089 // Before we do any real work, see if the value can even show up.
3090 APSIntType AdjustmentType(Adjustment);
3091 switch (AdjustmentType.testInRange(Int, true)) {
3092 case APSIntType::RTR_Below:
3093 return F.getEmptySet();
3094 case APSIntType::RTR_Within:
3095 break;
3096 case APSIntType::RTR_Above:
3097 return getRange(St, Sym);
3098 }
3099
3100 // Special case for Int == Min. This is always false.
3101 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3102 llvm::APSInt Min = AdjustmentType.getMinValue();
3103 if (ComparisonVal == Min)
3104 return F.getEmptySet();
3105
3106 llvm::APSInt Lower = Min - Adjustment;
3107 llvm::APSInt Upper = ComparisonVal - Adjustment;
3108 --Upper;
3109
3110 RangeSet Result = getRange(St, Sym);
3111 return F.intersect(Result, Lower, Upper);
3112 }
3113
3114 ProgramStateRef
assumeSymLT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3115 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3116 const llvm::APSInt &Int,
3117 const llvm::APSInt &Adjustment) {
3118 RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3119 return setRange(St, Sym, New);
3120 }
3121
getSymGTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3122 RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3123 SymbolRef Sym,
3124 const llvm::APSInt &Int,
3125 const llvm::APSInt &Adjustment) {
3126 // Before we do any real work, see if the value can even show up.
3127 APSIntType AdjustmentType(Adjustment);
3128 switch (AdjustmentType.testInRange(Int, true)) {
3129 case APSIntType::RTR_Below:
3130 return getRange(St, Sym);
3131 case APSIntType::RTR_Within:
3132 break;
3133 case APSIntType::RTR_Above:
3134 return F.getEmptySet();
3135 }
3136
3137 // Special case for Int == Max. This is always false.
3138 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3139 llvm::APSInt Max = AdjustmentType.getMaxValue();
3140 if (ComparisonVal == Max)
3141 return F.getEmptySet();
3142
3143 llvm::APSInt Lower = ComparisonVal - Adjustment;
3144 llvm::APSInt Upper = Max - Adjustment;
3145 ++Lower;
3146
3147 RangeSet SymRange = getRange(St, Sym);
3148 return F.intersect(SymRange, Lower, Upper);
3149 }
3150
3151 ProgramStateRef
assumeSymGT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3152 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3153 const llvm::APSInt &Int,
3154 const llvm::APSInt &Adjustment) {
3155 RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3156 return setRange(St, Sym, New);
3157 }
3158
getSymGERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3159 RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3160 SymbolRef Sym,
3161 const llvm::APSInt &Int,
3162 const llvm::APSInt &Adjustment) {
3163 // Before we do any real work, see if the value can even show up.
3164 APSIntType AdjustmentType(Adjustment);
3165 switch (AdjustmentType.testInRange(Int, true)) {
3166 case APSIntType::RTR_Below:
3167 return getRange(St, Sym);
3168 case APSIntType::RTR_Within:
3169 break;
3170 case APSIntType::RTR_Above:
3171 return F.getEmptySet();
3172 }
3173
3174 // Special case for Int == Min. This is always feasible.
3175 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3176 llvm::APSInt Min = AdjustmentType.getMinValue();
3177 if (ComparisonVal == Min)
3178 return getRange(St, Sym);
3179
3180 llvm::APSInt Max = AdjustmentType.getMaxValue();
3181 llvm::APSInt Lower = ComparisonVal - Adjustment;
3182 llvm::APSInt Upper = Max - Adjustment;
3183
3184 RangeSet SymRange = getRange(St, Sym);
3185 return F.intersect(SymRange, Lower, Upper);
3186 }
3187
3188 ProgramStateRef
assumeSymGE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3189 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3190 const llvm::APSInt &Int,
3191 const llvm::APSInt &Adjustment) {
3192 RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3193 return setRange(St, Sym, New);
3194 }
3195
3196 RangeSet
getSymLERange(llvm::function_ref<RangeSet ()> RS,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3197 RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3198 const llvm::APSInt &Int,
3199 const llvm::APSInt &Adjustment) {
3200 // Before we do any real work, see if the value can even show up.
3201 APSIntType AdjustmentType(Adjustment);
3202 switch (AdjustmentType.testInRange(Int, true)) {
3203 case APSIntType::RTR_Below:
3204 return F.getEmptySet();
3205 case APSIntType::RTR_Within:
3206 break;
3207 case APSIntType::RTR_Above:
3208 return RS();
3209 }
3210
3211 // Special case for Int == Max. This is always feasible.
3212 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3213 llvm::APSInt Max = AdjustmentType.getMaxValue();
3214 if (ComparisonVal == Max)
3215 return RS();
3216
3217 llvm::APSInt Min = AdjustmentType.getMinValue();
3218 llvm::APSInt Lower = Min - Adjustment;
3219 llvm::APSInt Upper = ComparisonVal - Adjustment;
3220
3221 RangeSet Default = RS();
3222 return F.intersect(Default, Lower, Upper);
3223 }
3224
getSymLERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3225 RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3226 SymbolRef Sym,
3227 const llvm::APSInt &Int,
3228 const llvm::APSInt &Adjustment) {
3229 return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3230 }
3231
3232 ProgramStateRef
assumeSymLE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3233 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3234 const llvm::APSInt &Int,
3235 const llvm::APSInt &Adjustment) {
3236 RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3237 return setRange(St, Sym, New);
3238 }
3239
assumeSymWithinInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3240 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3241 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3242 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3243 RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3244 if (New.isEmpty())
3245 return nullptr;
3246 RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3247 return setRange(State, Sym, Out);
3248 }
3249
assumeSymOutsideInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3250 ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3251 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3252 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3253 RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3254 RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3255 RangeSet New(F.add(RangeLT, RangeGT));
3256 return setRange(State, Sym, New);
3257 }
3258
3259 //===----------------------------------------------------------------------===//
3260 // Pretty-printing.
3261 //===----------------------------------------------------------------------===//
3262
printJson(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3263 void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3264 const char *NL, unsigned int Space,
3265 bool IsDot) const {
3266 printConstraints(Out, State, NL, Space, IsDot);
3267 printEquivalenceClasses(Out, State, NL, Space, IsDot);
3268 printDisequalities(Out, State, NL, Space, IsDot);
3269 }
3270
printValue(raw_ostream & Out,ProgramStateRef State,SymbolRef Sym)3271 void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3272 SymbolRef Sym) {
3273 const RangeSet RS = getRange(State, Sym);
3274 if (RS.isEmpty()) {
3275 Out << "<empty rangeset>";
3276 return;
3277 }
3278 Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3279 RS.dump(Out);
3280 }
3281
toString(const SymbolRef & Sym)3282 static std::string toString(const SymbolRef &Sym) {
3283 std::string S;
3284 llvm::raw_string_ostream O(S);
3285 Sym->dumpToStream(O);
3286 return S;
3287 }
3288
printConstraints(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3289 void RangeConstraintManager::printConstraints(raw_ostream &Out,
3290 ProgramStateRef State,
3291 const char *NL,
3292 unsigned int Space,
3293 bool IsDot) const {
3294 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3295
3296 Indent(Out, Space, IsDot) << "\"constraints\": ";
3297 if (Constraints.isEmpty()) {
3298 Out << "null," << NL;
3299 return;
3300 }
3301
3302 std::map<std::string, RangeSet> OrderedConstraints;
3303 for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3304 SymbolSet ClassMembers = P.first.getClassMembers(State);
3305 for (const SymbolRef &ClassMember : ClassMembers) {
3306 bool insertion_took_place;
3307 std::tie(std::ignore, insertion_took_place) =
3308 OrderedConstraints.insert({toString(ClassMember), P.second});
3309 assert(insertion_took_place &&
3310 "two symbols should not have the same dump");
3311 }
3312 }
3313
3314 ++Space;
3315 Out << '[' << NL;
3316 bool First = true;
3317 for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3318 if (First) {
3319 First = false;
3320 } else {
3321 Out << ',';
3322 Out << NL;
3323 }
3324 Indent(Out, Space, IsDot)
3325 << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3326 P.second.dump(Out);
3327 Out << "\" }";
3328 }
3329 Out << NL;
3330
3331 --Space;
3332 Indent(Out, Space, IsDot) << "]," << NL;
3333 }
3334
toString(ProgramStateRef State,EquivalenceClass Class)3335 static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3336 SymbolSet ClassMembers = Class.getClassMembers(State);
3337 llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3338 ClassMembers.end());
3339 llvm::sort(ClassMembersSorted,
3340 [](const SymbolRef &LHS, const SymbolRef &RHS) {
3341 return toString(LHS) < toString(RHS);
3342 });
3343
3344 bool FirstMember = true;
3345
3346 std::string Str;
3347 llvm::raw_string_ostream Out(Str);
3348 Out << "[ ";
3349 for (SymbolRef ClassMember : ClassMembersSorted) {
3350 if (FirstMember)
3351 FirstMember = false;
3352 else
3353 Out << ", ";
3354 Out << "\"" << ClassMember << "\"";
3355 }
3356 Out << " ]";
3357 return Str;
3358 }
3359
printEquivalenceClasses(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3360 void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3361 ProgramStateRef State,
3362 const char *NL,
3363 unsigned int Space,
3364 bool IsDot) const {
3365 ClassMembersTy Members = State->get<ClassMembers>();
3366
3367 Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3368 if (Members.isEmpty()) {
3369 Out << "null," << NL;
3370 return;
3371 }
3372
3373 std::set<std::string> MembersStr;
3374 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3375 MembersStr.insert(toString(State, ClassToSymbolSet.first));
3376
3377 ++Space;
3378 Out << '[' << NL;
3379 bool FirstClass = true;
3380 for (const std::string &Str : MembersStr) {
3381 if (FirstClass) {
3382 FirstClass = false;
3383 } else {
3384 Out << ',';
3385 Out << NL;
3386 }
3387 Indent(Out, Space, IsDot);
3388 Out << Str;
3389 }
3390 Out << NL;
3391
3392 --Space;
3393 Indent(Out, Space, IsDot) << "]," << NL;
3394 }
3395
printDisequalities(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3396 void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3397 ProgramStateRef State,
3398 const char *NL,
3399 unsigned int Space,
3400 bool IsDot) const {
3401 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3402
3403 Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3404 if (Disequalities.isEmpty()) {
3405 Out << "null," << NL;
3406 return;
3407 }
3408
3409 // Transform the disequality info to an ordered map of
3410 // [string -> (ordered set of strings)]
3411 using EqClassesStrTy = std::set<std::string>;
3412 using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3413 DisequalityInfoStrTy DisequalityInfoStr;
3414 for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3415 EquivalenceClass Class = ClassToDisEqSet.first;
3416 ClassSet DisequalClasses = ClassToDisEqSet.second;
3417 EqClassesStrTy MembersStr;
3418 for (EquivalenceClass DisEqClass : DisequalClasses)
3419 MembersStr.insert(toString(State, DisEqClass));
3420 DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3421 }
3422
3423 ++Space;
3424 Out << '[' << NL;
3425 bool FirstClass = true;
3426 for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3427 DisequalityInfoStr) {
3428 const std::string &Class = ClassToDisEqSet.first;
3429 if (FirstClass) {
3430 FirstClass = false;
3431 } else {
3432 Out << ',';
3433 Out << NL;
3434 }
3435 Indent(Out, Space, IsDot) << "{" << NL;
3436 unsigned int DisEqSpace = Space + 1;
3437 Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3438 Out << Class;
3439 const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3440 if (!DisequalClasses.empty()) {
3441 Out << "," << NL;
3442 Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3443 unsigned int DisEqClassSpace = DisEqSpace + 1;
3444 Indent(Out, DisEqClassSpace, IsDot);
3445 bool FirstDisEqClass = true;
3446 for (const std::string &DisEqClass : DisequalClasses) {
3447 if (FirstDisEqClass) {
3448 FirstDisEqClass = false;
3449 } else {
3450 Out << ',' << NL;
3451 Indent(Out, DisEqClassSpace, IsDot);
3452 }
3453 Out << DisEqClass;
3454 }
3455 Out << "]" << NL;
3456 }
3457 Indent(Out, Space, IsDot) << "}";
3458 }
3459 Out << NL;
3460
3461 --Space;
3462 Indent(Out, Space, IsDot) << "]," << NL;
3463 }
3464