xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines RangeConstraintManager, a class that tracks simple
10 //  equality and inequality constraints on symbolic values of ProgramState.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/JsonSupport.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <algorithm>
28 #include <iterator>
29 #include <optional>
30 
31 using namespace clang;
32 using namespace ento;
33 
34 // This class can be extended with other tables which will help to reason
35 // about ranges more precisely.
36 class OperatorRelationsTable {
37   static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
38                     BO_GE < BO_EQ && BO_EQ < BO_NE,
39                 "This class relies on operators order. Rework it otherwise.");
40 
41 public:
42   enum TriStateKind {
43     False = 0,
44     True,
45     Unknown,
46   };
47 
48 private:
49   // CmpOpTable holds states which represent the corresponding range for
50   // branching an exploded graph. We can reason about the branch if there is
51   // a previously known fact of the existence of a comparison expression with
52   // operands used in the current expression.
53   // E.g. assuming (x < y) is true that means (x != y) is surely true.
54   // if (x previous_operation y)  // <    | !=      | >
55   //   if (x operation y)         // !=   | >       | <
56   //     tristate                 // True | Unknown | False
57   //
58   // CmpOpTable represents next:
59   // __|< |> |<=|>=|==|!=|UnknownX2|
60   // < |1 |0 |* |0 |0 |* |1        |
61   // > |0 |1 |0 |* |0 |* |1        |
62   // <=|1 |0 |1 |* |1 |* |0        |
63   // >=|0 |1 |* |1 |1 |* |0        |
64   // ==|0 |0 |* |* |1 |0 |1        |
65   // !=|1 |1 |* |* |0 |1 |0        |
66   //
67   // Columns stands for a previous operator.
68   // Rows stands for a current operator.
69   // Each row has exactly two `Unknown` cases.
70   // UnknownX2 means that both `Unknown` previous operators are met in code,
71   // and there is a special column for that, for example:
72   // if (x >= y)
73   //   if (x != y)
74   //     if (x <= y)
75   //       False only
76   static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
77   const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
78       // <      >      <=     >=     ==     !=    UnknownX2
79       {True, False, Unknown, False, False, Unknown, True}, // <
80       {False, True, False, Unknown, False, Unknown, True}, // >
81       {True, False, True, Unknown, True, Unknown, False},  // <=
82       {False, True, Unknown, True, True, Unknown, False},  // >=
83       {False, False, Unknown, Unknown, True, False, True}, // ==
84       {True, True, Unknown, Unknown, False, True, False},  // !=
85   };
86 
getIndexFromOp(BinaryOperatorKind OP)87   static size_t getIndexFromOp(BinaryOperatorKind OP) {
88     return static_cast<size_t>(OP - BO_LT);
89   }
90 
91 public:
getCmpOpCount() const92   constexpr size_t getCmpOpCount() const { return CmpOpCount; }
93 
getOpFromIndex(size_t Index)94   static BinaryOperatorKind getOpFromIndex(size_t Index) {
95     return static_cast<BinaryOperatorKind>(Index + BO_LT);
96   }
97 
getCmpOpState(BinaryOperatorKind CurrentOP,BinaryOperatorKind QueriedOP) const98   TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
99                              BinaryOperatorKind QueriedOP) const {
100     return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
101   }
102 
getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const103   TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
104     return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
105   }
106 };
107 
108 //===----------------------------------------------------------------------===//
109 //                           RangeSet implementation
110 //===----------------------------------------------------------------------===//
111 
112 RangeSet::ContainerType RangeSet::Factory::EmptySet{};
113 
add(RangeSet LHS,RangeSet RHS)114 RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
115   ContainerType Result;
116   Result.reserve(LHS.size() + RHS.size());
117   std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
118              std::back_inserter(Result));
119   return makePersistent(std::move(Result));
120 }
121 
add(RangeSet Original,Range Element)122 RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
123   ContainerType Result;
124   Result.reserve(Original.size() + 1);
125 
126   const_iterator Lower = llvm::lower_bound(Original, Element);
127   Result.insert(Result.end(), Original.begin(), Lower);
128   Result.push_back(Element);
129   Result.insert(Result.end(), Lower, Original.end());
130 
131   return makePersistent(std::move(Result));
132 }
133 
add(RangeSet Original,const llvm::APSInt & Point)134 RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
135   return add(Original, Range(Point));
136 }
137 
unite(RangeSet LHS,RangeSet RHS)138 RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
139   ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
140   return makePersistent(std::move(Result));
141 }
142 
unite(RangeSet Original,Range R)143 RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
144   ContainerType Result;
145   Result.push_back(R);
146   Result = unite(*Original.Impl, Result);
147   return makePersistent(std::move(Result));
148 }
149 
unite(RangeSet Original,llvm::APSInt Point)150 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
151   return unite(Original, Range(ValueFactory.getValue(Point)));
152 }
153 
unite(RangeSet Original,llvm::APSInt From,llvm::APSInt To)154 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
155                                   llvm::APSInt To) {
156   return unite(Original,
157                Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
158 }
159 
160 template <typename T>
swapIterators(T & First,T & FirstEnd,T & Second,T & SecondEnd)161 void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
162   std::swap(First, Second);
163   std::swap(FirstEnd, SecondEnd);
164 }
165 
unite(const ContainerType & LHS,const ContainerType & RHS)166 RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
167                                                  const ContainerType &RHS) {
168   if (LHS.empty())
169     return RHS;
170   if (RHS.empty())
171     return LHS;
172 
173   using llvm::APSInt;
174   using iterator = ContainerType::const_iterator;
175 
176   iterator First = LHS.begin();
177   iterator FirstEnd = LHS.end();
178   iterator Second = RHS.begin();
179   iterator SecondEnd = RHS.end();
180   APSIntType Ty = APSIntType(First->From());
181   const APSInt Min = Ty.getMinValue();
182 
183   // Handle a corner case first when both range sets start from MIN.
184   // This helps to avoid complicated conditions below. Specifically, this
185   // particular check for `MIN` is not needed in the loop below every time
186   // when we do `Second->From() - One` operation.
187   if (Min == First->From() && Min == Second->From()) {
188     if (First->To() > Second->To()) {
189       //    [ First    ]--->
190       //    [ Second ]----->
191       // MIN^
192       // The Second range is entirely inside the First one.
193 
194       // Check if Second is the last in its RangeSet.
195       if (++Second == SecondEnd)
196         //    [ First     ]--[ First + 1 ]--->
197         //    [ Second ]--------------------->
198         // MIN^
199         // The Union is equal to First's RangeSet.
200         return LHS;
201     } else {
202       // case 1: [ First ]----->
203       // case 2: [ First   ]--->
204       //         [ Second  ]--->
205       //      MIN^
206       // The First range is entirely inside or equal to the Second one.
207 
208       // Check if First is the last in its RangeSet.
209       if (++First == FirstEnd)
210         //    [ First ]----------------------->
211         //    [ Second  ]--[ Second + 1 ]---->
212         // MIN^
213         // The Union is equal to Second's RangeSet.
214         return RHS;
215     }
216   }
217 
218   const APSInt One = Ty.getValue(1);
219   ContainerType Result;
220 
221   // This is called when there are no ranges left in one of the ranges.
222   // Append the rest of the ranges from another range set to the Result
223   // and return with that.
224   const auto AppendTheRest = [&Result](iterator I, iterator E) {
225     Result.append(I, E);
226     return Result;
227   };
228 
229   while (true) {
230     // We want to keep the following invariant at all times:
231     // ---[ First ------>
232     // -----[ Second --->
233     if (First->From() > Second->From())
234       swapIterators(First, FirstEnd, Second, SecondEnd);
235 
236     // The Union definitely starts with First->From().
237     // ----------[ First ------>
238     // ------------[ Second --->
239     // ----------[ Union ------>
240     // UnionStart^
241     const llvm::APSInt &UnionStart = First->From();
242 
243     // Loop where the invariant holds.
244     while (true) {
245       // Skip all enclosed ranges.
246       // ---[                  First                     ]--->
247       // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
248       while (First->To() >= Second->To()) {
249         // Check if Second is the last in its RangeSet.
250         if (++Second == SecondEnd) {
251           // Append the Union.
252           // ---[ Union      ]--->
253           // -----[ Second ]----->
254           // --------[ First ]--->
255           //         UnionEnd^
256           Result.emplace_back(UnionStart, First->To());
257           // ---[ Union ]----------------->
258           // --------------[ First + 1]--->
259           // Append all remaining ranges from the First's RangeSet.
260           return AppendTheRest(++First, FirstEnd);
261         }
262       }
263 
264       // Check if First and Second are disjoint. It means that we find
265       // the end of the Union. Exit the loop and append the Union.
266       // ---[ First ]=------------->
267       // ------------=[ Second ]--->
268       // ----MinusOne^
269       if (First->To() < Second->From() - One)
270         break;
271 
272       // First is entirely inside the Union. Go next.
273       // ---[ Union ----------->
274       // ---- [ First ]-------->
275       // -------[ Second ]----->
276       // Check if First is the last in its RangeSet.
277       if (++First == FirstEnd) {
278         // Append the Union.
279         // ---[ Union       ]--->
280         // -----[ First ]------->
281         // --------[ Second ]--->
282         //          UnionEnd^
283         Result.emplace_back(UnionStart, Second->To());
284         // ---[ Union ]------------------>
285         // --------------[ Second + 1]--->
286         // Append all remaining ranges from the Second's RangeSet.
287         return AppendTheRest(++Second, SecondEnd);
288       }
289 
290       // We know that we are at one of the two cases:
291       // case 1: --[ First ]--------->
292       // case 2: ----[ First ]------->
293       // --------[ Second ]---------->
294       // In both cases First starts after Second->From().
295       // Make sure that the loop invariant holds.
296       swapIterators(First, FirstEnd, Second, SecondEnd);
297     }
298 
299     // Here First and Second are disjoint.
300     // Append the Union.
301     // ---[ Union    ]--------------->
302     // -----------------[ Second ]--->
303     // ------[ First ]--------------->
304     //       UnionEnd^
305     Result.emplace_back(UnionStart, First->To());
306 
307     // Check if First is the last in its RangeSet.
308     if (++First == FirstEnd)
309       // ---[ Union ]--------------->
310       // --------------[ Second ]--->
311       // Append all remaining ranges from the Second's RangeSet.
312       return AppendTheRest(Second, SecondEnd);
313   }
314 
315   llvm_unreachable("Normally, we should not reach here");
316 }
317 
getRangeSet(Range From)318 RangeSet RangeSet::Factory::getRangeSet(Range From) {
319   ContainerType Result;
320   Result.push_back(From);
321   return makePersistent(std::move(Result));
322 }
323 
makePersistent(ContainerType && From)324 RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
325   llvm::FoldingSetNodeID ID;
326   void *InsertPos;
327 
328   From.Profile(ID);
329   ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
330 
331   if (!Result) {
332     // It is cheaper to fully construct the resulting range on stack
333     // and move it to the freshly allocated buffer if we don't have
334     // a set like this already.
335     Result = construct(std::move(From));
336     Cache.InsertNode(Result, InsertPos);
337   }
338 
339   return Result;
340 }
341 
construct(ContainerType && From)342 RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
343   void *Buffer = Arena.Allocate();
344   return new (Buffer) ContainerType(std::move(From));
345 }
346 
getMinValue() const347 const llvm::APSInt &RangeSet::getMinValue() const {
348   assert(!isEmpty());
349   return begin()->From();
350 }
351 
getMaxValue() const352 const llvm::APSInt &RangeSet::getMaxValue() const {
353   assert(!isEmpty());
354   return std::prev(end())->To();
355 }
356 
isUnsigned() const357 bool clang::ento::RangeSet::isUnsigned() const {
358   assert(!isEmpty());
359   return begin()->From().isUnsigned();
360 }
361 
getBitWidth() const362 uint32_t clang::ento::RangeSet::getBitWidth() const {
363   assert(!isEmpty());
364   return begin()->From().getBitWidth();
365 }
366 
getAPSIntType() const367 APSIntType clang::ento::RangeSet::getAPSIntType() const {
368   assert(!isEmpty());
369   return APSIntType(begin()->From());
370 }
371 
containsImpl(llvm::APSInt & Point) const372 bool RangeSet::containsImpl(llvm::APSInt &Point) const {
373   if (isEmpty() || !pin(Point))
374     return false;
375 
376   Range Dummy(Point);
377   const_iterator It = llvm::upper_bound(*this, Dummy);
378   if (It == begin())
379     return false;
380 
381   return std::prev(It)->Includes(Point);
382 }
383 
pin(llvm::APSInt & Point) const384 bool RangeSet::pin(llvm::APSInt &Point) const {
385   APSIntType Type(getMinValue());
386   if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
387     return false;
388 
389   Type.apply(Point);
390   return true;
391 }
392 
pin(llvm::APSInt & Lower,llvm::APSInt & Upper) const393 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
394   // This function has nine cases, the cartesian product of range-testing
395   // both the upper and lower bounds against the symbol's type.
396   // Each case requires a different pinning operation.
397   // The function returns false if the described range is entirely outside
398   // the range of values for the associated symbol.
399   APSIntType Type(getMinValue());
400   APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
401   APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
402 
403   switch (LowerTest) {
404   case APSIntType::RTR_Below:
405     switch (UpperTest) {
406     case APSIntType::RTR_Below:
407       // The entire range is outside the symbol's set of possible values.
408       // If this is a conventionally-ordered range, the state is infeasible.
409       if (Lower <= Upper)
410         return false;
411 
412       // However, if the range wraps around, it spans all possible values.
413       Lower = Type.getMinValue();
414       Upper = Type.getMaxValue();
415       break;
416     case APSIntType::RTR_Within:
417       // The range starts below what's possible but ends within it. Pin.
418       Lower = Type.getMinValue();
419       Type.apply(Upper);
420       break;
421     case APSIntType::RTR_Above:
422       // The range spans all possible values for the symbol. Pin.
423       Lower = Type.getMinValue();
424       Upper = Type.getMaxValue();
425       break;
426     }
427     break;
428   case APSIntType::RTR_Within:
429     switch (UpperTest) {
430     case APSIntType::RTR_Below:
431       // The range wraps around, but all lower values are not possible.
432       Type.apply(Lower);
433       Upper = Type.getMaxValue();
434       break;
435     case APSIntType::RTR_Within:
436       // The range may or may not wrap around, but both limits are valid.
437       Type.apply(Lower);
438       Type.apply(Upper);
439       break;
440     case APSIntType::RTR_Above:
441       // The range starts within what's possible but ends above it. Pin.
442       Type.apply(Lower);
443       Upper = Type.getMaxValue();
444       break;
445     }
446     break;
447   case APSIntType::RTR_Above:
448     switch (UpperTest) {
449     case APSIntType::RTR_Below:
450       // The range wraps but is outside the symbol's set of possible values.
451       return false;
452     case APSIntType::RTR_Within:
453       // The range starts above what's possible but ends within it (wrap).
454       Lower = Type.getMinValue();
455       Type.apply(Upper);
456       break;
457     case APSIntType::RTR_Above:
458       // The entire range is outside the symbol's set of possible values.
459       // If this is a conventionally-ordered range, the state is infeasible.
460       if (Lower <= Upper)
461         return false;
462 
463       // However, if the range wraps around, it spans all possible values.
464       Lower = Type.getMinValue();
465       Upper = Type.getMaxValue();
466       break;
467     }
468     break;
469   }
470 
471   return true;
472 }
473 
intersect(RangeSet What,llvm::APSInt Lower,llvm::APSInt Upper)474 RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
475                                       llvm::APSInt Upper) {
476   if (What.isEmpty() || !What.pin(Lower, Upper))
477     return getEmptySet();
478 
479   ContainerType DummyContainer;
480 
481   if (Lower <= Upper) {
482     // [Lower, Upper] is a regular range.
483     //
484     // Shortcut: check that there is even a possibility of the intersection
485     //           by checking the two following situations:
486     //
487     //               <---[  What  ]---[------]------>
488     //                              Lower  Upper
489     //                            -or-
490     //               <----[------]----[  What  ]---->
491     //                  Lower  Upper
492     if (What.getMaxValue() < Lower || Upper < What.getMinValue())
493       return getEmptySet();
494 
495     DummyContainer.push_back(
496         Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
497   } else {
498     // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
499     //
500     // Shortcut: check that there is even a possibility of the intersection
501     //           by checking the following situation:
502     //
503     //               <------]---[  What  ]---[------>
504     //                    Upper             Lower
505     if (What.getMaxValue() < Lower && Upper < What.getMinValue())
506       return getEmptySet();
507 
508     DummyContainer.push_back(
509         Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
510     DummyContainer.push_back(
511         Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
512   }
513 
514   return intersect(*What.Impl, DummyContainer);
515 }
516 
intersect(const RangeSet::ContainerType & LHS,const RangeSet::ContainerType & RHS)517 RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
518                                       const RangeSet::ContainerType &RHS) {
519   ContainerType Result;
520   Result.reserve(std::max(LHS.size(), RHS.size()));
521 
522   const_iterator First = LHS.begin(), Second = RHS.begin(),
523                  FirstEnd = LHS.end(), SecondEnd = RHS.end();
524 
525   // If we ran out of ranges in one set, but not in the other,
526   // it means that those elements are definitely not in the
527   // intersection.
528   while (First != FirstEnd && Second != SecondEnd) {
529     // We want to keep the following invariant at all times:
530     //
531     //    ----[ First ---------------------->
532     //    --------[ Second ----------------->
533     if (Second->From() < First->From())
534       swapIterators(First, FirstEnd, Second, SecondEnd);
535 
536     // Loop where the invariant holds:
537     do {
538       // Check for the following situation:
539       //
540       //    ----[ First ]--------------------->
541       //    ---------------[ Second ]--------->
542       //
543       // which means that...
544       if (Second->From() > First->To()) {
545         // ...First is not in the intersection.
546         //
547         // We should move on to the next range after First and break out of the
548         // loop because the invariant might not be true.
549         ++First;
550         break;
551       }
552 
553       // We have a guaranteed intersection at this point!
554       // And this is the current situation:
555       //
556       //    ----[   First   ]----------------->
557       //    -------[ Second ------------------>
558       //
559       // Additionally, it definitely starts with Second->From().
560       const llvm::APSInt &IntersectionStart = Second->From();
561 
562       // It is important to know which of the two ranges' ends
563       // is greater.  That "longer" range might have some other
564       // intersections, while the "shorter" range might not.
565       if (Second->To() > First->To()) {
566         // Here we make a decision to keep First as the "longer"
567         // range.
568         swapIterators(First, FirstEnd, Second, SecondEnd);
569       }
570 
571       // At this point, we have the following situation:
572       //
573       //    ---- First      ]-------------------->
574       //    ---- Second ]--[  Second+1 ---------->
575       //
576       // We don't know the relationship between First->From and
577       // Second->From and we don't know whether Second+1 intersects
578       // with First.
579       //
580       // However, we know that [IntersectionStart, Second->To] is
581       // a part of the intersection...
582       Result.push_back(Range(IntersectionStart, Second->To()));
583       ++Second;
584       // ...and that the invariant will hold for a valid Second+1
585       // because First->From <= Second->To < (Second+1)->From.
586     } while (Second != SecondEnd);
587   }
588 
589   if (Result.empty())
590     return getEmptySet();
591 
592   return makePersistent(std::move(Result));
593 }
594 
intersect(RangeSet LHS,RangeSet RHS)595 RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
596   // Shortcut: let's see if the intersection is even possible.
597   if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
598       RHS.getMaxValue() < LHS.getMinValue())
599     return getEmptySet();
600 
601   return intersect(*LHS.Impl, *RHS.Impl);
602 }
603 
intersect(RangeSet LHS,llvm::APSInt Point)604 RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
605   if (LHS.containsImpl(Point))
606     return getRangeSet(ValueFactory.getValue(Point));
607 
608   return getEmptySet();
609 }
610 
negate(RangeSet What)611 RangeSet RangeSet::Factory::negate(RangeSet What) {
612   if (What.isEmpty())
613     return getEmptySet();
614 
615   const llvm::APSInt SampleValue = What.getMinValue();
616   const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
617   const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
618 
619   ContainerType Result;
620   Result.reserve(What.size() + (SampleValue == MIN));
621 
622   // Handle a special case for MIN value.
623   const_iterator It = What.begin();
624   const_iterator End = What.end();
625 
626   const llvm::APSInt &From = It->From();
627   const llvm::APSInt &To = It->To();
628 
629   if (From == MIN) {
630     // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
631     if (To == MAX) {
632       return What;
633     }
634 
635     const_iterator Last = std::prev(End);
636 
637     // Try to find and unite the following ranges:
638     // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
639     if (Last->To() == MAX) {
640       // It means that in the original range we have ranges
641       //   [MIN, A], ... , [B, MAX]
642       // And the result should be [MIN, -B], ..., [-A, MAX]
643       Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
644       // We already negated Last, so we can skip it.
645       End = Last;
646     } else {
647       // Add a separate range for the lowest value.
648       Result.emplace_back(MIN, MIN);
649     }
650 
651     // Skip adding the second range in case when [From, To] are [MIN, MIN].
652     if (To != MIN) {
653       Result.emplace_back(ValueFactory.getValue(-To), MAX);
654     }
655 
656     // Skip the first range in the loop.
657     ++It;
658   }
659 
660   // Negate all other ranges.
661   for (; It != End; ++It) {
662     // Negate int values.
663     const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
664     const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
665 
666     // Add a negated range.
667     Result.emplace_back(NewFrom, NewTo);
668   }
669 
670   llvm::sort(Result);
671   return makePersistent(std::move(Result));
672 }
673 
674 // Convert range set to the given integral type using truncation and promotion.
675 // This works similar to APSIntType::apply function but for the range set.
castTo(RangeSet What,APSIntType Ty)676 RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
677   // Set is empty or NOOP (aka cast to the same type).
678   if (What.isEmpty() || What.getAPSIntType() == Ty)
679     return What;
680 
681   const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
682   const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
683   const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
684 
685   if (IsTruncation)
686     return makePersistent(truncateTo(What, Ty));
687 
688   // Here we handle 2 cases:
689   // - IsConversion && !IsPromotion.
690   //   In this case we handle changing a sign with same bitwidth: char -> uchar,
691   //   uint -> int. Here we convert negatives to positives and positives which
692   //   is out of range to negatives. We use convertTo function for that.
693   // - IsConversion && IsPromotion && !What.isUnsigned().
694   //   In this case we handle changing a sign from signeds to unsigneds with
695   //   higher bitwidth: char -> uint, int-> uint64. The point is that we also
696   //   need convert negatives to positives and use convertTo function as well.
697   //   For example, we don't need such a convertion when converting unsigned to
698   //   signed with higher bitwidth, because all the values of unsigned is valid
699   //   for the such signed.
700   if (IsConversion && (!IsPromotion || !What.isUnsigned()))
701     return makePersistent(convertTo(What, Ty));
702 
703   assert(IsPromotion && "Only promotion operation from unsigneds left.");
704   return makePersistent(promoteTo(What, Ty));
705 }
706 
castTo(RangeSet What,QualType T)707 RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
708   assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
709   return castTo(What, ValueFactory.getAPSIntType(T));
710 }
711 
truncateTo(RangeSet What,APSIntType Ty)712 RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
713                                                       APSIntType Ty) {
714   using llvm::APInt;
715   using llvm::APSInt;
716   ContainerType Result;
717   ContainerType Dummy;
718   // CastRangeSize is an amount of all possible values of cast type.
719   // Example: `char` has 256 values; `short` has 65536 values.
720   // But in fact we use `amount of values` - 1, because
721   // we can't keep `amount of values of UINT64` inside uint64_t.
722   // E.g. 256 is an amount of all possible values of `char` and we can't keep
723   // it inside `char`.
724   // And it's OK, it's enough to do correct calculations.
725   uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
726   for (const Range &R : What) {
727     // Get bounds of the given range.
728     APSInt FromInt = R.From();
729     APSInt ToInt = R.To();
730     // CurrentRangeSize is an amount of all possible values of the current
731     // range minus one.
732     uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
733     // This is an optimization for a specific case when this Range covers
734     // the whole range of the target type.
735     Dummy.clear();
736     if (CurrentRangeSize >= CastRangeSize) {
737       Dummy.emplace_back(ValueFactory.getMinValue(Ty),
738                          ValueFactory.getMaxValue(Ty));
739       Result = std::move(Dummy);
740       break;
741     }
742     // Cast the bounds.
743     Ty.apply(FromInt);
744     Ty.apply(ToInt);
745     const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
746     const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
747     if (FromInt > ToInt) {
748       Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
749       Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
750     } else
751       Dummy.emplace_back(PersistentFrom, PersistentTo);
752     // Every range retrieved after truncation potentialy has garbage values.
753     // So, we have to unite every next range with the previouses.
754     Result = unite(Result, Dummy);
755   }
756 
757   return Result;
758 }
759 
760 // Divide the convertion into two phases (presented as loops here).
761 // First phase(loop) works when casted values go in ascending order.
762 // E.g. char{1,3,5,127} -> uint{1,3,5,127}
763 // Interrupt the first phase and go to second one when casted values start
764 // go in descending order. That means that we crossed over the middle of
765 // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
766 // For instance:
767 // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
768 //    Here we put {1,3,5} to one array and {-128, -1} to another
769 // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
770 //    Here we put {128,129,255} to one array and {0,1,3} to another.
771 // After that we unite both arrays.
772 // NOTE: We don't just concatenate the arrays, because they may have
773 // adjacent ranges, e.g.:
774 // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
775 //    unite -> uchar(0, 255)
776 // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
777 //    unite -> uchar(-2, 1)
convertTo(RangeSet What,APSIntType Ty)778 RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
779                                                      APSIntType Ty) {
780   using llvm::APInt;
781   using llvm::APSInt;
782   using Bounds = std::pair<const APSInt &, const APSInt &>;
783   ContainerType AscendArray;
784   ContainerType DescendArray;
785   auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
786     // Get bounds of the given range.
787     APSInt FromInt = R.From();
788     APSInt ToInt = R.To();
789     // Cast the bounds.
790     Ty.apply(FromInt);
791     Ty.apply(ToInt);
792     return {VF.getValue(FromInt), VF.getValue(ToInt)};
793   };
794   // Phase 1. Fill the first array.
795   APSInt LastConvertedInt = Ty.getMinValue();
796   const auto *It = What.begin();
797   const auto *E = What.end();
798   while (It != E) {
799     Bounds NewBounds = CastRange(*(It++));
800     // If values stop going acsending order, go to the second phase(loop).
801     if (NewBounds.first < LastConvertedInt) {
802       DescendArray.emplace_back(NewBounds.first, NewBounds.second);
803       break;
804     }
805     // If the range contains a midpoint, then split the range.
806     // E.g. char(-5, 5) -> uchar(251, 5)
807     // Here we shall add a range (251, 255) to the first array and (0, 5) to the
808     // second one.
809     if (NewBounds.first > NewBounds.second) {
810       DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
811       AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
812     } else
813       // Values are going acsending order.
814       AscendArray.emplace_back(NewBounds.first, NewBounds.second);
815     LastConvertedInt = NewBounds.first;
816   }
817   // Phase 2. Fill the second array.
818   while (It != E) {
819     Bounds NewBounds = CastRange(*(It++));
820     DescendArray.emplace_back(NewBounds.first, NewBounds.second);
821   }
822   // Unite both arrays.
823   return unite(AscendArray, DescendArray);
824 }
825 
826 /// Promotion from unsigneds to signeds/unsigneds left.
promoteTo(RangeSet What,APSIntType Ty)827 RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
828                                                      APSIntType Ty) {
829   ContainerType Result;
830   // We definitely know the size of the result set.
831   Result.reserve(What.size());
832 
833   // Each unsigned value fits every larger type without any changes,
834   // whether the larger type is signed or unsigned. So just promote and push
835   // back each range one by one.
836   for (const Range &R : What) {
837     // Get bounds of the given range.
838     llvm::APSInt FromInt = R.From();
839     llvm::APSInt ToInt = R.To();
840     // Cast the bounds.
841     Ty.apply(FromInt);
842     Ty.apply(ToInt);
843     Result.emplace_back(ValueFactory.getValue(FromInt),
844                         ValueFactory.getValue(ToInt));
845   }
846   return Result;
847 }
848 
deletePoint(RangeSet From,const llvm::APSInt & Point)849 RangeSet RangeSet::Factory::deletePoint(RangeSet From,
850                                         const llvm::APSInt &Point) {
851   if (!From.contains(Point))
852     return From;
853 
854   llvm::APSInt Upper = Point;
855   llvm::APSInt Lower = Point;
856 
857   ++Upper;
858   --Lower;
859 
860   // Notice that the lower bound is greater than the upper bound.
861   return intersect(From, Upper, Lower);
862 }
863 
dump(raw_ostream & OS) const864 LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
865   OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
866 }
dump() const867 LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
868 
dump(raw_ostream & OS) const869 LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
870   OS << "{ ";
871   llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
872   OS << " }";
873 }
dump() const874 LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
875 
876 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
877 
878 namespace {
879 class EquivalenceClass;
880 } // end anonymous namespace
881 
882 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
883 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
884 REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
885 
886 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
887 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
888 
889 namespace {
890 /// This class encapsulates a set of symbols equal to each other.
891 ///
892 /// The main idea of the approach requiring such classes is in narrowing
893 /// and sharing constraints between symbols within the class.  Also we can
894 /// conclude that there is no practical need in storing constraints for
895 /// every member of the class separately.
896 ///
897 /// Main terminology:
898 ///
899 ///   * "Equivalence class" is an object of this class, which can be efficiently
900 ///     compared to other classes.  It represents the whole class without
901 ///     storing the actual in it.  The members of the class however can be
902 ///     retrieved from the state.
903 ///
904 ///   * "Class members" are the symbols corresponding to the class.  This means
905 ///     that A == B for every member symbols A and B from the class.  Members of
906 ///     each class are stored in the state.
907 ///
908 ///   * "Trivial class" is a class that has and ever had only one same symbol.
909 ///
910 ///   * "Merge operation" merges two classes into one.  It is the main operation
911 ///     to produce non-trivial classes.
912 ///     If, at some point, we can assume that two symbols from two distinct
913 ///     classes are equal, we can merge these classes.
914 class EquivalenceClass : public llvm::FoldingSetNode {
915 public:
916   /// Find equivalence class for the given symbol in the given state.
917   [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
918                                                     SymbolRef Sym);
919 
920   /// Merge classes for the given symbols and return a new state.
921   [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
922                                                     ProgramStateRef State,
923                                                     SymbolRef First,
924                                                     SymbolRef Second);
925   // Merge this class with the given class and return a new state.
926   [[nodiscard]] inline ProgramStateRef
927   merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
928 
929   /// Return a set of class members for the given state.
930   [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
931 
932   /// Return true if the current class is trivial in the given state.
933   /// A class is trivial if and only if there is not any member relations stored
934   /// to it in State/ClassMembers.
935   /// An equivalence class with one member might seem as it does not hold any
936   /// meaningful information, i.e. that is a tautology. However, during the
937   /// removal of dead symbols we do not remove classes with one member for
938   /// resource and performance reasons. Consequently, a class with one member is
939   /// not necessarily trivial. It could happen that we have a class with two
940   /// members and then during the removal of dead symbols we remove one of its
941   /// members. In this case, the class is still non-trivial (it still has the
942   /// mappings in ClassMembers), even though it has only one member.
943   [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
944 
945   /// Return true if the current class is trivial and its only member is dead.
946   [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
947                                             SymbolReaper &Reaper) const;
948 
949   [[nodiscard]] static inline ProgramStateRef
950   markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
951                SymbolRef Second);
952   [[nodiscard]] static inline ProgramStateRef
953   markDisequal(RangeSet::Factory &F, ProgramStateRef State,
954                EquivalenceClass First, EquivalenceClass Second);
955   [[nodiscard]] inline ProgramStateRef
956   markDisequal(RangeSet::Factory &F, ProgramStateRef State,
957                EquivalenceClass Other) const;
958   [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
959                                                           SymbolRef Sym);
960   [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
961   [[nodiscard]] inline ClassSet
962   getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
963 
964   [[nodiscard]] static inline std::optional<bool>
965   areEqual(ProgramStateRef State, EquivalenceClass First,
966            EquivalenceClass Second);
967   [[nodiscard]] static inline std::optional<bool>
968   areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
969 
970   /// Remove one member from the class.
971   [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
972                                              const SymbolRef Old);
973 
974   /// Iterate over all symbols and try to simplify them.
975   [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
976                                                        RangeSet::Factory &F,
977                                                        ProgramStateRef State,
978                                                        EquivalenceClass Class);
979 
980   void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
dump(ProgramStateRef State) const981   LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
982     dumpToStream(State, llvm::errs());
983   }
984 
985   /// Check equivalence data for consistency.
986   [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
987   isClassDataConsistent(ProgramStateRef State);
988 
getType() const989   [[nodiscard]] QualType getType() const {
990     return getRepresentativeSymbol()->getType();
991   }
992 
993   EquivalenceClass() = delete;
994   EquivalenceClass(const EquivalenceClass &) = default;
995   EquivalenceClass &operator=(const EquivalenceClass &) = delete;
996   EquivalenceClass(EquivalenceClass &&) = default;
997   EquivalenceClass &operator=(EquivalenceClass &&) = delete;
998 
operator ==(const EquivalenceClass & Other) const999   bool operator==(const EquivalenceClass &Other) const {
1000     return ID == Other.ID;
1001   }
operator <(const EquivalenceClass & Other) const1002   bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
operator !=(const EquivalenceClass & Other) const1003   bool operator!=(const EquivalenceClass &Other) const {
1004     return !operator==(Other);
1005   }
1006 
Profile(llvm::FoldingSetNodeID & ID,uintptr_t CID)1007   static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1008     ID.AddInteger(CID);
1009   }
1010 
Profile(llvm::FoldingSetNodeID & ID) const1011   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1012 
1013 private:
EquivalenceClass(SymbolRef Sym)1014   /* implicit */ EquivalenceClass(SymbolRef Sym)
1015       : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1016 
1017   /// This function is intended to be used ONLY within the class.
1018   /// The fact that ID is a pointer to a symbol is an implementation detail
1019   /// and should stay that way.
1020   /// In the current implementation, we use it to retrieve the only member
1021   /// of the trivial class.
getRepresentativeSymbol() const1022   SymbolRef getRepresentativeSymbol() const {
1023     return reinterpret_cast<SymbolRef>(ID);
1024   }
1025   static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1026 
1027   inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1028                                    SymbolSet Members, EquivalenceClass Other,
1029                                    SymbolSet OtherMembers);
1030 
1031   static inline bool
1032   addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1033                        RangeSet::Factory &F, ProgramStateRef State,
1034                        EquivalenceClass First, EquivalenceClass Second);
1035 
1036   /// This is a unique identifier of the class.
1037   uintptr_t ID;
1038 };
1039 
1040 //===----------------------------------------------------------------------===//
1041 //                             Constraint functions
1042 //===----------------------------------------------------------------------===//
1043 
1044 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
areFeasible(ConstraintRangeTy Constraints)1045 areFeasible(ConstraintRangeTy Constraints) {
1046   return llvm::none_of(
1047       Constraints,
1048       [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1049         return ClassConstraint.second.isEmpty();
1050       });
1051 }
1052 
getConstraint(ProgramStateRef State,EquivalenceClass Class)1053 [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1054                                                    EquivalenceClass Class) {
1055   return State->get<ConstraintRange>(Class);
1056 }
1057 
getConstraint(ProgramStateRef State,SymbolRef Sym)1058 [[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
1059                                                    SymbolRef Sym) {
1060   return getConstraint(State, EquivalenceClass::find(State, Sym));
1061 }
1062 
setConstraint(ProgramStateRef State,EquivalenceClass Class,RangeSet Constraint)1063 [[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
1064                                             EquivalenceClass Class,
1065                                             RangeSet Constraint) {
1066   return State->set<ConstraintRange>(Class, Constraint);
1067 }
1068 
setConstraints(ProgramStateRef State,ConstraintRangeTy Constraints)1069 [[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
1070                                              ConstraintRangeTy Constraints) {
1071   return State->set<ConstraintRange>(Constraints);
1072 }
1073 
1074 //===----------------------------------------------------------------------===//
1075 //                       Equality/diseqiality abstraction
1076 //===----------------------------------------------------------------------===//
1077 
1078 /// A small helper function for detecting symbolic (dis)equality.
1079 ///
1080 /// Equality check can have different forms (like a == b or a - b) and this
1081 /// class encapsulates those away if the only thing the user wants to check -
1082 /// whether it's equality/diseqiality or not.
1083 ///
1084 /// \returns true if assuming this Sym to be true means equality of operands
1085 ///          false if it means disequality of operands
1086 ///          std::nullopt otherwise
meansEquality(const SymSymExpr * Sym)1087 std::optional<bool> meansEquality(const SymSymExpr *Sym) {
1088   switch (Sym->getOpcode()) {
1089   case BO_Sub:
1090     // This case is: A - B != 0 -> disequality check.
1091     return false;
1092   case BO_EQ:
1093     // This case is: A == B != 0 -> equality check.
1094     return true;
1095   case BO_NE:
1096     // This case is: A != B != 0 -> diseqiality check.
1097     return false;
1098   default:
1099     return std::nullopt;
1100   }
1101 }
1102 
1103 //===----------------------------------------------------------------------===//
1104 //                            Intersection functions
1105 //===----------------------------------------------------------------------===//
1106 
1107 template <class SecondTy, class... RestTy>
1108 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1109                                         SecondTy Second, RestTy... Tail);
1110 
1111 template <class... RangeTy> struct IntersectionTraits;
1112 
1113 template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1114   // Found RangeSet, no need to check any further
1115   using Type = RangeSet;
1116 };
1117 
1118 template <> struct IntersectionTraits<> {
1119   // We ran out of types, and we didn't find any RangeSet, so the result should
1120   // be optional.
1121   using Type = std::optional<RangeSet>;
1122 };
1123 
1124 template <class OptionalOrPointer, class... TailTy>
1125 struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1126   // If current type is Optional or a raw pointer, we should keep looking.
1127   using Type = typename IntersectionTraits<TailTy...>::Type;
1128 };
1129 
1130 template <class EndTy>
intersect(RangeSet::Factory & F,EndTy End)1131 [[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1132   // If the list contains only RangeSet or std::optional<RangeSet>, simply
1133   // return that range set.
1134   return End;
1135 }
1136 
1137 [[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
intersect(RangeSet::Factory & F,const RangeSet * End)1138 intersect(RangeSet::Factory &F, const RangeSet *End) {
1139   // This is an extraneous conversion from a raw pointer into
1140   // std::optional<RangeSet>
1141   if (End) {
1142     return *End;
1143   }
1144   return std::nullopt;
1145 }
1146 
1147 template <class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,RangeSet Second,RestTy...Tail)1148 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1149                                         RangeSet Second, RestTy... Tail) {
1150   // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1151   // of the function and can be sure that the result is RangeSet.
1152   return intersect(F, F.intersect(Head, Second), Tail...);
1153 }
1154 
1155 template <class SecondTy, class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,SecondTy Second,RestTy...Tail)1156 [[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1157                                         SecondTy Second, RestTy... Tail) {
1158   if (Second) {
1159     // Here we call the <RangeSet,RangeSet,...> version of the function...
1160     return intersect(F, Head, *Second, Tail...);
1161   }
1162   // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1163   // means that the result is definitely RangeSet.
1164   return intersect(F, Head, Tail...);
1165 }
1166 
1167 /// Main generic intersect function.
1168 /// It intersects all of the given range sets.  If some of the given arguments
1169 /// don't hold a range set (nullptr or std::nullopt), the function will skip
1170 /// them.
1171 ///
1172 /// Available representations for the arguments are:
1173 ///   * RangeSet
1174 ///   * std::optional<RangeSet>
1175 ///   * RangeSet *
1176 /// Pointer to a RangeSet is automatically assumed to be nullable and will get
1177 /// checked as well as the optional version.  If this behaviour is undesired,
1178 /// please dereference the pointer in the call.
1179 ///
1180 /// Return type depends on the arguments' types.  If we can be sure in compile
1181 /// time that there will be a range set as a result, the returning type is
1182 /// simply RangeSet, in other cases we have to back off to
1183 /// std::optional<RangeSet>.
1184 ///
1185 /// Please, prefer optional range sets to raw pointers.  If the last argument is
1186 /// a raw pointer and all previous arguments are std::nullopt, it will cost one
1187 /// additional check to convert RangeSet * into std::optional<RangeSet>.
1188 template <class HeadTy, class SecondTy, class... RestTy>
1189 [[nodiscard]] inline
1190     typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
intersect(RangeSet::Factory & F,HeadTy Head,SecondTy Second,RestTy...Tail)1191     intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1192               RestTy... Tail) {
1193   if (Head) {
1194     return intersect(F, *Head, Second, Tail...);
1195   }
1196   return intersect(F, Second, Tail...);
1197 }
1198 
1199 //===----------------------------------------------------------------------===//
1200 //                           Symbolic reasoning logic
1201 //===----------------------------------------------------------------------===//
1202 
1203 /// A little component aggregating all of the reasoning we have about
1204 /// the ranges of symbolic expressions.
1205 ///
1206 /// Even when we don't know the exact values of the operands, we still
1207 /// can get a pretty good estimate of the result's range.
1208 class SymbolicRangeInferrer
1209     : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1210 public:
1211   template <class SourceType>
inferRange(RangeSet::Factory & F,ProgramStateRef State,SourceType Origin)1212   static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1213                              SourceType Origin) {
1214     SymbolicRangeInferrer Inferrer(F, State);
1215     return Inferrer.infer(Origin);
1216   }
1217 
VisitSymExpr(SymbolRef Sym)1218   RangeSet VisitSymExpr(SymbolRef Sym) {
1219     if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1220       return *RS;
1221     // If we've reached this line, the actual type of the symbolic
1222     // expression is not supported for advanced inference.
1223     // In this case, we simply backoff to the default "let's simply
1224     // infer the range from the expression's type".
1225     return infer(Sym->getType());
1226   }
1227 
VisitUnarySymExpr(const UnarySymExpr * USE)1228   RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1229     if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1230       return *RS;
1231     return infer(USE->getType());
1232   }
1233 
VisitSymIntExpr(const SymIntExpr * Sym)1234   RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1235     return VisitBinaryOperator(Sym);
1236   }
1237 
VisitIntSymExpr(const IntSymExpr * Sym)1238   RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1239     return VisitBinaryOperator(Sym);
1240   }
1241 
VisitSymSymExpr(const SymSymExpr * SSE)1242   RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1243     return intersect(
1244         RangeFactory,
1245         // If Sym is a difference of symbols A - B, then maybe we have range
1246         // set stored for B - A.
1247         //
1248         // If we have range set stored for both A - B and B - A then
1249         // calculate the effective range set by intersecting the range set
1250         // for A - B and the negated range set of B - A.
1251         getRangeForNegatedSymSym(SSE),
1252         // If Sym is a comparison expression (except <=>),
1253         // find any other comparisons with the same operands.
1254         // See function description.
1255         getRangeForComparisonSymbol(SSE),
1256         // If Sym is (dis)equality, we might have some information
1257         // on that in our equality classes data structure.
1258         getRangeForEqualities(SSE),
1259         // And we should always check what we can get from the operands.
1260         VisitBinaryOperator(SSE));
1261   }
1262 
1263 private:
SymbolicRangeInferrer(RangeSet::Factory & F,ProgramStateRef S)1264   SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1265       : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1266 
1267   /// Infer range information from the given integer constant.
1268   ///
1269   /// It's not a real "inference", but is here for operating with
1270   /// sub-expressions in a more polymorphic manner.
inferAs(const llvm::APSInt & Val,QualType)1271   RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1272     return {RangeFactory, Val};
1273   }
1274 
1275   /// Infer range information from symbol in the context of the given type.
inferAs(SymbolRef Sym,QualType DestType)1276   RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1277     QualType ActualType = Sym->getType();
1278     // Check that we can reason about the symbol at all.
1279     if (ActualType->isIntegralOrEnumerationType() ||
1280         Loc::isLocType(ActualType)) {
1281       return infer(Sym);
1282     }
1283     // Otherwise, let's simply infer from the destination type.
1284     // We couldn't figure out nothing else about that expression.
1285     return infer(DestType);
1286   }
1287 
infer(SymbolRef Sym)1288   RangeSet infer(SymbolRef Sym) {
1289     return intersect(RangeFactory,
1290                      // Of course, we should take the constraint directly
1291                      // associated with this symbol into consideration.
1292                      getConstraint(State, Sym),
1293                      // Apart from the Sym itself, we can infer quite a lot if
1294                      // we look into subexpressions of Sym.
1295                      Visit(Sym));
1296   }
1297 
infer(EquivalenceClass Class)1298   RangeSet infer(EquivalenceClass Class) {
1299     if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1300       return *AssociatedConstraint;
1301 
1302     return infer(Class.getType());
1303   }
1304 
1305   /// Infer range information solely from the type.
infer(QualType T)1306   RangeSet infer(QualType T) {
1307     // Lazily generate a new RangeSet representing all possible values for the
1308     // given symbol type.
1309     RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1310                     ValueFactory.getMaxValue(T));
1311 
1312     // References are known to be non-zero.
1313     if (T->isReferenceType())
1314       return assumeNonZero(Result, T);
1315 
1316     return Result;
1317   }
1318 
1319   template <class BinarySymExprTy>
VisitBinaryOperator(const BinarySymExprTy * Sym)1320   RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1321     // TODO #1: VisitBinaryOperator implementation might not make a good
1322     // use of the inferred ranges.  In this case, we might be calculating
1323     // everything for nothing.  This being said, we should introduce some
1324     // sort of laziness mechanism here.
1325     //
1326     // TODO #2: We didn't go into the nested expressions before, so it
1327     // might cause us spending much more time doing the inference.
1328     // This can be a problem for deeply nested expressions that are
1329     // involved in conditions and get tested continuously.  We definitely
1330     // need to address this issue and introduce some sort of caching
1331     // in here.
1332     QualType ResultType = Sym->getType();
1333     return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1334                                Sym->getOpcode(),
1335                                inferAs(Sym->getRHS(), ResultType), ResultType);
1336   }
1337 
1338   RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1339                                RangeSet RHS, QualType T);
1340 
1341   //===----------------------------------------------------------------------===//
1342   //                         Ranges and operators
1343   //===----------------------------------------------------------------------===//
1344 
1345   /// Return a rough approximation of the given range set.
1346   ///
1347   /// For the range set:
1348   ///   { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1349   /// it will return the range [x_0, y_N].
fillGaps(RangeSet Origin)1350   static Range fillGaps(RangeSet Origin) {
1351     assert(!Origin.isEmpty());
1352     return {Origin.getMinValue(), Origin.getMaxValue()};
1353   }
1354 
1355   /// Try to convert given range into the given type.
1356   ///
1357   /// It will return std::nullopt only when the trivial conversion is possible.
convert(const Range & Origin,APSIntType To)1358   std::optional<Range> convert(const Range &Origin, APSIntType To) {
1359     if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1360         To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1361       return std::nullopt;
1362     }
1363     return Range(ValueFactory.Convert(To, Origin.From()),
1364                  ValueFactory.Convert(To, Origin.To()));
1365   }
1366 
1367   template <BinaryOperator::Opcode Op>
VisitBinaryOperator(RangeSet LHS,RangeSet RHS,QualType T)1368   RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1369     assert(!LHS.isEmpty() && !RHS.isEmpty());
1370 
1371     Range CoarseLHS = fillGaps(LHS);
1372     Range CoarseRHS = fillGaps(RHS);
1373 
1374     APSIntType ResultType = ValueFactory.getAPSIntType(T);
1375 
1376     // We need to convert ranges to the resulting type, so we can compare values
1377     // and combine them in a meaningful (in terms of the given operation) way.
1378     auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1379     auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1380 
1381     // It is hard to reason about ranges when conversion changes
1382     // borders of the ranges.
1383     if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1384       return infer(T);
1385     }
1386 
1387     return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1388   }
1389 
1390   template <BinaryOperator::Opcode Op>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1391   RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1392     return infer(T);
1393   }
1394 
1395   /// Return a symmetrical range for the given range and type.
1396   ///
1397   /// If T is signed, return the smallest range [-x..x] that covers the original
1398   /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1399   /// exist due to original range covering min(T)).
1400   ///
1401   /// If T is unsigned, return the smallest range [0..x] that covers the
1402   /// original range.
getSymmetricalRange(Range Origin,QualType T)1403   Range getSymmetricalRange(Range Origin, QualType T) {
1404     APSIntType RangeType = ValueFactory.getAPSIntType(T);
1405 
1406     if (RangeType.isUnsigned()) {
1407       return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1408     }
1409 
1410     if (Origin.From().isMinSignedValue()) {
1411       // If mini is a minimal signed value, absolute value of it is greater
1412       // than the maximal signed value.  In order to avoid these
1413       // complications, we simply return the whole range.
1414       return {ValueFactory.getMinValue(RangeType),
1415               ValueFactory.getMaxValue(RangeType)};
1416     }
1417 
1418     // At this point, we are sure that the type is signed and we can safely
1419     // use unary - operator.
1420     //
1421     // While calculating absolute maximum, we can use the following formula
1422     // because of these reasons:
1423     //   * If From >= 0 then To >= From and To >= -From.
1424     //     AbsMax == To == max(To, -From)
1425     //   * If To <= 0 then -From >= -To and -From >= From.
1426     //     AbsMax == -From == max(-From, To)
1427     //   * Otherwise, From <= 0, To >= 0, and
1428     //     AbsMax == max(abs(From), abs(To))
1429     llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1430 
1431     // Intersection is guaranteed to be non-empty.
1432     return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1433   }
1434 
1435   /// Return a range set subtracting zero from \p Domain.
assumeNonZero(RangeSet Domain,QualType T)1436   RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1437     APSIntType IntType = ValueFactory.getAPSIntType(T);
1438     return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1439   }
1440 
1441   template <typename ProduceNegatedSymFunc>
getRangeForNegatedExpr(ProduceNegatedSymFunc F,QualType T)1442   std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1443                                                  QualType T) {
1444     // Do not negate if the type cannot be meaningfully negated.
1445     if (!T->isUnsignedIntegerOrEnumerationType() &&
1446         !T->isSignedIntegerOrEnumerationType())
1447       return std::nullopt;
1448 
1449     if (SymbolRef NegatedSym = F())
1450       if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1451         return RangeFactory.negate(*NegatedRange);
1452 
1453     return std::nullopt;
1454   }
1455 
getRangeForNegatedUnarySym(const UnarySymExpr * USE)1456   std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1457     // Just get the operand when we negate a symbol that is already negated.
1458     // -(-a) == a
1459     return getRangeForNegatedExpr(
1460         [USE]() -> SymbolRef {
1461           if (USE->getOpcode() == UO_Minus)
1462             return USE->getOperand();
1463           return nullptr;
1464         },
1465         USE->getType());
1466   }
1467 
getRangeForNegatedSymSym(const SymSymExpr * SSE)1468   std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1469     return getRangeForNegatedExpr(
1470         [SSE, State = this->State]() -> SymbolRef {
1471           if (SSE->getOpcode() == BO_Sub)
1472             return State->getSymbolManager().getSymSymExpr(
1473                 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1474           return nullptr;
1475         },
1476         SSE->getType());
1477   }
1478 
getRangeForNegatedSym(SymbolRef Sym)1479   std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1480     return getRangeForNegatedExpr(
1481         [Sym, State = this->State]() {
1482           return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1483                                                            Sym->getType());
1484         },
1485         Sym->getType());
1486   }
1487 
1488   // Returns ranges only for binary comparison operators (except <=>)
1489   // when left and right operands are symbolic values.
1490   // Finds any other comparisons with the same operands.
1491   // Then do logical calculations and refuse impossible branches.
1492   // E.g. (x < y) and (x > y) at the same time are impossible.
1493   // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1494   // E.g. (x == y) and (y == x) are just reversed but the same.
1495   // It covers all possible combinations (see CmpOpTable description).
1496   // Note that `x` and `y` can also stand for subexpressions,
1497   // not only for actual symbols.
getRangeForComparisonSymbol(const SymSymExpr * SSE)1498   std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1499     const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1500 
1501     // We currently do not support <=> (C++20).
1502     if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1503       return std::nullopt;
1504 
1505     static const OperatorRelationsTable CmpOpTable{};
1506 
1507     const SymExpr *LHS = SSE->getLHS();
1508     const SymExpr *RHS = SSE->getRHS();
1509     QualType T = SSE->getType();
1510 
1511     SymbolManager &SymMgr = State->getSymbolManager();
1512 
1513     // We use this variable to store the last queried operator (`QueriedOP`)
1514     // for which the `getCmpOpState` returned with `Unknown`. If there are two
1515     // different OPs that returned `Unknown` then we have to query the special
1516     // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1517     // never returns `Unknown`, so `CurrentOP` is a good initial value.
1518     BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1519 
1520     // Loop goes through all of the columns exept the last one ('UnknownX2').
1521     // We treat `UnknownX2` column separately at the end of the loop body.
1522     for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1523 
1524       // Let's find an expression e.g. (x < y).
1525       BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
1526       const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1527       const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1528 
1529       // If ranges were not previously found,
1530       // try to find a reversed expression (y > x).
1531       if (!QueriedRangeSet) {
1532         const BinaryOperatorKind ROP =
1533             BinaryOperator::reverseComparisonOp(QueriedOP);
1534         SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1535         QueriedRangeSet = getConstraint(State, SymSym);
1536       }
1537 
1538       if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1539         continue;
1540 
1541       const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1542       const bool isInFalseBranch =
1543           ConcreteValue ? (*ConcreteValue == 0) : false;
1544 
1545       // If it is a false branch, we shall be guided by opposite operator,
1546       // because the table is made assuming we are in the true branch.
1547       // E.g. when (x <= y) is false, then (x > y) is true.
1548       if (isInFalseBranch)
1549         QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1550 
1551       OperatorRelationsTable::TriStateKind BranchState =
1552           CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1553 
1554       if (BranchState == OperatorRelationsTable::Unknown) {
1555         if (LastQueriedOpToUnknown != CurrentOP &&
1556             LastQueriedOpToUnknown != QueriedOP) {
1557           // If we got the Unknown state for both different operators.
1558           // if (x <= y)    // assume true
1559           //   if (x != y)  // assume true
1560           //     if (x < y) // would be also true
1561           // Get a state from `UnknownX2` column.
1562           BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1563         } else {
1564           LastQueriedOpToUnknown = QueriedOP;
1565           continue;
1566         }
1567       }
1568 
1569       return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1570                                                            : getFalseRange(T);
1571     }
1572 
1573     return std::nullopt;
1574   }
1575 
getRangeForEqualities(const SymSymExpr * Sym)1576   std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1577     std::optional<bool> Equality = meansEquality(Sym);
1578 
1579     if (!Equality)
1580       return std::nullopt;
1581 
1582     if (std::optional<bool> AreEqual =
1583             EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1584       // Here we cover two cases at once:
1585       //   * if Sym is equality and its operands are known to be equal -> true
1586       //   * if Sym is disequality and its operands are disequal -> true
1587       if (*AreEqual == *Equality) {
1588         return getTrueRange(Sym->getType());
1589       }
1590       // Opposite combinations result in false.
1591       return getFalseRange(Sym->getType());
1592     }
1593 
1594     return std::nullopt;
1595   }
1596 
getTrueRange(QualType T)1597   RangeSet getTrueRange(QualType T) {
1598     RangeSet TypeRange = infer(T);
1599     return assumeNonZero(TypeRange, T);
1600   }
1601 
getFalseRange(QualType T)1602   RangeSet getFalseRange(QualType T) {
1603     const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1604     return RangeSet(RangeFactory, Zero);
1605   }
1606 
1607   BasicValueFactory &ValueFactory;
1608   RangeSet::Factory &RangeFactory;
1609   ProgramStateRef State;
1610 };
1611 
1612 //===----------------------------------------------------------------------===//
1613 //               Range-based reasoning about symbolic operations
1614 //===----------------------------------------------------------------------===//
1615 
1616 template <>
VisitBinaryOperator(RangeSet LHS,RangeSet RHS,QualType T)1617 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
1618                                                            RangeSet RHS,
1619                                                            QualType T) {
1620   assert(!LHS.isEmpty() && !RHS.isEmpty());
1621 
1622   if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
1623     if (intersect(RangeFactory, LHS, RHS).isEmpty())
1624       return getTrueRange(T);
1625 
1626   } else {
1627     // We can only lose information if we are casting smaller signed type to
1628     // bigger unsigned type. For e.g.,
1629     //    LHS (unsigned short): [2, USHRT_MAX]
1630     //    RHS   (signed short): [SHRT_MIN, 0]
1631     //
1632     // Casting RHS to LHS type will leave us with overlapping values
1633     //    CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
1634     //
1635     // We can avoid this by checking if signed type's maximum value is lesser
1636     // than unsigned type's minimum value.
1637 
1638     // If both have different signs then only we can get more information.
1639     if (LHS.isUnsigned() != RHS.isUnsigned()) {
1640       if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
1641         if (RHS.getMaxValue().isNegative() ||
1642             LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
1643           return getTrueRange(T);
1644 
1645       } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
1646         if (LHS.getMaxValue().isNegative() ||
1647             RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
1648           return getTrueRange(T);
1649       }
1650     }
1651 
1652     // Both RangeSets should be casted to bigger unsigned type.
1653     APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
1654                            LHS.isUnsigned() || RHS.isUnsigned());
1655 
1656     RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
1657     RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
1658 
1659     if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
1660       return getTrueRange(T);
1661   }
1662 
1663   // In all other cases, the resulting range cannot be deduced.
1664   return infer(T);
1665 }
1666 
1667 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1668 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1669                                                            QualType T) {
1670   APSIntType ResultType = ValueFactory.getAPSIntType(T);
1671   llvm::APSInt Zero = ResultType.getZeroValue();
1672 
1673   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1674   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1675 
1676   bool IsLHSNegative = LHS.To() < Zero;
1677   bool IsRHSNegative = RHS.To() < Zero;
1678 
1679   // Check if both ranges have the same sign.
1680   if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1681       (IsLHSNegative && IsRHSNegative)) {
1682     // The result is definitely greater or equal than any of the operands.
1683     const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1684 
1685     // We estimate maximal value for positives as the maximal value for the
1686     // given type.  For negatives, we estimate it with -1 (e.g. 0x11111111).
1687     //
1688     // TODO: We basically, limit the resulting range from below, but don't do
1689     //       anything with the upper bound.
1690     //
1691     //       For positive operands, it can be done as follows: for the upper
1692     //       bound of LHS and RHS we calculate the most significant bit set.
1693     //       Let's call it the N-th bit.  Then we can estimate the maximal
1694     //       number to be 2^(N+1)-1, i.e. the number with all the bits up to
1695     //       the N-th bit set.
1696     const llvm::APSInt &Max = IsLHSNegative
1697                                   ? ValueFactory.getValue(--Zero)
1698                                   : ValueFactory.getMaxValue(ResultType);
1699 
1700     return {RangeFactory, ValueFactory.getValue(Min), Max};
1701   }
1702 
1703   // Otherwise, let's check if at least one of the operands is negative.
1704   if (IsLHSNegative || IsRHSNegative) {
1705     // This means that the result is definitely negative as well.
1706     return {RangeFactory, ValueFactory.getMinValue(ResultType),
1707             ValueFactory.getValue(--Zero)};
1708   }
1709 
1710   RangeSet DefaultRange = infer(T);
1711 
1712   // It is pretty hard to reason about operands with different signs
1713   // (and especially with possibly different signs).  We simply check if it
1714   // can be zero.  In order to conclude that the result could not be zero,
1715   // at least one of the operands should be definitely not zero itself.
1716   if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1717     return assumeNonZero(DefaultRange, T);
1718   }
1719 
1720   // Nothing much else to do here.
1721   return DefaultRange;
1722 }
1723 
1724 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1725 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1726                                                             Range RHS,
1727                                                             QualType T) {
1728   APSIntType ResultType = ValueFactory.getAPSIntType(T);
1729   llvm::APSInt Zero = ResultType.getZeroValue();
1730 
1731   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1732   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1733 
1734   bool IsLHSNegative = LHS.To() < Zero;
1735   bool IsRHSNegative = RHS.To() < Zero;
1736 
1737   // Check if both ranges have the same sign.
1738   if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1739       (IsLHSNegative && IsRHSNegative)) {
1740     // The result is definitely less or equal than any of the operands.
1741     const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1742 
1743     // We conservatively estimate lower bound to be the smallest positive
1744     // or negative value corresponding to the sign of the operands.
1745     const llvm::APSInt &Min = IsLHSNegative
1746                                   ? ValueFactory.getMinValue(ResultType)
1747                                   : ValueFactory.getValue(Zero);
1748 
1749     return {RangeFactory, Min, Max};
1750   }
1751 
1752   // Otherwise, let's check if at least one of the operands is positive.
1753   if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1754     // This makes result definitely positive.
1755     //
1756     // We can also reason about a maximal value by finding the maximal
1757     // value of the positive operand.
1758     const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1759 
1760     // The minimal value on the other hand is much harder to reason about.
1761     // The only thing we know for sure is that the result is positive.
1762     return {RangeFactory, ValueFactory.getValue(Zero),
1763             ValueFactory.getValue(Max)};
1764   }
1765 
1766   // Nothing much else to do here.
1767   return infer(T);
1768 }
1769 
1770 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1771 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1772                                                             Range RHS,
1773                                                             QualType T) {
1774   llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1775 
1776   Range ConservativeRange = getSymmetricalRange(RHS, T);
1777 
1778   llvm::APSInt Max = ConservativeRange.To();
1779   llvm::APSInt Min = ConservativeRange.From();
1780 
1781   if (Max == Zero) {
1782     // It's an undefined behaviour to divide by 0 and it seems like we know
1783     // for sure that RHS is 0.  Let's say that the resulting range is
1784     // simply infeasible for that matter.
1785     return RangeFactory.getEmptySet();
1786   }
1787 
1788   // At this point, our conservative range is closed.  The result, however,
1789   // couldn't be greater than the RHS' maximal absolute value.  Because of
1790   // this reason, we turn the range into open (or half-open in case of
1791   // unsigned integers).
1792   //
1793   // While we operate on integer values, an open interval (a, b) can be easily
1794   // represented by the closed interval [a + 1, b - 1].  And this is exactly
1795   // what we do next.
1796   //
1797   // If we are dealing with unsigned case, we shouldn't move the lower bound.
1798   if (Min.isSigned()) {
1799     ++Min;
1800   }
1801   --Max;
1802 
1803   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1804   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1805 
1806   // Remainder operator results with negative operands is implementation
1807   // defined.  Positive cases are much easier to reason about though.
1808   if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1809     // If maximal value of LHS is less than maximal value of RHS,
1810     // the result won't get greater than LHS.To().
1811     Max = std::min(LHS.To(), Max);
1812     // We want to check if it is a situation similar to the following:
1813     //
1814     // <------------|---[  LHS  ]--------[  RHS  ]----->
1815     //  -INF        0                              +INF
1816     //
1817     // In this situation, we can conclude that (LHS / RHS) == 0 and
1818     // (LHS % RHS) == LHS.
1819     Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1820   }
1821 
1822   // Nevertheless, the symmetrical range for RHS is a conservative estimate
1823   // for any sign of either LHS, or RHS.
1824   return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1825 }
1826 
VisitBinaryOperator(RangeSet LHS,BinaryOperator::Opcode Op,RangeSet RHS,QualType T)1827 RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
1828                                                     BinaryOperator::Opcode Op,
1829                                                     RangeSet RHS, QualType T) {
1830   // We should propagate information about unfeasbility of one of the
1831   // operands to the resulting range.
1832   if (LHS.isEmpty() || RHS.isEmpty()) {
1833     return RangeFactory.getEmptySet();
1834   }
1835 
1836   switch (Op) {
1837   case BO_NE:
1838     return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
1839   case BO_Or:
1840     return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1841   case BO_And:
1842     return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1843   case BO_Rem:
1844     return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1845   default:
1846     return infer(T);
1847   }
1848 }
1849 
1850 //===----------------------------------------------------------------------===//
1851 //                  Constraint manager implementation details
1852 //===----------------------------------------------------------------------===//
1853 
1854 class RangeConstraintManager : public RangedConstraintManager {
1855 public:
RangeConstraintManager(ExprEngine * EE,SValBuilder & SVB)1856   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1857       : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1858 
1859   //===------------------------------------------------------------------===//
1860   // Implementation for interface from ConstraintManager.
1861   //===------------------------------------------------------------------===//
1862 
haveEqualConstraints(ProgramStateRef S1,ProgramStateRef S2) const1863   bool haveEqualConstraints(ProgramStateRef S1,
1864                             ProgramStateRef S2) const override {
1865     // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1866     //       so comparing constraint ranges and class maps should be
1867     //       sufficient.
1868     return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1869            S1->get<ClassMap>() == S2->get<ClassMap>();
1870   }
1871 
1872   bool canReasonAbout(SVal X) const override;
1873 
1874   ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1875 
1876   const llvm::APSInt *getSymVal(ProgramStateRef State,
1877                                 SymbolRef Sym) const override;
1878 
1879   const llvm::APSInt *getSymMinVal(ProgramStateRef State,
1880                                    SymbolRef Sym) const override;
1881 
1882   const llvm::APSInt *getSymMaxVal(ProgramStateRef State,
1883                                    SymbolRef Sym) const override;
1884 
1885   ProgramStateRef removeDeadBindings(ProgramStateRef State,
1886                                      SymbolReaper &SymReaper) override;
1887 
1888   void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1889                  unsigned int Space = 0, bool IsDot = false) const override;
1890   void printValue(raw_ostream &Out, ProgramStateRef State,
1891                   SymbolRef Sym) override;
1892   void printConstraints(raw_ostream &Out, ProgramStateRef State,
1893                         const char *NL = "\n", unsigned int Space = 0,
1894                         bool IsDot = false) const;
1895   void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1896                                const char *NL = "\n", unsigned int Space = 0,
1897                                bool IsDot = false) const;
1898   void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1899                           const char *NL = "\n", unsigned int Space = 0,
1900                           bool IsDot = false) const;
1901 
1902   //===------------------------------------------------------------------===//
1903   // Implementation for interface from RangedConstraintManager.
1904   //===------------------------------------------------------------------===//
1905 
1906   ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1907                               const llvm::APSInt &V,
1908                               const llvm::APSInt &Adjustment) override;
1909 
1910   ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1911                               const llvm::APSInt &V,
1912                               const llvm::APSInt &Adjustment) override;
1913 
1914   ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1915                               const llvm::APSInt &V,
1916                               const llvm::APSInt &Adjustment) override;
1917 
1918   ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1919                               const llvm::APSInt &V,
1920                               const llvm::APSInt &Adjustment) override;
1921 
1922   ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1923                               const llvm::APSInt &V,
1924                               const llvm::APSInt &Adjustment) override;
1925 
1926   ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1927                               const llvm::APSInt &V,
1928                               const llvm::APSInt &Adjustment) override;
1929 
1930   ProgramStateRef assumeSymWithinInclusiveRange(
1931       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1932       const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1933 
1934   ProgramStateRef assumeSymOutsideInclusiveRange(
1935       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1936       const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1937 
1938 private:
1939   RangeSet::Factory F;
1940 
1941   RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
1942   RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1943   ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1944                            RangeSet Range);
1945   ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1946                            RangeSet Range);
1947 
1948   RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1949                          const llvm::APSInt &Int,
1950                          const llvm::APSInt &Adjustment);
1951   RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1952                          const llvm::APSInt &Int,
1953                          const llvm::APSInt &Adjustment);
1954   RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1955                          const llvm::APSInt &Int,
1956                          const llvm::APSInt &Adjustment);
1957   RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1958                          const llvm::APSInt &Int,
1959                          const llvm::APSInt &Adjustment);
1960   RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1961                          const llvm::APSInt &Int,
1962                          const llvm::APSInt &Adjustment);
1963 };
1964 
1965 //===----------------------------------------------------------------------===//
1966 //                         Constraint assignment logic
1967 //===----------------------------------------------------------------------===//
1968 
1969 /// ConstraintAssignorBase is a small utility class that unifies visitor
1970 /// for ranges with a visitor for constraints (rangeset/range/constant).
1971 ///
1972 /// It is designed to have one derived class, but generally it can have more.
1973 /// Derived class can control which types we handle by defining methods of the
1974 /// following form:
1975 ///
1976 ///   bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1977 ///                                       CONSTRAINT Constraint);
1978 ///
1979 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1980 ///       CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1981 ///       return value signifies whether we should try other handle methods
1982 ///          (i.e. false would mean to stop right after calling this method)
1983 template <class Derived> class ConstraintAssignorBase {
1984 public:
1985   using Const = const llvm::APSInt &;
1986 
1987 #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1988 
1989 #define ASSIGN(CLASS, TO, SYM, CONSTRAINT)                                     \
1990   if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT))   \
1991   return false
1992 
assign(SymbolRef Sym,RangeSet Constraint)1993   void assign(SymbolRef Sym, RangeSet Constraint) {
1994     assignImpl(Sym, Constraint);
1995   }
1996 
assignImpl(SymbolRef Sym,RangeSet Constraint)1997   bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1998     switch (Sym->getKind()) {
1999 #define SYMBOL(Id, Parent)                                                     \
2000   case SymExpr::Id##Kind:                                                      \
2001     DISPATCH(Id);
2002 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2003     }
2004     llvm_unreachable("Unknown SymExpr kind!");
2005   }
2006 
2007 #define DEFAULT_ASSIGN(Id)                                                     \
2008   bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) {          \
2009     return true;                                                               \
2010   }                                                                            \
2011   bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
2012   bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
2013 
2014   // When we dispatch for constraint types, we first try to check
2015   // if the new constraint is the constant and try the corresponding
2016   // assignor methods.  If it didn't interrupt, we can proceed to the
2017   // range, and finally to the range set.
2018 #define CONSTRAINT_DISPATCH(Id)                                                \
2019   if (const llvm::APSInt *Const = Constraint.getConcreteValue()) {             \
2020     ASSIGN(Id, Const, Sym, *Const);                                            \
2021   }                                                                            \
2022   if (Constraint.size() == 1) {                                                \
2023     ASSIGN(Id, Range, Sym, *Constraint.begin());                               \
2024   }                                                                            \
2025   ASSIGN(Id, RangeSet, Sym, Constraint)
2026 
2027   // Our internal assign method first tries to call assignor methods for all
2028   // constraint types that apply.  And if not interrupted, continues with its
2029   // parent class.
2030 #define SYMBOL(Id, Parent)                                                     \
2031   bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) {                  \
2032     CONSTRAINT_DISPATCH(Id);                                                   \
2033     DISPATCH(Parent);                                                          \
2034   }                                                                            \
2035   DEFAULT_ASSIGN(Id)
2036 #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
2037 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
2038 
2039   // Default implementations for the top class that doesn't have parents.
assignSymExprImpl(const SymExpr * Sym,RangeSet Constraint)2040   bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
2041     CONSTRAINT_DISPATCH(SymExpr);
2042     return true;
2043   }
2044   DEFAULT_ASSIGN(SymExpr);
2045 
2046 #undef DISPATCH
2047 #undef CONSTRAINT_DISPATCH
2048 #undef DEFAULT_ASSIGN
2049 #undef ASSIGN
2050 };
2051 
2052 /// A little component aggregating all of the reasoning we have about
2053 /// assigning new constraints to symbols.
2054 ///
2055 /// The main purpose of this class is to associate constraints to symbols,
2056 /// and impose additional constraints on other symbols, when we can imply
2057 /// them.
2058 ///
2059 /// It has a nice symmetry with SymbolicRangeInferrer.  When the latter
2060 /// can provide more precise ranges by looking into the operands of the
2061 /// expression in question, ConstraintAssignor looks into the operands
2062 /// to see if we can imply more from the new constraint.
2063 class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
2064 public:
2065   template <class ClassOrSymbol>
2066   [[nodiscard]] static ProgramStateRef
assign(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F,ClassOrSymbol CoS,RangeSet NewConstraint)2067   assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
2068          ClassOrSymbol CoS, RangeSet NewConstraint) {
2069     if (!State || NewConstraint.isEmpty())
2070       return nullptr;
2071 
2072     ConstraintAssignor Assignor{State, Builder, F};
2073     return Assignor.assign(CoS, NewConstraint);
2074   }
2075 
2076   /// Handle expressions like: a % b != 0.
2077   template <typename SymT>
handleRemainderOp(const SymT * Sym,RangeSet Constraint)2078   bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2079     if (Sym->getOpcode() != BO_Rem)
2080       return true;
2081     // a % b != 0 implies that a != 0.
2082     if (!Constraint.containsZero()) {
2083       SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2084       if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2085         State = State->assume(*NonLocSymSVal, true);
2086         if (!State)
2087           return false;
2088       }
2089     }
2090     return true;
2091   }
2092 
2093   inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
assignSymIntExprToRangeSet(const SymIntExpr * Sym,RangeSet Constraint)2094   inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2095                                          RangeSet Constraint) {
2096     return handleRemainderOp(Sym, Constraint);
2097   }
2098   inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2099                                          RangeSet Constraint);
2100 
2101 private:
ConstraintAssignor(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F)2102   ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2103                      RangeSet::Factory &F)
2104       : State(State), Builder(Builder), RangeFactory(F) {}
2105   using Base = ConstraintAssignorBase<ConstraintAssignor>;
2106 
2107   /// Base method for handling new constraints for symbols.
assign(SymbolRef Sym,RangeSet NewConstraint)2108   [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2109     // All constraints are actually associated with equivalence classes, and
2110     // that's what we are going to do first.
2111     State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2112     if (!State)
2113       return nullptr;
2114 
2115     // And after that we can check what other things we can get from this
2116     // constraint.
2117     Base::assign(Sym, NewConstraint);
2118     return State;
2119   }
2120 
2121   /// Base method for handling new constraints for classes.
assign(EquivalenceClass Class,RangeSet NewConstraint)2122   [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
2123                                        RangeSet NewConstraint) {
2124     // There is a chance that we might need to update constraints for the
2125     // classes that are known to be disequal to Class.
2126     //
2127     // In order for this to be even possible, the new constraint should
2128     // be simply a constant because we can't reason about range disequalities.
2129     if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2130 
2131       ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2132       ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2133 
2134       // Add new constraint.
2135       Constraints = CF.add(Constraints, Class, NewConstraint);
2136 
2137       for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2138         RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2139             RangeFactory, State, DisequalClass);
2140 
2141         UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2142 
2143         // If we end up with at least one of the disequal classes to be
2144         // constrained with an empty range-set, the state is infeasible.
2145         if (UpdatedConstraint.isEmpty())
2146           return nullptr;
2147 
2148         Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2149       }
2150       assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2151                                          "a state with infeasible constraints");
2152 
2153       return setConstraints(State, Constraints);
2154     }
2155 
2156     return setConstraint(State, Class, NewConstraint);
2157   }
2158 
trackDisequality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2159   ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2160                                    SymbolRef RHS) {
2161     return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2162   }
2163 
trackEquality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2164   ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2165                                 SymbolRef RHS) {
2166     return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2167   }
2168 
interpreteAsBool(RangeSet Constraint)2169   [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
2170     assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2171 
2172     if (Constraint.getConcreteValue())
2173       return !Constraint.getConcreteValue()->isZero();
2174 
2175     if (!Constraint.containsZero())
2176       return true;
2177 
2178     return std::nullopt;
2179   }
2180 
2181   ProgramStateRef State;
2182   SValBuilder &Builder;
2183   RangeSet::Factory &RangeFactory;
2184 };
2185 
assignSymExprToConst(const SymExpr * Sym,const llvm::APSInt & Constraint)2186 bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2187                                               const llvm::APSInt &Constraint) {
2188   llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2189   // Iterate over all equivalence classes and try to simplify them.
2190   ClassMembersTy Members = State->get<ClassMembers>();
2191   for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2192     EquivalenceClass Class = ClassToSymbolSet.first;
2193     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2194     if (!State)
2195       return false;
2196     SimplifiedClasses.insert(Class);
2197   }
2198 
2199   // Trivial equivalence classes (those that have only one symbol member) are
2200   // not stored in the State. Thus, we must skim through the constraints as
2201   // well. And we try to simplify symbols in the constraints.
2202   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2203   for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2204     EquivalenceClass Class = ClassConstraint.first;
2205     if (SimplifiedClasses.count(Class)) // Already simplified.
2206       continue;
2207     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2208     if (!State)
2209       return false;
2210   }
2211 
2212   // We may have trivial equivalence classes in the disequality info as
2213   // well, and we need to simplify them.
2214   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2215   for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2216        DisequalityInfo) {
2217     EquivalenceClass Class = DisequalityEntry.first;
2218     ClassSet DisequalClasses = DisequalityEntry.second;
2219     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2220     if (!State)
2221       return false;
2222   }
2223 
2224   return true;
2225 }
2226 
assignSymSymExprToRangeSet(const SymSymExpr * Sym,RangeSet Constraint)2227 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2228                                                     RangeSet Constraint) {
2229   if (!handleRemainderOp(Sym, Constraint))
2230     return false;
2231 
2232   std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2233 
2234   if (!ConstraintAsBool)
2235     return true;
2236 
2237   if (std::optional<bool> Equality = meansEquality(Sym)) {
2238     // Here we cover two cases:
2239     //   * if Sym is equality and the new constraint is true -> Sym's operands
2240     //     should be marked as equal
2241     //   * if Sym is disequality and the new constraint is false -> Sym's
2242     //     operands should be also marked as equal
2243     if (*Equality == *ConstraintAsBool) {
2244       State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2245     } else {
2246       // Other combinations leave as with disequal operands.
2247       State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2248     }
2249 
2250     if (!State)
2251       return false;
2252   }
2253 
2254   return true;
2255 }
2256 
2257 } // end anonymous namespace
2258 
2259 std::unique_ptr<ConstraintManager>
CreateRangeConstraintManager(ProgramStateManager & StMgr,ExprEngine * Eng)2260 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
2261                                    ExprEngine *Eng) {
2262   return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2263 }
2264 
getConstraintMap(ProgramStateRef State)2265 ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
2266   ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2267   ConstraintMap Result = F.getEmptyMap();
2268 
2269   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2270   for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2271     EquivalenceClass Class = ClassConstraint.first;
2272     SymbolSet ClassMembers = Class.getClassMembers(State);
2273     assert(!ClassMembers.isEmpty() &&
2274            "Class must always have at least one member!");
2275 
2276     SymbolRef Representative = *ClassMembers.begin();
2277     Result = F.add(Result, Representative, ClassConstraint.second);
2278   }
2279 
2280   return Result;
2281 }
2282 
2283 //===----------------------------------------------------------------------===//
2284 //                     EqualityClass implementation details
2285 //===----------------------------------------------------------------------===//
2286 
dumpToStream(ProgramStateRef State,raw_ostream & os) const2287 LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2288                                                      raw_ostream &os) const {
2289   SymbolSet ClassMembers = getClassMembers(State);
2290   for (const SymbolRef &MemberSym : ClassMembers) {
2291     MemberSym->dump();
2292     os << "\n";
2293   }
2294 }
2295 
find(ProgramStateRef State,SymbolRef Sym)2296 inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2297                                                SymbolRef Sym) {
2298   assert(State && "State should not be null");
2299   assert(Sym && "Symbol should not be null");
2300   // We store far from all Symbol -> Class mappings
2301   if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2302     return *NontrivialClass;
2303 
2304   // This is a trivial class of Sym.
2305   return Sym;
2306 }
2307 
merge(RangeSet::Factory & F,ProgramStateRef State,SymbolRef First,SymbolRef Second)2308 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2309                                                ProgramStateRef State,
2310                                                SymbolRef First,
2311                                                SymbolRef Second) {
2312   EquivalenceClass FirstClass = find(State, First);
2313   EquivalenceClass SecondClass = find(State, Second);
2314 
2315   return FirstClass.merge(F, State, SecondClass);
2316 }
2317 
merge(RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Other)2318 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2319                                                ProgramStateRef State,
2320                                                EquivalenceClass Other) {
2321   // It is already the same class.
2322   if (*this == Other)
2323     return State;
2324 
2325   // FIXME: As of now, we support only equivalence classes of the same type.
2326   //        This limitation is connected to the lack of explicit casts in
2327   //        our symbolic expression model.
2328   //
2329   //        That means that for `int x` and `char y` we don't distinguish
2330   //        between these two very different cases:
2331   //          * `x == y`
2332   //          * `(char)x == y`
2333   //
2334   //        The moment we introduce symbolic casts, this restriction can be
2335   //        lifted.
2336   if (getType()->getCanonicalTypeUnqualified() !=
2337       Other.getType()->getCanonicalTypeUnqualified())
2338     return State;
2339 
2340   SymbolSet Members = getClassMembers(State);
2341   SymbolSet OtherMembers = Other.getClassMembers(State);
2342 
2343   // We estimate the size of the class by the height of tree containing
2344   // its members.  Merging is not a trivial operation, so it's easier to
2345   // merge the smaller class into the bigger one.
2346   if (Members.getHeight() >= OtherMembers.getHeight()) {
2347     return mergeImpl(F, State, Members, Other, OtherMembers);
2348   } else {
2349     return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2350   }
2351 }
2352 
2353 inline ProgramStateRef
mergeImpl(RangeSet::Factory & RangeFactory,ProgramStateRef State,SymbolSet MyMembers,EquivalenceClass Other,SymbolSet OtherMembers)2354 EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2355                             ProgramStateRef State, SymbolSet MyMembers,
2356                             EquivalenceClass Other, SymbolSet OtherMembers) {
2357   // Essentially what we try to recreate here is some kind of union-find
2358   // data structure.  It does have certain limitations due to persistence
2359   // and the need to remove elements from classes.
2360   //
2361   // In this setting, EquialityClass object is the representative of the class
2362   // or the parent element.  ClassMap is a mapping of class members to their
2363   // parent. Unlike the union-find structure, they all point directly to the
2364   // class representative because we don't have an opportunity to actually do
2365   // path compression when dealing with immutability.  This means that we
2366   // compress paths every time we do merges.  It also means that we lose
2367   // the main amortized complexity benefit from the original data structure.
2368   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2369   ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2370 
2371   // 1. If the merged classes have any constraints associated with them, we
2372   //    need to transfer them to the class we have left.
2373   //
2374   // Intersection here makes perfect sense because both of these constraints
2375   // must hold for the whole new class.
2376   if (std::optional<RangeSet> NewClassConstraint =
2377           intersect(RangeFactory, getConstraint(State, *this),
2378                     getConstraint(State, Other))) {
2379     // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2380     //       range inferrer shouldn't generate ranges incompatible with
2381     //       equivalence classes. However, at the moment, due to imperfections
2382     //       in the solver, it is possible and the merge function can also
2383     //       return infeasible states aka null states.
2384     if (NewClassConstraint->isEmpty())
2385       // Infeasible state
2386       return nullptr;
2387 
2388     // No need in tracking constraints of a now-dissolved class.
2389     Constraints = CRF.remove(Constraints, Other);
2390     // Assign new constraints for this class.
2391     Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2392 
2393     assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2394                                        "a state with infeasible constraints");
2395 
2396     State = State->set<ConstraintRange>(Constraints);
2397   }
2398 
2399   // 2. Get ALL equivalence-related maps
2400   ClassMapTy Classes = State->get<ClassMap>();
2401   ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2402 
2403   ClassMembersTy Members = State->get<ClassMembers>();
2404   ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2405 
2406   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2407   DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2408 
2409   ClassSet::Factory &CF = State->get_context<ClassSet>();
2410   SymbolSet::Factory &F = getMembersFactory(State);
2411 
2412   // 2. Merge members of the Other class into the current class.
2413   SymbolSet NewClassMembers = MyMembers;
2414   for (SymbolRef Sym : OtherMembers) {
2415     NewClassMembers = F.add(NewClassMembers, Sym);
2416     // *this is now the class for all these new symbols.
2417     Classes = CMF.add(Classes, Sym, *this);
2418   }
2419 
2420   // 3. Adjust member mapping.
2421   //
2422   // No need in tracking members of a now-dissolved class.
2423   Members = MF.remove(Members, Other);
2424   // Now only the current class is mapped to all the symbols.
2425   Members = MF.add(Members, *this, NewClassMembers);
2426 
2427   // 4. Update disequality relations
2428   ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2429   // We are about to merge two classes but they are already known to be
2430   // non-equal. This is a contradiction.
2431   if (DisequalToOther.contains(*this))
2432     return nullptr;
2433 
2434   if (!DisequalToOther.isEmpty()) {
2435     ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2436     DisequalityInfo = DF.remove(DisequalityInfo, Other);
2437 
2438     for (EquivalenceClass DisequalClass : DisequalToOther) {
2439       DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2440 
2441       // Disequality is a symmetric relation meaning that if
2442       // DisequalToOther not null then the set for DisequalClass is not
2443       // empty and has at least Other.
2444       ClassSet OriginalSetLinkedToOther =
2445           *DisequalityInfo.lookup(DisequalClass);
2446 
2447       // Other will be eliminated and we should replace it with the bigger
2448       // united class.
2449       ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2450       NewSet = CF.add(NewSet, *this);
2451 
2452       DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2453     }
2454 
2455     DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2456     State = State->set<DisequalityMap>(DisequalityInfo);
2457   }
2458 
2459   // 5. Update the state
2460   State = State->set<ClassMap>(Classes);
2461   State = State->set<ClassMembers>(Members);
2462 
2463   return State;
2464 }
2465 
2466 inline SymbolSet::Factory &
getMembersFactory(ProgramStateRef State)2467 EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2468   return State->get_context<SymbolSet>();
2469 }
2470 
getClassMembers(ProgramStateRef State) const2471 SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2472   if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2473     return *Members;
2474 
2475   // This class is trivial, so we need to construct a set
2476   // with just that one symbol from the class.
2477   SymbolSet::Factory &F = getMembersFactory(State);
2478   return F.add(F.getEmptySet(), getRepresentativeSymbol());
2479 }
2480 
isTrivial(ProgramStateRef State) const2481 bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2482   return State->get<ClassMembers>(*this) == nullptr;
2483 }
2484 
isTriviallyDead(ProgramStateRef State,SymbolReaper & Reaper) const2485 bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2486                                        SymbolReaper &Reaper) const {
2487   return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2488 }
2489 
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,SymbolRef First,SymbolRef Second)2490 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2491                                                       ProgramStateRef State,
2492                                                       SymbolRef First,
2493                                                       SymbolRef Second) {
2494   return markDisequal(RF, State, find(State, First), find(State, Second));
2495 }
2496 
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2497 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2498                                                       ProgramStateRef State,
2499                                                       EquivalenceClass First,
2500                                                       EquivalenceClass Second) {
2501   return First.markDisequal(RF, State, Second);
2502 }
2503 
2504 inline ProgramStateRef
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass Other) const2505 EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2506                                EquivalenceClass Other) const {
2507   // If we know that two classes are equal, we can only produce an infeasible
2508   // state.
2509   if (*this == Other) {
2510     return nullptr;
2511   }
2512 
2513   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2514   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2515 
2516   // Disequality is a symmetric relation, so if we mark A as disequal to B,
2517   // we should also mark B as disequalt to A.
2518   if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2519                             Other) ||
2520       !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2521                             *this))
2522     return nullptr;
2523 
2524   assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2525                                      "a state with infeasible constraints");
2526 
2527   State = State->set<DisequalityMap>(DisequalityInfo);
2528   State = State->set<ConstraintRange>(Constraints);
2529 
2530   return State;
2531 }
2532 
addToDisequalityInfo(DisequalityMapTy & Info,ConstraintRangeTy & Constraints,RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2533 inline bool EquivalenceClass::addToDisequalityInfo(
2534     DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2535     RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2536     EquivalenceClass Second) {
2537 
2538   // 1. Get all of the required factories.
2539   DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2540   ClassSet::Factory &CF = State->get_context<ClassSet>();
2541   ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2542 
2543   // 2. Add Second to the set of classes disequal to First.
2544   const ClassSet *CurrentSet = Info.lookup(First);
2545   ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2546   NewSet = CF.add(NewSet, Second);
2547 
2548   Info = F.add(Info, First, NewSet);
2549 
2550   // 3. If Second is known to be a constant, we can delete this point
2551   //    from the constraint asociated with First.
2552   //
2553   //    So, if Second == 10, it means that First != 10.
2554   //    At the same time, the same logic does not apply to ranges.
2555   if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2556     if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2557 
2558       RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2559           RF, State, First.getRepresentativeSymbol());
2560 
2561       FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2562 
2563       // If the First class is about to be constrained with an empty
2564       // range-set, the state is infeasible.
2565       if (FirstConstraint.isEmpty())
2566         return false;
2567 
2568       Constraints = CRF.add(Constraints, First, FirstConstraint);
2569     }
2570 
2571   return true;
2572 }
2573 
areEqual(ProgramStateRef State,SymbolRef FirstSym,SymbolRef SecondSym)2574 inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2575                                                       SymbolRef FirstSym,
2576                                                       SymbolRef SecondSym) {
2577   return EquivalenceClass::areEqual(State, find(State, FirstSym),
2578                                     find(State, SecondSym));
2579 }
2580 
areEqual(ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2581 inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2582                                                       EquivalenceClass First,
2583                                                       EquivalenceClass Second) {
2584   // The same equivalence class => symbols are equal.
2585   if (First == Second)
2586     return true;
2587 
2588   // Let's check if we know anything about these two classes being not equal to
2589   // each other.
2590   ClassSet DisequalToFirst = First.getDisequalClasses(State);
2591   if (DisequalToFirst.contains(Second))
2592     return false;
2593 
2594   // It is not clear.
2595   return std::nullopt;
2596 }
2597 
2598 [[nodiscard]] ProgramStateRef
removeMember(ProgramStateRef State,const SymbolRef Old)2599 EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2600 
2601   SymbolSet ClsMembers = getClassMembers(State);
2602   assert(ClsMembers.contains(Old));
2603 
2604   // Remove `Old`'s Class->Sym relation.
2605   SymbolSet::Factory &F = getMembersFactory(State);
2606   ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2607   ClsMembers = F.remove(ClsMembers, Old);
2608   // Ensure another precondition of the removeMember function (we can check
2609   // this only with isEmpty, thus we have to do the remove first).
2610   assert(!ClsMembers.isEmpty() &&
2611          "Class should have had at least two members before member removal");
2612   // Overwrite the existing members assigned to this class.
2613   ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2614   ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2615   State = State->set<ClassMembers>(ClassMembersMap);
2616 
2617   // Remove `Old`'s Sym->Class relation.
2618   ClassMapTy Classes = State->get<ClassMap>();
2619   ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2620   Classes = CMF.remove(Classes, Old);
2621   State = State->set<ClassMap>(Classes);
2622 
2623   return State;
2624 }
2625 
2626 // Re-evaluate an SVal with top-level `State->assume` logic.
2627 [[nodiscard]] ProgramStateRef
reAssume(ProgramStateRef State,const RangeSet * Constraint,SVal TheValue)2628 reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
2629   if (!Constraint)
2630     return State;
2631 
2632   const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2633 
2634   // If the SVal is 0, we can simply interpret that as `false`.
2635   if (Constraint->encodesFalseRange())
2636     return State->assume(DefinedVal, false);
2637 
2638   // If the constraint does not encode 0 then we can interpret that as `true`
2639   // AND as a Range(Set).
2640   if (Constraint->encodesTrueRange()) {
2641     State = State->assume(DefinedVal, true);
2642     if (!State)
2643       return nullptr;
2644     // Fall through, re-assume based on the range values as well.
2645   }
2646   // Overestimate the individual Ranges with the RangeSet' lowest and
2647   // highest values.
2648   return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2649                                      Constraint->getMaxValue(), true);
2650 }
2651 
2652 // Iterate over all symbols and try to simplify them. Once a symbol is
2653 // simplified then we check if we can merge the simplified symbol's equivalence
2654 // class to this class. This way, we simplify not just the symbols but the
2655 // classes as well: we strive to keep the number of the classes to be the
2656 // absolute minimum.
2657 [[nodiscard]] ProgramStateRef
simplify(SValBuilder & SVB,RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Class)2658 EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2659                            ProgramStateRef State, EquivalenceClass Class) {
2660   SymbolSet ClassMembers = Class.getClassMembers(State);
2661   for (const SymbolRef &MemberSym : ClassMembers) {
2662 
2663     const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2664     const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2665 
2666     // The symbol is collapsed to a constant, check if the current State is
2667     // still feasible.
2668     if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2669       const llvm::APSInt &SV = CI->getValue();
2670       const RangeSet *ClassConstraint = getConstraint(State, Class);
2671       // We have found a contradiction.
2672       if (ClassConstraint && !ClassConstraint->contains(SV))
2673         return nullptr;
2674     }
2675 
2676     if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2677       // The simplified symbol should be the member of the original Class,
2678       // however, it might be in another existing class at the moment. We
2679       // have to merge these classes.
2680       ProgramStateRef OldState = State;
2681       State = merge(F, State, MemberSym, SimplifiedMemberSym);
2682       if (!State)
2683         return nullptr;
2684       // No state change, no merge happened actually.
2685       if (OldState == State)
2686         continue;
2687 
2688       // Be aware that `SimplifiedMemberSym` might refer to an already dead
2689       // symbol. In that case, the eqclass of that might not be the same as the
2690       // eqclass of `MemberSym`. This is because the dead symbols are not
2691       // preserved in the `ClassMap`, hence
2692       // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
2693       // compared to the eqclass of `MemberSym`.
2694       // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
2695       // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
2696       //
2697       // Note that `MemberSym` must be alive here since that is from the
2698       // `ClassMembers` where all the symbols are alive.
2699 
2700       // Remove the old and more complex symbol.
2701       State = find(State, MemberSym).removeMember(State, MemberSym);
2702 
2703       // Query the class constraint again b/c that may have changed during the
2704       // merge above.
2705       const RangeSet *ClassConstraint = getConstraint(State, Class);
2706 
2707       // Re-evaluate an SVal with top-level `State->assume`, this ignites
2708       // a RECURSIVE algorithm that will reach a FIXPOINT.
2709       //
2710       // About performance and complexity: Let us assume that in a State we
2711       // have N non-trivial equivalence classes and that all constraints and
2712       // disequality info is related to non-trivial classes. In the worst case,
2713       // we can simplify only one symbol of one class in each iteration. The
2714       // number of symbols in one class cannot grow b/c we replace the old
2715       // symbol with the simplified one. Also, the number of the equivalence
2716       // classes can decrease only, b/c the algorithm does a merge operation
2717       // optionally. We need N iterations in this case to reach the fixpoint.
2718       // Thus, the steps needed to be done in the worst case is proportional to
2719       // N*N.
2720       //
2721       // This worst case scenario can be extended to that case when we have
2722       // trivial classes in the constraints and in the disequality map. This
2723       // case can be reduced to the case with a State where there are only
2724       // non-trivial classes. This is because a merge operation on two trivial
2725       // classes results in one non-trivial class.
2726       State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2727       if (!State)
2728         return nullptr;
2729     }
2730   }
2731   return State;
2732 }
2733 
getDisequalClasses(ProgramStateRef State,SymbolRef Sym)2734 inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2735                                                      SymbolRef Sym) {
2736   return find(State, Sym).getDisequalClasses(State);
2737 }
2738 
2739 inline ClassSet
getDisequalClasses(ProgramStateRef State) const2740 EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2741   return getDisequalClasses(State->get<DisequalityMap>(),
2742                             State->get_context<ClassSet>());
2743 }
2744 
2745 inline ClassSet
getDisequalClasses(DisequalityMapTy Map,ClassSet::Factory & Factory) const2746 EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2747                                      ClassSet::Factory &Factory) const {
2748   if (const ClassSet *DisequalClasses = Map.lookup(*this))
2749     return *DisequalClasses;
2750 
2751   return Factory.getEmptySet();
2752 }
2753 
isClassDataConsistent(ProgramStateRef State)2754 bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2755   ClassMembersTy Members = State->get<ClassMembers>();
2756 
2757   for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2758     for (SymbolRef Member : ClassMembersPair.second) {
2759       // Every member of the class should have a mapping back to the class.
2760       if (find(State, Member) == ClassMembersPair.first) {
2761         continue;
2762       }
2763 
2764       return false;
2765     }
2766   }
2767 
2768   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2769   for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2770     EquivalenceClass Class = DisequalityInfo.first;
2771     ClassSet DisequalClasses = DisequalityInfo.second;
2772 
2773     // There is no use in keeping empty sets in the map.
2774     if (DisequalClasses.isEmpty())
2775       return false;
2776 
2777     // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2778     // B != A should also be true.
2779     for (EquivalenceClass DisequalClass : DisequalClasses) {
2780       const ClassSet *DisequalToDisequalClasses =
2781           Disequalities.lookup(DisequalClass);
2782 
2783       // It should be a set of at least one element: Class
2784       if (!DisequalToDisequalClasses ||
2785           !DisequalToDisequalClasses->contains(Class))
2786         return false;
2787     }
2788   }
2789 
2790   return true;
2791 }
2792 
2793 //===----------------------------------------------------------------------===//
2794 //                    RangeConstraintManager implementation
2795 //===----------------------------------------------------------------------===//
2796 
canReasonAbout(SVal X) const2797 bool RangeConstraintManager::canReasonAbout(SVal X) const {
2798   std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2799   if (SymVal && SymVal->isExpression()) {
2800     const SymExpr *SE = SymVal->getSymbol();
2801 
2802     if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2803       switch (SIE->getOpcode()) {
2804       // We don't reason yet about bitwise-constraints on symbolic values.
2805       case BO_And:
2806       case BO_Or:
2807       case BO_Xor:
2808         return false;
2809       // We don't reason yet about these arithmetic constraints on
2810       // symbolic values.
2811       case BO_Mul:
2812       case BO_Div:
2813       case BO_Rem:
2814       case BO_Shl:
2815       case BO_Shr:
2816         return false;
2817       // All other cases.
2818       default:
2819         return true;
2820       }
2821     }
2822 
2823     if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2824       // FIXME: Handle <=> here.
2825       if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
2826           BinaryOperator::isRelationalOp(SSE->getOpcode())) {
2827         // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2828         // We've recently started producing Loc <> NonLoc comparisons (that
2829         // result from casts of one of the operands between eg. intptr_t and
2830         // void *), but we can't reason about them yet.
2831         if (Loc::isLocType(SSE->getLHS()->getType())) {
2832           return Loc::isLocType(SSE->getRHS()->getType());
2833         }
2834       }
2835     }
2836 
2837     return false;
2838   }
2839 
2840   return true;
2841 }
2842 
checkNull(ProgramStateRef State,SymbolRef Sym)2843 ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2844                                                     SymbolRef Sym) {
2845   const RangeSet *Ranges = getConstraint(State, Sym);
2846 
2847   // If we don't have any information about this symbol, it's underconstrained.
2848   if (!Ranges)
2849     return ConditionTruthVal();
2850 
2851   // If we have a concrete value, see if it's zero.
2852   if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2853     return *Value == 0;
2854 
2855   BasicValueFactory &BV = getBasicVals();
2856   APSIntType IntType = BV.getAPSIntType(Sym->getType());
2857   llvm::APSInt Zero = IntType.getZeroValue();
2858 
2859   // Check if zero is in the set of possible values.
2860   if (!Ranges->contains(Zero))
2861     return false;
2862 
2863   // Zero is a possible value, but it is not the /only/ possible value.
2864   return ConditionTruthVal();
2865 }
2866 
getSymVal(ProgramStateRef St,SymbolRef Sym) const2867 const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2868                                                       SymbolRef Sym) const {
2869   const RangeSet *T = getConstraint(St, Sym);
2870   return T ? T->getConcreteValue() : nullptr;
2871 }
2872 
getSymMinVal(ProgramStateRef St,SymbolRef Sym) const2873 const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St,
2874                                                          SymbolRef Sym) const {
2875   const RangeSet *T = getConstraint(St, Sym);
2876   if (!T || T->isEmpty())
2877     return nullptr;
2878   return &T->getMinValue();
2879 }
2880 
getSymMaxVal(ProgramStateRef St,SymbolRef Sym) const2881 const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St,
2882                                                          SymbolRef Sym) const {
2883   const RangeSet *T = getConstraint(St, Sym);
2884   if (!T || T->isEmpty())
2885     return nullptr;
2886   return &T->getMaxValue();
2887 }
2888 
2889 //===----------------------------------------------------------------------===//
2890 //                Remove dead symbols from existing constraints
2891 //===----------------------------------------------------------------------===//
2892 
2893 /// Scan all symbols referenced by the constraints. If the symbol is not alive
2894 /// as marked in LSymbols, mark it as dead in DSymbols.
2895 ProgramStateRef
removeDeadBindings(ProgramStateRef State,SymbolReaper & SymReaper)2896 RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2897                                            SymbolReaper &SymReaper) {
2898   ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2899   ClassMembersTy NewClassMembersMap = ClassMembersMap;
2900   ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2901   SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2902 
2903   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2904   ConstraintRangeTy NewConstraints = Constraints;
2905   ConstraintRangeTy::Factory &ConstraintFactory =
2906       State->get_context<ConstraintRange>();
2907 
2908   ClassMapTy Map = State->get<ClassMap>();
2909   ClassMapTy NewMap = Map;
2910   ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2911 
2912   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2913   DisequalityMapTy::Factory &DisequalityFactory =
2914       State->get_context<DisequalityMap>();
2915   ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2916 
2917   bool ClassMapChanged = false;
2918   bool MembersMapChanged = false;
2919   bool ConstraintMapChanged = false;
2920   bool DisequalitiesChanged = false;
2921 
2922   auto removeDeadClass = [&](EquivalenceClass Class) {
2923     // Remove associated constraint ranges.
2924     Constraints = ConstraintFactory.remove(Constraints, Class);
2925     ConstraintMapChanged = true;
2926 
2927     // Update disequality information to not hold any information on the
2928     // removed class.
2929     ClassSet DisequalClasses =
2930         Class.getDisequalClasses(Disequalities, ClassSetFactory);
2931     if (!DisequalClasses.isEmpty()) {
2932       for (EquivalenceClass DisequalClass : DisequalClasses) {
2933         ClassSet DisequalToDisequalSet =
2934             DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2935         // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2936         // disequality info.
2937         assert(!DisequalToDisequalSet.isEmpty());
2938         ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2939 
2940         // No need in keeping an empty set.
2941         if (NewSet.isEmpty()) {
2942           Disequalities =
2943               DisequalityFactory.remove(Disequalities, DisequalClass);
2944         } else {
2945           Disequalities =
2946               DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2947         }
2948       }
2949       // Remove the data for the class
2950       Disequalities = DisequalityFactory.remove(Disequalities, Class);
2951       DisequalitiesChanged = true;
2952     }
2953   };
2954 
2955   // 1. Let's see if dead symbols are trivial and have associated constraints.
2956   for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2957        Constraints) {
2958     EquivalenceClass Class = ClassConstraintPair.first;
2959     if (Class.isTriviallyDead(State, SymReaper)) {
2960       // If this class is trivial, we can remove its constraints right away.
2961       removeDeadClass(Class);
2962     }
2963   }
2964 
2965   // 2. We don't need to track classes for dead symbols.
2966   for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2967     SymbolRef Sym = SymbolClassPair.first;
2968 
2969     if (SymReaper.isDead(Sym)) {
2970       ClassMapChanged = true;
2971       NewMap = ClassFactory.remove(NewMap, Sym);
2972     }
2973   }
2974 
2975   // 3. Remove dead members from classes and remove dead non-trivial classes
2976   //    and their constraints.
2977   for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2978        ClassMembersMap) {
2979     EquivalenceClass Class = ClassMembersPair.first;
2980     SymbolSet LiveMembers = ClassMembersPair.second;
2981     bool MembersChanged = false;
2982 
2983     for (SymbolRef Member : ClassMembersPair.second) {
2984       if (SymReaper.isDead(Member)) {
2985         MembersChanged = true;
2986         LiveMembers = SetFactory.remove(LiveMembers, Member);
2987       }
2988     }
2989 
2990     // Check if the class changed.
2991     if (!MembersChanged)
2992       continue;
2993 
2994     MembersMapChanged = true;
2995 
2996     if (LiveMembers.isEmpty()) {
2997       // The class is dead now, we need to wipe it out of the members map...
2998       NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2999 
3000       // ...and remove all of its constraints.
3001       removeDeadClass(Class);
3002     } else {
3003       // We need to change the members associated with the class.
3004       NewClassMembersMap =
3005           EMFactory.add(NewClassMembersMap, Class, LiveMembers);
3006     }
3007   }
3008 
3009   // 4. Update the state with new maps.
3010   //
3011   // Here we try to be humble and update a map only if it really changed.
3012   if (ClassMapChanged)
3013     State = State->set<ClassMap>(NewMap);
3014 
3015   if (MembersMapChanged)
3016     State = State->set<ClassMembers>(NewClassMembersMap);
3017 
3018   if (ConstraintMapChanged)
3019     State = State->set<ConstraintRange>(Constraints);
3020 
3021   if (DisequalitiesChanged)
3022     State = State->set<DisequalityMap>(Disequalities);
3023 
3024   assert(EquivalenceClass::isClassDataConsistent(State));
3025 
3026   return State;
3027 }
3028 
getRange(ProgramStateRef State,SymbolRef Sym)3029 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
3030                                           SymbolRef Sym) {
3031   return SymbolicRangeInferrer::inferRange(F, State, Sym);
3032 }
3033 
setRange(ProgramStateRef State,SymbolRef Sym,RangeSet Range)3034 ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
3035                                                  SymbolRef Sym,
3036                                                  RangeSet Range) {
3037   return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
3038 }
3039 
3040 //===------------------------------------------------------------------------===
3041 // assumeSymX methods: protected interface for RangeConstraintManager.
3042 //===------------------------------------------------------------------------===
3043 
3044 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
3045 // and (x, y) for open ranges. These ranges are modular, corresponding with
3046 // a common treatment of C integer overflow. This means that these methods
3047 // do not have to worry about overflow; RangeSet::Intersect can handle such a
3048 // "wraparound" range.
3049 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
3050 // UINT_MAX, 0, 1, and 2.
3051 
3052 ProgramStateRef
assumeSymNE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3053 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
3054                                     const llvm::APSInt &Int,
3055                                     const llvm::APSInt &Adjustment) {
3056   // Before we do any real work, see if the value can even show up.
3057   APSIntType AdjustmentType(Adjustment);
3058   if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3059     return St;
3060 
3061   llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
3062   RangeSet New = getRange(St, Sym);
3063   New = F.deletePoint(New, Point);
3064 
3065   return setRange(St, Sym, New);
3066 }
3067 
3068 ProgramStateRef
assumeSymEQ(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3069 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
3070                                     const llvm::APSInt &Int,
3071                                     const llvm::APSInt &Adjustment) {
3072   // Before we do any real work, see if the value can even show up.
3073   APSIntType AdjustmentType(Adjustment);
3074   if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
3075     return nullptr;
3076 
3077   // [Int-Adjustment, Int-Adjustment]
3078   llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
3079   RangeSet New = getRange(St, Sym);
3080   New = F.intersect(New, AdjInt);
3081 
3082   return setRange(St, Sym, New);
3083 }
3084 
getSymLTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3085 RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
3086                                                SymbolRef Sym,
3087                                                const llvm::APSInt &Int,
3088                                                const llvm::APSInt &Adjustment) {
3089   // Before we do any real work, see if the value can even show up.
3090   APSIntType AdjustmentType(Adjustment);
3091   switch (AdjustmentType.testInRange(Int, true)) {
3092   case APSIntType::RTR_Below:
3093     return F.getEmptySet();
3094   case APSIntType::RTR_Within:
3095     break;
3096   case APSIntType::RTR_Above:
3097     return getRange(St, Sym);
3098   }
3099 
3100   // Special case for Int == Min. This is always false.
3101   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3102   llvm::APSInt Min = AdjustmentType.getMinValue();
3103   if (ComparisonVal == Min)
3104     return F.getEmptySet();
3105 
3106   llvm::APSInt Lower = Min - Adjustment;
3107   llvm::APSInt Upper = ComparisonVal - Adjustment;
3108   --Upper;
3109 
3110   RangeSet Result = getRange(St, Sym);
3111   return F.intersect(Result, Lower, Upper);
3112 }
3113 
3114 ProgramStateRef
assumeSymLT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3115 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3116                                     const llvm::APSInt &Int,
3117                                     const llvm::APSInt &Adjustment) {
3118   RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3119   return setRange(St, Sym, New);
3120 }
3121 
getSymGTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3122 RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3123                                                SymbolRef Sym,
3124                                                const llvm::APSInt &Int,
3125                                                const llvm::APSInt &Adjustment) {
3126   // Before we do any real work, see if the value can even show up.
3127   APSIntType AdjustmentType(Adjustment);
3128   switch (AdjustmentType.testInRange(Int, true)) {
3129   case APSIntType::RTR_Below:
3130     return getRange(St, Sym);
3131   case APSIntType::RTR_Within:
3132     break;
3133   case APSIntType::RTR_Above:
3134     return F.getEmptySet();
3135   }
3136 
3137   // Special case for Int == Max. This is always false.
3138   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3139   llvm::APSInt Max = AdjustmentType.getMaxValue();
3140   if (ComparisonVal == Max)
3141     return F.getEmptySet();
3142 
3143   llvm::APSInt Lower = ComparisonVal - Adjustment;
3144   llvm::APSInt Upper = Max - Adjustment;
3145   ++Lower;
3146 
3147   RangeSet SymRange = getRange(St, Sym);
3148   return F.intersect(SymRange, Lower, Upper);
3149 }
3150 
3151 ProgramStateRef
assumeSymGT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3152 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3153                                     const llvm::APSInt &Int,
3154                                     const llvm::APSInt &Adjustment) {
3155   RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3156   return setRange(St, Sym, New);
3157 }
3158 
getSymGERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3159 RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3160                                                SymbolRef Sym,
3161                                                const llvm::APSInt &Int,
3162                                                const llvm::APSInt &Adjustment) {
3163   // Before we do any real work, see if the value can even show up.
3164   APSIntType AdjustmentType(Adjustment);
3165   switch (AdjustmentType.testInRange(Int, true)) {
3166   case APSIntType::RTR_Below:
3167     return getRange(St, Sym);
3168   case APSIntType::RTR_Within:
3169     break;
3170   case APSIntType::RTR_Above:
3171     return F.getEmptySet();
3172   }
3173 
3174   // Special case for Int == Min. This is always feasible.
3175   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3176   llvm::APSInt Min = AdjustmentType.getMinValue();
3177   if (ComparisonVal == Min)
3178     return getRange(St, Sym);
3179 
3180   llvm::APSInt Max = AdjustmentType.getMaxValue();
3181   llvm::APSInt Lower = ComparisonVal - Adjustment;
3182   llvm::APSInt Upper = Max - Adjustment;
3183 
3184   RangeSet SymRange = getRange(St, Sym);
3185   return F.intersect(SymRange, Lower, Upper);
3186 }
3187 
3188 ProgramStateRef
assumeSymGE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3189 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3190                                     const llvm::APSInt &Int,
3191                                     const llvm::APSInt &Adjustment) {
3192   RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3193   return setRange(St, Sym, New);
3194 }
3195 
3196 RangeSet
getSymLERange(llvm::function_ref<RangeSet ()> RS,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3197 RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3198                                       const llvm::APSInt &Int,
3199                                       const llvm::APSInt &Adjustment) {
3200   // Before we do any real work, see if the value can even show up.
3201   APSIntType AdjustmentType(Adjustment);
3202   switch (AdjustmentType.testInRange(Int, true)) {
3203   case APSIntType::RTR_Below:
3204     return F.getEmptySet();
3205   case APSIntType::RTR_Within:
3206     break;
3207   case APSIntType::RTR_Above:
3208     return RS();
3209   }
3210 
3211   // Special case for Int == Max. This is always feasible.
3212   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3213   llvm::APSInt Max = AdjustmentType.getMaxValue();
3214   if (ComparisonVal == Max)
3215     return RS();
3216 
3217   llvm::APSInt Min = AdjustmentType.getMinValue();
3218   llvm::APSInt Lower = Min - Adjustment;
3219   llvm::APSInt Upper = ComparisonVal - Adjustment;
3220 
3221   RangeSet Default = RS();
3222   return F.intersect(Default, Lower, Upper);
3223 }
3224 
getSymLERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3225 RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3226                                                SymbolRef Sym,
3227                                                const llvm::APSInt &Int,
3228                                                const llvm::APSInt &Adjustment) {
3229   return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3230 }
3231 
3232 ProgramStateRef
assumeSymLE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3233 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3234                                     const llvm::APSInt &Int,
3235                                     const llvm::APSInt &Adjustment) {
3236   RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3237   return setRange(St, Sym, New);
3238 }
3239 
assumeSymWithinInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3240 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3241     ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3242     const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3243   RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3244   if (New.isEmpty())
3245     return nullptr;
3246   RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3247   return setRange(State, Sym, Out);
3248 }
3249 
assumeSymOutsideInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3250 ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3251     ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3252     const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3253   RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3254   RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3255   RangeSet New(F.add(RangeLT, RangeGT));
3256   return setRange(State, Sym, New);
3257 }
3258 
3259 //===----------------------------------------------------------------------===//
3260 // Pretty-printing.
3261 //===----------------------------------------------------------------------===//
3262 
printJson(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3263 void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3264                                        const char *NL, unsigned int Space,
3265                                        bool IsDot) const {
3266   printConstraints(Out, State, NL, Space, IsDot);
3267   printEquivalenceClasses(Out, State, NL, Space, IsDot);
3268   printDisequalities(Out, State, NL, Space, IsDot);
3269 }
3270 
printValue(raw_ostream & Out,ProgramStateRef State,SymbolRef Sym)3271 void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3272                                         SymbolRef Sym) {
3273   const RangeSet RS = getRange(State, Sym);
3274   if (RS.isEmpty()) {
3275     Out << "<empty rangeset>";
3276     return;
3277   }
3278   Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3279   RS.dump(Out);
3280 }
3281 
toString(const SymbolRef & Sym)3282 static std::string toString(const SymbolRef &Sym) {
3283   std::string S;
3284   llvm::raw_string_ostream O(S);
3285   Sym->dumpToStream(O);
3286   return S;
3287 }
3288 
printConstraints(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3289 void RangeConstraintManager::printConstraints(raw_ostream &Out,
3290                                               ProgramStateRef State,
3291                                               const char *NL,
3292                                               unsigned int Space,
3293                                               bool IsDot) const {
3294   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3295 
3296   Indent(Out, Space, IsDot) << "\"constraints\": ";
3297   if (Constraints.isEmpty()) {
3298     Out << "null," << NL;
3299     return;
3300   }
3301 
3302   std::map<std::string, RangeSet> OrderedConstraints;
3303   for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3304     SymbolSet ClassMembers = P.first.getClassMembers(State);
3305     for (const SymbolRef &ClassMember : ClassMembers) {
3306       bool insertion_took_place;
3307       std::tie(std::ignore, insertion_took_place) =
3308           OrderedConstraints.insert({toString(ClassMember), P.second});
3309       assert(insertion_took_place &&
3310              "two symbols should not have the same dump");
3311     }
3312   }
3313 
3314   ++Space;
3315   Out << '[' << NL;
3316   bool First = true;
3317   for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3318     if (First) {
3319       First = false;
3320     } else {
3321       Out << ',';
3322       Out << NL;
3323     }
3324     Indent(Out, Space, IsDot)
3325         << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3326     P.second.dump(Out);
3327     Out << "\" }";
3328   }
3329   Out << NL;
3330 
3331   --Space;
3332   Indent(Out, Space, IsDot) << "]," << NL;
3333 }
3334 
toString(ProgramStateRef State,EquivalenceClass Class)3335 static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3336   SymbolSet ClassMembers = Class.getClassMembers(State);
3337   llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3338                                                      ClassMembers.end());
3339   llvm::sort(ClassMembersSorted,
3340              [](const SymbolRef &LHS, const SymbolRef &RHS) {
3341                return toString(LHS) < toString(RHS);
3342              });
3343 
3344   bool FirstMember = true;
3345 
3346   std::string Str;
3347   llvm::raw_string_ostream Out(Str);
3348   Out << "[ ";
3349   for (SymbolRef ClassMember : ClassMembersSorted) {
3350     if (FirstMember)
3351       FirstMember = false;
3352     else
3353       Out << ", ";
3354     Out << "\"" << ClassMember << "\"";
3355   }
3356   Out << " ]";
3357   return Str;
3358 }
3359 
printEquivalenceClasses(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3360 void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3361                                                      ProgramStateRef State,
3362                                                      const char *NL,
3363                                                      unsigned int Space,
3364                                                      bool IsDot) const {
3365   ClassMembersTy Members = State->get<ClassMembers>();
3366 
3367   Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3368   if (Members.isEmpty()) {
3369     Out << "null," << NL;
3370     return;
3371   }
3372 
3373   std::set<std::string> MembersStr;
3374   for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3375     MembersStr.insert(toString(State, ClassToSymbolSet.first));
3376 
3377   ++Space;
3378   Out << '[' << NL;
3379   bool FirstClass = true;
3380   for (const std::string &Str : MembersStr) {
3381     if (FirstClass) {
3382       FirstClass = false;
3383     } else {
3384       Out << ',';
3385       Out << NL;
3386     }
3387     Indent(Out, Space, IsDot);
3388     Out << Str;
3389   }
3390   Out << NL;
3391 
3392   --Space;
3393   Indent(Out, Space, IsDot) << "]," << NL;
3394 }
3395 
printDisequalities(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3396 void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3397                                                 ProgramStateRef State,
3398                                                 const char *NL,
3399                                                 unsigned int Space,
3400                                                 bool IsDot) const {
3401   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3402 
3403   Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3404   if (Disequalities.isEmpty()) {
3405     Out << "null," << NL;
3406     return;
3407   }
3408 
3409   // Transform the disequality info to an ordered map of
3410   // [string -> (ordered set of strings)]
3411   using EqClassesStrTy = std::set<std::string>;
3412   using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3413   DisequalityInfoStrTy DisequalityInfoStr;
3414   for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3415     EquivalenceClass Class = ClassToDisEqSet.first;
3416     ClassSet DisequalClasses = ClassToDisEqSet.second;
3417     EqClassesStrTy MembersStr;
3418     for (EquivalenceClass DisEqClass : DisequalClasses)
3419       MembersStr.insert(toString(State, DisEqClass));
3420     DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3421   }
3422 
3423   ++Space;
3424   Out << '[' << NL;
3425   bool FirstClass = true;
3426   for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3427        DisequalityInfoStr) {
3428     const std::string &Class = ClassToDisEqSet.first;
3429     if (FirstClass) {
3430       FirstClass = false;
3431     } else {
3432       Out << ',';
3433       Out << NL;
3434     }
3435     Indent(Out, Space, IsDot) << "{" << NL;
3436     unsigned int DisEqSpace = Space + 1;
3437     Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3438     Out << Class;
3439     const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3440     if (!DisequalClasses.empty()) {
3441       Out << "," << NL;
3442       Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3443       unsigned int DisEqClassSpace = DisEqSpace + 1;
3444       Indent(Out, DisEqClassSpace, IsDot);
3445       bool FirstDisEqClass = true;
3446       for (const std::string &DisEqClass : DisequalClasses) {
3447         if (FirstDisEqClass) {
3448           FirstDisEqClass = false;
3449         } else {
3450           Out << ',' << NL;
3451           Indent(Out, DisEqClassSpace, IsDot);
3452         }
3453         Out << DisEqClass;
3454       }
3455       Out << "]" << NL;
3456     }
3457     Indent(Out, Space, IsDot) << "}";
3458   }
3459   Out << NL;
3460 
3461   --Space;
3462   Indent(Out, Space, IsDot) << "]," << NL;
3463 }
3464