1 //== RangedConstraintManager.h ----------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Ranged constraint manager, built on SimpleConstraintManager. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H 14 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H 15 16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 18 #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h" 19 #include "llvm/ADT/APSInt.h" 20 #include "llvm/Support/Allocator.h" 21 22 namespace clang { 23 24 namespace ento { 25 26 /// A Range represents the closed range [from, to]. The caller must 27 /// guarantee that from <= to. Note that Range is immutable, so as not 28 /// to subvert RangeSet's immutability. 29 class Range { 30 public: Range(const llvm::APSInt & From,const llvm::APSInt & To)31 Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) { 32 assert(From <= To); 33 } 34 Range(const llvm::APSInt & Point)35 Range(const llvm::APSInt &Point) : Range(Point, Point) {} 36 Includes(const llvm::APSInt & Point)37 bool Includes(const llvm::APSInt &Point) const { 38 return From() <= Point && Point <= To(); 39 } From()40 const llvm::APSInt &From() const { return *Impl.first; } To()41 const llvm::APSInt &To() const { return *Impl.second; } getConcreteValue()42 const llvm::APSInt *getConcreteValue() const { 43 return &From() == &To() ? &From() : nullptr; 44 } 45 Profile(llvm::FoldingSetNodeID & ID)46 void Profile(llvm::FoldingSetNodeID &ID) const { 47 ID.AddPointer(&From()); 48 ID.AddPointer(&To()); 49 } 50 void dump(raw_ostream &OS) const; 51 void dump() const; 52 53 // In order to keep non-overlapping ranges sorted, we can compare only From 54 // points. 55 bool operator<(const Range &RHS) const { return From() < RHS.From(); } 56 57 bool operator==(const Range &RHS) const { return Impl == RHS.Impl; } 58 bool operator!=(const Range &RHS) const { return !operator==(RHS); } 59 60 private: 61 std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl; 62 }; 63 64 /// @class RangeSet is a persistent set of non-overlapping ranges. 65 /// 66 /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which 67 /// also supports the most common operations performed on range sets. 68 /// 69 /// Empty set corresponds to an overly constrained symbol meaning that there 70 /// are no possible values for that symbol. 71 class RangeSet { 72 public: 73 class Factory; 74 75 private: 76 // We use llvm::SmallVector as the underlying container for the following 77 // reasons: 78 // 79 // * Range sets are usually very simple, 1 or 2 ranges. 80 // That's why llvm::ImmutableSet is not perfect. 81 // 82 // * Ranges in sets are NOT overlapping, so it is natural to keep them 83 // sorted for efficient operations and queries. For this reason, 84 // llvm::SmallSet doesn't fit the requirements, it is not sorted when it 85 // is a vector. 86 // 87 // * Range set operations usually a bit harder than add/remove a range. 88 // Complex operations might do many of those for just one range set. 89 // Formerly it used to be llvm::ImmutableSet, which is inefficient for our 90 // purposes as we want to make these operations BOTH immutable AND 91 // efficient. 92 // 93 // * Iteration over ranges is widespread and a more cache-friendly 94 // structure is preferred. 95 using ImplType = llvm::SmallVector<Range, 4>; 96 97 struct ContainerType : public ImplType, public llvm::FoldingSetNode { ProfileContainerType98 void Profile(llvm::FoldingSetNodeID &ID) const { 99 for (const Range &It : *this) { 100 It.Profile(ID); 101 } 102 } 103 }; 104 // This is a non-owning pointer to an actual container. 105 // The memory is fully managed by the factory and is alive as long as the 106 // factory itself is alive. 107 // It is a pointer as opposed to a reference, so we can easily reassign 108 // RangeSet objects. 109 using UnderlyingType = const ContainerType *; 110 UnderlyingType Impl; 111 112 public: 113 using const_iterator = ImplType::const_iterator; 114 begin()115 const_iterator begin() const { return Impl->begin(); } end()116 const_iterator end() const { return Impl->end(); } size()117 size_t size() const { return Impl->size(); } 118 isEmpty()119 bool isEmpty() const { return Impl->empty(); } 120 121 class Factory { 122 public: Factory(BasicValueFactory & BV)123 Factory(BasicValueFactory &BV) : ValueFactory(BV) {} 124 125 /// Create a new set with all ranges from both LHS and RHS. 126 /// Possible intersections are not checked here. 127 /// 128 /// Complexity: O(N + M) 129 /// where N = size(LHS), M = size(RHS) 130 RangeSet add(RangeSet LHS, RangeSet RHS); 131 /// Create a new set with all ranges from the original set plus the new one. 132 /// Possible intersections are not checked here. 133 /// 134 /// Complexity: O(N) 135 /// where N = size(Original) 136 RangeSet add(RangeSet Original, Range Element); 137 /// Create a new set with all ranges from the original set plus the point. 138 /// Possible intersections are not checked here. 139 /// 140 /// Complexity: O(N) 141 /// where N = size(Original) 142 RangeSet add(RangeSet Original, const llvm::APSInt &Point); 143 /// Create a new set which is a union of two given ranges. 144 /// Possible intersections are not checked here. 145 /// 146 /// Complexity: O(N + M) 147 /// where N = size(LHS), M = size(RHS) 148 RangeSet unite(RangeSet LHS, RangeSet RHS); 149 /// Create a new set by uniting given range set with the given range. 150 /// All intersections and adjacent ranges are handled here. 151 /// 152 /// Complexity: O(N) 153 /// where N = size(Original) 154 RangeSet unite(RangeSet Original, Range Element); 155 /// Create a new set by uniting given range set with the given point. 156 /// All intersections and adjacent ranges are handled here. 157 /// 158 /// Complexity: O(N) 159 /// where N = size(Original) 160 RangeSet unite(RangeSet Original, llvm::APSInt Point); 161 /// Create a new set by uniting given range set with the given range 162 /// between points. All intersections and adjacent ranges are handled here. 163 /// 164 /// Complexity: O(N) 165 /// where N = size(Original) 166 RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To); 167 getEmptySet()168 RangeSet getEmptySet() { return &EmptySet; } 169 170 /// Create a new set with just one range. 171 /// @{ 172 RangeSet getRangeSet(Range Origin); getRangeSet(const llvm::APSInt & From,const llvm::APSInt & To)173 RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) { 174 return getRangeSet(Range(From, To)); 175 } getRangeSet(const llvm::APSInt & Origin)176 RangeSet getRangeSet(const llvm::APSInt &Origin) { 177 return getRangeSet(Origin, Origin); 178 } 179 /// @} 180 181 /// Intersect the given range sets. 182 /// 183 /// Complexity: O(N + M) 184 /// where N = size(LHS), M = size(RHS) 185 RangeSet intersect(RangeSet LHS, RangeSet RHS); 186 /// Intersect the given set with the closed range [Lower, Upper]. 187 /// 188 /// Unlike the Range type, this range uses modular arithmetic, corresponding 189 /// to the common treatment of C integer overflow. Thus, if the Lower bound 190 /// is greater than the Upper bound, the range is taken to wrap around. This 191 /// is equivalent to taking the intersection with the two ranges [Min, 192 /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers 193 /// between Upper and Lower. 194 /// 195 /// Complexity: O(N) 196 /// where N = size(What) 197 RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper); 198 /// Intersect the given range with the given point. 199 /// 200 /// The result can be either an empty set or a set containing the given 201 /// point depending on whether the point is in the range set. 202 /// 203 /// Complexity: O(logN) 204 /// where N = size(What) 205 RangeSet intersect(RangeSet What, llvm::APSInt Point); 206 207 /// Delete the given point from the range set. 208 /// 209 /// Complexity: O(N) 210 /// where N = size(From) 211 RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point); 212 /// Negate the given range set. 213 /// 214 /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus 215 /// operation under the values of the type. 216 /// 217 /// We also handle MIN because applying unary minus to MIN does not change 218 /// it. 219 /// Example 1: 220 /// char x = -128; // -128 is a MIN value in a range of 'char' 221 /// char y = -x; // y: -128 222 /// 223 /// Example 2: 224 /// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' 225 /// unsigned char y = -x; // y: 0 226 /// 227 /// And it makes us to separate the range 228 /// like [MIN, N] to [MIN, MIN] U [-N, MAX]. 229 /// For instance, whole range is {-128..127} and subrange is [-128,-126], 230 /// thus [-128,-127,-126,...] negates to [-128,...,126,127]. 231 /// 232 /// Negate restores disrupted ranges on bounds, 233 /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. 234 /// 235 /// Negate is a self-inverse function, i.e. negate(negate(R)) == R. 236 /// 237 /// Complexity: O(N) 238 /// where N = size(What) 239 RangeSet negate(RangeSet What); 240 /// Performs promotions, truncations and conversions of the given set. 241 /// 242 /// This function is optimized for each of the six cast cases: 243 /// - noop 244 /// - conversion 245 /// - truncation 246 /// - truncation-conversion 247 /// - promotion 248 /// - promotion-conversion 249 /// 250 /// NOTE: This function is NOT self-inverse for truncations, because of 251 /// the higher bits loss: 252 /// - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt. 253 /// - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar. 254 /// But it is self-inverse for all the rest casts. 255 /// 256 /// Complexity: 257 /// - Noop O(1); 258 /// - Truncation O(N^2); 259 /// - Another case O(N); 260 /// where N = size(What) 261 RangeSet castTo(RangeSet What, APSIntType Ty); 262 RangeSet castTo(RangeSet What, QualType T); 263 264 /// Return associated value factory. getValueFactory()265 BasicValueFactory &getValueFactory() const { return ValueFactory; } 266 267 private: 268 /// Return a persistent version of the given container. 269 RangeSet makePersistent(ContainerType &&From); 270 /// Construct a new persistent version of the given container. 271 ContainerType *construct(ContainerType &&From); 272 273 RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS); 274 /// NOTE: This function relies on the fact that all values in the 275 /// containers are persistent (created via BasicValueFactory::getValue). 276 ContainerType unite(const ContainerType &LHS, const ContainerType &RHS); 277 278 /// This is a helper function for `castTo` method. Implies not to be used 279 /// separately. 280 /// Performs a truncation case of a cast operation. 281 ContainerType truncateTo(RangeSet What, APSIntType Ty); 282 283 /// This is a helper function for `castTo` method. Implies not to be used 284 /// separately. 285 /// Performs a conversion case and a promotion-conversion case for signeds 286 /// of a cast operation. 287 ContainerType convertTo(RangeSet What, APSIntType Ty); 288 289 /// This is a helper function for `castTo` method. Implies not to be used 290 /// separately. 291 /// Performs a promotion for unsigneds only. 292 ContainerType promoteTo(RangeSet What, APSIntType Ty); 293 294 // Many operations include producing new APSInt values and that's why 295 // we need this factory. 296 BasicValueFactory &ValueFactory; 297 // Allocator for all the created containers. 298 // Containers might own their own memory and that's why it is specific 299 // for the type, so it calls container destructors upon deletion. 300 llvm::SpecificBumpPtrAllocator<ContainerType> Arena; 301 // Usually we deal with the same ranges and range sets over and over. 302 // Here we track all created containers and try not to repeat ourselves. 303 llvm::FoldingSet<ContainerType> Cache; 304 static ContainerType EmptySet; 305 }; 306 307 RangeSet(const RangeSet &) = default; 308 RangeSet &operator=(const RangeSet &) = default; 309 RangeSet(RangeSet &&) = default; 310 RangeSet &operator=(RangeSet &&) = default; 311 ~RangeSet() = default; 312 313 /// Construct a new RangeSet representing '{ [From, To] }'. RangeSet(Factory & F,const llvm::APSInt & From,const llvm::APSInt & To)314 RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To) 315 : RangeSet(F.getRangeSet(From, To)) {} 316 317 /// Construct a new RangeSet representing the given point as a range. RangeSet(Factory & F,const llvm::APSInt & Point)318 RangeSet(Factory &F, const llvm::APSInt &Point) 319 : RangeSet(F.getRangeSet(Point)) {} 320 Profile(llvm::FoldingSetNodeID & ID,const RangeSet & RS)321 static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) { 322 ID.AddPointer(RS.Impl); 323 } 324 325 /// Profile - Generates a hash profile of this RangeSet for use 326 /// by FoldingSet. Profile(llvm::FoldingSetNodeID & ID)327 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); } 328 329 /// getConcreteValue - If a symbol is constrained to equal a specific integer 330 /// constant then this method returns that value. Otherwise, it returns 331 /// NULL. getConcreteValue()332 const llvm::APSInt *getConcreteValue() const { 333 return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr; 334 } 335 336 /// Get the minimal value covered by the ranges in the set. 337 /// 338 /// Complexity: O(1) 339 const llvm::APSInt &getMinValue() const; 340 /// Get the maximal value covered by the ranges in the set. 341 /// 342 /// Complexity: O(1) 343 const llvm::APSInt &getMaxValue() const; 344 345 bool isUnsigned() const; 346 uint32_t getBitWidth() const; 347 APSIntType getAPSIntType() const; 348 349 /// Test whether the given point is contained by any of the ranges. 350 /// 351 /// Complexity: O(logN) 352 /// where N = size(this) contains(llvm::APSInt Point)353 bool contains(llvm::APSInt Point) const { return containsImpl(Point); } 354 containsZero()355 bool containsZero() const { 356 APSIntType T{getMinValue()}; 357 return contains(T.getZeroValue()); 358 } 359 360 /// Test if the range is the [0,0] range. 361 /// 362 /// Complexity: O(1) encodesFalseRange()363 bool encodesFalseRange() const { 364 const llvm::APSInt *Constant = getConcreteValue(); 365 return Constant && Constant->isZero(); 366 } 367 368 /// Test if the range doesn't contain zero. 369 /// 370 /// Complexity: O(logN) 371 /// where N = size(this) encodesTrueRange()372 bool encodesTrueRange() const { return !containsZero(); } 373 374 void dump(raw_ostream &OS) const; 375 void dump() const; 376 377 bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; } 378 bool operator!=(const RangeSet &Other) const { return !(*this == Other); } 379 380 private: RangeSet(ContainerType * RawContainer)381 /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {} RangeSet(UnderlyingType Ptr)382 /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {} 383 384 /// Pin given points to the type represented by the current range set. 385 /// 386 /// This makes parameter points to be in-out parameters. 387 /// In order to maintain consistent types across all of the ranges in the set 388 /// and to keep all the operations to compare ONLY points of the same type, we 389 /// need to pin every point before any operation. 390 /// 391 /// @Returns true if the given points can be converted to the target type 392 /// without changing the values (i.e. trivially) and false otherwise. 393 /// @{ 394 bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const; 395 bool pin(llvm::APSInt &Point) const; 396 /// @} 397 398 // This version of this function modifies its arguments (pins it). 399 bool containsImpl(llvm::APSInt &Point) const; 400 401 friend class Factory; 402 }; 403 404 using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>; 405 ConstraintMap getConstraintMap(ProgramStateRef State); 406 407 class RangedConstraintManager : public SimpleConstraintManager { 408 public: RangedConstraintManager(ExprEngine * EE,SValBuilder & SB)409 RangedConstraintManager(ExprEngine *EE, SValBuilder &SB) 410 : SimpleConstraintManager(EE, SB) {} 411 412 ~RangedConstraintManager() override; 413 414 //===------------------------------------------------------------------===// 415 // Implementation for interface from SimpleConstraintManager. 416 //===------------------------------------------------------------------===// 417 418 ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym, 419 bool Assumption) override; 420 421 ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym, 422 const llvm::APSInt &From, 423 const llvm::APSInt &To, 424 bool InRange) override; 425 426 ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym, 427 bool Assumption) override; 428 429 protected: 430 /// Assume a constraint between a symbolic expression and a concrete integer. 431 virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym, 432 BinaryOperator::Opcode op, 433 const llvm::APSInt &Int); 434 435 //===------------------------------------------------------------------===// 436 // Interface that subclasses must implement. 437 //===------------------------------------------------------------------===// 438 439 // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison 440 // operation for the method being invoked. 441 442 virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, 443 const llvm::APSInt &V, 444 const llvm::APSInt &Adjustment) = 0; 445 446 virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, 447 const llvm::APSInt &V, 448 const llvm::APSInt &Adjustment) = 0; 449 450 virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, 451 const llvm::APSInt &V, 452 const llvm::APSInt &Adjustment) = 0; 453 454 virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, 455 const llvm::APSInt &V, 456 const llvm::APSInt &Adjustment) = 0; 457 458 virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, 459 const llvm::APSInt &V, 460 const llvm::APSInt &Adjustment) = 0; 461 462 virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, 463 const llvm::APSInt &V, 464 const llvm::APSInt &Adjustment) = 0; 465 466 virtual ProgramStateRef assumeSymWithinInclusiveRange( 467 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, 468 const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; 469 470 virtual ProgramStateRef assumeSymOutsideInclusiveRange( 471 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, 472 const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0; 473 474 //===------------------------------------------------------------------===// 475 // Internal implementation. 476 //===------------------------------------------------------------------===// 477 private: 478 static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment); 479 }; 480 481 /// Try to simplify a given symbolic expression based on the constraints in 482 /// State. This is needed because the Environment bindings are not getting 483 /// updated when a new constraint is added to the State. If the symbol is 484 /// simplified to a non-symbol (e.g. to a constant) then the original symbol 485 /// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE 486 /// functions where we can work only with symbols. Use the other function 487 /// (simplifyToSVal) if you are interested in a simplification that may yield 488 /// a concrete constant value. 489 SymbolRef simplify(ProgramStateRef State, SymbolRef Sym); 490 491 /// Try to simplify a given symbolic expression's associated `SVal` based on the 492 /// constraints in State. This is very similar to `simplify`, but this function 493 /// always returns the simplified SVal. The simplified SVal might be a single 494 /// constant (i.e. `ConcreteInt`). 495 SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym); 496 497 } // namespace ento 498 } // namespace clang 499 500 REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap) 501 502 #endif 503