1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10 // which looks for an out-of-bound array element access. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/CharUnits.h" 15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 16 #include "clang/StaticAnalyzer/Checkers/Taint.h" 17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <optional> 27 28 using namespace clang; 29 using namespace ento; 30 using namespace taint; 31 32 namespace { 33 class ArrayBoundCheckerV2 : 34 public Checker<check::Location> { 35 mutable std::unique_ptr<BuiltinBug> BT; 36 37 enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; 38 39 void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, 40 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; 41 42 public: 43 void checkLocation(SVal l, bool isLoad, const Stmt*S, 44 CheckerContext &C) const; 45 }; 46 47 // FIXME: Eventually replace RegionRawOffset with this class. 48 class RegionRawOffsetV2 { 49 private: 50 const SubRegion *baseRegion; 51 SVal byteOffset; 52 53 RegionRawOffsetV2() 54 : baseRegion(nullptr), byteOffset(UnknownVal()) {} 55 56 public: 57 RegionRawOffsetV2(const SubRegion* base, SVal offset) 58 : baseRegion(base), byteOffset(offset) {} 59 60 NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); } 61 const SubRegion *getRegion() const { return baseRegion; } 62 63 static RegionRawOffsetV2 computeOffset(ProgramStateRef state, 64 SValBuilder &svalBuilder, 65 SVal location); 66 67 void dump() const; 68 void dumpToStream(raw_ostream &os) const; 69 }; 70 } 71 72 static SVal computeExtentBegin(SValBuilder &svalBuilder, 73 const MemRegion *region) { 74 const MemSpaceRegion *SR = region->getMemorySpace(); 75 if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) 76 return UnknownVal(); 77 else 78 return svalBuilder.makeZeroArrayIndex(); 79 } 80 81 // TODO: once the constraint manager is smart enough to handle non simplified 82 // symbolic expressions remove this function. Note that this can not be used in 83 // the constraint manager as is, since this does not handle overflows. It is 84 // safe to assume, however, that memory offsets will not overflow. 85 static std::pair<NonLoc, nonloc::ConcreteInt> 86 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 87 SValBuilder &svalBuilder) { 88 std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 89 if (SymVal && SymVal->isExpression()) { 90 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 91 llvm::APSInt constant = 92 APSIntType(extent.getValue()).convert(SIE->getRHS()); 93 switch (SIE->getOpcode()) { 94 case BO_Mul: 95 // The constant should never be 0 here, since it the result of scaling 96 // based on the size of a type which is never 0. 97 if ((extent.getValue() % constant) != 0) 98 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 99 else 100 return getSimplifiedOffsets( 101 nonloc::SymbolVal(SIE->getLHS()), 102 svalBuilder.makeIntVal(extent.getValue() / constant), 103 svalBuilder); 104 case BO_Add: 105 return getSimplifiedOffsets( 106 nonloc::SymbolVal(SIE->getLHS()), 107 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 108 default: 109 break; 110 } 111 } 112 } 113 114 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 115 } 116 117 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, 118 const Stmt* LoadS, 119 CheckerContext &checkerContext) const { 120 121 // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping 122 // some new logic here that reasons directly about memory region extents. 123 // Once that logic is more mature, we can bring it back to assumeInBound() 124 // for all clients to use. 125 // 126 // The algorithm we are using here for bounds checking is to see if the 127 // memory access is within the extent of the base region. Since we 128 // have some flexibility in defining the base region, we can achieve 129 // various levels of conservatism in our buffer overflow checking. 130 ProgramStateRef state = checkerContext.getState(); 131 132 SValBuilder &svalBuilder = checkerContext.getSValBuilder(); 133 const RegionRawOffsetV2 &rawOffset = 134 RegionRawOffsetV2::computeOffset(state, svalBuilder, location); 135 136 if (!rawOffset.getRegion()) 137 return; 138 139 NonLoc rawOffsetVal = rawOffset.getByteOffset(); 140 141 // CHECK LOWER BOUND: Is byteOffset < extent begin? 142 // If so, we are doing a load/store 143 // before the first valid offset in the memory region. 144 145 SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); 146 147 if (std::optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) { 148 if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) { 149 std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 150 getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV, 151 svalBuilder); 152 rawOffsetVal = simplifiedOffsets.first; 153 *NV = simplifiedOffsets.second; 154 } 155 156 SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, 157 svalBuilder.getConditionType()); 158 159 std::optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>(); 160 if (!lowerBoundToCheck) 161 return; 162 163 ProgramStateRef state_precedesLowerBound, state_withinLowerBound; 164 std::tie(state_precedesLowerBound, state_withinLowerBound) = 165 state->assume(*lowerBoundToCheck); 166 167 // Are we constrained enough to definitely precede the lower bound? 168 if (state_precedesLowerBound && !state_withinLowerBound) { 169 reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); 170 return; 171 } 172 173 // Otherwise, assume the constraint of the lower bound. 174 assert(state_withinLowerBound); 175 state = state_withinLowerBound; 176 } 177 178 do { 179 // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so, 180 // we are doing a load/store after the last valid offset. 181 const MemRegion *MR = rawOffset.getRegion(); 182 DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder); 183 if (!isa<NonLoc>(Size)) 184 break; 185 186 if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) { 187 std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 188 getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize, 189 svalBuilder); 190 rawOffsetVal = simplifiedOffsets.first; 191 Size = simplifiedOffsets.second; 192 } 193 194 SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, 195 Size.castAs<NonLoc>(), 196 svalBuilder.getConditionType()); 197 198 std::optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>(); 199 if (!upperboundToCheck) 200 break; 201 202 ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; 203 std::tie(state_exceedsUpperBound, state_withinUpperBound) = 204 state->assume(*upperboundToCheck); 205 206 // If we are under constrained and the index variables are tainted, report. 207 if (state_exceedsUpperBound && state_withinUpperBound) { 208 SVal ByteOffset = rawOffset.getByteOffset(); 209 if (isTainted(state, ByteOffset)) { 210 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, 211 std::make_unique<TaintBugVisitor>(ByteOffset)); 212 return; 213 } 214 } else if (state_exceedsUpperBound) { 215 // If we are constrained enough to definitely exceed the upper bound, 216 // report. 217 assert(!state_withinUpperBound); 218 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); 219 return; 220 } 221 222 assert(state_withinUpperBound); 223 state = state_withinUpperBound; 224 } 225 while (false); 226 227 checkerContext.addTransition(state); 228 } 229 230 void ArrayBoundCheckerV2::reportOOB( 231 CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, 232 std::unique_ptr<BugReporterVisitor> Visitor) const { 233 234 ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); 235 if (!errorNode) 236 return; 237 238 if (!BT) 239 BT.reset(new BuiltinBug(this, "Out-of-bound access")); 240 241 // FIXME: This diagnostics are preliminary. We should get far better 242 // diagnostics for explaining buffer overruns. 243 244 SmallString<256> buf; 245 llvm::raw_svector_ostream os(buf); 246 os << "Out of bound memory access "; 247 switch (kind) { 248 case OOB_Precedes: 249 os << "(accessed memory precedes memory block)"; 250 break; 251 case OOB_Excedes: 252 os << "(access exceeds upper limit of memory block)"; 253 break; 254 case OOB_Tainted: 255 os << "(index is tainted)"; 256 break; 257 } 258 259 auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); 260 BR->addVisitor(std::move(Visitor)); 261 checkerContext.emitReport(std::move(BR)); 262 } 263 264 #ifndef NDEBUG 265 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { 266 dumpToStream(llvm::errs()); 267 } 268 269 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { 270 os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; 271 } 272 #endif 273 274 // Lazily computes a value to be used by 'computeOffset'. If 'val' 275 // is unknown or undefined, we lazily substitute '0'. Otherwise, 276 // return 'val'. 277 static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { 278 return val.isUndef() ? svalBuilder.makeZeroArrayIndex() : val; 279 } 280 281 // Scale a base value by a scaling factor, and return the scaled 282 // value as an SVal. Used by 'computeOffset'. 283 static inline SVal scaleValue(ProgramStateRef state, 284 NonLoc baseVal, CharUnits scaling, 285 SValBuilder &sb) { 286 return sb.evalBinOpNN(state, BO_Mul, baseVal, 287 sb.makeArrayIndex(scaling.getQuantity()), 288 sb.getArrayIndexType()); 289 } 290 291 // Add an SVal to another, treating unknown and undefined values as 292 // summing to UnknownVal. Used by 'computeOffset'. 293 static SVal addValue(ProgramStateRef state, SVal x, SVal y, 294 SValBuilder &svalBuilder) { 295 // We treat UnknownVals and UndefinedVals the same here because we 296 // only care about computing offsets. 297 if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) 298 return UnknownVal(); 299 300 return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(), 301 y.castAs<NonLoc>(), 302 svalBuilder.getArrayIndexType()); 303 } 304 305 /// Compute a raw byte offset from a base region. Used for array bounds 306 /// checking. 307 RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, 308 SValBuilder &svalBuilder, 309 SVal location) 310 { 311 const MemRegion *region = location.getAsRegion(); 312 SVal offset = UndefinedVal(); 313 314 while (region) { 315 switch (region->getKind()) { 316 default: { 317 if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) { 318 offset = getValue(offset, svalBuilder); 319 if (!offset.isUnknownOrUndef()) 320 return RegionRawOffsetV2(subReg, offset); 321 } 322 return RegionRawOffsetV2(); 323 } 324 case MemRegion::ElementRegionKind: { 325 const ElementRegion *elemReg = cast<ElementRegion>(region); 326 SVal index = elemReg->getIndex(); 327 if (!isa<NonLoc>(index)) 328 return RegionRawOffsetV2(); 329 QualType elemType = elemReg->getElementType(); 330 // If the element is an incomplete type, go no further. 331 ASTContext &astContext = svalBuilder.getContext(); 332 if (elemType->isIncompleteType()) 333 return RegionRawOffsetV2(); 334 335 // Update the offset. 336 offset = addValue(state, 337 getValue(offset, svalBuilder), 338 scaleValue(state, 339 index.castAs<NonLoc>(), 340 astContext.getTypeSizeInChars(elemType), 341 svalBuilder), 342 svalBuilder); 343 344 if (offset.isUnknownOrUndef()) 345 return RegionRawOffsetV2(); 346 347 region = elemReg->getSuperRegion(); 348 continue; 349 } 350 } 351 } 352 return RegionRawOffsetV2(); 353 } 354 355 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 356 mgr.registerChecker<ArrayBoundCheckerV2>(); 357 } 358 359 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 360 return true; 361 } 362