1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10 // which looks for an out-of-bound array element access. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/CharUnits.h" 15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 16 #include "clang/StaticAnalyzer/Checkers/Taint.h" 17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace clang; 28 using namespace ento; 29 using namespace taint; 30 31 namespace { 32 class ArrayBoundCheckerV2 : 33 public Checker<check::Location> { 34 mutable std::unique_ptr<BuiltinBug> BT; 35 36 enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; 37 38 void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, 39 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; 40 41 public: 42 void checkLocation(SVal l, bool isLoad, const Stmt*S, 43 CheckerContext &C) const; 44 }; 45 46 // FIXME: Eventually replace RegionRawOffset with this class. 47 class RegionRawOffsetV2 { 48 private: 49 const SubRegion *baseRegion; 50 SVal byteOffset; 51 52 RegionRawOffsetV2() 53 : baseRegion(nullptr), byteOffset(UnknownVal()) {} 54 55 public: 56 RegionRawOffsetV2(const SubRegion* base, SVal offset) 57 : baseRegion(base), byteOffset(offset) {} 58 59 NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); } 60 const SubRegion *getRegion() const { return baseRegion; } 61 62 static RegionRawOffsetV2 computeOffset(ProgramStateRef state, 63 SValBuilder &svalBuilder, 64 SVal location); 65 66 void dump() const; 67 void dumpToStream(raw_ostream &os) const; 68 }; 69 } 70 71 static SVal computeExtentBegin(SValBuilder &svalBuilder, 72 const MemRegion *region) { 73 const MemSpaceRegion *SR = region->getMemorySpace(); 74 if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) 75 return UnknownVal(); 76 else 77 return svalBuilder.makeZeroArrayIndex(); 78 } 79 80 // TODO: once the constraint manager is smart enough to handle non simplified 81 // symbolic expressions remove this function. Note that this can not be used in 82 // the constraint manager as is, since this does not handle overflows. It is 83 // safe to assume, however, that memory offsets will not overflow. 84 static std::pair<NonLoc, nonloc::ConcreteInt> 85 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 86 SValBuilder &svalBuilder) { 87 Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 88 if (SymVal && SymVal->isExpression()) { 89 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 90 llvm::APSInt constant = 91 APSIntType(extent.getValue()).convert(SIE->getRHS()); 92 switch (SIE->getOpcode()) { 93 case BO_Mul: 94 // The constant should never be 0 here, since it the result of scaling 95 // based on the size of a type which is never 0. 96 if ((extent.getValue() % constant) != 0) 97 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 98 else 99 return getSimplifiedOffsets( 100 nonloc::SymbolVal(SIE->getLHS()), 101 svalBuilder.makeIntVal(extent.getValue() / constant), 102 svalBuilder); 103 case BO_Add: 104 return getSimplifiedOffsets( 105 nonloc::SymbolVal(SIE->getLHS()), 106 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 107 default: 108 break; 109 } 110 } 111 } 112 113 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 114 } 115 116 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, 117 const Stmt* LoadS, 118 CheckerContext &checkerContext) const { 119 120 // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping 121 // some new logic here that reasons directly about memory region extents. 122 // Once that logic is more mature, we can bring it back to assumeInBound() 123 // for all clients to use. 124 // 125 // The algorithm we are using here for bounds checking is to see if the 126 // memory access is within the extent of the base region. Since we 127 // have some flexibility in defining the base region, we can achieve 128 // various levels of conservatism in our buffer overflow checking. 129 ProgramStateRef state = checkerContext.getState(); 130 131 SValBuilder &svalBuilder = checkerContext.getSValBuilder(); 132 const RegionRawOffsetV2 &rawOffset = 133 RegionRawOffsetV2::computeOffset(state, svalBuilder, location); 134 135 if (!rawOffset.getRegion()) 136 return; 137 138 NonLoc rawOffsetVal = rawOffset.getByteOffset(); 139 140 // CHECK LOWER BOUND: Is byteOffset < extent begin? 141 // If so, we are doing a load/store 142 // before the first valid offset in the memory region. 143 144 SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); 145 146 if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) { 147 if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) { 148 std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 149 getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV, 150 svalBuilder); 151 rawOffsetVal = simplifiedOffsets.first; 152 *NV = simplifiedOffsets.second; 153 } 154 155 SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, 156 svalBuilder.getConditionType()); 157 158 Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>(); 159 if (!lowerBoundToCheck) 160 return; 161 162 ProgramStateRef state_precedesLowerBound, state_withinLowerBound; 163 std::tie(state_precedesLowerBound, state_withinLowerBound) = 164 state->assume(*lowerBoundToCheck); 165 166 // Are we constrained enough to definitely precede the lower bound? 167 if (state_precedesLowerBound && !state_withinLowerBound) { 168 reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); 169 return; 170 } 171 172 // Otherwise, assume the constraint of the lower bound. 173 assert(state_withinLowerBound); 174 state = state_withinLowerBound; 175 } 176 177 do { 178 // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so, 179 // we are doing a load/store after the last valid offset. 180 const MemRegion *MR = rawOffset.getRegion(); 181 DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder); 182 if (!isa<NonLoc>(Size)) 183 break; 184 185 if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) { 186 std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 187 getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize, 188 svalBuilder); 189 rawOffsetVal = simplifiedOffsets.first; 190 Size = simplifiedOffsets.second; 191 } 192 193 SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, 194 Size.castAs<NonLoc>(), 195 svalBuilder.getConditionType()); 196 197 Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>(); 198 if (!upperboundToCheck) 199 break; 200 201 ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; 202 std::tie(state_exceedsUpperBound, state_withinUpperBound) = 203 state->assume(*upperboundToCheck); 204 205 // If we are under constrained and the index variables are tainted, report. 206 if (state_exceedsUpperBound && state_withinUpperBound) { 207 SVal ByteOffset = rawOffset.getByteOffset(); 208 if (isTainted(state, ByteOffset)) { 209 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, 210 std::make_unique<TaintBugVisitor>(ByteOffset)); 211 return; 212 } 213 } else if (state_exceedsUpperBound) { 214 // If we are constrained enough to definitely exceed the upper bound, 215 // report. 216 assert(!state_withinUpperBound); 217 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); 218 return; 219 } 220 221 assert(state_withinUpperBound); 222 state = state_withinUpperBound; 223 } 224 while (false); 225 226 checkerContext.addTransition(state); 227 } 228 229 void ArrayBoundCheckerV2::reportOOB( 230 CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, 231 std::unique_ptr<BugReporterVisitor> Visitor) const { 232 233 ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); 234 if (!errorNode) 235 return; 236 237 if (!BT) 238 BT.reset(new BuiltinBug(this, "Out-of-bound access")); 239 240 // FIXME: This diagnostics are preliminary. We should get far better 241 // diagnostics for explaining buffer overruns. 242 243 SmallString<256> buf; 244 llvm::raw_svector_ostream os(buf); 245 os << "Out of bound memory access "; 246 switch (kind) { 247 case OOB_Precedes: 248 os << "(accessed memory precedes memory block)"; 249 break; 250 case OOB_Excedes: 251 os << "(access exceeds upper limit of memory block)"; 252 break; 253 case OOB_Tainted: 254 os << "(index is tainted)"; 255 break; 256 } 257 258 auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); 259 BR->addVisitor(std::move(Visitor)); 260 checkerContext.emitReport(std::move(BR)); 261 } 262 263 #ifndef NDEBUG 264 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { 265 dumpToStream(llvm::errs()); 266 } 267 268 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { 269 os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; 270 } 271 #endif 272 273 // Lazily computes a value to be used by 'computeOffset'. If 'val' 274 // is unknown or undefined, we lazily substitute '0'. Otherwise, 275 // return 'val'. 276 static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { 277 return val.isUndef() ? svalBuilder.makeZeroArrayIndex() : val; 278 } 279 280 // Scale a base value by a scaling factor, and return the scaled 281 // value as an SVal. Used by 'computeOffset'. 282 static inline SVal scaleValue(ProgramStateRef state, 283 NonLoc baseVal, CharUnits scaling, 284 SValBuilder &sb) { 285 return sb.evalBinOpNN(state, BO_Mul, baseVal, 286 sb.makeArrayIndex(scaling.getQuantity()), 287 sb.getArrayIndexType()); 288 } 289 290 // Add an SVal to another, treating unknown and undefined values as 291 // summing to UnknownVal. Used by 'computeOffset'. 292 static SVal addValue(ProgramStateRef state, SVal x, SVal y, 293 SValBuilder &svalBuilder) { 294 // We treat UnknownVals and UndefinedVals the same here because we 295 // only care about computing offsets. 296 if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) 297 return UnknownVal(); 298 299 return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(), 300 y.castAs<NonLoc>(), 301 svalBuilder.getArrayIndexType()); 302 } 303 304 /// Compute a raw byte offset from a base region. Used for array bounds 305 /// checking. 306 RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, 307 SValBuilder &svalBuilder, 308 SVal location) 309 { 310 const MemRegion *region = location.getAsRegion(); 311 SVal offset = UndefinedVal(); 312 313 while (region) { 314 switch (region->getKind()) { 315 default: { 316 if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) { 317 offset = getValue(offset, svalBuilder); 318 if (!offset.isUnknownOrUndef()) 319 return RegionRawOffsetV2(subReg, offset); 320 } 321 return RegionRawOffsetV2(); 322 } 323 case MemRegion::ElementRegionKind: { 324 const ElementRegion *elemReg = cast<ElementRegion>(region); 325 SVal index = elemReg->getIndex(); 326 if (!isa<NonLoc>(index)) 327 return RegionRawOffsetV2(); 328 QualType elemType = elemReg->getElementType(); 329 // If the element is an incomplete type, go no further. 330 ASTContext &astContext = svalBuilder.getContext(); 331 if (elemType->isIncompleteType()) 332 return RegionRawOffsetV2(); 333 334 // Update the offset. 335 offset = addValue(state, 336 getValue(offset, svalBuilder), 337 scaleValue(state, 338 index.castAs<NonLoc>(), 339 astContext.getTypeSizeInChars(elemType), 340 svalBuilder), 341 svalBuilder); 342 343 if (offset.isUnknownOrUndef()) 344 return RegionRawOffsetV2(); 345 346 region = elemReg->getSuperRegion(); 347 continue; 348 } 349 } 350 } 351 return RegionRawOffsetV2(); 352 } 353 354 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 355 mgr.registerChecker<ArrayBoundCheckerV2>(); 356 } 357 358 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 359 return true; 360 } 361