1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a check that marks a raw pointer to a C++ container's 10 // inner buffer released when the object is destroyed. This information can 11 // be used by MallocChecker to detect use-after-free problems. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AllocationState.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" 20 #include "clang/StaticAnalyzer/Core/Checker.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 25 using namespace clang; 26 using namespace ento; 27 28 // Associate container objects with a set of raw pointer symbols. 29 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) 30 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) 31 32 33 namespace { 34 35 class InnerPointerChecker 36 : public Checker<check::DeadSymbols, check::PostCall> { 37 38 CallDescriptionSet InvalidatingMemberFunctions{ 39 CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}), 40 CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}), 41 CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}), 42 CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}), 43 CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}), 44 CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}), 45 CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}), 46 CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}), 47 CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}), 48 CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}), 49 CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}), 50 CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})}; 51 52 CallDescriptionSet AddressofFunctions{ 53 CallDescription(CDM::SimpleFunc, {"std", "addressof"}), 54 CallDescription(CDM::SimpleFunc, {"std", "__addressof"})}; 55 56 CallDescriptionSet InnerPointerAccessFunctions{ 57 CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}), 58 CallDescription(CDM::SimpleFunc, {"std", "data"}, 1), 59 CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})}; 60 61 public: 62 class InnerPointerBRVisitor : public BugReporterVisitor { 63 SymbolRef PtrToBuf; 64 65 public: 66 InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} 67 68 static void *getTag() { 69 static int Tag = 0; 70 return &Tag; 71 } 72 73 void Profile(llvm::FoldingSetNodeID &ID) const override { 74 ID.AddPointer(getTag()); 75 } 76 77 PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, 78 BugReporterContext &BRC, 79 PathSensitiveBugReport &BR) override; 80 81 // FIXME: Scan the map once in the visitor's constructor and do a direct 82 // lookup by region. 83 bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { 84 RawPtrMapTy Map = State->get<RawPtrMap>(); 85 for (const auto &Entry : Map) { 86 if (Entry.second.contains(Sym)) 87 return true; 88 } 89 return false; 90 } 91 }; 92 93 /// Check whether the called member function potentially invalidates 94 /// pointers referring to the container object's inner buffer. 95 bool isInvalidatingMemberFunction(const CallEvent &Call) const; 96 97 /// Mark pointer symbols associated with the given memory region released 98 /// in the program state. 99 void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, 100 const MemRegion *ObjRegion, 101 CheckerContext &C) const; 102 103 /// Standard library functions that take a non-const `basic_string` argument by 104 /// reference may invalidate its inner pointers. Check for these cases and 105 /// mark the pointers released. 106 void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, 107 CheckerContext &C) const; 108 109 /// Record the connection between raw pointers referring to a container 110 /// object's inner buffer and the object's memory region in the program state. 111 /// Mark potentially invalidated pointers released. 112 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 113 114 /// Clean up the program state map. 115 void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; 116 }; 117 118 } // end anonymous namespace 119 120 bool InnerPointerChecker::isInvalidatingMemberFunction( 121 const CallEvent &Call) const { 122 if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) { 123 OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); 124 if (Opc == OO_Equal || Opc == OO_PlusEqual) 125 return true; 126 return false; 127 } 128 return isa<CXXDestructorCall>(Call) || 129 InvalidatingMemberFunctions.contains(Call); 130 } 131 132 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, 133 ProgramStateRef State, 134 const MemRegion *MR, 135 CheckerContext &C) const { 136 if (const PtrSet *PS = State->get<RawPtrMap>(MR)) { 137 const Expr *Origin = Call.getOriginExpr(); 138 for (const auto Symbol : *PS) { 139 // NOTE: `Origin` may be null, and will be stored so in the symbol's 140 // `RefState` in MallocChecker's `RegionState` program state map. 141 State = allocation_state::markReleased(State, Symbol, Origin); 142 } 143 State = State->remove<RawPtrMap>(MR); 144 C.addTransition(State); 145 return; 146 } 147 } 148 149 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, 150 ProgramStateRef State, 151 CheckerContext &C) const { 152 if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) { 153 const FunctionDecl *FD = FC->getDecl(); 154 if (!FD || !FD->isInStdNamespace()) 155 return; 156 157 for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { 158 QualType ParamTy = FD->getParamDecl(I)->getType(); 159 if (!ParamTy->isReferenceType() || 160 ParamTy->getPointeeType().isConstQualified()) 161 continue; 162 163 // In case of member operator calls, `this` is counted as an 164 // argument but not as a parameter. 165 bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC); 166 unsigned ArgI = isaMemberOpCall ? I+1 : I; 167 168 SVal Arg = FC->getArgSVal(ArgI); 169 const auto *ArgRegion = 170 dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion()); 171 if (!ArgRegion) 172 continue; 173 174 // std::addressof functions accepts a non-const reference as an argument, 175 // but doesn't modify it. 176 if (AddressofFunctions.contains(Call)) 177 continue; 178 179 markPtrSymbolsReleased(Call, State, ArgRegion, C); 180 } 181 } 182 } 183 184 // [string.require] 185 // 186 // "References, pointers, and iterators referring to the elements of a 187 // basic_string sequence may be invalidated by the following uses of that 188 // basic_string object: 189 // 190 // -- As an argument to any standard library function taking a reference 191 // to non-const basic_string as an argument. For example, as an argument to 192 // non-member functions swap(), operator>>(), and getline(), or as an argument 193 // to basic_string::swap(). 194 // 195 // -- Calling non-const member functions, except operator[], at, front, back, 196 // begin, rbegin, end, and rend." 197 198 void InnerPointerChecker::checkPostCall(const CallEvent &Call, 199 CheckerContext &C) const { 200 ProgramStateRef State = C.getState(); 201 202 // TODO: Do we need these to be typed? 203 const TypedValueRegion *ObjRegion = nullptr; 204 205 if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) { 206 ObjRegion = dyn_cast_or_null<TypedValueRegion>( 207 ICall->getCXXThisVal().getAsRegion()); 208 209 // Check [string.require] / second point. 210 if (isInvalidatingMemberFunction(Call)) { 211 markPtrSymbolsReleased(Call, State, ObjRegion, C); 212 return; 213 } 214 } 215 216 if (InnerPointerAccessFunctions.contains(Call)) { 217 218 if (isa<SimpleFunctionCall>(Call)) { 219 // NOTE: As of now, we only have one free access function: std::data. 220 // If we add more functions like this in the list, hardcoded 221 // argument index should be changed. 222 ObjRegion = 223 dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion()); 224 } 225 226 if (!ObjRegion) 227 return; 228 229 SVal RawPtr = Call.getReturnValue(); 230 if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { 231 // Start tracking this raw pointer by adding it to the set of symbols 232 // associated with this container object in the program state map. 233 234 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); 235 const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion); 236 PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); 237 assert(C.wasInlined || !Set.contains(Sym)); 238 Set = F.add(Set, Sym); 239 240 State = State->set<RawPtrMap>(ObjRegion, Set); 241 C.addTransition(State); 242 } 243 244 return; 245 } 246 247 // Check [string.require] / first point. 248 checkFunctionArguments(Call, State, C); 249 } 250 251 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, 252 CheckerContext &C) const { 253 ProgramStateRef State = C.getState(); 254 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); 255 RawPtrMapTy RPM = State->get<RawPtrMap>(); 256 for (const auto &Entry : RPM) { 257 if (!SymReaper.isLiveRegion(Entry.first)) { 258 // Due to incomplete destructor support, some dead regions might 259 // remain in the program state map. Clean them up. 260 State = State->remove<RawPtrMap>(Entry.first); 261 } 262 if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) { 263 PtrSet CleanedUpSet = *OldSet; 264 for (const auto Symbol : Entry.second) { 265 if (!SymReaper.isLive(Symbol)) 266 CleanedUpSet = F.remove(CleanedUpSet, Symbol); 267 } 268 State = CleanedUpSet.isEmpty() 269 ? State->remove<RawPtrMap>(Entry.first) 270 : State->set<RawPtrMap>(Entry.first, CleanedUpSet); 271 } 272 } 273 C.addTransition(State); 274 } 275 276 namespace clang { 277 namespace ento { 278 namespace allocation_state { 279 280 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { 281 return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym); 282 } 283 284 const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { 285 RawPtrMapTy Map = State->get<RawPtrMap>(); 286 for (const auto &Entry : Map) { 287 if (Entry.second.contains(Sym)) { 288 return Entry.first; 289 } 290 } 291 return nullptr; 292 } 293 294 } // end namespace allocation_state 295 } // end namespace ento 296 } // end namespace clang 297 298 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( 299 const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { 300 if (!isSymbolTracked(N->getState(), PtrToBuf) || 301 isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf)) 302 return nullptr; 303 304 const Stmt *S = N->getStmtForDiagnostics(); 305 if (!S) 306 return nullptr; 307 308 const MemRegion *ObjRegion = 309 allocation_state::getContainerObjRegion(N->getState(), PtrToBuf); 310 const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion); 311 QualType ObjTy = TypedRegion->getValueType(); 312 313 SmallString<256> Buf; 314 llvm::raw_svector_ostream OS(Buf); 315 OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here"; 316 PathDiagnosticLocation Pos(S, BRC.getSourceManager(), 317 N->getLocationContext()); 318 return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); 319 } 320 321 void ento::registerInnerPointerChecker(CheckerManager &Mgr) { 322 registerInnerPointerCheckerAux(Mgr); 323 Mgr.registerChecker<InnerPointerChecker>(); 324 } 325 326 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { 327 return true; 328 } 329