1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a check that marks a raw pointer to a C++ container's 10 // inner buffer released when the object is destroyed. This information can 11 // be used by MallocChecker to detect use-after-free problems. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AllocationState.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 23 using namespace clang; 24 using namespace ento; 25 26 // Associate container objects with a set of raw pointer symbols. 27 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) 28 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) 29 30 31 namespace { 32 33 class InnerPointerChecker 34 : public Checker<check::DeadSymbols, check::PostCall> { 35 36 CallDescriptionSet InvalidatingMemberFunctions{ 37 CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}), 38 CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}), 39 CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}), 40 CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}), 41 CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}), 42 CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}), 43 CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}), 44 CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}), 45 CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}), 46 CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}), 47 CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}), 48 CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})}; 49 50 CallDescriptionSet AddressofFunctions{ 51 CallDescription(CDM::SimpleFunc, {"std", "addressof"}), 52 CallDescription(CDM::SimpleFunc, {"std", "__addressof"})}; 53 54 CallDescriptionSet InnerPointerAccessFunctions{ 55 CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}), 56 CallDescription(CDM::SimpleFunc, {"std", "data"}, 1), 57 CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})}; 58 59 public: 60 class InnerPointerBRVisitor : public BugReporterVisitor { 61 SymbolRef PtrToBuf; 62 63 public: 64 InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} 65 66 static void *getTag() { 67 static int Tag = 0; 68 return &Tag; 69 } 70 71 void Profile(llvm::FoldingSetNodeID &ID) const override { 72 ID.AddPointer(getTag()); 73 } 74 75 PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, 76 BugReporterContext &BRC, 77 PathSensitiveBugReport &BR) override; 78 79 // FIXME: Scan the map once in the visitor's constructor and do a direct 80 // lookup by region. 81 bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { 82 RawPtrMapTy Map = State->get<RawPtrMap>(); 83 for (const auto &Entry : Map) { 84 if (Entry.second.contains(Sym)) 85 return true; 86 } 87 return false; 88 } 89 }; 90 91 /// Check whether the called member function potentially invalidates 92 /// pointers referring to the container object's inner buffer. 93 bool isInvalidatingMemberFunction(const CallEvent &Call) const; 94 95 /// Mark pointer symbols associated with the given memory region released 96 /// in the program state. 97 void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, 98 const MemRegion *ObjRegion, 99 CheckerContext &C) const; 100 101 /// Standard library functions that take a non-const `basic_string` argument by 102 /// reference may invalidate its inner pointers. Check for these cases and 103 /// mark the pointers released. 104 void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, 105 CheckerContext &C) const; 106 107 /// Record the connection between raw pointers referring to a container 108 /// object's inner buffer and the object's memory region in the program state. 109 /// Mark potentially invalidated pointers released. 110 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 111 112 /// Clean up the program state map. 113 void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; 114 }; 115 116 } // end anonymous namespace 117 118 bool InnerPointerChecker::isInvalidatingMemberFunction( 119 const CallEvent &Call) const { 120 if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) { 121 OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); 122 if (Opc == OO_Equal || Opc == OO_PlusEqual) 123 return true; 124 return false; 125 } 126 return isa<CXXDestructorCall>(Call) || 127 InvalidatingMemberFunctions.contains(Call); 128 } 129 130 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, 131 ProgramStateRef State, 132 const MemRegion *MR, 133 CheckerContext &C) const { 134 if (const PtrSet *PS = State->get<RawPtrMap>(MR)) { 135 const Expr *Origin = Call.getOriginExpr(); 136 for (const auto Symbol : *PS) { 137 // NOTE: `Origin` may be null, and will be stored so in the symbol's 138 // `RefState` in MallocChecker's `RegionState` program state map. 139 State = allocation_state::markReleased(State, Symbol, Origin); 140 } 141 State = State->remove<RawPtrMap>(MR); 142 C.addTransition(State); 143 return; 144 } 145 } 146 147 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, 148 ProgramStateRef State, 149 CheckerContext &C) const { 150 if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) { 151 const FunctionDecl *FD = FC->getDecl(); 152 if (!FD || !FD->isInStdNamespace()) 153 return; 154 155 for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { 156 QualType ParamTy = FD->getParamDecl(I)->getType(); 157 if (!ParamTy->isReferenceType() || 158 ParamTy->getPointeeType().isConstQualified()) 159 continue; 160 161 // In case of member operator calls, `this` is counted as an 162 // argument but not as a parameter. 163 bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC); 164 unsigned ArgI = isaMemberOpCall ? I+1 : I; 165 166 SVal Arg = FC->getArgSVal(ArgI); 167 const auto *ArgRegion = 168 dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion()); 169 if (!ArgRegion) 170 continue; 171 172 // std::addressof functions accepts a non-const reference as an argument, 173 // but doesn't modify it. 174 if (AddressofFunctions.contains(Call)) 175 continue; 176 177 markPtrSymbolsReleased(Call, State, ArgRegion, C); 178 } 179 } 180 } 181 182 // [string.require] 183 // 184 // "References, pointers, and iterators referring to the elements of a 185 // basic_string sequence may be invalidated by the following uses of that 186 // basic_string object: 187 // 188 // -- As an argument to any standard library function taking a reference 189 // to non-const basic_string as an argument. For example, as an argument to 190 // non-member functions swap(), operator>>(), and getline(), or as an argument 191 // to basic_string::swap(). 192 // 193 // -- Calling non-const member functions, except operator[], at, front, back, 194 // begin, rbegin, end, and rend." 195 196 void InnerPointerChecker::checkPostCall(const CallEvent &Call, 197 CheckerContext &C) const { 198 ProgramStateRef State = C.getState(); 199 200 // TODO: Do we need these to be typed? 201 const TypedValueRegion *ObjRegion = nullptr; 202 203 if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) { 204 ObjRegion = dyn_cast_or_null<TypedValueRegion>( 205 ICall->getCXXThisVal().getAsRegion()); 206 207 // Check [string.require] / second point. 208 if (isInvalidatingMemberFunction(Call)) { 209 markPtrSymbolsReleased(Call, State, ObjRegion, C); 210 return; 211 } 212 } 213 214 if (InnerPointerAccessFunctions.contains(Call)) { 215 216 if (isa<SimpleFunctionCall>(Call)) { 217 // NOTE: As of now, we only have one free access function: std::data. 218 // If we add more functions like this in the list, hardcoded 219 // argument index should be changed. 220 ObjRegion = 221 dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion()); 222 } 223 224 if (!ObjRegion) 225 return; 226 227 SVal RawPtr = Call.getReturnValue(); 228 if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { 229 // Start tracking this raw pointer by adding it to the set of symbols 230 // associated with this container object in the program state map. 231 232 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); 233 const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion); 234 PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); 235 assert(C.wasInlined || !Set.contains(Sym)); 236 Set = F.add(Set, Sym); 237 238 State = State->set<RawPtrMap>(ObjRegion, Set); 239 C.addTransition(State); 240 } 241 242 return; 243 } 244 245 // Check [string.require] / first point. 246 checkFunctionArguments(Call, State, C); 247 } 248 249 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, 250 CheckerContext &C) const { 251 ProgramStateRef State = C.getState(); 252 PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); 253 RawPtrMapTy RPM = State->get<RawPtrMap>(); 254 for (const auto &Entry : RPM) { 255 if (!SymReaper.isLiveRegion(Entry.first)) { 256 // Due to incomplete destructor support, some dead regions might 257 // remain in the program state map. Clean them up. 258 State = State->remove<RawPtrMap>(Entry.first); 259 } 260 if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) { 261 PtrSet CleanedUpSet = *OldSet; 262 for (const auto Symbol : Entry.second) { 263 if (!SymReaper.isLive(Symbol)) 264 CleanedUpSet = F.remove(CleanedUpSet, Symbol); 265 } 266 State = CleanedUpSet.isEmpty() 267 ? State->remove<RawPtrMap>(Entry.first) 268 : State->set<RawPtrMap>(Entry.first, CleanedUpSet); 269 } 270 } 271 C.addTransition(State); 272 } 273 274 namespace clang { 275 namespace ento { 276 namespace allocation_state { 277 278 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { 279 return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym); 280 } 281 282 const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { 283 RawPtrMapTy Map = State->get<RawPtrMap>(); 284 for (const auto &Entry : Map) { 285 if (Entry.second.contains(Sym)) { 286 return Entry.first; 287 } 288 } 289 return nullptr; 290 } 291 292 } // end namespace allocation_state 293 } // end namespace ento 294 } // end namespace clang 295 296 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( 297 const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { 298 if (!isSymbolTracked(N->getState(), PtrToBuf) || 299 isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf)) 300 return nullptr; 301 302 const Stmt *S = N->getStmtForDiagnostics(); 303 if (!S) 304 return nullptr; 305 306 const MemRegion *ObjRegion = 307 allocation_state::getContainerObjRegion(N->getState(), PtrToBuf); 308 const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion); 309 QualType ObjTy = TypedRegion->getValueType(); 310 311 SmallString<256> Buf; 312 llvm::raw_svector_ostream OS(Buf); 313 OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here"; 314 PathDiagnosticLocation Pos(S, BRC.getSourceManager(), 315 N->getLocationContext()); 316 return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); 317 } 318 319 void ento::registerInnerPointerChecker(CheckerManager &Mgr) { 320 registerInnerPointerCheckerAux(Mgr); 321 Mgr.registerChecker<InnerPointerChecker>(); 322 } 323 324 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) { 325 return true; 326 } 327