1 //===- SymbolManager.h - Management of Symbolic Values --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines SymbolManager, a class that manages symbolic values
10 // created for use by ExprEngine and related classes.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Expr.h"
17 #include "clang/AST/StmtObjC.h"
18 #include "clang/Analysis/Analyses/LiveVariables.h"
19 #include "clang/Analysis/AnalysisDeclContext.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
25 #include "llvm/ADT/FoldingSet.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32
33 using namespace clang;
34 using namespace ento;
35
anchor()36 void SymExpr::anchor() {}
37
getKindStr() const38 StringRef SymbolConjured::getKindStr() const { return "conj_$"; }
getKindStr() const39 StringRef SymbolDerived::getKindStr() const { return "derived_$"; }
getKindStr() const40 StringRef SymbolExtent::getKindStr() const { return "extent_$"; }
getKindStr() const41 StringRef SymbolMetadata::getKindStr() const { return "meta_$"; }
getKindStr() const42 StringRef SymbolRegionValue::getKindStr() const { return "reg_$"; }
43
dump() const44 LLVM_DUMP_METHOD void SymExpr::dump() const { dumpToStream(llvm::errs()); }
45
dumpToStreamImpl(raw_ostream & OS,const SymExpr * Sym)46 void BinarySymExpr::dumpToStreamImpl(raw_ostream &OS, const SymExpr *Sym) {
47 OS << '(';
48 Sym->dumpToStream(OS);
49 OS << ')';
50 }
51
dumpToStreamImpl(raw_ostream & OS,const llvm::APSInt & Value)52 void BinarySymExpr::dumpToStreamImpl(raw_ostream &OS,
53 const llvm::APSInt &Value) {
54 if (Value.isUnsigned())
55 OS << Value.getZExtValue();
56 else
57 OS << Value.getSExtValue();
58 if (Value.isUnsigned())
59 OS << 'U';
60 }
61
dumpToStreamImpl(raw_ostream & OS,BinaryOperator::Opcode Op)62 void BinarySymExpr::dumpToStreamImpl(raw_ostream &OS,
63 BinaryOperator::Opcode Op) {
64 OS << ' ' << BinaryOperator::getOpcodeStr(Op) << ' ';
65 }
66
dumpToStream(raw_ostream & os) const67 void SymbolCast::dumpToStream(raw_ostream &os) const {
68 os << '(' << ToTy << ") (";
69 Operand->dumpToStream(os);
70 os << ')';
71 }
72
dumpToStream(raw_ostream & os) const73 void UnarySymExpr::dumpToStream(raw_ostream &os) const {
74 os << UnaryOperator::getOpcodeStr(Op);
75 bool Binary = isa<BinarySymExpr>(Operand);
76 if (Binary)
77 os << '(';
78 Operand->dumpToStream(os);
79 if (Binary)
80 os << ')';
81 }
82
dumpToStream(raw_ostream & os) const83 void SymbolConjured::dumpToStream(raw_ostream &os) const {
84 os << getKindStr() << getSymbolID() << '{' << T << ", LC" << LCtx->getID();
85 if (S)
86 os << ", S" << S->getID(LCtx->getDecl()->getASTContext());
87 else
88 os << ", no stmt";
89 os << ", #" << Count << '}';
90 }
91
dumpToStream(raw_ostream & os) const92 void SymbolDerived::dumpToStream(raw_ostream &os) const {
93 os << getKindStr() << getSymbolID() << '{' << getParentSymbol() << ','
94 << getRegion() << '}';
95 }
96
dumpToStream(raw_ostream & os) const97 void SymbolExtent::dumpToStream(raw_ostream &os) const {
98 os << getKindStr() << getSymbolID() << '{' << getRegion() << '}';
99 }
100
dumpToStream(raw_ostream & os) const101 void SymbolMetadata::dumpToStream(raw_ostream &os) const {
102 os << getKindStr() << getSymbolID() << '{' << getRegion() << ',' << T << '}';
103 }
104
anchor()105 void SymbolData::anchor() {}
106
dumpToStream(raw_ostream & os) const107 void SymbolRegionValue::dumpToStream(raw_ostream &os) const {
108 os << getKindStr() << getSymbolID() << '<' << getType() << ' ' << R << '>';
109 }
110
operator ==(const symbol_iterator & X) const111 bool SymExpr::symbol_iterator::operator==(const symbol_iterator &X) const {
112 return itr == X.itr;
113 }
114
operator !=(const symbol_iterator & X) const115 bool SymExpr::symbol_iterator::operator!=(const symbol_iterator &X) const {
116 return itr != X.itr;
117 }
118
symbol_iterator(const SymExpr * SE)119 SymExpr::symbol_iterator::symbol_iterator(const SymExpr *SE) {
120 itr.push_back(SE);
121 }
122
operator ++()123 SymExpr::symbol_iterator &SymExpr::symbol_iterator::operator++() {
124 assert(!itr.empty() && "attempting to iterate on an 'end' iterator");
125 expand();
126 return *this;
127 }
128
operator *()129 SymbolRef SymExpr::symbol_iterator::operator*() {
130 assert(!itr.empty() && "attempting to dereference an 'end' iterator");
131 return itr.back();
132 }
133
expand()134 void SymExpr::symbol_iterator::expand() {
135 const SymExpr *SE = itr.pop_back_val();
136
137 switch (SE->getKind()) {
138 case SymExpr::SymbolRegionValueKind:
139 case SymExpr::SymbolConjuredKind:
140 case SymExpr::SymbolDerivedKind:
141 case SymExpr::SymbolExtentKind:
142 case SymExpr::SymbolMetadataKind:
143 return;
144 case SymExpr::SymbolCastKind:
145 itr.push_back(cast<SymbolCast>(SE)->getOperand());
146 return;
147 case SymExpr::UnarySymExprKind:
148 itr.push_back(cast<UnarySymExpr>(SE)->getOperand());
149 return;
150 case SymExpr::SymIntExprKind:
151 itr.push_back(cast<SymIntExpr>(SE)->getLHS());
152 return;
153 case SymExpr::IntSymExprKind:
154 itr.push_back(cast<IntSymExpr>(SE)->getRHS());
155 return;
156 case SymExpr::SymSymExprKind: {
157 const auto *x = cast<SymSymExpr>(SE);
158 itr.push_back(x->getLHS());
159 itr.push_back(x->getRHS());
160 return;
161 }
162 }
163 llvm_unreachable("unhandled expansion case");
164 }
165
166 const SymbolRegionValue*
getRegionValueSymbol(const TypedValueRegion * R)167 SymbolManager::getRegionValueSymbol(const TypedValueRegion* R) {
168 llvm::FoldingSetNodeID profile;
169 SymbolRegionValue::Profile(profile, R);
170 void *InsertPos;
171 SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
172 if (!SD) {
173 SD = new (BPAlloc) SymbolRegionValue(SymbolCounter, R);
174 DataSet.InsertNode(SD, InsertPos);
175 ++SymbolCounter;
176 }
177
178 return cast<SymbolRegionValue>(SD);
179 }
180
conjureSymbol(const Stmt * E,const LocationContext * LCtx,QualType T,unsigned Count,const void * SymbolTag)181 const SymbolConjured* SymbolManager::conjureSymbol(const Stmt *E,
182 const LocationContext *LCtx,
183 QualType T,
184 unsigned Count,
185 const void *SymbolTag) {
186 llvm::FoldingSetNodeID profile;
187 SymbolConjured::Profile(profile, E, T, Count, LCtx, SymbolTag);
188 void *InsertPos;
189 SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
190 if (!SD) {
191 SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count, SymbolTag);
192 DataSet.InsertNode(SD, InsertPos);
193 ++SymbolCounter;
194 }
195
196 return cast<SymbolConjured>(SD);
197 }
198
199 const SymbolDerived*
getDerivedSymbol(SymbolRef parentSymbol,const TypedValueRegion * R)200 SymbolManager::getDerivedSymbol(SymbolRef parentSymbol,
201 const TypedValueRegion *R) {
202 llvm::FoldingSetNodeID profile;
203 SymbolDerived::Profile(profile, parentSymbol, R);
204 void *InsertPos;
205 SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
206 if (!SD) {
207 SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R);
208 DataSet.InsertNode(SD, InsertPos);
209 ++SymbolCounter;
210 }
211
212 return cast<SymbolDerived>(SD);
213 }
214
215 const SymbolExtent*
getExtentSymbol(const SubRegion * R)216 SymbolManager::getExtentSymbol(const SubRegion *R) {
217 llvm::FoldingSetNodeID profile;
218 SymbolExtent::Profile(profile, R);
219 void *InsertPos;
220 SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
221 if (!SD) {
222 SD = new (BPAlloc) SymbolExtent(SymbolCounter, R);
223 DataSet.InsertNode(SD, InsertPos);
224 ++SymbolCounter;
225 }
226
227 return cast<SymbolExtent>(SD);
228 }
229
230 const SymbolMetadata *
getMetadataSymbol(const MemRegion * R,const Stmt * S,QualType T,const LocationContext * LCtx,unsigned Count,const void * SymbolTag)231 SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T,
232 const LocationContext *LCtx,
233 unsigned Count, const void *SymbolTag) {
234 llvm::FoldingSetNodeID profile;
235 SymbolMetadata::Profile(profile, R, S, T, LCtx, Count, SymbolTag);
236 void *InsertPos;
237 SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
238 if (!SD) {
239 SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count, SymbolTag);
240 DataSet.InsertNode(SD, InsertPos);
241 ++SymbolCounter;
242 }
243
244 return cast<SymbolMetadata>(SD);
245 }
246
247 const SymbolCast*
getCastSymbol(const SymExpr * Op,QualType From,QualType To)248 SymbolManager::getCastSymbol(const SymExpr *Op,
249 QualType From, QualType To) {
250 llvm::FoldingSetNodeID ID;
251 SymbolCast::Profile(ID, Op, From, To);
252 void *InsertPos;
253 SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
254 if (!data) {
255 data = new (BPAlloc) SymbolCast(Op, From, To);
256 DataSet.InsertNode(data, InsertPos);
257 }
258
259 return cast<SymbolCast>(data);
260 }
261
getSymIntExpr(const SymExpr * lhs,BinaryOperator::Opcode op,const llvm::APSInt & v,QualType t)262 const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
263 BinaryOperator::Opcode op,
264 const llvm::APSInt& v,
265 QualType t) {
266 llvm::FoldingSetNodeID ID;
267 SymIntExpr::Profile(ID, lhs, op, v, t);
268 void *InsertPos;
269 SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
270
271 if (!data) {
272 data = new (BPAlloc) SymIntExpr(lhs, op, v, t);
273 DataSet.InsertNode(data, InsertPos);
274 }
275
276 return cast<SymIntExpr>(data);
277 }
278
getIntSymExpr(const llvm::APSInt & lhs,BinaryOperator::Opcode op,const SymExpr * rhs,QualType t)279 const IntSymExpr *SymbolManager::getIntSymExpr(const llvm::APSInt& lhs,
280 BinaryOperator::Opcode op,
281 const SymExpr *rhs,
282 QualType t) {
283 llvm::FoldingSetNodeID ID;
284 IntSymExpr::Profile(ID, lhs, op, rhs, t);
285 void *InsertPos;
286 SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
287
288 if (!data) {
289 data = new (BPAlloc) IntSymExpr(lhs, op, rhs, t);
290 DataSet.InsertNode(data, InsertPos);
291 }
292
293 return cast<IntSymExpr>(data);
294 }
295
getSymSymExpr(const SymExpr * lhs,BinaryOperator::Opcode op,const SymExpr * rhs,QualType t)296 const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs,
297 BinaryOperator::Opcode op,
298 const SymExpr *rhs,
299 QualType t) {
300 llvm::FoldingSetNodeID ID;
301 SymSymExpr::Profile(ID, lhs, op, rhs, t);
302 void *InsertPos;
303 SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
304
305 if (!data) {
306 data = new (BPAlloc) SymSymExpr(lhs, op, rhs, t);
307 DataSet.InsertNode(data, InsertPos);
308 }
309
310 return cast<SymSymExpr>(data);
311 }
312
getUnarySymExpr(const SymExpr * Operand,UnaryOperator::Opcode Opc,QualType T)313 const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand,
314 UnaryOperator::Opcode Opc,
315 QualType T) {
316 llvm::FoldingSetNodeID ID;
317 UnarySymExpr::Profile(ID, Operand, Opc, T);
318 void *InsertPos;
319 SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
320 if (!data) {
321 data = new (BPAlloc) UnarySymExpr(Operand, Opc, T);
322 DataSet.InsertNode(data, InsertPos);
323 }
324
325 return cast<UnarySymExpr>(data);
326 }
327
getType() const328 QualType SymbolConjured::getType() const {
329 return T;
330 }
331
getType() const332 QualType SymbolDerived::getType() const {
333 return R->getValueType();
334 }
335
getType() const336 QualType SymbolExtent::getType() const {
337 ASTContext &Ctx = R->getMemRegionManager().getContext();
338 return Ctx.getSizeType();
339 }
340
getType() const341 QualType SymbolMetadata::getType() const {
342 return T;
343 }
344
getType() const345 QualType SymbolRegionValue::getType() const {
346 return R->getValueType();
347 }
348
canSymbolicate(QualType T)349 bool SymbolManager::canSymbolicate(QualType T) {
350 T = T.getCanonicalType();
351
352 if (Loc::isLocType(T))
353 return true;
354
355 if (T->isIntegralOrEnumerationType())
356 return true;
357
358 if (T->isRecordType() && !T->isUnionType())
359 return true;
360
361 return false;
362 }
363
addSymbolDependency(const SymbolRef Primary,const SymbolRef Dependent)364 void SymbolManager::addSymbolDependency(const SymbolRef Primary,
365 const SymbolRef Dependent) {
366 auto &dependencies = SymbolDependencies[Primary];
367 if (!dependencies) {
368 dependencies = std::make_unique<SymbolRefSmallVectorTy>();
369 }
370 dependencies->push_back(Dependent);
371 }
372
getDependentSymbols(const SymbolRef Primary)373 const SymbolRefSmallVectorTy *SymbolManager::getDependentSymbols(
374 const SymbolRef Primary) {
375 SymbolDependTy::const_iterator I = SymbolDependencies.find(Primary);
376 if (I == SymbolDependencies.end())
377 return nullptr;
378 return I->second.get();
379 }
380
markDependentsLive(SymbolRef sym)381 void SymbolReaper::markDependentsLive(SymbolRef sym) {
382 // Do not mark dependents more then once.
383 SymbolMapTy::iterator LI = TheLiving.find(sym);
384 assert(LI != TheLiving.end() && "The primary symbol is not live.");
385 if (LI->second == HaveMarkedDependents)
386 return;
387 LI->second = HaveMarkedDependents;
388
389 if (const SymbolRefSmallVectorTy *Deps = SymMgr.getDependentSymbols(sym)) {
390 for (const auto I : *Deps) {
391 if (TheLiving.contains(I))
392 continue;
393 markLive(I);
394 }
395 }
396 }
397
markLive(SymbolRef sym)398 void SymbolReaper::markLive(SymbolRef sym) {
399 TheLiving[sym] = NotProcessed;
400 markDependentsLive(sym);
401 }
402
markLive(const MemRegion * region)403 void SymbolReaper::markLive(const MemRegion *region) {
404 LiveRegionRoots.insert(region->getBaseRegion());
405 markElementIndicesLive(region);
406 }
407
markLazilyCopied(const clang::ento::MemRegion * region)408 void SymbolReaper::markLazilyCopied(const clang::ento::MemRegion *region) {
409 LazilyCopiedRegionRoots.insert(region->getBaseRegion());
410 }
411
markElementIndicesLive(const MemRegion * region)412 void SymbolReaper::markElementIndicesLive(const MemRegion *region) {
413 for (auto SR = dyn_cast<SubRegion>(region); SR;
414 SR = dyn_cast<SubRegion>(SR->getSuperRegion())) {
415 if (const auto ER = dyn_cast<ElementRegion>(SR)) {
416 SVal Idx = ER->getIndex();
417 for (SymbolRef Sym : Idx.symbols())
418 markLive(Sym);
419 }
420 }
421 }
422
markInUse(SymbolRef sym)423 void SymbolReaper::markInUse(SymbolRef sym) {
424 if (isa<SymbolMetadata>(sym))
425 MetadataInUse.insert(sym);
426 }
427
isLiveRegion(const MemRegion * MR)428 bool SymbolReaper::isLiveRegion(const MemRegion *MR) {
429 // TODO: For now, liveness of a memory region is equivalent to liveness of its
430 // base region. In fact we can do a bit better: say, if a particular FieldDecl
431 // is not used later in the path, we can diagnose a leak of a value within
432 // that field earlier than, say, the variable that contains the field dies.
433 MR = MR->getBaseRegion();
434 if (LiveRegionRoots.count(MR))
435 return true;
436
437 if (const auto *SR = dyn_cast<SymbolicRegion>(MR))
438 return isLive(SR->getSymbol());
439
440 if (const auto *VR = dyn_cast<VarRegion>(MR))
441 return isLive(VR, true);
442
443 // FIXME: This is a gross over-approximation. What we really need is a way to
444 // tell if anything still refers to this region. Unlike SymbolicRegions,
445 // AllocaRegions don't have associated symbols, though, so we don't actually
446 // have a way to track their liveness.
447 return isa<AllocaRegion, CXXThisRegion, MemSpaceRegion, CodeTextRegion>(MR);
448 }
449
isLazilyCopiedRegion(const MemRegion * MR) const450 bool SymbolReaper::isLazilyCopiedRegion(const MemRegion *MR) const {
451 // TODO: See comment in isLiveRegion.
452 return LazilyCopiedRegionRoots.count(MR->getBaseRegion());
453 }
454
isReadableRegion(const MemRegion * MR)455 bool SymbolReaper::isReadableRegion(const MemRegion *MR) {
456 return isLiveRegion(MR) || isLazilyCopiedRegion(MR);
457 }
458
isLive(SymbolRef sym)459 bool SymbolReaper::isLive(SymbolRef sym) {
460 if (TheLiving.count(sym)) {
461 markDependentsLive(sym);
462 return true;
463 }
464
465 bool KnownLive;
466
467 switch (sym->getKind()) {
468 case SymExpr::SymbolRegionValueKind:
469 KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion());
470 break;
471 case SymExpr::SymbolConjuredKind:
472 KnownLive = false;
473 break;
474 case SymExpr::SymbolDerivedKind:
475 KnownLive = isLive(cast<SymbolDerived>(sym)->getParentSymbol());
476 break;
477 case SymExpr::SymbolExtentKind:
478 KnownLive = isLiveRegion(cast<SymbolExtent>(sym)->getRegion());
479 break;
480 case SymExpr::SymbolMetadataKind:
481 KnownLive = MetadataInUse.count(sym) &&
482 isLiveRegion(cast<SymbolMetadata>(sym)->getRegion());
483 if (KnownLive)
484 MetadataInUse.erase(sym);
485 break;
486 case SymExpr::SymIntExprKind:
487 KnownLive = isLive(cast<SymIntExpr>(sym)->getLHS());
488 break;
489 case SymExpr::IntSymExprKind:
490 KnownLive = isLive(cast<IntSymExpr>(sym)->getRHS());
491 break;
492 case SymExpr::SymSymExprKind:
493 KnownLive = isLive(cast<SymSymExpr>(sym)->getLHS()) &&
494 isLive(cast<SymSymExpr>(sym)->getRHS());
495 break;
496 case SymExpr::SymbolCastKind:
497 KnownLive = isLive(cast<SymbolCast>(sym)->getOperand());
498 break;
499 case SymExpr::UnarySymExprKind:
500 KnownLive = isLive(cast<UnarySymExpr>(sym)->getOperand());
501 break;
502 }
503
504 if (KnownLive)
505 markLive(sym);
506
507 return KnownLive;
508 }
509
510 bool
isLive(const Expr * ExprVal,const LocationContext * ELCtx) const511 SymbolReaper::isLive(const Expr *ExprVal, const LocationContext *ELCtx) const {
512 if (LCtx == nullptr)
513 return false;
514
515 if (LCtx != ELCtx) {
516 // If the reaper's location context is a parent of the expression's
517 // location context, then the expression value is now "out of scope".
518 if (LCtx->isParentOf(ELCtx))
519 return false;
520 return true;
521 }
522
523 // If no statement is provided, everything in this and parent contexts is
524 // live.
525 if (!Loc)
526 return true;
527
528 return LCtx->getAnalysis<RelaxedLiveVariables>()->isLive(Loc, ExprVal);
529 }
530
isLive(const VarRegion * VR,bool includeStoreBindings) const531 bool SymbolReaper::isLive(const VarRegion *VR, bool includeStoreBindings) const{
532 const StackFrameContext *VarContext = VR->getStackFrame();
533
534 if (!VarContext)
535 return true;
536
537 if (!LCtx)
538 return false;
539 const StackFrameContext *CurrentContext = LCtx->getStackFrame();
540
541 if (VarContext == CurrentContext) {
542 // If no statement is provided, everything is live.
543 if (!Loc)
544 return true;
545
546 // Anonymous parameters of an inheriting constructor are live for the entire
547 // duration of the constructor.
548 if (isa<CXXInheritedCtorInitExpr>(Loc))
549 return true;
550
551 if (LCtx->getAnalysis<RelaxedLiveVariables>()->isLive(Loc, VR->getDecl()))
552 return true;
553
554 if (!includeStoreBindings)
555 return false;
556
557 unsigned &cachedQuery =
558 const_cast<SymbolReaper *>(this)->includedRegionCache[VR];
559
560 if (cachedQuery) {
561 return cachedQuery == 1;
562 }
563
564 // Query the store to see if the region occurs in any live bindings.
565 if (Store store = reapedStore.getStore()) {
566 bool hasRegion =
567 reapedStore.getStoreManager().includedInBindings(store, VR);
568 cachedQuery = hasRegion ? 1 : 2;
569 return hasRegion;
570 }
571
572 return false;
573 }
574
575 return VarContext->isParentOf(CurrentContext);
576 }
577