xref: /freebsd/contrib/llvm-project/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1 //===-- IRDynamicChecks.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/Constants.h"
10 #include "llvm/IR/DataLayout.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/Instructions.h"
13 #include "llvm/IR/Module.h"
14 #include "llvm/IR/Value.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #include "IRDynamicChecks.h"
18 
19 #include "lldb/Expression/UtilityFunction.h"
20 #include "lldb/Target/ExecutionContext.h"
21 #include "lldb/Target/Process.h"
22 #include "lldb/Target/StackFrame.h"
23 #include "lldb/Target/Target.h"
24 #include "lldb/Utility/ConstString.h"
25 #include "lldb/Utility/LLDBLog.h"
26 #include "lldb/Utility/Log.h"
27 
28 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
29 
30 using namespace llvm;
31 using namespace lldb_private;
32 
33 static char ID;
34 
35 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
36 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
37 
38 static const char g_valid_pointer_check_text[] =
39     "extern \"C\" void\n"
40     "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
41     "{\n"
42     "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
43     "}";
44 
45 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
46     : DynamicCheckerFunctions(DCF_Clang) {}
47 
48 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
49 
50 llvm::Error ClangDynamicCheckerFunctions::Install(
51     DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
52   Expected<std::unique_ptr<UtilityFunction>> utility_fn =
53       exe_ctx.GetTargetRef().CreateUtilityFunction(
54           g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME,
55           lldb::eLanguageTypeC, exe_ctx);
56   if (!utility_fn)
57     return utility_fn.takeError();
58   m_valid_pointer_check = std::move(*utility_fn);
59 
60   if (Process *process = exe_ctx.GetProcessPtr()) {
61     ObjCLanguageRuntime *objc_language_runtime =
62         ObjCLanguageRuntime::Get(*process);
63 
64     if (objc_language_runtime) {
65       Expected<std::unique_ptr<UtilityFunction>> checker_fn =
66           objc_language_runtime->CreateObjectChecker(VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx);
67       if (!checker_fn)
68         return checker_fn.takeError();
69       m_objc_object_check = std::move(*checker_fn);
70     }
71   }
72 
73   return Error::success();
74 }
75 
76 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
77                                                          Stream &message) {
78   // FIXME: We have to get the checkers to know why they scotched the call in
79   // more detail,
80   // so we can print a better message here.
81   if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
82     message.Printf("Attempted to dereference an invalid pointer.");
83     return true;
84   } else if (m_objc_object_check &&
85              m_objc_object_check->ContainsAddress(addr)) {
86     message.Printf("Attempted to dereference an invalid ObjC Object or send it "
87                    "an unrecognized selector");
88     return true;
89   }
90   return false;
91 }
92 
93 static std::string PrintValue(llvm::Value *V, bool truncate = false) {
94   std::string s;
95   raw_string_ostream rso(s);
96   V->print(rso);
97   rso.flush();
98   if (truncate)
99     s.resize(s.length() - 1);
100   return s;
101 }
102 
103 /// \class Instrumenter IRDynamicChecks.cpp
104 /// Finds and instruments individual LLVM IR instructions
105 ///
106 /// When instrumenting LLVM IR, it is frequently desirable to first search for
107 /// instructions, and then later modify them.  This way iterators remain
108 /// intact, and multiple passes can look at the same code base without
109 /// treading on each other's toes.
110 ///
111 /// The Instrumenter class implements this functionality.  A client first
112 /// calls Inspect on a function, which populates a list of instructions to be
113 /// instrumented.  Then, later, when all passes' Inspect functions have been
114 /// called, the client calls Instrument, which adds the desired
115 /// instrumentation.
116 ///
117 /// A subclass of Instrumenter must override InstrumentInstruction, which
118 /// is responsible for adding whatever instrumentation is necessary.
119 ///
120 /// A subclass of Instrumenter may override:
121 ///
122 /// - InspectInstruction [default: does nothing]
123 ///
124 /// - InspectBasicBlock [default: iterates through the instructions in a
125 ///   basic block calling InspectInstruction]
126 ///
127 /// - InspectFunction [default: iterates through the basic blocks in a
128 ///   function calling InspectBasicBlock]
129 class Instrumenter {
130 public:
131   /// Constructor
132   ///
133   /// \param[in] module
134   ///     The module being instrumented.
135   Instrumenter(llvm::Module &module,
136                std::shared_ptr<UtilityFunction> checker_function)
137       : m_module(module), m_checker_function(checker_function) {}
138 
139   virtual ~Instrumenter() = default;
140 
141   /// Inspect a function to find instructions to instrument
142   ///
143   /// \param[in] function
144   ///     The function to inspect.
145   ///
146   /// \return
147   ///     True on success; false on error.
148   bool Inspect(llvm::Function &function) { return InspectFunction(function); }
149 
150   /// Instrument all the instructions found by Inspect()
151   ///
152   /// \return
153   ///     True on success; false on error.
154   bool Instrument() {
155     for (InstIterator ii = m_to_instrument.begin(),
156                       last_ii = m_to_instrument.end();
157          ii != last_ii; ++ii) {
158       if (!InstrumentInstruction(*ii))
159         return false;
160     }
161 
162     return true;
163   }
164 
165 protected:
166   /// Add instrumentation to a single instruction
167   ///
168   /// \param[in] inst
169   ///     The instruction to be instrumented.
170   ///
171   /// \return
172   ///     True on success; false otherwise.
173   virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
174 
175   /// Register a single instruction to be instrumented
176   ///
177   /// \param[in] inst
178   ///     The instruction to be instrumented.
179   void RegisterInstruction(llvm::Instruction &inst) {
180     m_to_instrument.push_back(&inst);
181   }
182 
183   /// Determine whether a single instruction is interesting to instrument,
184   /// and, if so, call RegisterInstruction
185   ///
186   /// \param[in] i
187   ///     The instruction to be inspected.
188   ///
189   /// \return
190   ///     False if there was an error scanning; true otherwise.
191   virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
192 
193   /// Scan a basic block to see if any instructions are interesting
194   ///
195   /// \param[in] bb
196   ///     The basic block to be inspected.
197   ///
198   /// \return
199   ///     False if there was an error scanning; true otherwise.
200   virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
201     for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
202          ii != last_ii; ++ii) {
203       if (!InspectInstruction(*ii))
204         return false;
205     }
206 
207     return true;
208   }
209 
210   /// Scan a function to see if any instructions are interesting
211   ///
212   /// \param[in] f
213   ///     The function to be inspected.
214   ///
215   /// \return
216   ///     False if there was an error scanning; true otherwise.
217   virtual bool InspectFunction(llvm::Function &f) {
218     for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
219          bbi != last_bbi; ++bbi) {
220       if (!InspectBasicBlock(*bbi))
221         return false;
222     }
223 
224     return true;
225   }
226 
227   /// Build a function pointer for a function with signature void
228   /// (*)(uint8_t*) with a given address
229   ///
230   /// \param[in] start_address
231   ///     The address of the function.
232   ///
233   /// \return
234   ///     The function pointer, for use in a CallInst.
235   llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
236     llvm::Type *param_array[1];
237 
238     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
239 
240     ArrayRef<llvm::Type *> params(param_array, 1);
241 
242     FunctionType *fun_ty = FunctionType::get(
243         llvm::Type::getVoidTy(m_module.getContext()), params, true);
244     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
245     Constant *fun_addr_int =
246         ConstantInt::get(GetIntptrTy(), start_address, false);
247     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
248   }
249 
250   /// Build a function pointer for a function with signature void
251   /// (*)(uint8_t*, uint8_t*) with a given address
252   ///
253   /// \param[in] start_address
254   ///     The address of the function.
255   ///
256   /// \return
257   ///     The function pointer, for use in a CallInst.
258   llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
259     llvm::Type *param_array[2];
260 
261     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
262     param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
263 
264     ArrayRef<llvm::Type *> params(param_array, 2);
265 
266     FunctionType *fun_ty = FunctionType::get(
267         llvm::Type::getVoidTy(m_module.getContext()), params, true);
268     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
269     Constant *fun_addr_int =
270         ConstantInt::get(GetIntptrTy(), start_address, false);
271     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
272   }
273 
274   PointerType *GetI8PtrTy() {
275     if (!m_i8ptr_ty)
276       m_i8ptr_ty = llvm::PointerType::getUnqual(m_module.getContext());
277 
278     return m_i8ptr_ty;
279   }
280 
281   IntegerType *GetIntptrTy() {
282     if (!m_intptr_ty) {
283       llvm::DataLayout data_layout(&m_module);
284 
285       m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
286                                           data_layout.getPointerSizeInBits());
287     }
288 
289     return m_intptr_ty;
290   }
291 
292   typedef std::vector<llvm::Instruction *> InstVector;
293   typedef InstVector::iterator InstIterator;
294 
295   InstVector m_to_instrument; ///< List of instructions the inspector found
296   llvm::Module &m_module;     ///< The module which is being instrumented
297   std::shared_ptr<UtilityFunction>
298       m_checker_function; ///< The dynamic checker function for the process
299 
300 private:
301   PointerType *m_i8ptr_ty = nullptr;
302   IntegerType *m_intptr_ty = nullptr;
303 };
304 
305 class ValidPointerChecker : public Instrumenter {
306 public:
307   ValidPointerChecker(llvm::Module &module,
308                       std::shared_ptr<UtilityFunction> checker_function)
309       : Instrumenter(module, checker_function),
310         m_valid_pointer_check_func(nullptr) {}
311 
312   ~ValidPointerChecker() override = default;
313 
314 protected:
315   bool InstrumentInstruction(llvm::Instruction *inst) override {
316     Log *log = GetLog(LLDBLog::Expressions);
317 
318     LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
319               PrintValue(inst).c_str());
320 
321     if (!m_valid_pointer_check_func)
322       m_valid_pointer_check_func =
323           BuildPointerValidatorFunc(m_checker_function->StartAddress());
324 
325     llvm::Value *dereferenced_ptr = nullptr;
326 
327     if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
328       dereferenced_ptr = li->getPointerOperand();
329     else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
330       dereferenced_ptr = si->getPointerOperand();
331     else
332       return false;
333 
334     // Insert an instruction to call the helper with the result
335     CallInst::Create(m_valid_pointer_check_func, dereferenced_ptr, "", inst);
336 
337     return true;
338   }
339 
340   bool InspectInstruction(llvm::Instruction &i) override {
341     if (isa<llvm::LoadInst>(&i) || isa<llvm::StoreInst>(&i))
342       RegisterInstruction(i);
343 
344     return true;
345   }
346 
347 private:
348   llvm::FunctionCallee m_valid_pointer_check_func;
349 };
350 
351 class ObjcObjectChecker : public Instrumenter {
352 public:
353   ObjcObjectChecker(llvm::Module &module,
354                     std::shared_ptr<UtilityFunction> checker_function)
355       : Instrumenter(module, checker_function),
356         m_objc_object_check_func(nullptr) {}
357 
358   ~ObjcObjectChecker() override = default;
359 
360   enum msgSend_type {
361     eMsgSend = 0,
362     eMsgSendSuper,
363     eMsgSendSuper_stret,
364     eMsgSend_fpret,
365     eMsgSend_stret
366   };
367 
368   std::map<llvm::Instruction *, msgSend_type> msgSend_types;
369 
370 protected:
371   bool InstrumentInstruction(llvm::Instruction *inst) override {
372     CallInst *call_inst = dyn_cast<CallInst>(inst);
373 
374     if (!call_inst)
375       return false; // call_inst really shouldn't be nullptr, because otherwise
376                     // InspectInstruction wouldn't have registered it
377 
378     if (!m_objc_object_check_func)
379       m_objc_object_check_func =
380           BuildObjectCheckerFunc(m_checker_function->StartAddress());
381 
382     // id objc_msgSend(id theReceiver, SEL theSelector, ...)
383 
384     llvm::Value *target_object;
385     llvm::Value *selector;
386 
387     switch (msgSend_types[inst]) {
388     case eMsgSend:
389     case eMsgSend_fpret:
390       // On arm64, clang uses objc_msgSend for scalar and struct return
391       // calls.  The call instruction will record which was used.
392       if (call_inst->hasStructRetAttr()) {
393         target_object = call_inst->getArgOperand(1);
394         selector = call_inst->getArgOperand(2);
395       } else {
396         target_object = call_inst->getArgOperand(0);
397         selector = call_inst->getArgOperand(1);
398       }
399       break;
400     case eMsgSend_stret:
401       target_object = call_inst->getArgOperand(1);
402       selector = call_inst->getArgOperand(2);
403       break;
404     case eMsgSendSuper:
405     case eMsgSendSuper_stret:
406       return true;
407     }
408 
409     // These objects should always be valid according to Sean Calannan
410     assert(target_object);
411     assert(selector);
412 
413     // Insert an instruction to call the helper with the result
414 
415     llvm::Value *arg_array[2];
416 
417     arg_array[0] = target_object;
418     arg_array[1] = selector;
419 
420     ArrayRef<llvm::Value *> args(arg_array, 2);
421 
422     CallInst::Create(m_objc_object_check_func, args, "", inst);
423 
424     return true;
425   }
426 
427   static llvm::Function *GetFunction(llvm::Value *value) {
428     if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
429       return function;
430     }
431 
432     if (llvm::ConstantExpr *const_expr =
433             llvm::dyn_cast<llvm::ConstantExpr>(value)) {
434       switch (const_expr->getOpcode()) {
435       default:
436         return nullptr;
437       case llvm::Instruction::BitCast:
438         return GetFunction(const_expr->getOperand(0));
439       }
440     }
441 
442     return nullptr;
443   }
444 
445   static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
446     return GetFunction(inst->getCalledOperand());
447   }
448 
449   bool InspectInstruction(llvm::Instruction &i) override {
450     Log *log = GetLog(LLDBLog::Expressions);
451 
452     CallInst *call_inst = dyn_cast<CallInst>(&i);
453 
454     if (call_inst) {
455       const llvm::Function *called_function = GetCalledFunction(call_inst);
456 
457       if (!called_function)
458         return true;
459 
460       std::string name_str = called_function->getName().str();
461       const char *name_cstr = name_str.c_str();
462 
463       LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
464                 PrintValue(call_inst).c_str());
465 
466       if (name_str.find("objc_msgSend") == std::string::npos)
467         return true;
468 
469       if (!strcmp(name_cstr, "objc_msgSend")) {
470         RegisterInstruction(i);
471         msgSend_types[&i] = eMsgSend;
472         return true;
473       }
474 
475       if (!strcmp(name_cstr, "objc_msgSend_stret")) {
476         RegisterInstruction(i);
477         msgSend_types[&i] = eMsgSend_stret;
478         return true;
479       }
480 
481       if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
482         RegisterInstruction(i);
483         msgSend_types[&i] = eMsgSend_fpret;
484         return true;
485       }
486 
487       if (!strcmp(name_cstr, "objc_msgSendSuper")) {
488         RegisterInstruction(i);
489         msgSend_types[&i] = eMsgSendSuper;
490         return true;
491       }
492 
493       if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
494         RegisterInstruction(i);
495         msgSend_types[&i] = eMsgSendSuper_stret;
496         return true;
497       }
498 
499       LLDB_LOGF(log,
500                 "Function name '%s' contains 'objc_msgSend' but is not handled",
501                 name_str.c_str());
502 
503       return true;
504     }
505 
506     return true;
507   }
508 
509 private:
510   llvm::FunctionCallee m_objc_object_check_func;
511 };
512 
513 IRDynamicChecks::IRDynamicChecks(
514     ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
515     : ModulePass(ID), m_func_name(func_name),
516       m_checker_functions(checker_functions) {}
517 
518 IRDynamicChecks::~IRDynamicChecks() = default;
519 
520 bool IRDynamicChecks::runOnModule(llvm::Module &M) {
521   Log *log = GetLog(LLDBLog::Expressions);
522 
523   llvm::Function *function = M.getFunction(StringRef(m_func_name));
524 
525   if (!function) {
526     LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
527 
528     return false;
529   }
530 
531   if (m_checker_functions.m_valid_pointer_check) {
532     ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
533 
534     if (!vpc.Inspect(*function))
535       return false;
536 
537     if (!vpc.Instrument())
538       return false;
539   }
540 
541   if (m_checker_functions.m_objc_object_check) {
542     ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
543 
544     if (!ooc.Inspect(*function))
545       return false;
546 
547     if (!ooc.Instrument())
548       return false;
549   }
550 
551   if (log && log->GetVerbose()) {
552     std::string s;
553     raw_string_ostream oss(s);
554 
555     M.print(oss, nullptr);
556 
557     oss.flush();
558 
559     LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
560   }
561 
562   return true;
563 }
564 
565 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
566 
567 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
568   return PMT_ModulePassManager;
569 }
570