1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #define pr_fmt(fmt) "callthunks: " fmt 4 5 #include <linux/debugfs.h> 6 #include <linux/kallsyms.h> 7 #include <linux/memory.h> 8 #include <linux/moduleloader.h> 9 #include <linux/static_call.h> 10 11 #include <asm/alternative.h> 12 #include <asm/asm-offsets.h> 13 #include <asm/cpu.h> 14 #include <asm/ftrace.h> 15 #include <asm/insn.h> 16 #include <asm/kexec.h> 17 #include <asm/nospec-branch.h> 18 #include <asm/paravirt.h> 19 #include <asm/sections.h> 20 #include <asm/switch_to.h> 21 #include <asm/sync_core.h> 22 #include <asm/text-patching.h> 23 #include <asm/xen/hypercall.h> 24 25 static int __initdata_or_module debug_callthunks; 26 27 #define MAX_PATCH_LEN (255-1) 28 29 #define prdbg(fmt, args...) \ 30 do { \ 31 if (debug_callthunks) \ 32 printk(KERN_DEBUG pr_fmt(fmt), ##args); \ 33 } while(0) 34 35 static int __init debug_thunks(char *str) 36 { 37 debug_callthunks = 1; 38 return 1; 39 } 40 __setup("debug-callthunks", debug_thunks); 41 42 #ifdef CONFIG_CALL_THUNKS_DEBUG 43 DEFINE_PER_CPU(u64, __x86_call_count); 44 DEFINE_PER_CPU(u64, __x86_ret_count); 45 DEFINE_PER_CPU(u64, __x86_stuffs_count); 46 DEFINE_PER_CPU(u64, __x86_ctxsw_count); 47 EXPORT_PER_CPU_SYMBOL_GPL(__x86_ctxsw_count); 48 EXPORT_PER_CPU_SYMBOL_GPL(__x86_call_count); 49 #endif 50 51 extern s32 __call_sites[], __call_sites_end[]; 52 53 struct core_text { 54 unsigned long base; 55 unsigned long end; 56 const char *name; 57 }; 58 59 static bool thunks_initialized __ro_after_init; 60 61 static const struct core_text builtin_coretext = { 62 .base = (unsigned long)_text, 63 .end = (unsigned long)_etext, 64 .name = "builtin", 65 }; 66 67 asm ( 68 ".pushsection .rodata \n" 69 ".global skl_call_thunk_template \n" 70 "skl_call_thunk_template: \n" 71 __stringify(INCREMENT_CALL_DEPTH)" \n" 72 ".global skl_call_thunk_tail \n" 73 "skl_call_thunk_tail: \n" 74 ".popsection \n" 75 ); 76 77 extern u8 skl_call_thunk_template[]; 78 extern u8 skl_call_thunk_tail[]; 79 80 #define SKL_TMPL_SIZE \ 81 ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template)) 82 83 extern void error_entry(void); 84 extern void xen_error_entry(void); 85 extern void paranoid_entry(void); 86 87 static inline bool within_coretext(const struct core_text *ct, void *addr) 88 { 89 unsigned long p = (unsigned long)addr; 90 91 return ct->base <= p && p < ct->end; 92 } 93 94 static inline bool within_module_coretext(void *addr) 95 { 96 bool ret = false; 97 98 #ifdef CONFIG_MODULES 99 struct module *mod; 100 101 preempt_disable(); 102 mod = __module_address((unsigned long)addr); 103 if (mod && within_module_core((unsigned long)addr, mod)) 104 ret = true; 105 preempt_enable(); 106 #endif 107 return ret; 108 } 109 110 static bool is_coretext(const struct core_text *ct, void *addr) 111 { 112 if (ct && within_coretext(ct, addr)) 113 return true; 114 if (within_coretext(&builtin_coretext, addr)) 115 return true; 116 return within_module_coretext(addr); 117 } 118 119 static bool skip_addr(void *dest) 120 { 121 if (dest == error_entry) 122 return true; 123 if (dest == paranoid_entry) 124 return true; 125 if (dest == xen_error_entry) 126 return true; 127 /* Does FILL_RSB... */ 128 if (dest == __switch_to_asm) 129 return true; 130 /* Accounts directly */ 131 if (dest == ret_from_fork) 132 return true; 133 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) 134 if (dest == soft_restart_cpu) 135 return true; 136 #endif 137 #ifdef CONFIG_FUNCTION_TRACER 138 if (dest == __fentry__) 139 return true; 140 #endif 141 #ifdef CONFIG_KEXEC_CORE 142 # ifdef CONFIG_X86_64 143 if (dest >= (void *)__relocate_kernel_start && 144 dest < (void *)__relocate_kernel_end) 145 return true; 146 # else 147 if (dest >= (void *)relocate_kernel && 148 dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE) 149 return true; 150 # endif 151 #endif 152 return false; 153 } 154 155 static __init_or_module void *call_get_dest(void *addr) 156 { 157 struct insn insn; 158 void *dest; 159 int ret; 160 161 ret = insn_decode_kernel(&insn, addr); 162 if (ret) 163 return ERR_PTR(ret); 164 165 /* Patched out call? */ 166 if (insn.opcode.bytes[0] != CALL_INSN_OPCODE) 167 return NULL; 168 169 dest = addr + insn.length + insn.immediate.value; 170 if (skip_addr(dest)) 171 return NULL; 172 return dest; 173 } 174 175 static const u8 nops[] = { 176 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 177 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 178 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 179 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 180 }; 181 182 static void *patch_dest(void *dest, bool direct) 183 { 184 unsigned int tsize = SKL_TMPL_SIZE; 185 u8 insn_buff[MAX_PATCH_LEN]; 186 u8 *pad = dest - tsize; 187 188 memcpy(insn_buff, skl_call_thunk_template, tsize); 189 apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize); 190 191 /* Already patched? */ 192 if (!bcmp(pad, insn_buff, tsize)) 193 return pad; 194 195 /* Ensure there are nops */ 196 if (bcmp(pad, nops, tsize)) { 197 pr_warn_once("Invalid padding area for %pS\n", dest); 198 return NULL; 199 } 200 201 if (direct) 202 memcpy(pad, insn_buff, tsize); 203 else 204 text_poke_copy_locked(pad, insn_buff, tsize, true); 205 return pad; 206 } 207 208 static __init_or_module void patch_call(void *addr, const struct core_text *ct) 209 { 210 void *pad, *dest; 211 u8 bytes[8]; 212 213 if (!within_coretext(ct, addr)) 214 return; 215 216 dest = call_get_dest(addr); 217 if (!dest || WARN_ON_ONCE(IS_ERR(dest))) 218 return; 219 220 if (!is_coretext(ct, dest)) 221 return; 222 223 pad = patch_dest(dest, within_coretext(ct, dest)); 224 if (!pad) 225 return; 226 227 prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr, 228 dest, dest, pad); 229 __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE); 230 text_poke_early(addr, bytes, CALL_INSN_SIZE); 231 } 232 233 static __init_or_module void 234 patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) 235 { 236 s32 *s; 237 238 for (s = start; s < end; s++) 239 patch_call((void *)s + *s, ct); 240 } 241 242 static __init_or_module void 243 callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) 244 { 245 prdbg("Patching call sites %s\n", ct->name); 246 patch_call_sites(cs->call_start, cs->call_end, ct); 247 prdbg("Patching call sites done%s\n", ct->name); 248 } 249 250 void __init callthunks_patch_builtin_calls(void) 251 { 252 struct callthunk_sites cs = { 253 .call_start = __call_sites, 254 .call_end = __call_sites_end, 255 }; 256 257 if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 258 return; 259 260 pr_info("Setting up call depth tracking\n"); 261 mutex_lock(&text_mutex); 262 callthunks_setup(&cs, &builtin_coretext); 263 thunks_initialized = true; 264 mutex_unlock(&text_mutex); 265 } 266 267 void *callthunks_translate_call_dest(void *dest) 268 { 269 void *target; 270 271 lockdep_assert_held(&text_mutex); 272 273 if (!thunks_initialized || skip_addr(dest)) 274 return dest; 275 276 if (!is_coretext(NULL, dest)) 277 return dest; 278 279 target = patch_dest(dest, false); 280 return target ? : dest; 281 } 282 283 #ifdef CONFIG_BPF_JIT 284 static bool is_callthunk(void *addr) 285 { 286 unsigned int tmpl_size = SKL_TMPL_SIZE; 287 u8 insn_buff[MAX_PATCH_LEN]; 288 unsigned long dest; 289 u8 *pad; 290 291 dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); 292 if (!thunks_initialized || skip_addr((void *)dest)) 293 return false; 294 295 pad = (void *)(dest - tmpl_size); 296 297 memcpy(insn_buff, skl_call_thunk_template, tmpl_size); 298 apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size); 299 300 return !bcmp(pad, insn_buff, tmpl_size); 301 } 302 303 int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) 304 { 305 unsigned int tmpl_size = SKL_TMPL_SIZE; 306 u8 insn_buff[MAX_PATCH_LEN]; 307 308 if (!thunks_initialized) 309 return 0; 310 311 /* Is function call target a thunk? */ 312 if (func && is_callthunk(func)) 313 return 0; 314 315 memcpy(insn_buff, skl_call_thunk_template, tmpl_size); 316 apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size); 317 318 memcpy(*pprog, insn_buff, tmpl_size); 319 *pprog += tmpl_size; 320 return tmpl_size; 321 } 322 #endif 323 324 #ifdef CONFIG_MODULES 325 void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, 326 struct module *mod) 327 { 328 struct core_text ct = { 329 .base = (unsigned long)mod->mem[MOD_TEXT].base, 330 .end = (unsigned long)mod->mem[MOD_TEXT].base + mod->mem[MOD_TEXT].size, 331 .name = mod->name, 332 }; 333 334 if (!thunks_initialized) 335 return; 336 337 mutex_lock(&text_mutex); 338 callthunks_setup(cs, &ct); 339 mutex_unlock(&text_mutex); 340 } 341 #endif /* CONFIG_MODULES */ 342 343 #if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS) 344 static int callthunks_debug_show(struct seq_file *m, void *p) 345 { 346 unsigned long cpu = (unsigned long)m->private; 347 348 seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,", 349 per_cpu(__x86_call_count, cpu), 350 per_cpu(__x86_ret_count, cpu), 351 per_cpu(__x86_stuffs_count, cpu), 352 per_cpu(__x86_ctxsw_count, cpu)); 353 return 0; 354 } 355 356 static int callthunks_debug_open(struct inode *inode, struct file *file) 357 { 358 return single_open(file, callthunks_debug_show, inode->i_private); 359 } 360 361 static const struct file_operations dfs_ops = { 362 .open = callthunks_debug_open, 363 .read = seq_read, 364 .llseek = seq_lseek, 365 .release = single_release, 366 }; 367 368 static int __init callthunks_debugfs_init(void) 369 { 370 struct dentry *dir; 371 unsigned long cpu; 372 373 dir = debugfs_create_dir("callthunks", NULL); 374 for_each_possible_cpu(cpu) { 375 void *arg = (void *)cpu; 376 char name [10]; 377 378 sprintf(name, "cpu%lu", cpu); 379 debugfs_create_file(name, 0644, dir, arg, &dfs_ops); 380 } 381 return 0; 382 } 383 __initcall(callthunks_debugfs_init); 384 #endif 385