xref: /linux/arch/powerpc/lib/code-patching.c (revision 90d32e92011eaae8e70a9169b4e7acf4ca8f9d3a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright 2008 Michael Ellerman, IBM Corporation.
4  */
5 
6 #include <linux/kprobes.h>
7 #include <linux/mmu_context.h>
8 #include <linux/random.h>
9 #include <linux/vmalloc.h>
10 #include <linux/init.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/uaccess.h>
13 #include <linux/jump_label.h>
14 
15 #include <asm/debug.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlb.h>
18 #include <asm/tlbflush.h>
19 #include <asm/page.h>
20 #include <asm/code-patching.h>
21 #include <asm/inst.h>
22 
23 static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
24 {
25 	if (!ppc_inst_prefixed(instr)) {
26 		u32 val = ppc_inst_val(instr);
27 
28 		__put_kernel_nofault(patch_addr, &val, u32, failed);
29 	} else {
30 		u64 val = ppc_inst_as_ulong(instr);
31 
32 		__put_kernel_nofault(patch_addr, &val, u64, failed);
33 	}
34 
35 	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
36 							    "r" (exec_addr));
37 
38 	return 0;
39 
40 failed:
41 	mb();  /* sync */
42 	return -EPERM;
43 }
44 
45 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
46 {
47 	return __patch_instruction(addr, instr, addr);
48 }
49 
50 struct patch_context {
51 	union {
52 		struct vm_struct *area;
53 		struct mm_struct *mm;
54 	};
55 	unsigned long addr;
56 	pte_t *pte;
57 };
58 
59 static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
60 
61 static int map_patch_area(void *addr, unsigned long text_poke_addr);
62 static void unmap_patch_area(unsigned long addr);
63 
64 static bool mm_patch_enabled(void)
65 {
66 	return IS_ENABLED(CONFIG_SMP) && radix_enabled();
67 }
68 
69 /*
70  * The following applies for Radix MMU. Hash MMU has different requirements,
71  * and so is not supported.
72  *
73  * Changing mm requires context synchronising instructions on both sides of
74  * the context switch, as well as a hwsync between the last instruction for
75  * which the address of an associated storage access was translated using
76  * the current context.
77  *
78  * switch_mm_irqs_off() performs an isync after the context switch. It is
79  * the responsibility of the caller to perform the CSI and hwsync before
80  * starting/stopping the temp mm.
81  */
82 static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
83 {
84 	struct mm_struct *orig_mm = current->active_mm;
85 
86 	lockdep_assert_irqs_disabled();
87 	switch_mm_irqs_off(orig_mm, temp_mm, current);
88 
89 	WARN_ON(!mm_is_thread_local(temp_mm));
90 
91 	suspend_breakpoints();
92 	return orig_mm;
93 }
94 
95 static void stop_using_temp_mm(struct mm_struct *temp_mm,
96 			       struct mm_struct *orig_mm)
97 {
98 	lockdep_assert_irqs_disabled();
99 	switch_mm_irqs_off(temp_mm, orig_mm, current);
100 	restore_breakpoints();
101 }
102 
103 static int text_area_cpu_up(unsigned int cpu)
104 {
105 	struct vm_struct *area;
106 	unsigned long addr;
107 	int err;
108 
109 	area = get_vm_area(PAGE_SIZE, VM_ALLOC);
110 	if (!area) {
111 		WARN_ONCE(1, "Failed to create text area for cpu %d\n",
112 			cpu);
113 		return -1;
114 	}
115 
116 	// Map/unmap the area to ensure all page tables are pre-allocated
117 	addr = (unsigned long)area->addr;
118 	err = map_patch_area(empty_zero_page, addr);
119 	if (err)
120 		return err;
121 
122 	unmap_patch_area(addr);
123 
124 	this_cpu_write(cpu_patching_context.area, area);
125 	this_cpu_write(cpu_patching_context.addr, addr);
126 	this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
127 
128 	return 0;
129 }
130 
131 static int text_area_cpu_down(unsigned int cpu)
132 {
133 	free_vm_area(this_cpu_read(cpu_patching_context.area));
134 	this_cpu_write(cpu_patching_context.area, NULL);
135 	this_cpu_write(cpu_patching_context.addr, 0);
136 	this_cpu_write(cpu_patching_context.pte, NULL);
137 	return 0;
138 }
139 
140 static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
141 {
142 	struct mmu_gather tlb;
143 
144 	tlb_gather_mmu(&tlb, mm);
145 	free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
146 	mmput(mm);
147 }
148 
149 static int text_area_cpu_up_mm(unsigned int cpu)
150 {
151 	struct mm_struct *mm;
152 	unsigned long addr;
153 	pte_t *pte;
154 	spinlock_t *ptl;
155 
156 	mm = mm_alloc();
157 	if (WARN_ON(!mm))
158 		goto fail_no_mm;
159 
160 	/*
161 	 * Choose a random page-aligned address from the interval
162 	 * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
163 	 * The lower address bound is PAGE_SIZE to avoid the zero-page.
164 	 */
165 	addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
166 
167 	/*
168 	 * PTE allocation uses GFP_KERNEL which means we need to
169 	 * pre-allocate the PTE here because we cannot do the
170 	 * allocation during patching when IRQs are disabled.
171 	 *
172 	 * Using get_locked_pte() to avoid open coding, the lock
173 	 * is unnecessary.
174 	 */
175 	pte = get_locked_pte(mm, addr, &ptl);
176 	if (!pte)
177 		goto fail_no_pte;
178 	pte_unmap_unlock(pte, ptl);
179 
180 	this_cpu_write(cpu_patching_context.mm, mm);
181 	this_cpu_write(cpu_patching_context.addr, addr);
182 
183 	return 0;
184 
185 fail_no_pte:
186 	put_patching_mm(mm, addr);
187 fail_no_mm:
188 	return -ENOMEM;
189 }
190 
191 static int text_area_cpu_down_mm(unsigned int cpu)
192 {
193 	put_patching_mm(this_cpu_read(cpu_patching_context.mm),
194 			this_cpu_read(cpu_patching_context.addr));
195 
196 	this_cpu_write(cpu_patching_context.mm, NULL);
197 	this_cpu_write(cpu_patching_context.addr, 0);
198 
199 	return 0;
200 }
201 
202 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
203 
204 void __init poking_init(void)
205 {
206 	int ret;
207 
208 	if (mm_patch_enabled())
209 		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
210 					"powerpc/text_poke_mm:online",
211 					text_area_cpu_up_mm,
212 					text_area_cpu_down_mm);
213 	else
214 		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
215 					"powerpc/text_poke:online",
216 					text_area_cpu_up,
217 					text_area_cpu_down);
218 
219 	/* cpuhp_setup_state returns >= 0 on success */
220 	if (WARN_ON(ret < 0))
221 		return;
222 
223 	static_branch_enable(&poking_init_done);
224 }
225 
226 static unsigned long get_patch_pfn(void *addr)
227 {
228 	if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
229 		return vmalloc_to_pfn(addr);
230 	else
231 		return __pa_symbol(addr) >> PAGE_SHIFT;
232 }
233 
234 /*
235  * This can be called for kernel text or a module.
236  */
237 static int map_patch_area(void *addr, unsigned long text_poke_addr)
238 {
239 	unsigned long pfn = get_patch_pfn(addr);
240 
241 	return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
242 }
243 
244 static void unmap_patch_area(unsigned long addr)
245 {
246 	pte_t *ptep;
247 	pmd_t *pmdp;
248 	pud_t *pudp;
249 	p4d_t *p4dp;
250 	pgd_t *pgdp;
251 
252 	pgdp = pgd_offset_k(addr);
253 	if (WARN_ON(pgd_none(*pgdp)))
254 		return;
255 
256 	p4dp = p4d_offset(pgdp, addr);
257 	if (WARN_ON(p4d_none(*p4dp)))
258 		return;
259 
260 	pudp = pud_offset(p4dp, addr);
261 	if (WARN_ON(pud_none(*pudp)))
262 		return;
263 
264 	pmdp = pmd_offset(pudp, addr);
265 	if (WARN_ON(pmd_none(*pmdp)))
266 		return;
267 
268 	ptep = pte_offset_kernel(pmdp, addr);
269 	if (WARN_ON(pte_none(*ptep)))
270 		return;
271 
272 	/*
273 	 * In hash, pte_clear flushes the tlb, in radix, we have to
274 	 */
275 	pte_clear(&init_mm, addr, ptep);
276 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
277 }
278 
279 static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
280 {
281 	int err;
282 	u32 *patch_addr;
283 	unsigned long text_poke_addr;
284 	pte_t *pte;
285 	unsigned long pfn = get_patch_pfn(addr);
286 	struct mm_struct *patching_mm;
287 	struct mm_struct *orig_mm;
288 	spinlock_t *ptl;
289 
290 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
291 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
292 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
293 
294 	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
295 	if (!pte)
296 		return -ENOMEM;
297 
298 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
299 
300 	/* order PTE update before use, also serves as the hwsync */
301 	asm volatile("ptesync": : :"memory");
302 
303 	/* order context switch after arbitrary prior code */
304 	isync();
305 
306 	orig_mm = start_using_temp_mm(patching_mm);
307 
308 	err = __patch_instruction(addr, instr, patch_addr);
309 
310 	/* context synchronisation performed by __patch_instruction (isync or exception) */
311 	stop_using_temp_mm(patching_mm, orig_mm);
312 
313 	pte_clear(patching_mm, text_poke_addr, pte);
314 	/*
315 	 * ptesync to order PTE update before TLB invalidation done
316 	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
317 	 */
318 	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
319 
320 	pte_unmap_unlock(pte, ptl);
321 
322 	return err;
323 }
324 
325 static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
326 {
327 	int err;
328 	u32 *patch_addr;
329 	unsigned long text_poke_addr;
330 	pte_t *pte;
331 	unsigned long pfn = get_patch_pfn(addr);
332 
333 	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
334 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
335 
336 	pte = __this_cpu_read(cpu_patching_context.pte);
337 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
338 	/* See ptesync comment in radix__set_pte_at() */
339 	if (radix_enabled())
340 		asm volatile("ptesync": : :"memory");
341 
342 	err = __patch_instruction(addr, instr, patch_addr);
343 
344 	pte_clear(&init_mm, text_poke_addr, pte);
345 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
346 
347 	return err;
348 }
349 
350 int patch_instruction(u32 *addr, ppc_inst_t instr)
351 {
352 	int err;
353 	unsigned long flags;
354 
355 	/*
356 	 * During early early boot patch_instruction is called
357 	 * when text_poke_area is not ready, but we still need
358 	 * to allow patching. We just do the plain old patching
359 	 */
360 	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
361 	    !static_branch_likely(&poking_init_done))
362 		return raw_patch_instruction(addr, instr);
363 
364 	local_irq_save(flags);
365 	if (mm_patch_enabled())
366 		err = __do_patch_instruction_mm(addr, instr);
367 	else
368 		err = __do_patch_instruction(addr, instr);
369 	local_irq_restore(flags);
370 
371 	return err;
372 }
373 NOKPROBE_SYMBOL(patch_instruction);
374 
375 static int patch_memset64(u64 *addr, u64 val, size_t count)
376 {
377 	for (u64 *end = addr + count; addr < end; addr++)
378 		__put_kernel_nofault(addr, &val, u64, failed);
379 
380 	return 0;
381 
382 failed:
383 	return -EPERM;
384 }
385 
386 static int patch_memset32(u32 *addr, u32 val, size_t count)
387 {
388 	for (u32 *end = addr + count; addr < end; addr++)
389 		__put_kernel_nofault(addr, &val, u32, failed);
390 
391 	return 0;
392 
393 failed:
394 	return -EPERM;
395 }
396 
397 static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
398 {
399 	unsigned long start = (unsigned long)patch_addr;
400 	int err;
401 
402 	/* Repeat instruction */
403 	if (repeat_instr) {
404 		ppc_inst_t instr = ppc_inst_read(code);
405 
406 		if (ppc_inst_prefixed(instr)) {
407 			u64 val = ppc_inst_as_ulong(instr);
408 
409 			err = patch_memset64((u64 *)patch_addr, val, len / 8);
410 		} else {
411 			u32 val = ppc_inst_val(instr);
412 
413 			err = patch_memset32(patch_addr, val, len / 4);
414 		}
415 	} else {
416 		err = copy_to_kernel_nofault(patch_addr, code, len);
417 	}
418 
419 	smp_wmb();	/* smp write barrier */
420 	flush_icache_range(start, start + len);
421 	return err;
422 }
423 
424 /*
425  * A page is mapped and instructions that fit the page are patched.
426  * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
427  */
428 static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
429 {
430 	struct mm_struct *patching_mm, *orig_mm;
431 	unsigned long pfn = get_patch_pfn(addr);
432 	unsigned long text_poke_addr;
433 	spinlock_t *ptl;
434 	u32 *patch_addr;
435 	pte_t *pte;
436 	int err;
437 
438 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
439 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
440 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
441 
442 	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
443 	if (!pte)
444 		return -ENOMEM;
445 
446 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
447 
448 	/* order PTE update before use, also serves as the hwsync */
449 	asm volatile("ptesync" ::: "memory");
450 
451 	/* order context switch after arbitrary prior code */
452 	isync();
453 
454 	orig_mm = start_using_temp_mm(patching_mm);
455 
456 	err = __patch_instructions(patch_addr, code, len, repeat_instr);
457 
458 	/* context synchronisation performed by __patch_instructions */
459 	stop_using_temp_mm(patching_mm, orig_mm);
460 
461 	pte_clear(patching_mm, text_poke_addr, pte);
462 	/*
463 	 * ptesync to order PTE update before TLB invalidation done
464 	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
465 	 */
466 	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
467 
468 	pte_unmap_unlock(pte, ptl);
469 
470 	return err;
471 }
472 
473 /*
474  * A page is mapped and instructions that fit the page are patched.
475  * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
476  */
477 static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
478 {
479 	unsigned long pfn = get_patch_pfn(addr);
480 	unsigned long text_poke_addr;
481 	u32 *patch_addr;
482 	pte_t *pte;
483 	int err;
484 
485 	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
486 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
487 
488 	pte = __this_cpu_read(cpu_patching_context.pte);
489 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
490 	/* See ptesync comment in radix__set_pte_at() */
491 	if (radix_enabled())
492 		asm volatile("ptesync" ::: "memory");
493 
494 	err = __patch_instructions(patch_addr, code, len, repeat_instr);
495 
496 	pte_clear(&init_mm, text_poke_addr, pte);
497 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
498 
499 	return err;
500 }
501 
502 /*
503  * Patch 'addr' with 'len' bytes of instructions from 'code'.
504  *
505  * If repeat_instr is true, the same instruction is filled for
506  * 'len' bytes.
507  */
508 int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
509 {
510 	while (len > 0) {
511 		unsigned long flags;
512 		size_t plen;
513 		int err;
514 
515 		plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
516 
517 		local_irq_save(flags);
518 		if (mm_patch_enabled())
519 			err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
520 		else
521 			err = __do_patch_instructions(addr, code, plen, repeat_instr);
522 		local_irq_restore(flags);
523 		if (err)
524 			return err;
525 
526 		len -= plen;
527 		addr = (u32 *)((unsigned long)addr + plen);
528 		if (!repeat_instr)
529 			code = (u32 *)((unsigned long)code + plen);
530 	}
531 
532 	return 0;
533 }
534 NOKPROBE_SYMBOL(patch_instructions);
535 
536 int patch_branch(u32 *addr, unsigned long target, int flags)
537 {
538 	ppc_inst_t instr;
539 
540 	if (create_branch(&instr, addr, target, flags))
541 		return -ERANGE;
542 
543 	return patch_instruction(addr, instr);
544 }
545 
546 /*
547  * Helper to check if a given instruction is a conditional branch
548  * Derived from the conditional checks in analyse_instr()
549  */
550 bool is_conditional_branch(ppc_inst_t instr)
551 {
552 	unsigned int opcode = ppc_inst_primary_opcode(instr);
553 
554 	if (opcode == 16)       /* bc, bca, bcl, bcla */
555 		return true;
556 	if (opcode == 19) {
557 		switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
558 		case 16:        /* bclr, bclrl */
559 		case 528:       /* bcctr, bcctrl */
560 		case 560:       /* bctar, bctarl */
561 			return true;
562 		}
563 	}
564 	return false;
565 }
566 NOKPROBE_SYMBOL(is_conditional_branch);
567 
568 int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
569 		       unsigned long target, int flags)
570 {
571 	long offset;
572 
573 	offset = target;
574 	if (! (flags & BRANCH_ABSOLUTE))
575 		offset = offset - (unsigned long)addr;
576 
577 	/* Check we can represent the target in the instruction format */
578 	if (!is_offset_in_cond_branch_range(offset))
579 		return 1;
580 
581 	/* Mask out the flags and target, so they don't step on each other. */
582 	*instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
583 
584 	return 0;
585 }
586 
587 int instr_is_relative_branch(ppc_inst_t instr)
588 {
589 	if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
590 		return 0;
591 
592 	return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
593 }
594 
595 int instr_is_relative_link_branch(ppc_inst_t instr)
596 {
597 	return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
598 }
599 
600 static unsigned long branch_iform_target(const u32 *instr)
601 {
602 	signed long imm;
603 
604 	imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
605 
606 	/* If the top bit of the immediate value is set this is negative */
607 	if (imm & 0x2000000)
608 		imm -= 0x4000000;
609 
610 	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
611 		imm += (unsigned long)instr;
612 
613 	return (unsigned long)imm;
614 }
615 
616 static unsigned long branch_bform_target(const u32 *instr)
617 {
618 	signed long imm;
619 
620 	imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
621 
622 	/* If the top bit of the immediate value is set this is negative */
623 	if (imm & 0x8000)
624 		imm -= 0x10000;
625 
626 	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
627 		imm += (unsigned long)instr;
628 
629 	return (unsigned long)imm;
630 }
631 
632 unsigned long branch_target(const u32 *instr)
633 {
634 	if (instr_is_branch_iform(ppc_inst_read(instr)))
635 		return branch_iform_target(instr);
636 	else if (instr_is_branch_bform(ppc_inst_read(instr)))
637 		return branch_bform_target(instr);
638 
639 	return 0;
640 }
641 
642 int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
643 {
644 	unsigned long target;
645 	target = branch_target(src);
646 
647 	if (instr_is_branch_iform(ppc_inst_read(src)))
648 		return create_branch(instr, dest, target,
649 				     ppc_inst_val(ppc_inst_read(src)));
650 	else if (instr_is_branch_bform(ppc_inst_read(src)))
651 		return create_cond_branch(instr, dest, target,
652 					  ppc_inst_val(ppc_inst_read(src)));
653 
654 	return 1;
655 }
656