xref: /linux/arch/powerpc/lib/code-patching.c (revision 8e07e0e3964ca4e23ce7b68e2096fe660a888942)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright 2008 Michael Ellerman, IBM Corporation.
4  */
5 
6 #include <linux/kprobes.h>
7 #include <linux/mmu_context.h>
8 #include <linux/random.h>
9 #include <linux/vmalloc.h>
10 #include <linux/init.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/uaccess.h>
13 #include <linux/jump_label.h>
14 
15 #include <asm/debug.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlb.h>
18 #include <asm/tlbflush.h>
19 #include <asm/page.h>
20 #include <asm/code-patching.h>
21 #include <asm/inst.h>
22 
23 static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
24 {
25 	if (!ppc_inst_prefixed(instr)) {
26 		u32 val = ppc_inst_val(instr);
27 
28 		__put_kernel_nofault(patch_addr, &val, u32, failed);
29 	} else {
30 		u64 val = ppc_inst_as_ulong(instr);
31 
32 		__put_kernel_nofault(patch_addr, &val, u64, failed);
33 	}
34 
35 	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
36 							    "r" (exec_addr));
37 
38 	return 0;
39 
40 failed:
41 	mb();  /* sync */
42 	return -EPERM;
43 }
44 
45 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
46 {
47 	return __patch_instruction(addr, instr, addr);
48 }
49 
50 struct patch_context {
51 	union {
52 		struct vm_struct *area;
53 		struct mm_struct *mm;
54 	};
55 	unsigned long addr;
56 	pte_t *pte;
57 };
58 
59 static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
60 
61 static int map_patch_area(void *addr, unsigned long text_poke_addr);
62 static void unmap_patch_area(unsigned long addr);
63 
64 static bool mm_patch_enabled(void)
65 {
66 	return IS_ENABLED(CONFIG_SMP) && radix_enabled();
67 }
68 
69 /*
70  * The following applies for Radix MMU. Hash MMU has different requirements,
71  * and so is not supported.
72  *
73  * Changing mm requires context synchronising instructions on both sides of
74  * the context switch, as well as a hwsync between the last instruction for
75  * which the address of an associated storage access was translated using
76  * the current context.
77  *
78  * switch_mm_irqs_off() performs an isync after the context switch. It is
79  * the responsibility of the caller to perform the CSI and hwsync before
80  * starting/stopping the temp mm.
81  */
82 static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
83 {
84 	struct mm_struct *orig_mm = current->active_mm;
85 
86 	lockdep_assert_irqs_disabled();
87 	switch_mm_irqs_off(orig_mm, temp_mm, current);
88 
89 	WARN_ON(!mm_is_thread_local(temp_mm));
90 
91 	suspend_breakpoints();
92 	return orig_mm;
93 }
94 
95 static void stop_using_temp_mm(struct mm_struct *temp_mm,
96 			       struct mm_struct *orig_mm)
97 {
98 	lockdep_assert_irqs_disabled();
99 	switch_mm_irqs_off(temp_mm, orig_mm, current);
100 	restore_breakpoints();
101 }
102 
103 static int text_area_cpu_up(unsigned int cpu)
104 {
105 	struct vm_struct *area;
106 	unsigned long addr;
107 	int err;
108 
109 	area = get_vm_area(PAGE_SIZE, VM_ALLOC);
110 	if (!area) {
111 		WARN_ONCE(1, "Failed to create text area for cpu %d\n",
112 			cpu);
113 		return -1;
114 	}
115 
116 	// Map/unmap the area to ensure all page tables are pre-allocated
117 	addr = (unsigned long)area->addr;
118 	err = map_patch_area(empty_zero_page, addr);
119 	if (err)
120 		return err;
121 
122 	unmap_patch_area(addr);
123 
124 	this_cpu_write(cpu_patching_context.area, area);
125 	this_cpu_write(cpu_patching_context.addr, addr);
126 	this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
127 
128 	return 0;
129 }
130 
131 static int text_area_cpu_down(unsigned int cpu)
132 {
133 	free_vm_area(this_cpu_read(cpu_patching_context.area));
134 	this_cpu_write(cpu_patching_context.area, NULL);
135 	this_cpu_write(cpu_patching_context.addr, 0);
136 	this_cpu_write(cpu_patching_context.pte, NULL);
137 	return 0;
138 }
139 
140 static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
141 {
142 	struct mmu_gather tlb;
143 
144 	tlb_gather_mmu(&tlb, mm);
145 	free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
146 	mmput(mm);
147 }
148 
149 static int text_area_cpu_up_mm(unsigned int cpu)
150 {
151 	struct mm_struct *mm;
152 	unsigned long addr;
153 	pte_t *pte;
154 	spinlock_t *ptl;
155 
156 	mm = mm_alloc();
157 	if (WARN_ON(!mm))
158 		goto fail_no_mm;
159 
160 	/*
161 	 * Choose a random page-aligned address from the interval
162 	 * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
163 	 * The lower address bound is PAGE_SIZE to avoid the zero-page.
164 	 */
165 	addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
166 
167 	/*
168 	 * PTE allocation uses GFP_KERNEL which means we need to
169 	 * pre-allocate the PTE here because we cannot do the
170 	 * allocation during patching when IRQs are disabled.
171 	 *
172 	 * Using get_locked_pte() to avoid open coding, the lock
173 	 * is unnecessary.
174 	 */
175 	pte = get_locked_pte(mm, addr, &ptl);
176 	if (!pte)
177 		goto fail_no_pte;
178 	pte_unmap_unlock(pte, ptl);
179 
180 	this_cpu_write(cpu_patching_context.mm, mm);
181 	this_cpu_write(cpu_patching_context.addr, addr);
182 
183 	return 0;
184 
185 fail_no_pte:
186 	put_patching_mm(mm, addr);
187 fail_no_mm:
188 	return -ENOMEM;
189 }
190 
191 static int text_area_cpu_down_mm(unsigned int cpu)
192 {
193 	put_patching_mm(this_cpu_read(cpu_patching_context.mm),
194 			this_cpu_read(cpu_patching_context.addr));
195 
196 	this_cpu_write(cpu_patching_context.mm, NULL);
197 	this_cpu_write(cpu_patching_context.addr, 0);
198 
199 	return 0;
200 }
201 
202 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
203 
204 void __init poking_init(void)
205 {
206 	int ret;
207 
208 	if (mm_patch_enabled())
209 		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
210 					"powerpc/text_poke_mm:online",
211 					text_area_cpu_up_mm,
212 					text_area_cpu_down_mm);
213 	else
214 		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
215 					"powerpc/text_poke:online",
216 					text_area_cpu_up,
217 					text_area_cpu_down);
218 
219 	/* cpuhp_setup_state returns >= 0 on success */
220 	if (WARN_ON(ret < 0))
221 		return;
222 
223 	static_branch_enable(&poking_init_done);
224 }
225 
226 static unsigned long get_patch_pfn(void *addr)
227 {
228 	if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
229 		return vmalloc_to_pfn(addr);
230 	else
231 		return __pa_symbol(addr) >> PAGE_SHIFT;
232 }
233 
234 /*
235  * This can be called for kernel text or a module.
236  */
237 static int map_patch_area(void *addr, unsigned long text_poke_addr)
238 {
239 	unsigned long pfn = get_patch_pfn(addr);
240 
241 	return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
242 }
243 
244 static void unmap_patch_area(unsigned long addr)
245 {
246 	pte_t *ptep;
247 	pmd_t *pmdp;
248 	pud_t *pudp;
249 	p4d_t *p4dp;
250 	pgd_t *pgdp;
251 
252 	pgdp = pgd_offset_k(addr);
253 	if (WARN_ON(pgd_none(*pgdp)))
254 		return;
255 
256 	p4dp = p4d_offset(pgdp, addr);
257 	if (WARN_ON(p4d_none(*p4dp)))
258 		return;
259 
260 	pudp = pud_offset(p4dp, addr);
261 	if (WARN_ON(pud_none(*pudp)))
262 		return;
263 
264 	pmdp = pmd_offset(pudp, addr);
265 	if (WARN_ON(pmd_none(*pmdp)))
266 		return;
267 
268 	ptep = pte_offset_kernel(pmdp, addr);
269 	if (WARN_ON(pte_none(*ptep)))
270 		return;
271 
272 	/*
273 	 * In hash, pte_clear flushes the tlb, in radix, we have to
274 	 */
275 	pte_clear(&init_mm, addr, ptep);
276 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
277 }
278 
279 static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
280 {
281 	int err;
282 	u32 *patch_addr;
283 	unsigned long text_poke_addr;
284 	pte_t *pte;
285 	unsigned long pfn = get_patch_pfn(addr);
286 	struct mm_struct *patching_mm;
287 	struct mm_struct *orig_mm;
288 	spinlock_t *ptl;
289 
290 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
291 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
292 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
293 
294 	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
295 	if (!pte)
296 		return -ENOMEM;
297 
298 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
299 
300 	/* order PTE update before use, also serves as the hwsync */
301 	asm volatile("ptesync": : :"memory");
302 
303 	/* order context switch after arbitrary prior code */
304 	isync();
305 
306 	orig_mm = start_using_temp_mm(patching_mm);
307 
308 	err = __patch_instruction(addr, instr, patch_addr);
309 
310 	/* context synchronisation performed by __patch_instruction (isync or exception) */
311 	stop_using_temp_mm(patching_mm, orig_mm);
312 
313 	pte_clear(patching_mm, text_poke_addr, pte);
314 	/*
315 	 * ptesync to order PTE update before TLB invalidation done
316 	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
317 	 */
318 	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
319 
320 	pte_unmap_unlock(pte, ptl);
321 
322 	return err;
323 }
324 
325 static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
326 {
327 	int err;
328 	u32 *patch_addr;
329 	unsigned long text_poke_addr;
330 	pte_t *pte;
331 	unsigned long pfn = get_patch_pfn(addr);
332 
333 	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
334 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
335 
336 	pte = __this_cpu_read(cpu_patching_context.pte);
337 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
338 	/* See ptesync comment in radix__set_pte_at() */
339 	if (radix_enabled())
340 		asm volatile("ptesync": : :"memory");
341 
342 	err = __patch_instruction(addr, instr, patch_addr);
343 
344 	pte_clear(&init_mm, text_poke_addr, pte);
345 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
346 
347 	return err;
348 }
349 
350 int patch_instruction(u32 *addr, ppc_inst_t instr)
351 {
352 	int err;
353 	unsigned long flags;
354 
355 	/*
356 	 * During early early boot patch_instruction is called
357 	 * when text_poke_area is not ready, but we still need
358 	 * to allow patching. We just do the plain old patching
359 	 */
360 	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
361 	    !static_branch_likely(&poking_init_done))
362 		return raw_patch_instruction(addr, instr);
363 
364 	local_irq_save(flags);
365 	if (mm_patch_enabled())
366 		err = __do_patch_instruction_mm(addr, instr);
367 	else
368 		err = __do_patch_instruction(addr, instr);
369 	local_irq_restore(flags);
370 
371 	return err;
372 }
373 NOKPROBE_SYMBOL(patch_instruction);
374 
375 static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
376 {
377 	unsigned long start = (unsigned long)patch_addr;
378 
379 	/* Repeat instruction */
380 	if (repeat_instr) {
381 		ppc_inst_t instr = ppc_inst_read(code);
382 
383 		if (ppc_inst_prefixed(instr)) {
384 			u64 val = ppc_inst_as_ulong(instr);
385 
386 			memset64((u64 *)patch_addr, val, len / 8);
387 		} else {
388 			u32 val = ppc_inst_val(instr);
389 
390 			memset32(patch_addr, val, len / 4);
391 		}
392 	} else {
393 		memcpy(patch_addr, code, len);
394 	}
395 
396 	smp_wmb();	/* smp write barrier */
397 	flush_icache_range(start, start + len);
398 	return 0;
399 }
400 
401 /*
402  * A page is mapped and instructions that fit the page are patched.
403  * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
404  */
405 static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
406 {
407 	struct mm_struct *patching_mm, *orig_mm;
408 	unsigned long pfn = get_patch_pfn(addr);
409 	unsigned long text_poke_addr;
410 	spinlock_t *ptl;
411 	u32 *patch_addr;
412 	pte_t *pte;
413 	int err;
414 
415 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
416 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
417 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
418 
419 	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
420 	if (!pte)
421 		return -ENOMEM;
422 
423 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
424 
425 	/* order PTE update before use, also serves as the hwsync */
426 	asm volatile("ptesync" ::: "memory");
427 
428 	/* order context switch after arbitrary prior code */
429 	isync();
430 
431 	orig_mm = start_using_temp_mm(patching_mm);
432 
433 	err = __patch_instructions(patch_addr, code, len, repeat_instr);
434 
435 	/* context synchronisation performed by __patch_instructions */
436 	stop_using_temp_mm(patching_mm, orig_mm);
437 
438 	pte_clear(patching_mm, text_poke_addr, pte);
439 	/*
440 	 * ptesync to order PTE update before TLB invalidation done
441 	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
442 	 */
443 	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
444 
445 	pte_unmap_unlock(pte, ptl);
446 
447 	return err;
448 }
449 
450 /*
451  * A page is mapped and instructions that fit the page are patched.
452  * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
453  */
454 static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
455 {
456 	unsigned long pfn = get_patch_pfn(addr);
457 	unsigned long text_poke_addr;
458 	u32 *patch_addr;
459 	pte_t *pte;
460 	int err;
461 
462 	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
463 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
464 
465 	pte = __this_cpu_read(cpu_patching_context.pte);
466 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
467 	/* See ptesync comment in radix__set_pte_at() */
468 	if (radix_enabled())
469 		asm volatile("ptesync" ::: "memory");
470 
471 	err = __patch_instructions(patch_addr, code, len, repeat_instr);
472 
473 	pte_clear(&init_mm, text_poke_addr, pte);
474 	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
475 
476 	return err;
477 }
478 
479 /*
480  * Patch 'addr' with 'len' bytes of instructions from 'code'.
481  *
482  * If repeat_instr is true, the same instruction is filled for
483  * 'len' bytes.
484  */
485 int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
486 {
487 	while (len > 0) {
488 		unsigned long flags;
489 		size_t plen;
490 		int err;
491 
492 		plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
493 
494 		local_irq_save(flags);
495 		if (mm_patch_enabled())
496 			err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
497 		else
498 			err = __do_patch_instructions(addr, code, plen, repeat_instr);
499 		local_irq_restore(flags);
500 		if (err)
501 			return err;
502 
503 		len -= plen;
504 		addr = (u32 *)((unsigned long)addr + plen);
505 		if (!repeat_instr)
506 			code = (u32 *)((unsigned long)code + plen);
507 	}
508 
509 	return 0;
510 }
511 NOKPROBE_SYMBOL(patch_instructions);
512 
513 int patch_branch(u32 *addr, unsigned long target, int flags)
514 {
515 	ppc_inst_t instr;
516 
517 	if (create_branch(&instr, addr, target, flags))
518 		return -ERANGE;
519 
520 	return patch_instruction(addr, instr);
521 }
522 
523 /*
524  * Helper to check if a given instruction is a conditional branch
525  * Derived from the conditional checks in analyse_instr()
526  */
527 bool is_conditional_branch(ppc_inst_t instr)
528 {
529 	unsigned int opcode = ppc_inst_primary_opcode(instr);
530 
531 	if (opcode == 16)       /* bc, bca, bcl, bcla */
532 		return true;
533 	if (opcode == 19) {
534 		switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
535 		case 16:        /* bclr, bclrl */
536 		case 528:       /* bcctr, bcctrl */
537 		case 560:       /* bctar, bctarl */
538 			return true;
539 		}
540 	}
541 	return false;
542 }
543 NOKPROBE_SYMBOL(is_conditional_branch);
544 
545 int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
546 		       unsigned long target, int flags)
547 {
548 	long offset;
549 
550 	offset = target;
551 	if (! (flags & BRANCH_ABSOLUTE))
552 		offset = offset - (unsigned long)addr;
553 
554 	/* Check we can represent the target in the instruction format */
555 	if (!is_offset_in_cond_branch_range(offset))
556 		return 1;
557 
558 	/* Mask out the flags and target, so they don't step on each other. */
559 	*instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
560 
561 	return 0;
562 }
563 
564 int instr_is_relative_branch(ppc_inst_t instr)
565 {
566 	if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
567 		return 0;
568 
569 	return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
570 }
571 
572 int instr_is_relative_link_branch(ppc_inst_t instr)
573 {
574 	return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
575 }
576 
577 static unsigned long branch_iform_target(const u32 *instr)
578 {
579 	signed long imm;
580 
581 	imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
582 
583 	/* If the top bit of the immediate value is set this is negative */
584 	if (imm & 0x2000000)
585 		imm -= 0x4000000;
586 
587 	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
588 		imm += (unsigned long)instr;
589 
590 	return (unsigned long)imm;
591 }
592 
593 static unsigned long branch_bform_target(const u32 *instr)
594 {
595 	signed long imm;
596 
597 	imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
598 
599 	/* If the top bit of the immediate value is set this is negative */
600 	if (imm & 0x8000)
601 		imm -= 0x10000;
602 
603 	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
604 		imm += (unsigned long)instr;
605 
606 	return (unsigned long)imm;
607 }
608 
609 unsigned long branch_target(const u32 *instr)
610 {
611 	if (instr_is_branch_iform(ppc_inst_read(instr)))
612 		return branch_iform_target(instr);
613 	else if (instr_is_branch_bform(ppc_inst_read(instr)))
614 		return branch_bform_target(instr);
615 
616 	return 0;
617 }
618 
619 int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
620 {
621 	unsigned long target;
622 	target = branch_target(src);
623 
624 	if (instr_is_branch_iform(ppc_inst_read(src)))
625 		return create_branch(instr, dest, target,
626 				     ppc_inst_val(ppc_inst_read(src)));
627 	else if (instr_is_branch_bform(ppc_inst_read(src)))
628 		return create_cond_branch(instr, dest, target,
629 					  ppc_inst_val(ppc_inst_read(src)));
630 
631 	return 1;
632 }
633