xref: /linux/arch/um/os-Linux/skas/process.c (revision cbb8e65e234e0139c0c516bb6b9110d210eecd3f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
4  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
5  */
6 
7 #include <stdlib.h>
8 #include <stdbool.h>
9 #include <unistd.h>
10 #include <sched.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <sys/wait.h>
15 #include <asm/unistd.h>
16 #include <as-layout.h>
17 #include <init.h>
18 #include <kern_util.h>
19 #include <mem.h>
20 #include <os.h>
21 #include <ptrace_user.h>
22 #include <registers.h>
23 #include <skas.h>
24 #include <sysdep/stub.h>
25 #include <linux/threads.h>
26 #include <timetravel.h>
27 #include "../internal.h"
28 
29 int is_skas_winch(int pid, int fd, void *data)
30 {
31 	return pid == getpgrp();
32 }
33 
34 static const char *ptrace_reg_name(int idx)
35 {
36 #define R(n) case HOST_##n: return #n
37 
38 	switch (idx) {
39 #ifdef __x86_64__
40 	R(BX);
41 	R(CX);
42 	R(DI);
43 	R(SI);
44 	R(DX);
45 	R(BP);
46 	R(AX);
47 	R(R8);
48 	R(R9);
49 	R(R10);
50 	R(R11);
51 	R(R12);
52 	R(R13);
53 	R(R14);
54 	R(R15);
55 	R(ORIG_AX);
56 	R(CS);
57 	R(SS);
58 	R(EFLAGS);
59 #elif defined(__i386__)
60 	R(IP);
61 	R(SP);
62 	R(EFLAGS);
63 	R(AX);
64 	R(BX);
65 	R(CX);
66 	R(DX);
67 	R(SI);
68 	R(DI);
69 	R(BP);
70 	R(CS);
71 	R(SS);
72 	R(DS);
73 	R(FS);
74 	R(ES);
75 	R(GS);
76 	R(ORIG_AX);
77 #endif
78 	}
79 	return "";
80 }
81 
82 static int ptrace_dump_regs(int pid)
83 {
84 	unsigned long regs[MAX_REG_NR];
85 	int i;
86 
87 	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
88 		return -errno;
89 
90 	printk(UM_KERN_ERR "Stub registers -\n");
91 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
92 		const char *regname = ptrace_reg_name(i);
93 
94 		printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]);
95 	}
96 
97 	return 0;
98 }
99 
100 /*
101  * Signals that are OK to receive in the stub - we'll just continue it.
102  * SIGWINCH will happen when UML is inside a detached screen.
103  */
104 #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH))
105 
106 /* Signals that the stub will finish with - anything else is an error */
107 #define STUB_DONE_MASK (1 << SIGTRAP)
108 
109 void wait_stub_done(int pid)
110 {
111 	int n, status, err;
112 
113 	while (1) {
114 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
115 		if ((n < 0) || !WIFSTOPPED(status))
116 			goto bad_wait;
117 
118 		if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0)
119 			break;
120 
121 		err = ptrace(PTRACE_CONT, pid, 0, 0);
122 		if (err) {
123 			printk(UM_KERN_ERR "%s : continue failed, errno = %d\n",
124 			       __func__, errno);
125 			fatal_sigsegv();
126 		}
127 	}
128 
129 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
130 		return;
131 
132 bad_wait:
133 	err = ptrace_dump_regs(pid);
134 	if (err)
135 		printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n",
136 		       -err);
137 	printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n",
138 	       __func__, pid, n, errno, status);
139 	fatal_sigsegv();
140 }
141 
142 extern unsigned long current_stub_stack(void);
143 
144 static void get_skas_faultinfo(int pid, struct faultinfo *fi)
145 {
146 	int err;
147 
148 	err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
149 	if (err) {
150 		printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
151 		       "errno = %d\n", pid, errno);
152 		fatal_sigsegv();
153 	}
154 	wait_stub_done(pid);
155 
156 	/*
157 	 * faultinfo is prepared by the stub_segv_handler at start of
158 	 * the stub stack page. We just have to copy it.
159 	 */
160 	memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
161 }
162 
163 static void handle_segv(int pid, struct uml_pt_regs *regs)
164 {
165 	get_skas_faultinfo(pid, &regs->faultinfo);
166 	segv(regs->faultinfo, 0, 1, NULL);
167 }
168 
169 static void handle_trap(int pid, struct uml_pt_regs *regs)
170 {
171 	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
172 		fatal_sigsegv();
173 
174 	handle_syscall(regs);
175 }
176 
177 extern char __syscall_stub_start[];
178 
179 /**
180  * userspace_tramp() - userspace trampoline
181  * @stack:	pointer to the new userspace stack page
182  *
183  * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
184  * This function will run on a temporary stack page.
185  * It ptrace()'es itself, then
186  * Two pages are mapped into the userspace address space:
187  * - STUB_CODE (with EXEC), which contains the skas stub code
188  * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel.
189  * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process.
190  * And last the process stops itself to give control to the UML kernel for this userspace process.
191  *
192  * Return: Always zero, otherwise the current userspace process is ended with non null exit() call
193  */
194 static int userspace_tramp(void *stack)
195 {
196 	struct sigaction sa;
197 	void *addr;
198 	int fd;
199 	unsigned long long offset;
200 	unsigned long segv_handler = STUB_CODE +
201 				     (unsigned long) stub_segv_handler -
202 				     (unsigned long) __syscall_stub_start;
203 
204 	ptrace(PTRACE_TRACEME, 0, 0, 0);
205 
206 	signal(SIGTERM, SIG_DFL);
207 	signal(SIGWINCH, SIG_IGN);
208 
209 	fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset);
210 	addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
211 		      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
212 	if (addr == MAP_FAILED) {
213 		os_info("mapping mmap stub at 0x%lx failed, errno = %d\n",
214 			STUB_CODE, errno);
215 		exit(1);
216 	}
217 
218 	fd = phys_mapping(uml_to_phys(stack), &offset);
219 	addr = mmap((void *) STUB_DATA,
220 		    STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
221 		    MAP_FIXED | MAP_SHARED, fd, offset);
222 	if (addr == MAP_FAILED) {
223 		os_info("mapping segfault stack at 0x%lx failed, errno = %d\n",
224 			STUB_DATA, errno);
225 		exit(1);
226 	}
227 
228 	set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
229 	sigemptyset(&sa.sa_mask);
230 	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
231 	sa.sa_sigaction = (void *) segv_handler;
232 	sa.sa_restorer = NULL;
233 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
234 		os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
235 			__func__, errno);
236 		exit(1);
237 	}
238 
239 	kill(os_getpid(), SIGSTOP);
240 	return 0;
241 }
242 
243 int userspace_pid[NR_CPUS];
244 
245 /**
246  * start_userspace() - prepare a new userspace process
247  * @stub_stack:	pointer to the stub stack.
248  *
249  * Setups a new temporary stack page that is used while userspace_tramp() runs
250  * Clones the kernel process into a new userspace process, with FDs only.
251  *
252  * Return: When positive: the process id of the new userspace process,
253  *         when negative: an error number.
254  * FIXME: can PIDs become negative?!
255  */
256 int start_userspace(unsigned long stub_stack)
257 {
258 	void *stack;
259 	unsigned long sp;
260 	int pid, status, n, flags, err;
261 
262 	/* setup a temporary stack page */
263 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
264 		     PROT_READ | PROT_WRITE | PROT_EXEC,
265 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
266 	if (stack == MAP_FAILED) {
267 		err = -errno;
268 		printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n",
269 		       __func__, errno);
270 		return err;
271 	}
272 
273 	/* set stack pointer to the end of the stack page, so it can grow downwards */
274 	sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
275 
276 	flags = CLONE_FILES | SIGCHLD;
277 
278 	/* clone into new userspace process */
279 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
280 	if (pid < 0) {
281 		err = -errno;
282 		printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
283 		       __func__, errno);
284 		return err;
285 	}
286 
287 	do {
288 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
289 		if (n < 0) {
290 			err = -errno;
291 			printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
292 			       __func__, errno);
293 			goto out_kill;
294 		}
295 	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
296 
297 	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
298 		err = -EINVAL;
299 		printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
300 		       __func__, status);
301 		goto out_kill;
302 	}
303 
304 	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
305 		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
306 		err = -errno;
307 		printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
308 		       __func__, errno);
309 		goto out_kill;
310 	}
311 
312 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
313 		err = -errno;
314 		printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n",
315 		       __func__, errno);
316 		goto out_kill;
317 	}
318 
319 	return pid;
320 
321  out_kill:
322 	os_kill_ptraced_process(pid, 1);
323 	return err;
324 }
325 
326 void userspace(struct uml_pt_regs *regs)
327 {
328 	int err, status, op, pid = userspace_pid[0];
329 	siginfo_t si;
330 
331 	/* Handle any immediate reschedules or signals */
332 	interrupt_end();
333 
334 	while (1) {
335 		time_travel_print_bc_msg();
336 
337 		current_mm_sync();
338 
339 		/* Flush out any pending syscalls */
340 		err = syscall_stub_flush(current_mm_id());
341 		if (err) {
342 			if (err == -ENOMEM)
343 				report_enomem();
344 
345 			printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
346 				__func__, -err);
347 			fatal_sigsegv();
348 		}
349 
350 		/*
351 		 * This can legitimately fail if the process loads a
352 		 * bogus value into a segment register.  It will
353 		 * segfault and PTRACE_GETREGS will read that value
354 		 * out of the process.  However, PTRACE_SETREGS will
355 		 * fail.  In this case, there is nothing to do but
356 		 * just kill the process.
357 		 */
358 		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
359 			printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
360 			       __func__, errno);
361 			fatal_sigsegv();
362 		}
363 
364 		if (put_fp_registers(pid, regs->fp)) {
365 			printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
366 			       __func__, errno);
367 			fatal_sigsegv();
368 		}
369 
370 		if (singlestepping())
371 			op = PTRACE_SYSEMU_SINGLESTEP;
372 		else
373 			op = PTRACE_SYSEMU;
374 
375 		if (ptrace(op, pid, 0, 0)) {
376 			printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
377 			       __func__, op, errno);
378 			fatal_sigsegv();
379 		}
380 
381 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
382 		if (err < 0) {
383 			printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
384 			       __func__, errno);
385 			fatal_sigsegv();
386 		}
387 
388 		regs->is_user = 1;
389 		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
390 			printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
391 			       __func__, errno);
392 			fatal_sigsegv();
393 		}
394 
395 		if (get_fp_registers(pid, regs->fp)) {
396 			printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
397 			       __func__, errno);
398 			fatal_sigsegv();
399 		}
400 
401 		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
402 
403 		if (WIFSTOPPED(status)) {
404 			int sig = WSTOPSIG(status);
405 
406 			/* These signal handlers need the si argument.
407 			 * The SIGIO and SIGALARM handlers which constitute the
408 			 * majority of invocations, do not use it.
409 			 */
410 			switch (sig) {
411 			case SIGSEGV:
412 			case SIGTRAP:
413 			case SIGILL:
414 			case SIGBUS:
415 			case SIGFPE:
416 			case SIGWINCH:
417 				ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
418 				break;
419 			}
420 
421 			switch (sig) {
422 			case SIGSEGV:
423 				if (PTRACE_FULL_FAULTINFO) {
424 					get_skas_faultinfo(pid,
425 							   &regs->faultinfo);
426 					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
427 							     regs);
428 				}
429 				else handle_segv(pid, regs);
430 				break;
431 			case SIGTRAP + 0x80:
432 				handle_trap(pid, regs);
433 				break;
434 			case SIGTRAP:
435 				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
436 				break;
437 			case SIGALRM:
438 				break;
439 			case SIGIO:
440 			case SIGILL:
441 			case SIGBUS:
442 			case SIGFPE:
443 			case SIGWINCH:
444 				block_signals_trace();
445 				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
446 				unblock_signals_trace();
447 				break;
448 			default:
449 				printk(UM_KERN_ERR "%s - child stopped with signal %d\n",
450 				       __func__, sig);
451 				fatal_sigsegv();
452 			}
453 			pid = userspace_pid[0];
454 			interrupt_end();
455 
456 			/* Avoid -ERESTARTSYS handling in host */
457 			if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET)
458 				PT_SYSCALL_NR(regs->gp) = -1;
459 		}
460 	}
461 }
462 
463 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
464 {
465 	(*buf)[0].JB_IP = (unsigned long) handler;
466 	(*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE -
467 		sizeof(void *);
468 }
469 
470 #define INIT_JMP_NEW_THREAD 0
471 #define INIT_JMP_CALLBACK 1
472 #define INIT_JMP_HALT 2
473 #define INIT_JMP_REBOOT 3
474 
475 void switch_threads(jmp_buf *me, jmp_buf *you)
476 {
477 	if (UML_SETJMP(me) == 0)
478 		UML_LONGJMP(you, 1);
479 }
480 
481 static jmp_buf initial_jmpbuf;
482 
483 /* XXX Make these percpu */
484 static void (*cb_proc)(void *arg);
485 static void *cb_arg;
486 static jmp_buf *cb_back;
487 
488 int start_idle_thread(void *stack, jmp_buf *switch_buf)
489 {
490 	int n;
491 
492 	set_handler(SIGWINCH);
493 
494 	/*
495 	 * Can't use UML_SETJMP or UML_LONGJMP here because they save
496 	 * and restore signals, with the possible side-effect of
497 	 * trying to handle any signals which came when they were
498 	 * blocked, which can't be done on this stack.
499 	 * Signals must be blocked when jumping back here and restored
500 	 * after returning to the jumper.
501 	 */
502 	n = setjmp(initial_jmpbuf);
503 	switch (n) {
504 	case INIT_JMP_NEW_THREAD:
505 		(*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup;
506 		(*switch_buf)[0].JB_SP = (unsigned long) stack +
507 			UM_THREAD_SIZE - sizeof(void *);
508 		break;
509 	case INIT_JMP_CALLBACK:
510 		(*cb_proc)(cb_arg);
511 		longjmp(*cb_back, 1);
512 		break;
513 	case INIT_JMP_HALT:
514 		kmalloc_ok = 0;
515 		return 0;
516 	case INIT_JMP_REBOOT:
517 		kmalloc_ok = 0;
518 		return 1;
519 	default:
520 		printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n",
521 		       __func__, n);
522 		fatal_sigsegv();
523 	}
524 	longjmp(*switch_buf, 1);
525 
526 	/* unreachable */
527 	printk(UM_KERN_ERR "impossible long jump!");
528 	fatal_sigsegv();
529 	return 0;
530 }
531 
532 void initial_thread_cb_skas(void (*proc)(void *), void *arg)
533 {
534 	jmp_buf here;
535 
536 	cb_proc = proc;
537 	cb_arg = arg;
538 	cb_back = &here;
539 
540 	block_signals_trace();
541 	if (UML_SETJMP(&here) == 0)
542 		UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
543 	unblock_signals_trace();
544 
545 	cb_proc = NULL;
546 	cb_arg = NULL;
547 	cb_back = NULL;
548 }
549 
550 void halt_skas(void)
551 {
552 	block_signals_trace();
553 	UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
554 }
555 
556 static bool noreboot;
557 
558 static int __init noreboot_cmd_param(char *str, int *add)
559 {
560 	noreboot = true;
561 	return 0;
562 }
563 
564 __uml_setup("noreboot", noreboot_cmd_param,
565 "noreboot\n"
566 "    Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n"
567 "    This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n"
568 "    crashes in CI\n");
569 
570 void reboot_skas(void)
571 {
572 	block_signals_trace();
573 	UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT);
574 }
575 
576 void __switch_mm(struct mm_id *mm_idp)
577 {
578 	userspace_pid[0] = mm_idp->pid;
579 }
580