xref: /freebsd/sys/kern/sys_process.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1994, Sean Eric Fagan
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Sean Eric Fagan.
16  * 4. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/syscallsubr.h>
40 #include <sys/sysproto.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/ptrace.h>
44 #include <sys/sx.h>
45 #include <sys/user.h>
46 
47 #include <machine/reg.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 
57 /*
58  * Functions implemented using PROC_ACTION():
59  *
60  * proc_read_regs(proc, regs)
61  *	Get the current user-visible register set from the process
62  *	and copy it into the regs structure (<machine/reg.h>).
63  *	The process is stopped at the time read_regs is called.
64  *
65  * proc_write_regs(proc, regs)
66  *	Update the current register set from the passed in regs
67  *	structure.  Take care to avoid clobbering special CPU
68  *	registers or privileged bits in the PSL.
69  *	Depending on the architecture this may have fix-up work to do,
70  *	especially if the IAR or PCW are modified.
71  *	The process is stopped at the time write_regs is called.
72  *
73  * proc_read_fpregs, proc_write_fpregs
74  *	deal with the floating point register set, otherwise as above.
75  *
76  * proc_read_dbregs, proc_write_dbregs
77  *	deal with the processor debug register set, otherwise as above.
78  *
79  * proc_sstep(proc)
80  *	Arrange for the process to trap after executing a single instruction.
81  */
82 
83 #define	PROC_ACTION(action) do {					\
84 	int error;							\
85 									\
86 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);			\
87 	if ((td->td_proc->p_sflag & PS_INMEM) == 0)			\
88 		error = EIO;						\
89 	else								\
90 		error = (action);					\
91 	return (error);							\
92 } while(0)
93 
94 int
95 proc_read_regs(struct thread *td, struct reg *regs)
96 {
97 
98 	PROC_ACTION(fill_regs(td, regs));
99 }
100 
101 int
102 proc_write_regs(struct thread *td, struct reg *regs)
103 {
104 
105 	PROC_ACTION(set_regs(td, regs));
106 }
107 
108 int
109 proc_read_dbregs(struct thread *td, struct dbreg *dbregs)
110 {
111 
112 	PROC_ACTION(fill_dbregs(td, dbregs));
113 }
114 
115 int
116 proc_write_dbregs(struct thread *td, struct dbreg *dbregs)
117 {
118 
119 	PROC_ACTION(set_dbregs(td, dbregs));
120 }
121 
122 /*
123  * Ptrace doesn't support fpregs at all, and there are no security holes
124  * or translations for fpregs, so we can just copy them.
125  */
126 int
127 proc_read_fpregs(struct thread *td, struct fpreg *fpregs)
128 {
129 
130 	PROC_ACTION(fill_fpregs(td, fpregs));
131 }
132 
133 int
134 proc_write_fpregs(struct thread *td, struct fpreg *fpregs)
135 {
136 
137 	PROC_ACTION(set_fpregs(td, fpregs));
138 }
139 
140 int
141 proc_sstep(struct thread *td)
142 {
143 
144 	PROC_ACTION(ptrace_single_step(td));
145 }
146 
147 int
148 proc_rwmem(struct proc *p, struct uio *uio)
149 {
150 	struct vmspace *vm;
151 	vm_map_t map;
152 	vm_object_t backing_object, object = NULL;
153 	vm_offset_t pageno = 0;		/* page number */
154 	vm_prot_t reqprot;
155 	vm_offset_t kva;
156 	int error, writing;
157 
158 	GIANT_REQUIRED;
159 
160 	/*
161 	 * if the vmspace is in the midst of being deallocated or the
162 	 * process is exiting, don't try to grab anything.  The page table
163 	 * usage in that process can be messed up.
164 	 */
165 	vm = p->p_vmspace;
166 	if ((p->p_flag & P_WEXIT))
167 		return (EFAULT);
168 	if (vm->vm_refcnt < 1)
169 		return (EFAULT);
170 	++vm->vm_refcnt;
171 	/*
172 	 * The map we want...
173 	 */
174 	map = &vm->vm_map;
175 
176 	writing = uio->uio_rw == UIO_WRITE;
177 	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) :
178 	    VM_PROT_READ;
179 
180 	kva = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
181 
182 	/*
183 	 * Only map in one page at a time.  We don't have to, but it
184 	 * makes things easier.  This way is trivial - right?
185 	 */
186 	do {
187 		vm_map_t tmap;
188 		vm_offset_t uva;
189 		int page_offset;		/* offset into page */
190 		vm_map_entry_t out_entry;
191 		vm_prot_t out_prot;
192 		boolean_t wired;
193 		vm_pindex_t pindex;
194 		u_int len;
195 		vm_page_t m;
196 
197 		object = NULL;
198 
199 		uva = (vm_offset_t)uio->uio_offset;
200 
201 		/*
202 		 * Get the page number of this segment.
203 		 */
204 		pageno = trunc_page(uva);
205 		page_offset = uva - pageno;
206 
207 		/*
208 		 * How many bytes to copy
209 		 */
210 		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
211 
212 		/*
213 		 * Fault the page on behalf of the process
214 		 */
215 		error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
216 		if (error) {
217 			error = EFAULT;
218 			break;
219 		}
220 
221 		/*
222 		 * Now we need to get the page.  out_entry, out_prot, wired,
223 		 * and single_use aren't used.  One would think the vm code
224 		 * would be a *bit* nicer...  We use tmap because
225 		 * vm_map_lookup() can change the map argument.
226 		 */
227 		tmap = map;
228 		error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry,
229 		    &object, &pindex, &out_prot, &wired);
230 		if (error) {
231 			error = EFAULT;
232 			break;
233 		}
234 		VM_OBJECT_LOCK(object);
235 		while ((m = vm_page_lookup(object, pindex)) == NULL &&
236 		    !writing &&
237 		    (backing_object = object->backing_object) != NULL) {
238 			/*
239 			 * Allow fallback to backing objects if we are reading.
240 			 */
241 			VM_OBJECT_LOCK(backing_object);
242 			pindex += OFF_TO_IDX(object->backing_object_offset);
243 			VM_OBJECT_UNLOCK(object);
244 			object = backing_object;
245 		}
246 		VM_OBJECT_UNLOCK(object);
247 		if (m == NULL) {
248 			vm_map_lookup_done(tmap, out_entry);
249 			error = EFAULT;
250 			break;
251 		}
252 
253 		/*
254 		 * Hold the page in memory.
255 		 */
256 		vm_page_lock_queues();
257 		vm_page_hold(m);
258 		vm_page_unlock_queues();
259 
260 		/*
261 		 * We're done with tmap now.
262 		 */
263 		vm_map_lookup_done(tmap, out_entry);
264 
265 		pmap_qenter(kva, &m, 1);
266 
267 		/*
268 		 * Now do the i/o move.
269 		 */
270 		error = uiomove((caddr_t)(kva + page_offset), len, uio);
271 
272 		pmap_qremove(kva, 1);
273 
274 		/*
275 		 * Release the page.
276 		 */
277 		vm_page_lock_queues();
278 		vm_page_unhold(m);
279 		vm_page_unlock_queues();
280 
281 	} while (error == 0 && uio->uio_resid > 0);
282 
283 	kmem_free(kernel_map, kva, PAGE_SIZE);
284 	vmspace_free(vm);
285 	return (error);
286 }
287 
288 /*
289  * Process debugging system call.
290  */
291 #ifndef _SYS_SYSPROTO_H_
292 struct ptrace_args {
293 	int	req;
294 	pid_t	pid;
295 	caddr_t	addr;
296 	int	data;
297 };
298 #endif
299 
300 /*
301  * MPSAFE
302  */
303 int
304 ptrace(struct thread *td, struct ptrace_args *uap)
305 {
306 	/*
307 	 * XXX this obfuscation is to reduce stack usage, but the register
308 	 * structs may be too large to put on the stack anyway.
309 	 */
310 	union {
311 		struct ptrace_io_desc piod;
312 		struct dbreg dbreg;
313 		struct fpreg fpreg;
314 		struct reg reg;
315 	} r;
316 	void *addr;
317 	int error = 0;
318 
319 	addr = &r;
320 	switch (uap->req) {
321 	case PT_GETREGS:
322 	case PT_GETFPREGS:
323 	case PT_GETDBREGS:
324 		break;
325 	case PT_SETREGS:
326 		error = copyin(uap->addr, &r.reg, sizeof r.reg);
327 		break;
328 	case PT_SETFPREGS:
329 		error = copyin(uap->addr, &r.fpreg, sizeof r.fpreg);
330 		break;
331 	case PT_SETDBREGS:
332 		error = copyin(uap->addr, &r.dbreg, sizeof r.dbreg);
333 		break;
334 	case PT_IO:
335 		error = copyin(uap->addr, &r.piod, sizeof r.piod);
336 		break;
337 	default:
338 		addr = uap->addr;
339 		break;
340 	}
341 	if (error)
342 		return (error);
343 
344 	error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data);
345 	if (error)
346 		return (error);
347 
348 	switch (uap->req) {
349 	case PT_IO:
350 		(void)copyout(&r.piod, uap->addr, sizeof r.piod);
351 		break;
352 	case PT_GETREGS:
353 		error = copyout(&r.reg, uap->addr, sizeof r.reg);
354 		break;
355 	case PT_GETFPREGS:
356 		error = copyout(&r.fpreg, uap->addr, sizeof r.fpreg);
357 		break;
358 	case PT_GETDBREGS:
359 		error = copyout(&r.dbreg, uap->addr, sizeof r.dbreg);
360 		break;
361 	}
362 
363 	return (error);
364 }
365 
366 int
367 kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
368 {
369 	struct iovec iov;
370 	struct uio uio;
371 	struct proc *curp, *p, *pp;
372 	struct thread *td2;
373 	struct ptrace_io_desc *piod;
374 	int error, write, tmp;
375 	int proctree_locked = 0;
376 
377 	curp = td->td_proc;
378 
379 	/* Lock proctree before locking the process. */
380 	switch (req) {
381 	case PT_TRACE_ME:
382 	case PT_ATTACH:
383 	case PT_STEP:
384 	case PT_CONTINUE:
385 	case PT_DETACH:
386 		sx_xlock(&proctree_lock);
387 		proctree_locked = 1;
388 		break;
389 	default:
390 		break;
391 	}
392 
393 	write = 0;
394 	if (req == PT_TRACE_ME) {
395 		p = td->td_proc;
396 		PROC_LOCK(p);
397 	} else {
398 		if ((p = pfind(pid)) == NULL) {
399 			if (proctree_locked)
400 				sx_xunlock(&proctree_lock);
401 			return (ESRCH);
402 		}
403 	}
404 	if ((error = p_cansee(td, p)) != 0)
405 		goto fail;
406 
407 	if ((error = p_candebug(td, p)) != 0)
408 		goto fail;
409 
410 	/*
411 	 * System processes can't be debugged.
412 	 */
413 	if ((p->p_flag & P_SYSTEM) != 0) {
414 		error = EINVAL;
415 		goto fail;
416 	}
417 
418 	/*
419 	 * Permissions check
420 	 */
421 	switch (req) {
422 	case PT_TRACE_ME:
423 		/* Always legal. */
424 		break;
425 
426 	case PT_ATTACH:
427 		/* Self */
428 		if (p->p_pid == td->td_proc->p_pid) {
429 			error = EINVAL;
430 			goto fail;
431 		}
432 
433 		/* Already traced */
434 		if (p->p_flag & P_TRACED) {
435 			error = EBUSY;
436 			goto fail;
437 		}
438 
439 		/* Can't trace an ancestor if you're being traced. */
440 		if (curp->p_flag & P_TRACED) {
441 			for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
442 				if (pp == p) {
443 					error = EINVAL;
444 					goto fail;
445 				}
446 			}
447 		}
448 
449 
450 		/* OK */
451 		break;
452 
453 	default:
454 		/* not being traced... */
455 		if ((p->p_flag & P_TRACED) == 0) {
456 			error = EPERM;
457 			goto fail;
458 		}
459 
460 		/* not being traced by YOU */
461 		if (p->p_pptr != td->td_proc) {
462 			error = EBUSY;
463 			goto fail;
464 		}
465 
466 		/* not currently stopped */
467 		if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
468 			error = EBUSY;
469 			goto fail;
470 		}
471 
472 		/* OK */
473 		break;
474 	}
475 
476 	td2 = FIRST_THREAD_IN_PROC(p);
477 #ifdef FIX_SSTEP
478 	/*
479 	 * Single step fixup ala procfs
480 	 */
481 	FIX_SSTEP(td2);			/* XXXKSE */
482 #endif
483 
484 	/*
485 	 * Actually do the requests
486 	 */
487 
488 	td->td_retval[0] = 0;
489 
490 	switch (req) {
491 	case PT_TRACE_ME:
492 		/* set my trace flag and "owner" so it can read/write me */
493 		p->p_flag |= P_TRACED;
494 		p->p_oppid = p->p_pptr->p_pid;
495 		PROC_UNLOCK(p);
496 		sx_xunlock(&proctree_lock);
497 		return (0);
498 
499 	case PT_ATTACH:
500 		/* security check done above */
501 		p->p_flag |= P_TRACED;
502 		p->p_oppid = p->p_pptr->p_pid;
503 		if (p->p_pptr != td->td_proc)
504 			proc_reparent(p, td->td_proc);
505 		data = SIGSTOP;
506 		goto sendsig;	/* in PT_CONTINUE below */
507 
508 	case PT_STEP:
509 	case PT_CONTINUE:
510 	case PT_DETACH:
511 		/* Zero means do not send any signal */
512 		if (data < 0 || data > _SIG_MAXSIG) {
513 			error = EINVAL;
514 			goto fail;
515 		}
516 
517 		_PHOLD(p);
518 
519 		if (req == PT_STEP) {
520 			error = ptrace_single_step(td2);
521 			if (error) {
522 				_PRELE(p);
523 				goto fail;
524 			}
525 		}
526 
527 		if (addr != (void *)1) {
528 			error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr);
529 			if (error) {
530 				_PRELE(p);
531 				goto fail;
532 			}
533 		}
534 		_PRELE(p);
535 
536 		if (req == PT_DETACH) {
537 			/* reset process parent */
538 			if (p->p_oppid != p->p_pptr->p_pid) {
539 				struct proc *pp;
540 
541 				PROC_UNLOCK(p);
542 				pp = pfind(p->p_oppid);
543 				if (pp == NULL)
544 					pp = initproc;
545 				else
546 					PROC_UNLOCK(pp);
547 				PROC_LOCK(p);
548 				proc_reparent(p, pp);
549 			}
550 			p->p_flag &= ~(P_TRACED | P_WAITED);
551 			p->p_oppid = 0;
552 
553 			/* should we send SIGCHLD? */
554 		}
555 
556 	sendsig:
557 		if (proctree_locked)
558 			sx_xunlock(&proctree_lock);
559 		/* deliver or queue signal */
560 		if (P_SHOULDSTOP(p)) {
561 			p->p_xstat = data;
562 			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
563 			mtx_lock_spin(&sched_lock);
564 			thread_unsuspend(p);
565 			setrunnable(td2);	/* XXXKSE */
566 			/* Need foreach kse in proc, ... make_kse_queued(). */
567 			mtx_unlock_spin(&sched_lock);
568 		} else if (data)
569 			psignal(p, data);
570 		PROC_UNLOCK(p);
571 
572 		return (0);
573 
574 	case PT_WRITE_I:
575 	case PT_WRITE_D:
576 		write = 1;
577 		/* FALLTHROUGH */
578 	case PT_READ_I:
579 	case PT_READ_D:
580 		PROC_UNLOCK(p);
581 		tmp = 0;
582 		/* write = 0 set above */
583 		iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
584 		iov.iov_len = sizeof(int);
585 		uio.uio_iov = &iov;
586 		uio.uio_iovcnt = 1;
587 		uio.uio_offset = (off_t)(uintptr_t)addr;
588 		uio.uio_resid = sizeof(int);
589 		uio.uio_segflg = UIO_SYSSPACE;	/* i.e.: the uap */
590 		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
591 		uio.uio_td = td;
592 		mtx_lock(&Giant);
593 		error = proc_rwmem(p, &uio);
594 		mtx_unlock(&Giant);
595 		if (uio.uio_resid != 0) {
596 			/*
597 			 * XXX proc_rwmem() doesn't currently return ENOSPC,
598 			 * so I think write() can bogusly return 0.
599 			 * XXX what happens for short writes?  We don't want
600 			 * to write partial data.
601 			 * XXX proc_rwmem() returns EPERM for other invalid
602 			 * addresses.  Convert this to EINVAL.  Does this
603 			 * clobber returns of EPERM for other reasons?
604 			 */
605 			if (error == 0 || error == ENOSPC || error == EPERM)
606 				error = EINVAL;	/* EOF */
607 		}
608 		if (!write)
609 			td->td_retval[0] = tmp;
610 		return (error);
611 
612 	case PT_IO:
613 		PROC_UNLOCK(p);
614 		piod = addr;
615 		iov.iov_base = piod->piod_addr;
616 		iov.iov_len = piod->piod_len;
617 		uio.uio_iov = &iov;
618 		uio.uio_iovcnt = 1;
619 		uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
620 		uio.uio_resid = piod->piod_len;
621 		uio.uio_segflg = UIO_USERSPACE;
622 		uio.uio_td = td;
623 		switch (piod->piod_op) {
624 		case PIOD_READ_D:
625 		case PIOD_READ_I:
626 			uio.uio_rw = UIO_READ;
627 			break;
628 		case PIOD_WRITE_D:
629 		case PIOD_WRITE_I:
630 			uio.uio_rw = UIO_WRITE;
631 			break;
632 		default:
633 			return (EINVAL);
634 		}
635 		mtx_lock(&Giant);
636 		error = proc_rwmem(p, &uio);
637 		mtx_unlock(&Giant);
638 		piod->piod_len -= uio.uio_resid;
639 		return (error);
640 
641 	case PT_KILL:
642 		data = SIGKILL;
643 		goto sendsig;	/* in PT_CONTINUE above */
644 
645 	case PT_SETREGS:
646 		_PHOLD(p);
647 		error = proc_write_regs(td2, addr);
648 		_PRELE(p);
649 		PROC_UNLOCK(p);
650 		return (error);
651 
652 	case PT_GETREGS:
653 		_PHOLD(p);
654 		error = proc_read_regs(td2, addr);
655 		_PRELE(p);
656 		PROC_UNLOCK(p);
657 		return (error);
658 
659 	case PT_SETFPREGS:
660 		_PHOLD(p);
661 		error = proc_write_fpregs(td2, addr);
662 		_PRELE(p);
663 		PROC_UNLOCK(p);
664 		return (error);
665 
666 	case PT_GETFPREGS:
667 		_PHOLD(p);
668 		error = proc_read_fpregs(td2, addr);
669 		_PRELE(p);
670 		PROC_UNLOCK(p);
671 		return (error);
672 
673 	case PT_SETDBREGS:
674 		_PHOLD(p);
675 		error = proc_write_dbregs(td2, addr);
676 		_PRELE(p);
677 		PROC_UNLOCK(p);
678 		return (error);
679 
680 	case PT_GETDBREGS:
681 		_PHOLD(p);
682 		error = proc_read_dbregs(td2, addr);
683 		_PRELE(p);
684 		PROC_UNLOCK(p);
685 		return (error);
686 
687 	default:
688 #ifdef __HAVE_PTRACE_MACHDEP
689 		if (req >= PT_FIRSTMACH) {
690 			_PHOLD(p);
691 			error = cpu_ptrace(td2, req, addr, data);
692 			_PRELE(p);
693 			PROC_UNLOCK(p);
694 			return (error);
695 		}
696 #endif
697 		break;
698 	}
699 
700 	/* Unknown request. */
701 	error = EINVAL;
702 
703 fail:
704 	PROC_UNLOCK(p);
705 	if (proctree_locked)
706 		sx_xunlock(&proctree_lock);
707 	return (error);
708 }
709 
710 /*
711  * Stop a process because of a debugging event;
712  * stay stopped until p->p_step is cleared
713  * (cleared by PIOCCONT in procfs).
714  */
715 void
716 stopevent(struct proc *p, unsigned int event, unsigned int val)
717 {
718 
719 	PROC_LOCK_ASSERT(p, MA_OWNED);
720 	p->p_step = 1;
721 	do {
722 		p->p_xstat = val;
723 		p->p_stype = event;	/* Which event caused the stop? */
724 		wakeup(&p->p_stype);	/* Wake up any PIOCWAIT'ing procs */
725 		msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
726 	} while (p->p_step);
727 }
728