xref: /freebsd/sys/kern/kern_ktrace.c (revision 3193579b66fd7067f898dbc54bdea81a0e6f9bd0)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/fcntl.h>
45 #include <sys/jail.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/mac.h>
51 #include <sys/malloc.h>
52 #include <sys/namei.h>
53 #include <sys/proc.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
56 #include <sys/ktrace.h>
57 #include <sys/sema.h>
58 #include <sys/sx.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 #include <sys/sysproto.h>
62 
63 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
64 
65 #ifdef KTRACE
66 
67 #ifndef KTRACE_REQUEST_POOL
68 #define	KTRACE_REQUEST_POOL	100
69 #endif
70 
71 struct ktr_request {
72 	struct	ktr_header ktr_header;
73 	struct	ucred *ktr_cred;
74 	struct	vnode *ktr_vp;
75 	union {
76 		struct	ktr_syscall ktr_syscall;
77 		struct	ktr_sysret ktr_sysret;
78 		struct	ktr_genio ktr_genio;
79 		struct	ktr_psig ktr_psig;
80 		struct	ktr_csw ktr_csw;
81 	} ktr_data;
82 	STAILQ_ENTRY(ktr_request) ktr_list;
83 };
84 
85 static int data_lengths[] = {
86 	0,					/* none */
87 	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
88 	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
89 	0,					/* KTR_NAMEI */
90 	sizeof(struct ktr_genio),		/* KTR_GENIO */
91 	sizeof(struct ktr_psig),		/* KTR_PSIG */
92 	sizeof(struct ktr_csw),			/* KTR_CSW */
93 	0					/* KTR_USER */
94 };
95 
96 static STAILQ_HEAD(, ktr_request) ktr_todo;
97 static STAILQ_HEAD(, ktr_request) ktr_free;
98 
99 SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
100 
101 static u_int ktr_requestpool = KTRACE_REQUEST_POOL;
102 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
103 
104 static u_int ktr_geniosize = PAGE_SIZE;
105 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
106 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
107     0, "Maximum size of genio event payload");
108 
109 static int print_message = 1;
110 struct mtx ktrace_mtx;
111 static struct sema ktrace_sema;
112 
113 static void ktrace_init(void *dummy);
114 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
115 static u_int ktrace_resize_pool(u_int newsize);
116 static struct ktr_request *ktr_getrequest(int type);
117 static void ktr_submitrequest(struct ktr_request *req);
118 static void ktr_freerequest(struct ktr_request *req);
119 static void ktr_loop(void *dummy);
120 static void ktr_writerequest(struct ktr_request *req);
121 static int ktrcanset(struct thread *,struct proc *);
122 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
123 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
124 
125 static void
126 ktrace_init(void *dummy)
127 {
128 	struct ktr_request *req;
129 	int i;
130 
131 	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
132 	sema_init(&ktrace_sema, 0, "ktrace");
133 	STAILQ_INIT(&ktr_todo);
134 	STAILQ_INIT(&ktr_free);
135 	for (i = 0; i < ktr_requestpool; i++) {
136 		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
137 		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
138 	}
139 	kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, 0, "ktrace");
140 }
141 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
142 
143 static int
144 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
145 {
146 	struct thread *td;
147 	u_int newsize, oldsize, wantsize;
148 	int error;
149 
150 	/* Handle easy read-only case first to avoid warnings from GCC. */
151 	if (!req->newptr) {
152 		mtx_lock(&ktrace_mtx);
153 		oldsize = ktr_requestpool;
154 		mtx_unlock(&ktrace_mtx);
155 		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
156 	}
157 
158 	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
159 	if (error)
160 		return (error);
161 	td = curthread;
162 	td->td_pflags |= TDP_INKTRACE;
163 	mtx_lock(&ktrace_mtx);
164 	oldsize = ktr_requestpool;
165 	newsize = ktrace_resize_pool(wantsize);
166 	mtx_unlock(&ktrace_mtx);
167 	td->td_pflags &= ~TDP_INKTRACE;
168 	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
169 	if (error)
170 		return (error);
171 	if (wantsize > oldsize && newsize < wantsize)
172 		return (ENOSPC);
173 	return (0);
174 }
175 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
176     &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
177 
178 static u_int
179 ktrace_resize_pool(u_int newsize)
180 {
181 	struct ktr_request *req;
182 	int bound;
183 
184 	mtx_assert(&ktrace_mtx, MA_OWNED);
185 	print_message = 1;
186 	bound = newsize - ktr_requestpool;
187 	if (bound == 0)
188 		return (ktr_requestpool);
189 	if (bound < 0)
190 		/* Shrink pool down to newsize if possible. */
191 		while (bound++ < 0) {
192 			req = STAILQ_FIRST(&ktr_free);
193 			if (req == NULL)
194 				return (ktr_requestpool);
195 			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
196 			ktr_requestpool--;
197 			mtx_unlock(&ktrace_mtx);
198 			free(req, M_KTRACE);
199 			mtx_lock(&ktrace_mtx);
200 		}
201 	else
202 		/* Grow pool up to newsize. */
203 		while (bound-- > 0) {
204 			mtx_unlock(&ktrace_mtx);
205 			req = malloc(sizeof(struct ktr_request), M_KTRACE,
206 			    M_WAITOK);
207 			mtx_lock(&ktrace_mtx);
208 			STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
209 			ktr_requestpool++;
210 		}
211 	return (ktr_requestpool);
212 }
213 
214 static struct ktr_request *
215 ktr_getrequest(int type)
216 {
217 	struct ktr_request *req;
218 	struct thread *td = curthread;
219 	struct proc *p = td->td_proc;
220 	int pm;
221 
222 	td->td_pflags |= TDP_INKTRACE;
223 	mtx_lock(&ktrace_mtx);
224 	if (!KTRCHECK(td, type)) {
225 		mtx_unlock(&ktrace_mtx);
226 		td->td_pflags &= ~TDP_INKTRACE;
227 		return (NULL);
228 	}
229 	req = STAILQ_FIRST(&ktr_free);
230 	if (req != NULL) {
231 		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
232 		req->ktr_header.ktr_type = type;
233 		if (p->p_traceflag & KTRFAC_DROP) {
234 			req->ktr_header.ktr_type |= KTR_DROP;
235 			p->p_traceflag &= ~KTRFAC_DROP;
236 		}
237 		KASSERT(p->p_tracevp != NULL, ("ktrace: no trace vnode"));
238 		KASSERT(p->p_tracecred != NULL, ("ktrace: no trace cred"));
239 		req->ktr_vp = p->p_tracevp;
240 		VREF(p->p_tracevp);
241 		req->ktr_cred = crhold(p->p_tracecred);
242 		mtx_unlock(&ktrace_mtx);
243 		microtime(&req->ktr_header.ktr_time);
244 		req->ktr_header.ktr_pid = p->p_pid;
245 		bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
246 		req->ktr_header.ktr_buffer = NULL;
247 		req->ktr_header.ktr_len = 0;
248 	} else {
249 		p->p_traceflag |= KTRFAC_DROP;
250 		pm = print_message;
251 		print_message = 0;
252 		mtx_unlock(&ktrace_mtx);
253 		if (pm)
254 			printf("Out of ktrace request objects.\n");
255 		td->td_pflags &= ~TDP_INKTRACE;
256 	}
257 	return (req);
258 }
259 
260 static void
261 ktr_submitrequest(struct ktr_request *req)
262 {
263 
264 	mtx_lock(&ktrace_mtx);
265 	STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
266 	mtx_unlock(&ktrace_mtx);
267 	sema_post(&ktrace_sema);
268 	curthread->td_pflags &= ~TDP_INKTRACE;
269 }
270 
271 static void
272 ktr_freerequest(struct ktr_request *req)
273 {
274 
275 	crfree(req->ktr_cred);
276 	if (req->ktr_vp != NULL) {
277 		mtx_lock(&Giant);
278 		vrele(req->ktr_vp);
279 		mtx_unlock(&Giant);
280 	}
281 	if (req->ktr_header.ktr_buffer != NULL)
282 		free(req->ktr_header.ktr_buffer, M_KTRACE);
283 	mtx_lock(&ktrace_mtx);
284 	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
285 	mtx_unlock(&ktrace_mtx);
286 }
287 
288 static void
289 ktr_loop(void *dummy)
290 {
291 	struct ktr_request *req;
292 	struct thread *td;
293 	struct ucred *cred;
294 
295 	/* Only cache these values once. */
296 	td = curthread;
297 	cred = td->td_ucred;
298 	for (;;) {
299 		sema_wait(&ktrace_sema);
300 		mtx_lock(&ktrace_mtx);
301 		req = STAILQ_FIRST(&ktr_todo);
302 		STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
303 		KASSERT(req != NULL, ("got a NULL request"));
304 		mtx_unlock(&ktrace_mtx);
305 		/*
306 		 * It is not enough just to pass the cached cred
307 		 * to the VOP's in ktr_writerequest().  Some VFS
308 		 * operations use curthread->td_ucred, so we need
309 		 * to modify our thread's credentials as well.
310 		 * Evil.
311 		 */
312 		td->td_ucred = req->ktr_cred;
313 		ktr_writerequest(req);
314 		td->td_ucred = cred;
315 		ktr_freerequest(req);
316 	}
317 }
318 
319 /*
320  * MPSAFE
321  */
322 void
323 ktrsyscall(code, narg, args)
324 	int code, narg;
325 	register_t args[];
326 {
327 	struct ktr_request *req;
328 	struct ktr_syscall *ktp;
329 	size_t buflen;
330 	char *buf = NULL;
331 
332 	buflen = sizeof(register_t) * narg;
333 	if (buflen > 0) {
334 		buf = malloc(buflen, M_KTRACE, M_WAITOK);
335 		bcopy(args, buf, buflen);
336 	}
337 	req = ktr_getrequest(KTR_SYSCALL);
338 	if (req == NULL) {
339 		if (buf != NULL)
340 			free(buf, M_KTRACE);
341 		return;
342 	}
343 	ktp = &req->ktr_data.ktr_syscall;
344 	ktp->ktr_code = code;
345 	ktp->ktr_narg = narg;
346 	if (buflen > 0) {
347 		req->ktr_header.ktr_len = buflen;
348 		req->ktr_header.ktr_buffer = buf;
349 	}
350 	ktr_submitrequest(req);
351 }
352 
353 /*
354  * MPSAFE
355  */
356 void
357 ktrsysret(code, error, retval)
358 	int code, error;
359 	register_t retval;
360 {
361 	struct ktr_request *req;
362 	struct ktr_sysret *ktp;
363 
364 	req = ktr_getrequest(KTR_SYSRET);
365 	if (req == NULL)
366 		return;
367 	ktp = &req->ktr_data.ktr_sysret;
368 	ktp->ktr_code = code;
369 	ktp->ktr_error = error;
370 	ktp->ktr_retval = retval;		/* what about val2 ? */
371 	ktr_submitrequest(req);
372 }
373 
374 void
375 ktrnamei(path)
376 	char *path;
377 {
378 	struct ktr_request *req;
379 	int namelen;
380 	char *buf = NULL;
381 
382 	namelen = strlen(path);
383 	if (namelen > 0) {
384 		buf = malloc(namelen, M_KTRACE, M_WAITOK);
385 		bcopy(path, buf, namelen);
386 	}
387 	req = ktr_getrequest(KTR_NAMEI);
388 	if (req == NULL) {
389 		if (buf != NULL)
390 			free(buf, M_KTRACE);
391 		return;
392 	}
393 	if (namelen > 0) {
394 		req->ktr_header.ktr_len = namelen;
395 		req->ktr_header.ktr_buffer = buf;
396 	}
397 	ktr_submitrequest(req);
398 }
399 
400 /*
401  * Since the uio may not stay valid, we can not hand off this request to
402  * the thread and need to process it synchronously.  However, we wish to
403  * keep the relative order of records in a trace file correct, so we
404  * do put this request on the queue (if it isn't empty) and then block.
405  * The ktrace thread waks us back up when it is time for this event to
406  * be posted and blocks until we have completed writing out the event
407  * and woken it back up.
408  */
409 void
410 ktrgenio(fd, rw, uio, error)
411 	int fd;
412 	enum uio_rw rw;
413 	struct uio *uio;
414 	int error;
415 {
416 	struct ktr_request *req;
417 	struct ktr_genio *ktg;
418 	int datalen;
419 	char *buf;
420 
421 	if (error)
422 		return;
423 	uio->uio_offset = 0;
424 	uio->uio_rw = UIO_WRITE;
425 	datalen = imin(uio->uio_resid, ktr_geniosize);
426 	buf = malloc(datalen, M_KTRACE, M_WAITOK);
427 	if (uiomove(buf, datalen, uio)) {
428 		free(buf, M_KTRACE);
429 		return;
430 	}
431 	req = ktr_getrequest(KTR_GENIO);
432 	if (req == NULL) {
433 		free(buf, M_KTRACE);
434 		return;
435 	}
436 	ktg = &req->ktr_data.ktr_genio;
437 	ktg->ktr_fd = fd;
438 	ktg->ktr_rw = rw;
439 	req->ktr_header.ktr_len = datalen;
440 	req->ktr_header.ktr_buffer = buf;
441 	ktr_submitrequest(req);
442 }
443 
444 void
445 ktrpsig(sig, action, mask, code)
446 	int sig;
447 	sig_t action;
448 	sigset_t *mask;
449 	int code;
450 {
451 	struct ktr_request *req;
452 	struct ktr_psig	*kp;
453 
454 	req = ktr_getrequest(KTR_PSIG);
455 	if (req == NULL)
456 		return;
457 	kp = &req->ktr_data.ktr_psig;
458 	kp->signo = (char)sig;
459 	kp->action = action;
460 	kp->mask = *mask;
461 	kp->code = code;
462 	ktr_submitrequest(req);
463 }
464 
465 void
466 ktrcsw(out, user)
467 	int out, user;
468 {
469 	struct ktr_request *req;
470 	struct ktr_csw *kc;
471 
472 	req = ktr_getrequest(KTR_CSW);
473 	if (req == NULL)
474 		return;
475 	kc = &req->ktr_data.ktr_csw;
476 	kc->out = out;
477 	kc->user = user;
478 	ktr_submitrequest(req);
479 }
480 #endif /* KTRACE */
481 
482 /* Interface and common routines */
483 
484 /*
485  * ktrace system call
486  *
487  * MPSAFE
488  */
489 #ifndef _SYS_SYSPROTO_H_
490 struct ktrace_args {
491 	char	*fname;
492 	int	ops;
493 	int	facs;
494 	int	pid;
495 };
496 #endif
497 /* ARGSUSED */
498 int
499 ktrace(td, uap)
500 	struct thread *td;
501 	register struct ktrace_args *uap;
502 {
503 #ifdef KTRACE
504 	register struct vnode *vp = NULL;
505 	register struct proc *p;
506 	struct pgrp *pg;
507 	int facs = uap->facs & ~KTRFAC_ROOT;
508 	int ops = KTROP(uap->ops);
509 	int descend = uap->ops & KTRFLAG_DESCEND;
510 	int ret = 0;
511 	int flags, error = 0;
512 	struct nameidata nd;
513 	struct ucred *cred;
514 
515 	/*
516 	 * Need something to (un)trace.
517 	 */
518 	if (ops != KTROP_CLEARFILE && facs == 0)
519 		return (EINVAL);
520 
521 	td->td_pflags |= TDP_INKTRACE;
522 	if (ops != KTROP_CLEAR) {
523 		/*
524 		 * an operation which requires a file argument.
525 		 */
526 		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
527 		flags = FREAD | FWRITE | O_NOFOLLOW;
528 		mtx_lock(&Giant);
529 		error = vn_open(&nd, &flags, 0, -1);
530 		if (error) {
531 			mtx_unlock(&Giant);
532 			td->td_pflags &= ~TDP_INKTRACE;
533 			return (error);
534 		}
535 		NDFREE(&nd, NDF_ONLY_PNBUF);
536 		vp = nd.ni_vp;
537 		VOP_UNLOCK(vp, 0, td);
538 		if (vp->v_type != VREG) {
539 			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
540 			mtx_unlock(&Giant);
541 			td->td_pflags &= ~TDP_INKTRACE;
542 			return (EACCES);
543 		}
544 		mtx_unlock(&Giant);
545 	}
546 	/*
547 	 * Clear all uses of the tracefile.
548 	 */
549 	if (ops == KTROP_CLEARFILE) {
550 		sx_slock(&allproc_lock);
551 		LIST_FOREACH(p, &allproc, p_list) {
552 			PROC_LOCK(p);
553 			if (p->p_tracevp == vp) {
554 				if (ktrcanset(td, p)) {
555 					mtx_lock(&ktrace_mtx);
556 					cred = p->p_tracecred;
557 					p->p_tracecred = NULL;
558 					p->p_tracevp = NULL;
559 					p->p_traceflag = 0;
560 					mtx_unlock(&ktrace_mtx);
561 					PROC_UNLOCK(p);
562 					mtx_lock(&Giant);
563 					(void) vn_close(vp, FREAD|FWRITE,
564 						cred, td);
565 					mtx_unlock(&Giant);
566 					crfree(cred);
567 				} else {
568 					PROC_UNLOCK(p);
569 					error = EPERM;
570 				}
571 			} else
572 				PROC_UNLOCK(p);
573 		}
574 		sx_sunlock(&allproc_lock);
575 		goto done;
576 	}
577 	/*
578 	 * do it
579 	 */
580 	sx_slock(&proctree_lock);
581 	if (uap->pid < 0) {
582 		/*
583 		 * by process group
584 		 */
585 		pg = pgfind(-uap->pid);
586 		if (pg == NULL) {
587 			sx_sunlock(&proctree_lock);
588 			error = ESRCH;
589 			goto done;
590 		}
591 		/*
592 		 * ktrops() may call vrele(). Lock pg_members
593 		 * by the proctree_lock rather than pg_mtx.
594 		 */
595 		PGRP_UNLOCK(pg);
596 		LIST_FOREACH(p, &pg->pg_members, p_pglist)
597 			if (descend)
598 				ret |= ktrsetchildren(td, p, ops, facs, vp);
599 			else
600 				ret |= ktrops(td, p, ops, facs, vp);
601 	} else {
602 		/*
603 		 * by pid
604 		 */
605 		p = pfind(uap->pid);
606 		if (p == NULL) {
607 			sx_sunlock(&proctree_lock);
608 			error = ESRCH;
609 			goto done;
610 		}
611 		/*
612 		 * The slock of the proctree lock will keep this process
613 		 * from going away, so unlocking the proc here is ok.
614 		 */
615 		PROC_UNLOCK(p);
616 		if (descend)
617 			ret |= ktrsetchildren(td, p, ops, facs, vp);
618 		else
619 			ret |= ktrops(td, p, ops, facs, vp);
620 	}
621 	sx_sunlock(&proctree_lock);
622 	if (!ret)
623 		error = EPERM;
624 done:
625 	if (vp != NULL) {
626 		mtx_lock(&Giant);
627 		(void) vn_close(vp, FWRITE, td->td_ucred, td);
628 		mtx_unlock(&Giant);
629 	}
630 	td->td_pflags &= ~TDP_INKTRACE;
631 	return (error);
632 #else /* !KTRACE */
633 	return (ENOSYS);
634 #endif /* KTRACE */
635 }
636 
637 /*
638  * utrace system call
639  *
640  * MPSAFE
641  */
642 /* ARGSUSED */
643 int
644 utrace(td, uap)
645 	struct thread *td;
646 	register struct utrace_args *uap;
647 {
648 
649 #ifdef KTRACE
650 	struct ktr_request *req;
651 	void *cp;
652 	int error;
653 
654 	if (!KTRPOINT(td, KTR_USER))
655 		return (0);
656 	if (uap->len > KTR_USER_MAXLEN)
657 		return (EINVAL);
658 	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
659 	error = copyin(uap->addr, cp, uap->len);
660 	if (error) {
661 		free(cp, M_KTRACE);
662 		return (error);
663 	}
664 	req = ktr_getrequest(KTR_USER);
665 	if (req == NULL) {
666 		free(cp, M_KTRACE);
667 		return (ENOMEM);
668 	}
669 	req->ktr_header.ktr_buffer = cp;
670 	req->ktr_header.ktr_len = uap->len;
671 	ktr_submitrequest(req);
672 	return (0);
673 #else /* !KTRACE */
674 	return (ENOSYS);
675 #endif /* KTRACE */
676 }
677 
678 #ifdef KTRACE
679 static int
680 ktrops(td, p, ops, facs, vp)
681 	struct thread *td;
682 	struct proc *p;
683 	int ops, facs;
684 	struct vnode *vp;
685 {
686 	struct vnode *tracevp = NULL;
687 	struct ucred *tracecred = NULL;
688 
689 	PROC_LOCK(p);
690 	if (!ktrcanset(td, p)) {
691 		PROC_UNLOCK(p);
692 		return (0);
693 	}
694 	mtx_lock(&ktrace_mtx);
695 	if (ops == KTROP_SET) {
696 		if (p->p_tracevp != vp) {
697 			/*
698 			 * if trace file already in use, relinquish below
699 			 */
700 			tracevp = p->p_tracevp;
701 			VREF(vp);
702 			p->p_tracevp = vp;
703 		}
704 		if (p->p_tracecred != td->td_ucred) {
705 			tracecred = p->p_tracecred;
706 			p->p_tracecred = crhold(td->td_ucred);
707 		}
708 		p->p_traceflag |= facs;
709 		if (td->td_ucred->cr_uid == 0)
710 			p->p_traceflag |= KTRFAC_ROOT;
711 	} else {
712 		/* KTROP_CLEAR */
713 		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
714 			/* no more tracing */
715 			p->p_traceflag = 0;
716 			tracevp = p->p_tracevp;
717 			p->p_tracevp = NULL;
718 			tracecred = p->p_tracecred;
719 			p->p_tracecred = NULL;
720 		}
721 	}
722 	mtx_unlock(&ktrace_mtx);
723 	PROC_UNLOCK(p);
724 	if (tracevp != NULL) {
725 		mtx_lock(&Giant);
726 		vrele(tracevp);
727 		mtx_unlock(&Giant);
728 	}
729 	if (tracecred != NULL)
730 		crfree(tracecred);
731 
732 	return (1);
733 }
734 
735 static int
736 ktrsetchildren(td, top, ops, facs, vp)
737 	struct thread *td;
738 	struct proc *top;
739 	int ops, facs;
740 	struct vnode *vp;
741 {
742 	register struct proc *p;
743 	register int ret = 0;
744 
745 	p = top;
746 	sx_assert(&proctree_lock, SX_LOCKED);
747 	for (;;) {
748 		ret |= ktrops(td, p, ops, facs, vp);
749 		/*
750 		 * If this process has children, descend to them next,
751 		 * otherwise do any siblings, and if done with this level,
752 		 * follow back up the tree (but not past top).
753 		 */
754 		if (!LIST_EMPTY(&p->p_children))
755 			p = LIST_FIRST(&p->p_children);
756 		else for (;;) {
757 			if (p == top)
758 				return (ret);
759 			if (LIST_NEXT(p, p_sibling)) {
760 				p = LIST_NEXT(p, p_sibling);
761 				break;
762 			}
763 			p = p->p_pptr;
764 		}
765 	}
766 	/*NOTREACHED*/
767 }
768 
769 static void
770 ktr_writerequest(struct ktr_request *req)
771 {
772 	struct ktr_header *kth;
773 	struct vnode *vp;
774 	struct proc *p;
775 	struct thread *td;
776 	struct ucred *cred;
777 	struct uio auio;
778 	struct iovec aiov[3];
779 	struct mount *mp;
780 	int datalen, buflen, vrele_count;
781 	int error;
782 
783 	vp = req->ktr_vp;
784 	/*
785 	 * If vp is NULL, the vp has been cleared out from under this
786 	 * request, so just drop it.
787 	 */
788 	if (vp == NULL)
789 		return;
790 	kth = &req->ktr_header;
791 	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
792 	buflen = kth->ktr_len;
793 	cred = req->ktr_cred;
794 	td = curthread;
795 	auio.uio_iov = &aiov[0];
796 	auio.uio_offset = 0;
797 	auio.uio_segflg = UIO_SYSSPACE;
798 	auio.uio_rw = UIO_WRITE;
799 	aiov[0].iov_base = (caddr_t)kth;
800 	aiov[0].iov_len = sizeof(struct ktr_header);
801 	auio.uio_resid = sizeof(struct ktr_header);
802 	auio.uio_iovcnt = 1;
803 	auio.uio_td = td;
804 	if (datalen != 0) {
805 		aiov[1].iov_base = (caddr_t)&req->ktr_data;
806 		aiov[1].iov_len = datalen;
807 		auio.uio_resid += datalen;
808 		auio.uio_iovcnt++;
809 		kth->ktr_len += datalen;
810 	}
811 	if (buflen != 0) {
812 		KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
813 		aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
814 		aiov[auio.uio_iovcnt].iov_len = buflen;
815 		auio.uio_resid += buflen;
816 		auio.uio_iovcnt++;
817 	}
818 	mtx_lock(&Giant);
819 	vn_start_write(vp, &mp, V_WAIT);
820 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
821 	(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
822 #ifdef MAC
823 	error = mac_check_vnode_write(cred, NOCRED, vp);
824 	if (error == 0)
825 #endif
826 		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
827 	VOP_UNLOCK(vp, 0, td);
828 	vn_finished_write(mp);
829 	mtx_unlock(&Giant);
830 	if (!error)
831 		return;
832 	/*
833 	 * If error encountered, give up tracing on this vnode.  We defer
834 	 * all the vrele()'s on the vnode until after we are finished walking
835 	 * the various lists to avoid needlessly holding locks.
836 	 */
837 	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
838 	    error);
839 	vrele_count = 0;
840 	/*
841 	 * First, clear this vnode from being used by any processes in the
842 	 * system.
843 	 * XXX - If one process gets an EPERM writing to the vnode, should
844 	 * we really do this?  Other processes might have suitable
845 	 * credentials for the operation.
846 	 */
847 	cred = NULL;
848 	sx_slock(&allproc_lock);
849 	LIST_FOREACH(p, &allproc, p_list) {
850 		PROC_LOCK(p);
851 		if (p->p_tracevp == vp) {
852 			mtx_lock(&ktrace_mtx);
853 			p->p_tracevp = NULL;
854 			p->p_traceflag = 0;
855 			cred = p->p_tracecred;
856 			p->p_tracecred = NULL;
857 			mtx_unlock(&ktrace_mtx);
858 			vrele_count++;
859 		}
860 		PROC_UNLOCK(p);
861 		if (cred != NULL) {
862 			crfree(cred);
863 			cred = NULL;
864 		}
865 	}
866 	sx_sunlock(&allproc_lock);
867 	/*
868 	 * Second, clear this vnode from any pending requests.
869 	 */
870 	mtx_lock(&ktrace_mtx);
871 	STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
872 		if (req->ktr_vp == vp) {
873 			req->ktr_vp = NULL;
874 			vrele_count++;
875 		}
876 	}
877 	mtx_unlock(&ktrace_mtx);
878 	mtx_lock(&Giant);
879 	while (vrele_count-- > 0)
880 		vrele(vp);
881 	mtx_unlock(&Giant);
882 }
883 
884 /*
885  * Return true if caller has permission to set the ktracing state
886  * of target.  Essentially, the target can't possess any
887  * more permissions than the caller.  KTRFAC_ROOT signifies that
888  * root previously set the tracing status on the target process, and
889  * so, only root may further change it.
890  */
891 static int
892 ktrcanset(td, targetp)
893 	struct thread *td;
894 	struct proc *targetp;
895 {
896 
897 	PROC_LOCK_ASSERT(targetp, MA_OWNED);
898 	if (targetp->p_traceflag & KTRFAC_ROOT &&
899 	    suser_cred(td->td_ucred, PRISON_ROOT))
900 		return (0);
901 
902 	if (p_candebug(td, targetp) != 0)
903 		return (0);
904 
905 	return (1);
906 }
907 
908 #endif /* KTRACE */
909