xref: /titanic_41/usr/src/uts/common/os/watchpoint.c (revision 006fe326e7b85f42c0b2cafbe1567fb3558c3d39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/cred.h>
31 #include <sys/debug.h>
32 #include <sys/inline.h>
33 #include <sys/kmem.h>
34 #include <sys/proc.h>
35 #include <sys/regset.h>
36 #include <sys/sysmacros.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/buf.h>
40 #include <sys/signal.h>
41 #include <sys/user.h>
42 #include <sys/cpuvar.h>
43 
44 #include <sys/fault.h>
45 #include <sys/syscall.h>
46 #include <sys/procfs.h>
47 #include <sys/cmn_err.h>
48 #include <sys/stack.h>
49 #include <sys/watchpoint.h>
50 #include <sys/copyops.h>
51 #include <sys/schedctl.h>
52 
53 #include <sys/mman.h>
54 #include <vm/as.h>
55 #include <vm/seg.h>
56 
57 /*
58  * Copy ops vector for watchpoints.
59  */
60 static int	watch_copyin(const void *, void *, size_t);
61 static int	watch_xcopyin(const void *, void *, size_t);
62 static int	watch_copyout(const void *, void *, size_t);
63 static int	watch_xcopyout(const void *, void *, size_t);
64 static int	watch_copyinstr(const char *, char *, size_t, size_t *);
65 static int	watch_copyoutstr(const char *, char *, size_t, size_t *);
66 static int	watch_fuword8(const void *, uint8_t *);
67 static int	watch_fuword16(const void *, uint16_t *);
68 static int	watch_fuword32(const void *, uint32_t *);
69 static int	watch_suword8(void *, uint8_t);
70 static int	watch_suword16(void *, uint16_t);
71 static int	watch_suword32(void *, uint32_t);
72 static int	watch_physio(int (*)(struct buf *), struct buf *,
73     dev_t, int, void (*)(struct buf *), struct uio *);
74 #ifdef _LP64
75 static int	watch_fuword64(const void *, uint64_t *);
76 static int	watch_suword64(void *, uint64_t);
77 #endif
78 
79 struct copyops watch_copyops = {
80 	watch_copyin,
81 	watch_xcopyin,
82 	watch_copyout,
83 	watch_xcopyout,
84 	watch_copyinstr,
85 	watch_copyoutstr,
86 	watch_fuword8,
87 	watch_fuword16,
88 	watch_fuword32,
89 #ifdef _LP64
90 	watch_fuword64,
91 #else
92 	NULL,
93 #endif
94 	watch_suword8,
95 	watch_suword16,
96 	watch_suword32,
97 #ifdef _LP64
98 	watch_suword64,
99 #else
100 	NULL,
101 #endif
102 	watch_physio
103 };
104 
105 /*
106  * Map the 'rw' argument to a protection flag.
107  */
108 static int
109 rw_to_prot(enum seg_rw rw)
110 {
111 	switch (rw) {
112 	case S_EXEC:
113 		return (PROT_EXEC);
114 	case S_READ:
115 		return (PROT_READ);
116 	case S_WRITE:
117 		return (PROT_WRITE);
118 	default:
119 		return (PROT_NONE);	/* can't happen */
120 	}
121 }
122 
123 /*
124  * Map the 'rw' argument to an index into an array of exec/write/read things.
125  * The index follows the precedence order:  exec .. write .. read
126  */
127 static int
128 rw_to_index(enum seg_rw rw)
129 {
130 	switch (rw) {
131 	default:	/* default case "can't happen" */
132 	case S_EXEC:
133 		return (0);
134 	case S_WRITE:
135 		return (1);
136 	case S_READ:
137 		return (2);
138 	}
139 }
140 
141 /*
142  * Map an index back to a seg_rw.
143  */
144 static enum seg_rw S_rw[4] = {
145 	S_EXEC,
146 	S_WRITE,
147 	S_READ,
148 	S_READ,
149 };
150 
151 #define	X	0
152 #define	W	1
153 #define	R	2
154 #define	sum(a)	(a[X] + a[W] + a[R])
155 
156 /*
157  * Common code for pr_mappage() and pr_unmappage().
158  */
159 static int
160 pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel)
161 {
162 	proc_t *p = curproc;
163 	struct as *as = p->p_as;
164 	char *eaddr = addr + size;
165 	int prot_rw = rw_to_prot(rw);
166 	int xrw = rw_to_index(rw);
167 	int rv = 0;
168 	struct watched_page *pwp;
169 	struct watched_page tpw;
170 	avl_index_t where;
171 	uint_t prot;
172 
173 	ASSERT(as != &kas);
174 
175 startover:
176 	ASSERT(rv == 0);
177 	if (avl_numnodes(&as->a_wpage) == 0)
178 		return (0);
179 
180 	/*
181 	 * as->a_wpage can only be changed while the process is totally stopped.
182 	 * Don't grab p_lock here.  Holding p_lock while grabbing the address
183 	 * space lock leads to deadlocks with the clock thread.  Note that if an
184 	 * as_fault() is servicing a fault to a watched page on behalf of an
185 	 * XHAT provider, watchpoint will be temporarily cleared (and wp_prot
186 	 * will be set to wp_oprot).  Since this is done while holding as writer
187 	 * lock, we need to grab as lock (reader lock is good enough).
188 	 *
189 	 * p_maplock prevents simultaneous execution of this function.  Under
190 	 * normal circumstances, holdwatch() will stop all other threads, so the
191 	 * lock isn't really needed.  But there may be multiple threads within
192 	 * stop() when SWATCHOK is set, so we need to handle multiple threads
193 	 * at once.  See holdwatch() for the details of this dance.
194 	 */
195 
196 	mutex_enter(&p->p_maplock);
197 	AS_LOCK_ENTER(as, RW_READER);
198 
199 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
200 	if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
201 		pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
202 
203 	for (; pwp != NULL && pwp->wp_vaddr < eaddr;
204 	    pwp = AVL_NEXT(&as->a_wpage, pwp)) {
205 
206 		/*
207 		 * If the requested protection has not been
208 		 * removed, we need not remap this page.
209 		 */
210 		prot = pwp->wp_prot;
211 		if (kernel || (prot & PROT_USER))
212 			if (prot & prot_rw)
213 				continue;
214 		/*
215 		 * If the requested access does not exist in the page's
216 		 * original protections, we need not remap this page.
217 		 * If the page does not exist yet, we can't test it.
218 		 */
219 		if ((prot = pwp->wp_oprot) != 0) {
220 			if (!(kernel || (prot & PROT_USER)))
221 				continue;
222 			if (!(prot & prot_rw))
223 				continue;
224 		}
225 
226 		if (mapin) {
227 			/*
228 			 * Before mapping the page in, ensure that
229 			 * all other lwps are held in the kernel.
230 			 */
231 			if (p->p_mapcnt == 0) {
232 				/*
233 				 * Release as lock while in holdwatch()
234 				 * in case other threads need to grab it.
235 				 */
236 				AS_LOCK_EXIT(as);
237 				mutex_exit(&p->p_maplock);
238 				if (holdwatch() != 0) {
239 					/*
240 					 * We stopped in holdwatch().
241 					 * Start all over again because the
242 					 * watched page list may have changed.
243 					 */
244 					goto startover;
245 				}
246 				mutex_enter(&p->p_maplock);
247 				AS_LOCK_ENTER(as, RW_READER);
248 			}
249 			p->p_mapcnt++;
250 		}
251 
252 		addr = pwp->wp_vaddr;
253 		rv++;
254 
255 		prot = pwp->wp_prot;
256 		if (mapin) {
257 			if (kernel)
258 				pwp->wp_kmap[xrw]++;
259 			else
260 				pwp->wp_umap[xrw]++;
261 			pwp->wp_flags |= WP_NOWATCH;
262 			if (pwp->wp_kmap[X] + pwp->wp_umap[X])
263 				/* cannot have exec-only protection */
264 				prot |= PROT_READ|PROT_EXEC;
265 			if (pwp->wp_kmap[R] + pwp->wp_umap[R])
266 				prot |= PROT_READ;
267 			if (pwp->wp_kmap[W] + pwp->wp_umap[W])
268 				/* cannot have write-only protection */
269 				prot |= PROT_READ|PROT_WRITE;
270 #if 0	/* damned broken mmu feature! */
271 			if (sum(pwp->wp_umap) == 0)
272 				prot &= ~PROT_USER;
273 #endif
274 		} else {
275 			ASSERT(pwp->wp_flags & WP_NOWATCH);
276 			if (kernel) {
277 				ASSERT(pwp->wp_kmap[xrw] != 0);
278 				--pwp->wp_kmap[xrw];
279 			} else {
280 				ASSERT(pwp->wp_umap[xrw] != 0);
281 				--pwp->wp_umap[xrw];
282 			}
283 			if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0)
284 				pwp->wp_flags &= ~WP_NOWATCH;
285 			else {
286 				if (pwp->wp_kmap[X] + pwp->wp_umap[X])
287 					/* cannot have exec-only protection */
288 					prot |= PROT_READ|PROT_EXEC;
289 				if (pwp->wp_kmap[R] + pwp->wp_umap[R])
290 					prot |= PROT_READ;
291 				if (pwp->wp_kmap[W] + pwp->wp_umap[W])
292 					/* cannot have write-only protection */
293 					prot |= PROT_READ|PROT_WRITE;
294 #if 0	/* damned broken mmu feature! */
295 				if (sum(pwp->wp_umap) == 0)
296 					prot &= ~PROT_USER;
297 #endif
298 			}
299 		}
300 
301 
302 		if (pwp->wp_oprot != 0) {	/* if page exists */
303 			struct seg *seg;
304 			uint_t oprot;
305 			int err, retrycnt = 0;
306 
307 			AS_LOCK_EXIT(as);
308 			AS_LOCK_ENTER(as, RW_WRITER);
309 		retry:
310 			seg = as_segat(as, addr);
311 			ASSERT(seg != NULL);
312 			SEGOP_GETPROT(seg, addr, 0, &oprot);
313 			if (prot != oprot) {
314 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
315 				if (err == IE_RETRY) {
316 					ASSERT(retrycnt == 0);
317 					retrycnt++;
318 					goto retry;
319 				}
320 			}
321 			AS_LOCK_EXIT(as);
322 		} else
323 			AS_LOCK_EXIT(as);
324 
325 		/*
326 		 * When all pages are mapped back to their normal state,
327 		 * continue the other lwps.
328 		 */
329 		if (!mapin) {
330 			ASSERT(p->p_mapcnt > 0);
331 			p->p_mapcnt--;
332 			if (p->p_mapcnt == 0) {
333 				mutex_exit(&p->p_maplock);
334 				mutex_enter(&p->p_lock);
335 				continuelwps(p);
336 				mutex_exit(&p->p_lock);
337 				mutex_enter(&p->p_maplock);
338 			}
339 		}
340 
341 		AS_LOCK_ENTER(as, RW_READER);
342 	}
343 
344 	AS_LOCK_EXIT(as);
345 	mutex_exit(&p->p_maplock);
346 
347 	return (rv);
348 }
349 
350 /*
351  * Restore the original page protections on an address range.
352  * If 'kernel' is non-zero, just do it for the kernel.
353  * pr_mappage() returns non-zero if it actually changed anything.
354  *
355  * pr_mappage() and pr_unmappage() must be executed in matched pairs,
356  * but pairs may be nested within other pairs.  The reference counts
357  * sort it all out.  See pr_do_mappage(), above.
358  */
359 static int
360 pr_mappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
361 {
362 	return (pr_do_mappage(addr, size, 1, rw, kernel));
363 }
364 
365 /*
366  * Set the modified page protections on a watched page.
367  * Inverse of pr_mappage().
368  * Needs to be called only if pr_mappage() returned non-zero.
369  */
370 static void
371 pr_unmappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
372 {
373 	(void) pr_do_mappage(addr, size, 0, rw, kernel);
374 }
375 
376 /*
377  * Function called by an lwp after it resumes from stop().
378  */
379 void
380 setallwatch(void)
381 {
382 	proc_t *p = curproc;
383 	struct as *as = curproc->p_as;
384 	struct watched_page *pwp, *next;
385 	struct seg *seg;
386 	caddr_t vaddr;
387 	uint_t prot;
388 	int err, retrycnt;
389 
390 	if (p->p_wprot == NULL)
391 		return;
392 
393 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
394 
395 	AS_LOCK_ENTER(as, RW_WRITER);
396 
397 	pwp = p->p_wprot;
398 	while (pwp != NULL) {
399 
400 		vaddr = pwp->wp_vaddr;
401 		retrycnt = 0;
402 	retry:
403 		ASSERT(pwp->wp_flags & WP_SETPROT);
404 		if ((seg = as_segat(as, vaddr)) != NULL &&
405 		    !(pwp->wp_flags & WP_NOWATCH)) {
406 			prot = pwp->wp_prot;
407 			err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
408 			if (err == IE_RETRY) {
409 				ASSERT(retrycnt == 0);
410 				retrycnt++;
411 				goto retry;
412 			}
413 		}
414 
415 		next = pwp->wp_list;
416 
417 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec == 0) {
418 			/*
419 			 * No watched areas remain in this page.
420 			 * Free the watched_page structure.
421 			 */
422 			avl_remove(&as->a_wpage, pwp);
423 			kmem_free(pwp, sizeof (struct watched_page));
424 		} else {
425 			pwp->wp_flags &= ~WP_SETPROT;
426 		}
427 
428 		pwp = next;
429 	}
430 	p->p_wprot = NULL;
431 
432 	AS_LOCK_EXIT(as);
433 }
434 
435 
436 
437 /* Must be called with as lock held */
438 int
439 pr_is_watchpage_as(caddr_t addr, enum seg_rw rw, struct as *as)
440 {
441 	register struct watched_page *pwp;
442 	struct watched_page tpw;
443 	uint_t prot;
444 	int rv = 0;
445 
446 	switch (rw) {
447 	case S_READ:
448 	case S_WRITE:
449 	case S_EXEC:
450 		break;
451 	default:
452 		return (0);
453 	}
454 
455 	/*
456 	 * as->a_wpage can only be modified while the process is totally
457 	 * stopped.  We need, and should use, no locks here.
458 	 */
459 	if (as != &kas && avl_numnodes(&as->a_wpage) != 0) {
460 		tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
461 		pwp = avl_find(&as->a_wpage, &tpw, NULL);
462 		if (pwp != NULL) {
463 			ASSERT(addr >= pwp->wp_vaddr &&
464 			    addr < pwp->wp_vaddr + PAGESIZE);
465 			if (pwp->wp_oprot != 0) {
466 				prot = pwp->wp_prot;
467 				switch (rw) {
468 				case S_READ:
469 					rv = ((prot & (PROT_USER|PROT_READ))
470 					    != (PROT_USER|PROT_READ));
471 					break;
472 				case S_WRITE:
473 					rv = ((prot & (PROT_USER|PROT_WRITE))
474 					    != (PROT_USER|PROT_WRITE));
475 					break;
476 				case S_EXEC:
477 					rv = ((prot & (PROT_USER|PROT_EXEC))
478 					    != (PROT_USER|PROT_EXEC));
479 					break;
480 				default:
481 					/* can't happen! */
482 					break;
483 				}
484 			}
485 		}
486 	}
487 
488 	return (rv);
489 }
490 
491 
492 /*
493  * trap() calls here to determine if a fault is in a watched page.
494  * We return nonzero if this is true and the load/store would fail.
495  */
496 int
497 pr_is_watchpage(caddr_t addr, enum seg_rw rw)
498 {
499 	struct as *as = curproc->p_as;
500 	int rv;
501 
502 	if ((as == &kas) || avl_numnodes(&as->a_wpage) == 0)
503 		return (0);
504 
505 	/* Grab the lock because of XHAT (see comment in pr_mappage()) */
506 	AS_LOCK_ENTER(as, RW_READER);
507 	rv = pr_is_watchpage_as(addr, rw, as);
508 	AS_LOCK_EXIT(as);
509 
510 	return (rv);
511 }
512 
513 
514 
515 /*
516  * trap() calls here to determine if a fault is a watchpoint.
517  */
518 int
519 pr_is_watchpoint(caddr_t *paddr, int *pta, size_t size, size_t *plen,
520 	enum seg_rw rw)
521 {
522 	proc_t *p = curproc;
523 	caddr_t addr = *paddr;
524 	caddr_t eaddr = addr + size;
525 	register struct watched_area *pwa;
526 	struct watched_area twa;
527 	int rv = 0;
528 	int ta = 0;
529 	size_t len = 0;
530 
531 	switch (rw) {
532 	case S_READ:
533 	case S_WRITE:
534 	case S_EXEC:
535 		break;
536 	default:
537 		*pta = 0;
538 		return (0);
539 	}
540 
541 	/*
542 	 * p->p_warea is protected by p->p_lock.
543 	 */
544 	mutex_enter(&p->p_lock);
545 
546 	/* BEGIN CSTYLED */
547 	/*
548 	 * This loop is somewhat complicated because the fault region can span
549 	 * multiple watched areas.  For example:
550 	 *
551 	 *            addr              eaddr
552 	 * 		+-----------------+
553 	 * 		| fault region    |
554 	 * 	+-------+--------+----+---+------------+
555 	 *      | prot not right |    | prot correct   |
556 	 *      +----------------+    +----------------+
557 	 *    wa_vaddr	      wa_eaddr
558 	 *    		      wa_vaddr		wa_eaddr
559 	 *
560 	 * We start at the area greater than or equal to the starting address.
561 	 * As long as some portion of the fault region overlaps the current
562 	 * area, we continue checking permissions until we find an appropriate
563 	 * match.
564 	 */
565 	/* END CSTYLED */
566 	twa.wa_vaddr = addr;
567 	twa.wa_eaddr = eaddr;
568 
569 	for (pwa = pr_find_watched_area(p, &twa, NULL);
570 	    pwa != NULL && eaddr > pwa->wa_vaddr && addr < pwa->wa_eaddr;
571 	    pwa = AVL_NEXT(&p->p_warea, pwa)) {
572 
573 		switch (rw) {
574 		case S_READ:
575 			if (pwa->wa_flags & WA_READ)
576 				rv = TRAP_RWATCH;
577 			break;
578 		case S_WRITE:
579 			if (pwa->wa_flags & WA_WRITE)
580 				rv = TRAP_WWATCH;
581 			break;
582 		case S_EXEC:
583 			if (pwa->wa_flags & WA_EXEC)
584 				rv = TRAP_XWATCH;
585 			break;
586 		default:
587 			/* can't happen */
588 			break;
589 		}
590 
591 		/*
592 		 * If protections didn't match, check the next watched
593 		 * area
594 		 */
595 		if (rv != 0) {
596 			if (addr < pwa->wa_vaddr)
597 				addr = pwa->wa_vaddr;
598 			len = pwa->wa_eaddr - addr;
599 			if (pwa->wa_flags & WA_TRAPAFTER)
600 				ta = 1;
601 			break;
602 		}
603 	}
604 
605 	mutex_exit(&p->p_lock);
606 
607 	*paddr = addr;
608 	*pta = ta;
609 	if (plen != NULL)
610 		*plen = len;
611 	return (rv);
612 }
613 
614 /*
615  * Set up to perform a single-step at user level for the
616  * case of a trapafter watchpoint.  Called from trap().
617  */
618 void
619 do_watch_step(caddr_t vaddr, size_t sz, enum seg_rw rw,
620 	int watchcode, greg_t pc)
621 {
622 	register klwp_t *lwp = ttolwp(curthread);
623 	struct lwp_watch *pw = &lwp->lwp_watch[rw_to_index(rw)];
624 
625 	/*
626 	 * Check to see if we are already performing this special
627 	 * watchpoint single-step.  We must not do pr_mappage() twice.
628 	 */
629 
630 	/* special check for two read traps on the same instruction */
631 	if (rw == S_READ && pw->wpaddr != NULL &&
632 	    !(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize)) {
633 		ASSERT(lwp->lwp_watchtrap != 0);
634 		pw++;	/* use the extra S_READ struct */
635 	}
636 
637 	if (pw->wpaddr != NULL) {
638 		ASSERT(lwp->lwp_watchtrap != 0);
639 		ASSERT(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize);
640 		if (pw->wpcode == 0) {
641 			pw->wpcode = watchcode;
642 			pw->wppc = pc;
643 		}
644 	} else {
645 		int mapped = pr_mappage(vaddr, sz, rw, 0);
646 		prstep(lwp, 1);
647 		lwp->lwp_watchtrap = 1;
648 		pw->wpaddr = vaddr;
649 		pw->wpsize = sz;
650 		pw->wpcode = watchcode;
651 		pw->wpmapped = mapped;
652 		pw->wppc = pc;
653 	}
654 }
655 
656 /*
657  * Undo the effects of do_watch_step().
658  * Called from trap() after the single-step is finished.
659  * Also called from issig_forreal() and stop() with a NULL
660  * argument to avoid having these things set more than once.
661  */
662 int
663 undo_watch_step(k_siginfo_t *sip)
664 {
665 	register klwp_t *lwp = ttolwp(curthread);
666 	int fault = 0;
667 
668 	if (lwp->lwp_watchtrap) {
669 		struct lwp_watch *pw = lwp->lwp_watch;
670 		int i;
671 
672 		for (i = 0; i < 4; i++, pw++) {
673 			if (pw->wpaddr == NULL)
674 				continue;
675 			if (pw->wpmapped)
676 				pr_unmappage(pw->wpaddr, pw->wpsize, S_rw[i],
677 				    0);
678 			if (pw->wpcode != 0) {
679 				if (sip != NULL) {
680 					sip->si_signo = SIGTRAP;
681 					sip->si_code = pw->wpcode;
682 					sip->si_addr = pw->wpaddr;
683 					sip->si_trapafter = 1;
684 					sip->si_pc = (caddr_t)pw->wppc;
685 				}
686 				fault = FLTWATCH;
687 				pw->wpcode = 0;
688 			}
689 			pw->wpaddr = NULL;
690 			pw->wpsize = 0;
691 			pw->wpmapped = 0;
692 		}
693 		lwp->lwp_watchtrap = 0;
694 	}
695 
696 	return (fault);
697 }
698 
699 /*
700  * Handle a watchpoint that occurs while doing copyin()
701  * or copyout() in a system call.
702  * Return non-zero if the fault or signal is cleared
703  * by a debugger while the lwp is stopped.
704  */
705 static int
706 sys_watchpoint(caddr_t addr, int watchcode, int ta)
707 {
708 	extern greg_t getuserpc(void);	/* XXX header file */
709 	k_sigset_t smask;
710 	register proc_t *p = ttoproc(curthread);
711 	register klwp_t *lwp = ttolwp(curthread);
712 	register sigqueue_t *sqp;
713 	int rval;
714 
715 	/* assert no locks are held */
716 	/* ASSERT(curthread->t_nlocks == 0); */
717 
718 	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
719 	sqp->sq_info.si_signo = SIGTRAP;
720 	sqp->sq_info.si_code = watchcode;
721 	sqp->sq_info.si_addr = addr;
722 	sqp->sq_info.si_trapafter = ta;
723 	sqp->sq_info.si_pc = (caddr_t)getuserpc();
724 
725 	mutex_enter(&p->p_lock);
726 
727 	/* this will be tested and cleared by the caller */
728 	lwp->lwp_sysabort = 0;
729 
730 	if (prismember(&p->p_fltmask, FLTWATCH)) {
731 		lwp->lwp_curflt = (uchar_t)FLTWATCH;
732 		lwp->lwp_siginfo = sqp->sq_info;
733 		stop(PR_FAULTED, FLTWATCH);
734 		if (lwp->lwp_curflt == 0) {
735 			mutex_exit(&p->p_lock);
736 			kmem_free(sqp, sizeof (sigqueue_t));
737 			return (1);
738 		}
739 		lwp->lwp_curflt = 0;
740 	}
741 
742 	/*
743 	 * post the SIGTRAP signal.
744 	 * Block all other signals so we only stop showing SIGTRAP.
745 	 */
746 	if (signal_is_blocked(curthread, SIGTRAP) ||
747 	    sigismember(&p->p_ignore, SIGTRAP)) {
748 		/* SIGTRAP is blocked or ignored, forget the rest. */
749 		mutex_exit(&p->p_lock);
750 		kmem_free(sqp, sizeof (sigqueue_t));
751 		return (0);
752 	}
753 	sigdelq(p, curthread, SIGTRAP);
754 	sigaddqa(p, curthread, sqp);
755 	schedctl_finish_sigblock(curthread);
756 	smask = curthread->t_hold;
757 	sigfillset(&curthread->t_hold);
758 	sigdiffset(&curthread->t_hold, &cantmask);
759 	sigdelset(&curthread->t_hold, SIGTRAP);
760 	mutex_exit(&p->p_lock);
761 
762 	rval = ((ISSIG_FAST(curthread, lwp, p, FORREAL))? 0 : 1);
763 
764 	/* restore the original signal mask */
765 	mutex_enter(&p->p_lock);
766 	curthread->t_hold = smask;
767 	mutex_exit(&p->p_lock);
768 
769 	return (rval);
770 }
771 
772 /*
773  * Wrappers for the copyin()/copyout() functions to deal
774  * with watchpoints that fire while in system calls.
775  */
776 
777 static int
778 watch_xcopyin(const void *uaddr, void *kaddr, size_t count)
779 {
780 	klwp_t *lwp = ttolwp(curthread);
781 	caddr_t watch_uaddr = (caddr_t)uaddr;
782 	caddr_t watch_kaddr = (caddr_t)kaddr;
783 	int error = 0;
784 	label_t ljb;
785 	size_t part;
786 	int mapped;
787 
788 	while (count && error == 0) {
789 		int watchcode;
790 		caddr_t vaddr;
791 		size_t len;
792 		int ta;
793 
794 		if ((part = PAGESIZE -
795 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
796 			part = count;
797 
798 		if (!pr_is_watchpage(watch_uaddr, S_READ))
799 			watchcode = 0;
800 		else {
801 			vaddr = watch_uaddr;
802 			watchcode = pr_is_watchpoint(&vaddr, &ta,
803 			    part, &len, S_READ);
804 			if (watchcode && ta == 0)
805 				part = vaddr - watch_uaddr;
806 		}
807 
808 		/*
809 		 * Copy the initial part, up to a watched address, if any.
810 		 */
811 		if (part != 0) {
812 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
813 			if (on_fault(&ljb))
814 				error = EFAULT;
815 			else
816 				copyin_noerr(watch_uaddr, watch_kaddr, part);
817 			no_fault();
818 			if (mapped)
819 				pr_unmappage(watch_uaddr, part, S_READ, 1);
820 			watch_uaddr += part;
821 			watch_kaddr += part;
822 			count -= part;
823 		}
824 		/*
825 		 * If trapafter was specified, then copy through the
826 		 * watched area before taking the watchpoint trap.
827 		 */
828 		while (count && watchcode && ta && len > part && error == 0) {
829 			len -= part;
830 			if ((part = PAGESIZE) > count)
831 				part = count;
832 			if (part > len)
833 				part = len;
834 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
835 			if (on_fault(&ljb))
836 				error = EFAULT;
837 			else
838 				copyin_noerr(watch_uaddr, watch_kaddr, part);
839 			no_fault();
840 			if (mapped)
841 				pr_unmappage(watch_uaddr, part, S_READ, 1);
842 			watch_uaddr += part;
843 			watch_kaddr += part;
844 			count -= part;
845 		}
846 
847 error:
848 		/* if we hit a watched address, do the watchpoint logic */
849 		if (watchcode &&
850 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
851 		    lwp->lwp_sysabort)) {
852 			lwp->lwp_sysabort = 0;
853 			error = EFAULT;
854 			break;
855 		}
856 	}
857 
858 	return (error);
859 }
860 
861 static int
862 watch_copyin(const void *kaddr, void *uaddr, size_t count)
863 {
864 	return (watch_xcopyin(kaddr, uaddr, count) ? -1 : 0);
865 }
866 
867 
868 static int
869 watch_xcopyout(const void *kaddr, void *uaddr, size_t count)
870 {
871 	klwp_t *lwp = ttolwp(curthread);
872 	caddr_t watch_uaddr = (caddr_t)uaddr;
873 	caddr_t watch_kaddr = (caddr_t)kaddr;
874 	int error = 0;
875 	label_t ljb;
876 
877 	while (count && error == 0) {
878 		int watchcode;
879 		caddr_t vaddr;
880 		size_t part;
881 		size_t len;
882 		int ta;
883 		int mapped;
884 
885 		if ((part = PAGESIZE -
886 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
887 			part = count;
888 
889 		if (!pr_is_watchpage(watch_uaddr, S_WRITE))
890 			watchcode = 0;
891 		else {
892 			vaddr = watch_uaddr;
893 			watchcode = pr_is_watchpoint(&vaddr, &ta,
894 			    part, &len, S_WRITE);
895 			if (watchcode) {
896 				if (ta == 0)
897 					part = vaddr - watch_uaddr;
898 				else {
899 					len += vaddr - watch_uaddr;
900 					if (part > len)
901 						part = len;
902 				}
903 			}
904 		}
905 
906 		/*
907 		 * Copy the initial part, up to a watched address, if any.
908 		 */
909 		if (part != 0) {
910 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
911 			if (on_fault(&ljb))
912 				error = EFAULT;
913 			else
914 				copyout_noerr(watch_kaddr, watch_uaddr, part);
915 			no_fault();
916 			if (mapped)
917 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
918 			watch_uaddr += part;
919 			watch_kaddr += part;
920 			count -= part;
921 		}
922 
923 		/*
924 		 * If trapafter was specified, then copy through the
925 		 * watched area before taking the watchpoint trap.
926 		 */
927 		while (count && watchcode && ta && len > part && error == 0) {
928 			len -= part;
929 			if ((part = PAGESIZE) > count)
930 				part = count;
931 			if (part > len)
932 				part = len;
933 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
934 			if (on_fault(&ljb))
935 				error = EFAULT;
936 			else
937 				copyout_noerr(watch_kaddr, watch_uaddr, part);
938 			no_fault();
939 			if (mapped)
940 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
941 			watch_uaddr += part;
942 			watch_kaddr += part;
943 			count -= part;
944 		}
945 
946 		/* if we hit a watched address, do the watchpoint logic */
947 		if (watchcode &&
948 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
949 		    lwp->lwp_sysabort)) {
950 			lwp->lwp_sysabort = 0;
951 			error = EFAULT;
952 			break;
953 		}
954 	}
955 
956 	return (error);
957 }
958 
959 static int
960 watch_copyout(const void *kaddr, void *uaddr, size_t count)
961 {
962 	return (watch_xcopyout(kaddr, uaddr, count) ? -1 : 0);
963 }
964 
965 static int
966 watch_copyinstr(
967 	const char *uaddr,
968 	char *kaddr,
969 	size_t maxlength,
970 	size_t *lencopied)
971 {
972 	klwp_t *lwp = ttolwp(curthread);
973 	size_t resid;
974 	int error = 0;
975 	label_t ljb;
976 
977 	if ((resid = maxlength) == 0)
978 		return (ENAMETOOLONG);
979 
980 	while (resid && error == 0) {
981 		int watchcode;
982 		caddr_t vaddr;
983 		size_t part;
984 		size_t len;
985 		size_t size;
986 		int ta;
987 		int mapped;
988 
989 		if ((part = PAGESIZE -
990 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
991 			part = resid;
992 
993 		if (!pr_is_watchpage((caddr_t)uaddr, S_READ))
994 			watchcode = 0;
995 		else {
996 			vaddr = (caddr_t)uaddr;
997 			watchcode = pr_is_watchpoint(&vaddr, &ta,
998 			    part, &len, S_READ);
999 			if (watchcode) {
1000 				if (ta == 0)
1001 					part = vaddr - uaddr;
1002 				else {
1003 					len += vaddr - uaddr;
1004 					if (part > len)
1005 						part = len;
1006 				}
1007 			}
1008 		}
1009 
1010 		/*
1011 		 * Copy the initial part, up to a watched address, if any.
1012 		 */
1013 		if (part != 0) {
1014 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1015 			if (on_fault(&ljb))
1016 				error = EFAULT;
1017 			else
1018 				error = copyinstr_noerr(uaddr, kaddr, part,
1019 				    &size);
1020 			no_fault();
1021 			if (mapped)
1022 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1023 			uaddr += size;
1024 			kaddr += size;
1025 			resid -= size;
1026 			if (error == ENAMETOOLONG && resid > 0)
1027 				error = 0;
1028 			if (error != 0 || (watchcode &&
1029 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1030 				break;	/* didn't reach the watched area */
1031 		}
1032 
1033 		/*
1034 		 * If trapafter was specified, then copy through the
1035 		 * watched area before taking the watchpoint trap.
1036 		 */
1037 		while (resid && watchcode && ta && len > part && error == 0 &&
1038 		    size == part && kaddr[-1] != '\0') {
1039 			len -= part;
1040 			if ((part = PAGESIZE) > resid)
1041 				part = resid;
1042 			if (part > len)
1043 				part = len;
1044 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1045 			if (on_fault(&ljb))
1046 				error = EFAULT;
1047 			else
1048 				error = copyinstr_noerr(uaddr, kaddr, part,
1049 				    &size);
1050 			no_fault();
1051 			if (mapped)
1052 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1053 			uaddr += size;
1054 			kaddr += size;
1055 			resid -= size;
1056 			if (error == ENAMETOOLONG && resid > 0)
1057 				error = 0;
1058 		}
1059 
1060 		/* if we hit a watched address, do the watchpoint logic */
1061 		if (watchcode &&
1062 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1063 		    lwp->lwp_sysabort)) {
1064 			lwp->lwp_sysabort = 0;
1065 			error = EFAULT;
1066 			break;
1067 		}
1068 
1069 		if (error == 0 && part != 0 &&
1070 		    (size < part || kaddr[-1] == '\0'))
1071 			break;
1072 	}
1073 
1074 	if (error != EFAULT && lencopied)
1075 		*lencopied = maxlength - resid;
1076 	return (error);
1077 }
1078 
1079 static int
1080 watch_copyoutstr(
1081 	const char *kaddr,
1082 	char *uaddr,
1083 	size_t maxlength,
1084 	size_t *lencopied)
1085 {
1086 	klwp_t *lwp = ttolwp(curthread);
1087 	size_t resid;
1088 	int error = 0;
1089 	label_t ljb;
1090 
1091 	if ((resid = maxlength) == 0)
1092 		return (ENAMETOOLONG);
1093 
1094 	while (resid && error == 0) {
1095 		int watchcode;
1096 		caddr_t vaddr;
1097 		size_t part;
1098 		size_t len;
1099 		size_t size;
1100 		int ta;
1101 		int mapped;
1102 
1103 		if ((part = PAGESIZE -
1104 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
1105 			part = resid;
1106 
1107 		if (!pr_is_watchpage(uaddr, S_WRITE)) {
1108 			watchcode = 0;
1109 		} else {
1110 			vaddr = uaddr;
1111 			watchcode = pr_is_watchpoint(&vaddr, &ta,
1112 			    part, &len, S_WRITE);
1113 			if (watchcode && ta == 0)
1114 				part = vaddr - uaddr;
1115 		}
1116 
1117 		/*
1118 		 * Copy the initial part, up to a watched address, if any.
1119 		 */
1120 		if (part != 0) {
1121 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1122 			if (on_fault(&ljb))
1123 				error = EFAULT;
1124 			else
1125 				error = copyoutstr_noerr(kaddr, uaddr, part,
1126 				    &size);
1127 			no_fault();
1128 			if (mapped)
1129 				pr_unmappage(uaddr, part, S_WRITE, 1);
1130 			uaddr += size;
1131 			kaddr += size;
1132 			resid -= size;
1133 			if (error == ENAMETOOLONG && resid > 0)
1134 				error = 0;
1135 			if (error != 0 || (watchcode &&
1136 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1137 				break;	/* didn't reach the watched area */
1138 		}
1139 
1140 		/*
1141 		 * If trapafter was specified, then copy through the
1142 		 * watched area before taking the watchpoint trap.
1143 		 */
1144 		while (resid && watchcode && ta && len > part && error == 0 &&
1145 		    size == part && kaddr[-1] != '\0') {
1146 			len -= part;
1147 			if ((part = PAGESIZE) > resid)
1148 				part = resid;
1149 			if (part > len)
1150 				part = len;
1151 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1152 			if (on_fault(&ljb))
1153 				error = EFAULT;
1154 			else
1155 				error = copyoutstr_noerr(kaddr, uaddr, part,
1156 				    &size);
1157 			no_fault();
1158 			if (mapped)
1159 				pr_unmappage(uaddr, part, S_WRITE, 1);
1160 			uaddr += size;
1161 			kaddr += size;
1162 			resid -= size;
1163 			if (error == ENAMETOOLONG && resid > 0)
1164 				error = 0;
1165 		}
1166 
1167 		/* if we hit a watched address, do the watchpoint logic */
1168 		if (watchcode &&
1169 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1170 		    lwp->lwp_sysabort)) {
1171 			lwp->lwp_sysabort = 0;
1172 			error = EFAULT;
1173 			break;
1174 		}
1175 
1176 		if (error == 0 && part != 0 &&
1177 		    (size < part || kaddr[-1] == '\0'))
1178 			break;
1179 	}
1180 
1181 	if (error != EFAULT && lencopied)
1182 		*lencopied = maxlength - resid;
1183 	return (error);
1184 }
1185 
1186 typedef int (*fuword_func)(const void *, void *);
1187 
1188 /*
1189  * Generic form of watch_fuword8(), watch_fuword16(), etc.
1190  */
1191 static int
1192 watch_fuword(const void *addr, void *dst, fuword_func func, size_t size)
1193 {
1194 	klwp_t *lwp = ttolwp(curthread);
1195 	int watchcode;
1196 	caddr_t vaddr;
1197 	int mapped;
1198 	int rv = 0;
1199 	int ta;
1200 	label_t ljb;
1201 
1202 	for (;;) {
1203 
1204 		vaddr = (caddr_t)addr;
1205 		watchcode = pr_is_watchpoint(&vaddr, &ta, size, NULL, S_READ);
1206 		if (watchcode == 0 || ta != 0) {
1207 			mapped = pr_mappage((caddr_t)addr, size, S_READ, 1);
1208 			if (on_fault(&ljb))
1209 				rv = -1;
1210 			else
1211 				(*func)(addr, dst);
1212 			no_fault();
1213 			if (mapped)
1214 				pr_unmappage((caddr_t)addr, size, S_READ, 1);
1215 		}
1216 		if (watchcode &&
1217 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1218 		    lwp->lwp_sysabort)) {
1219 			lwp->lwp_sysabort = 0;
1220 			rv = -1;
1221 			break;
1222 		}
1223 		if (watchcode == 0 || ta != 0)
1224 			break;
1225 	}
1226 
1227 	return (rv);
1228 }
1229 
1230 static int
1231 watch_fuword8(const void *addr, uint8_t *dst)
1232 {
1233 	return (watch_fuword(addr, dst, (fuword_func)fuword8_noerr,
1234 	    sizeof (*dst)));
1235 }
1236 
1237 static int
1238 watch_fuword16(const void *addr, uint16_t *dst)
1239 {
1240 	return (watch_fuword(addr, dst, (fuword_func)fuword16_noerr,
1241 	    sizeof (*dst)));
1242 }
1243 
1244 static int
1245 watch_fuword32(const void *addr, uint32_t *dst)
1246 {
1247 	return (watch_fuword(addr, dst, (fuword_func)fuword32_noerr,
1248 	    sizeof (*dst)));
1249 }
1250 
1251 #ifdef _LP64
1252 static int
1253 watch_fuword64(const void *addr, uint64_t *dst)
1254 {
1255 	return (watch_fuword(addr, dst, (fuword_func)fuword64_noerr,
1256 	    sizeof (*dst)));
1257 }
1258 #endif
1259 
1260 
1261 static int
1262 watch_suword8(void *addr, uint8_t value)
1263 {
1264 	klwp_t *lwp = ttolwp(curthread);
1265 	int watchcode;
1266 	caddr_t vaddr;
1267 	int mapped;
1268 	int rv = 0;
1269 	int ta;
1270 	label_t ljb;
1271 
1272 	for (;;) {
1273 
1274 		vaddr = (caddr_t)addr;
1275 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1276 		    S_WRITE);
1277 		if (watchcode == 0 || ta != 0) {
1278 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1279 			    S_WRITE, 1);
1280 			if (on_fault(&ljb))
1281 				rv = -1;
1282 			else
1283 				suword8_noerr(addr, value);
1284 			no_fault();
1285 			if (mapped)
1286 				pr_unmappage((caddr_t)addr, sizeof (value),
1287 				    S_WRITE, 1);
1288 		}
1289 		if (watchcode &&
1290 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1291 		    lwp->lwp_sysabort)) {
1292 			lwp->lwp_sysabort = 0;
1293 			rv = -1;
1294 			break;
1295 		}
1296 		if (watchcode == 0 || ta != 0)
1297 			break;
1298 	}
1299 
1300 	return (rv);
1301 }
1302 
1303 static int
1304 watch_suword16(void *addr, uint16_t value)
1305 {
1306 	klwp_t *lwp = ttolwp(curthread);
1307 	int watchcode;
1308 	caddr_t vaddr;
1309 	int mapped;
1310 	int rv = 0;
1311 	int ta;
1312 	label_t ljb;
1313 
1314 	for (;;) {
1315 
1316 		vaddr = (caddr_t)addr;
1317 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1318 		    S_WRITE);
1319 		if (watchcode == 0 || ta != 0) {
1320 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1321 			    S_WRITE, 1);
1322 			if (on_fault(&ljb))
1323 				rv = -1;
1324 			else
1325 				suword16_noerr(addr, value);
1326 			no_fault();
1327 			if (mapped)
1328 				pr_unmappage((caddr_t)addr, sizeof (value),
1329 				    S_WRITE, 1);
1330 		}
1331 		if (watchcode &&
1332 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1333 		    lwp->lwp_sysabort)) {
1334 			lwp->lwp_sysabort = 0;
1335 			rv = -1;
1336 			break;
1337 		}
1338 		if (watchcode == 0 || ta != 0)
1339 			break;
1340 	}
1341 
1342 	return (rv);
1343 }
1344 
1345 static int
1346 watch_suword32(void *addr, uint32_t value)
1347 {
1348 	klwp_t *lwp = ttolwp(curthread);
1349 	int watchcode;
1350 	caddr_t vaddr;
1351 	int mapped;
1352 	int rv = 0;
1353 	int ta;
1354 	label_t ljb;
1355 
1356 	for (;;) {
1357 
1358 		vaddr = (caddr_t)addr;
1359 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1360 		    S_WRITE);
1361 		if (watchcode == 0 || ta != 0) {
1362 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1363 			    S_WRITE, 1);
1364 			if (on_fault(&ljb))
1365 				rv = -1;
1366 			else
1367 				suword32_noerr(addr, value);
1368 			no_fault();
1369 			if (mapped)
1370 				pr_unmappage((caddr_t)addr, sizeof (value),
1371 				    S_WRITE, 1);
1372 		}
1373 		if (watchcode &&
1374 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1375 		    lwp->lwp_sysabort)) {
1376 			lwp->lwp_sysabort = 0;
1377 			rv = -1;
1378 			break;
1379 		}
1380 		if (watchcode == 0 || ta != 0)
1381 			break;
1382 	}
1383 
1384 	return (rv);
1385 }
1386 
1387 #ifdef _LP64
1388 static int
1389 watch_suword64(void *addr, uint64_t value)
1390 {
1391 	klwp_t *lwp = ttolwp(curthread);
1392 	int watchcode;
1393 	caddr_t vaddr;
1394 	int mapped;
1395 	int rv = 0;
1396 	int ta;
1397 	label_t ljb;
1398 
1399 	for (;;) {
1400 
1401 		vaddr = (caddr_t)addr;
1402 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1403 		    S_WRITE);
1404 		if (watchcode == 0 || ta != 0) {
1405 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1406 			    S_WRITE, 1);
1407 			if (on_fault(&ljb))
1408 				rv = -1;
1409 			else
1410 				suword64_noerr(addr, value);
1411 			no_fault();
1412 			if (mapped)
1413 				pr_unmappage((caddr_t)addr, sizeof (value),
1414 				    S_WRITE, 1);
1415 		}
1416 		if (watchcode &&
1417 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1418 		    lwp->lwp_sysabort)) {
1419 			lwp->lwp_sysabort = 0;
1420 			rv = -1;
1421 			break;
1422 		}
1423 		if (watchcode == 0 || ta != 0)
1424 			break;
1425 	}
1426 
1427 	return (rv);
1428 }
1429 #endif /* _LP64 */
1430 
1431 /*
1432  * Check for watched addresses in the given address space.
1433  * Return 1 if this is true, otherwise 0.
1434  */
1435 static int
1436 pr_is_watched(caddr_t base, size_t len, int rw)
1437 {
1438 	caddr_t saddr = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1439 	caddr_t eaddr = base + len;
1440 	caddr_t paddr;
1441 
1442 	for (paddr = saddr; paddr < eaddr; paddr += PAGESIZE) {
1443 		if (pr_is_watchpage(paddr, rw))
1444 			return (1);
1445 	}
1446 
1447 	return (0);
1448 }
1449 
1450 /*
1451  * Wrapper for the physio() function.
1452  * Splits one uio operation with multiple iovecs into uio operations with
1453  * only one iovecs to do the watchpoint handling separately for each iovecs.
1454  */
1455 static int
1456 watch_physio(int (*strat)(struct buf *), struct buf *bp, dev_t dev,
1457     int rw, void (*mincnt)(struct buf *), struct uio *uio)
1458 {
1459 	struct uio auio;
1460 	struct iovec *iov;
1461 	caddr_t  base;
1462 	size_t len;
1463 	int seg_rw;
1464 	int error = 0;
1465 
1466 	if (uio->uio_segflg == UIO_SYSSPACE)
1467 		return (default_physio(strat, bp, dev, rw, mincnt, uio));
1468 
1469 	seg_rw = (rw == B_READ) ? S_WRITE : S_READ;
1470 
1471 	while (uio->uio_iovcnt > 0) {
1472 		if (uio->uio_resid == 0) {
1473 			/*
1474 			 * Make sure to return the uio structure with the
1475 			 * same values as default_physio() does.
1476 			 */
1477 			uio->uio_iov++;
1478 			uio->uio_iovcnt--;
1479 			continue;
1480 		}
1481 
1482 		iov = uio->uio_iov;
1483 		len = MIN(iov->iov_len, uio->uio_resid);
1484 
1485 		auio.uio_iovcnt = 1;
1486 		auio.uio_iov = iov;
1487 		auio.uio_resid = len;
1488 		auio.uio_loffset = uio->uio_loffset;
1489 		auio.uio_llimit = uio->uio_llimit;
1490 		auio.uio_fmode = uio->uio_fmode;
1491 		auio.uio_extflg = uio->uio_extflg;
1492 		auio.uio_segflg = uio->uio_segflg;
1493 
1494 		base = iov->iov_base;
1495 
1496 		if (!pr_is_watched(base, len, seg_rw)) {
1497 			/*
1498 			 * The given memory references don't cover a
1499 			 * watched page.
1500 			 */
1501 			error = default_physio(strat, bp, dev, rw, mincnt,
1502 			    &auio);
1503 
1504 			/* Update uio with values from auio. */
1505 			len -= auio.uio_resid;
1506 			uio->uio_resid -= len;
1507 			uio->uio_loffset += len;
1508 
1509 			/*
1510 			 * Return if an error occurred or not all data
1511 			 * was copied.
1512 			 */
1513 			if (auio.uio_resid || error)
1514 				break;
1515 			uio->uio_iov++;
1516 			uio->uio_iovcnt--;
1517 		} else {
1518 			int mapped, watchcode, ta;
1519 			caddr_t vaddr = base;
1520 			klwp_t *lwp = ttolwp(curthread);
1521 
1522 			watchcode = pr_is_watchpoint(&vaddr, &ta, len,
1523 			    NULL, seg_rw);
1524 
1525 			if (watchcode == 0 || ta != 0) {
1526 				/*
1527 				 * Do the io if the given memory references
1528 				 * don't cover a watched area (watchcode=0)
1529 				 * or if WA_TRAPAFTER was specified.
1530 				 */
1531 				mapped = pr_mappage(base, len, seg_rw, 1);
1532 				error = default_physio(strat, bp, dev, rw,
1533 				    mincnt, &auio);
1534 				if (mapped)
1535 					pr_unmappage(base, len, seg_rw, 1);
1536 
1537 				len -= auio.uio_resid;
1538 				uio->uio_resid -= len;
1539 				uio->uio_loffset += len;
1540 			}
1541 
1542 			/*
1543 			 * If we hit a watched address, do the watchpoint logic.
1544 			 */
1545 			if (watchcode &&
1546 			    (!sys_watchpoint(vaddr, watchcode, ta) ||
1547 			    lwp->lwp_sysabort)) {
1548 				lwp->lwp_sysabort = 0;
1549 				return (EFAULT);
1550 			}
1551 
1552 			/*
1553 			 * Check for errors from default_physio().
1554 			 */
1555 			if (watchcode == 0 || ta != 0) {
1556 				if (auio.uio_resid || error)
1557 					break;
1558 				uio->uio_iov++;
1559 				uio->uio_iovcnt--;
1560 			}
1561 		}
1562 	}
1563 
1564 	return (error);
1565 }
1566 
1567 int
1568 wa_compare(const void *a, const void *b)
1569 {
1570 	const watched_area_t *pa = a;
1571 	const watched_area_t *pb = b;
1572 
1573 	if (pa->wa_vaddr < pb->wa_vaddr)
1574 		return (-1);
1575 	else if (pa->wa_vaddr > pb->wa_vaddr)
1576 		return (1);
1577 	else
1578 		return (0);
1579 }
1580 
1581 int
1582 wp_compare(const void *a, const void *b)
1583 {
1584 	const watched_page_t *pa = a;
1585 	const watched_page_t *pb = b;
1586 
1587 	if (pa->wp_vaddr < pb->wp_vaddr)
1588 		return (-1);
1589 	else if (pa->wp_vaddr > pb->wp_vaddr)
1590 		return (1);
1591 	else
1592 		return (0);
1593 }
1594 
1595 /*
1596  * Given an address range, finds the first watched area which overlaps some or
1597  * all of the range.
1598  */
1599 watched_area_t *
1600 pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where)
1601 {
1602 	caddr_t vaddr = pwa->wa_vaddr;
1603 	caddr_t eaddr = pwa->wa_eaddr;
1604 	watched_area_t *wap;
1605 	avl_index_t real_where;
1606 
1607 	/* First, check if there is an exact match.  */
1608 	wap = avl_find(&p->p_warea, pwa, &real_where);
1609 
1610 
1611 	/* Check to see if we overlap with the previous area.  */
1612 	if (wap == NULL) {
1613 		wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE);
1614 		if (wap != NULL &&
1615 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1616 			wap = NULL;
1617 	}
1618 
1619 	/* Try the next area.  */
1620 	if (wap == NULL) {
1621 		wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER);
1622 		if (wap != NULL &&
1623 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1624 			wap = NULL;
1625 	}
1626 
1627 	if (where)
1628 		*where = real_where;
1629 
1630 	return (wap);
1631 }
1632 
1633 void
1634 watch_enable(kthread_id_t t)
1635 {
1636 	t->t_proc_flag |= TP_WATCHPT;
1637 	install_copyops(t, &watch_copyops);
1638 }
1639 
1640 void
1641 watch_disable(kthread_id_t t)
1642 {
1643 	t->t_proc_flag &= ~TP_WATCHPT;
1644 	remove_copyops(t);
1645 }
1646 
1647 int
1648 copyin_nowatch(const void *uaddr, void *kaddr, size_t len)
1649 {
1650 	int watched, ret;
1651 
1652 	watched = watch_disable_addr(uaddr, len, S_READ);
1653 	ret = copyin(uaddr, kaddr, len);
1654 	if (watched)
1655 		watch_enable_addr(uaddr, len, S_READ);
1656 
1657 	return (ret);
1658 }
1659 
1660 int
1661 copyout_nowatch(const void *kaddr, void *uaddr, size_t len)
1662 {
1663 	int watched, ret;
1664 
1665 	watched = watch_disable_addr(uaddr, len, S_WRITE);
1666 	ret = copyout(kaddr, uaddr, len);
1667 	if (watched)
1668 		watch_enable_addr(uaddr, len, S_WRITE);
1669 
1670 	return (ret);
1671 }
1672 
1673 #ifdef _LP64
1674 int
1675 fuword64_nowatch(const void *addr, uint64_t *value)
1676 {
1677 	int watched, ret;
1678 
1679 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1680 	ret = fuword64(addr, value);
1681 	if (watched)
1682 		watch_enable_addr(addr, sizeof (*value), S_READ);
1683 
1684 	return (ret);
1685 }
1686 #endif
1687 
1688 int
1689 fuword32_nowatch(const void *addr, uint32_t *value)
1690 {
1691 	int watched, ret;
1692 
1693 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1694 	ret = fuword32(addr, value);
1695 	if (watched)
1696 		watch_enable_addr(addr, sizeof (*value), S_READ);
1697 
1698 	return (ret);
1699 }
1700 
1701 #ifdef _LP64
1702 int
1703 suword64_nowatch(void *addr, uint64_t value)
1704 {
1705 	int watched, ret;
1706 
1707 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1708 	ret = suword64(addr, value);
1709 	if (watched)
1710 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1711 
1712 	return (ret);
1713 }
1714 #endif
1715 
1716 int
1717 suword32_nowatch(void *addr, uint32_t value)
1718 {
1719 	int watched, ret;
1720 
1721 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1722 	ret = suword32(addr, value);
1723 	if (watched)
1724 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1725 
1726 	return (ret);
1727 }
1728 
1729 int
1730 watch_disable_addr(const void *addr, size_t len, enum seg_rw rw)
1731 {
1732 	if (pr_watch_active(curproc))
1733 		return (pr_mappage((caddr_t)addr, len, rw, 1));
1734 	return (0);
1735 }
1736 
1737 void
1738 watch_enable_addr(const void *addr, size_t len, enum seg_rw rw)
1739 {
1740 	if (pr_watch_active(curproc))
1741 		pr_unmappage((caddr_t)addr, len, rw, 1);
1742 }
1743