xref: /titanic_52/usr/src/uts/common/os/watchpoint.c (revision 0d5ae8c1274da6a6c74059317942eea625104946)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/cred.h>
31 #include <sys/debug.h>
32 #include <sys/inline.h>
33 #include <sys/kmem.h>
34 #include <sys/proc.h>
35 #include <sys/regset.h>
36 #include <sys/sysmacros.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/buf.h>
40 #include <sys/signal.h>
41 #include <sys/user.h>
42 #include <sys/cpuvar.h>
43 
44 #include <sys/fault.h>
45 #include <sys/syscall.h>
46 #include <sys/procfs.h>
47 #include <sys/cmn_err.h>
48 #include <sys/stack.h>
49 #include <sys/watchpoint.h>
50 #include <sys/copyops.h>
51 #include <sys/schedctl.h>
52 
53 #include <sys/mman.h>
54 #include <vm/as.h>
55 #include <vm/seg.h>
56 
57 /*
58  * Copy ops vector for watchpoints.
59  */
60 static int	watch_copyin(const void *, void *, size_t);
61 static int	watch_xcopyin(const void *, void *, size_t);
62 static int	watch_copyout(const void *, void *, size_t);
63 static int	watch_xcopyout(const void *, void *, size_t);
64 static int	watch_copyinstr(const char *, char *, size_t, size_t *);
65 static int	watch_copyoutstr(const char *, char *, size_t, size_t *);
66 static int	watch_fuword8(const void *, uint8_t *);
67 static int	watch_fuword16(const void *, uint16_t *);
68 static int	watch_fuword32(const void *, uint32_t *);
69 static int	watch_suword8(void *, uint8_t);
70 static int	watch_suword16(void *, uint16_t);
71 static int	watch_suword32(void *, uint32_t);
72 static int	watch_physio(int (*)(struct buf *), struct buf *,
73     dev_t, int, void (*)(struct buf *), struct uio *);
74 #ifdef _LP64
75 static int	watch_fuword64(const void *, uint64_t *);
76 static int	watch_suword64(void *, uint64_t);
77 #endif
78 
79 struct copyops watch_copyops = {
80 	watch_copyin,
81 	watch_xcopyin,
82 	watch_copyout,
83 	watch_xcopyout,
84 	watch_copyinstr,
85 	watch_copyoutstr,
86 	watch_fuword8,
87 	watch_fuword16,
88 	watch_fuword32,
89 #ifdef _LP64
90 	watch_fuword64,
91 #else
92 	NULL,
93 #endif
94 	watch_suword8,
95 	watch_suword16,
96 	watch_suword32,
97 #ifdef _LP64
98 	watch_suword64,
99 #else
100 	NULL,
101 #endif
102 	watch_physio
103 };
104 
105 /*
106  * Map the 'rw' argument to a protection flag.
107  */
108 static int
109 rw_to_prot(enum seg_rw rw)
110 {
111 	switch (rw) {
112 	case S_EXEC:
113 		return (PROT_EXEC);
114 	case S_READ:
115 		return (PROT_READ);
116 	case S_WRITE:
117 		return (PROT_WRITE);
118 	default:
119 		return (PROT_NONE);	/* can't happen */
120 	}
121 }
122 
123 /*
124  * Map the 'rw' argument to an index into an array of exec/write/read things.
125  * The index follows the precedence order:  exec .. write .. read
126  */
127 static int
128 rw_to_index(enum seg_rw rw)
129 {
130 	switch (rw) {
131 	default:	/* default case "can't happen" */
132 	case S_EXEC:
133 		return (0);
134 	case S_WRITE:
135 		return (1);
136 	case S_READ:
137 		return (2);
138 	}
139 }
140 
141 /*
142  * Map an index back to a seg_rw.
143  */
144 static enum seg_rw S_rw[4] = {
145 	S_EXEC,
146 	S_WRITE,
147 	S_READ,
148 	S_READ,
149 };
150 
151 #define	X	0
152 #define	W	1
153 #define	R	2
154 #define	sum(a)	(a[X] + a[W] + a[R])
155 
156 /*
157  * Common code for pr_mappage() and pr_unmappage().
158  */
159 static int
160 pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel)
161 {
162 	proc_t *p = curproc;
163 	struct as *as = p->p_as;
164 	char *eaddr = addr + size;
165 	int prot_rw = rw_to_prot(rw);
166 	int xrw = rw_to_index(rw);
167 	int rv = 0;
168 	struct watched_page *pwp;
169 	struct watched_page tpw;
170 	avl_index_t where;
171 	uint_t prot;
172 
173 	ASSERT(as != &kas);
174 
175 startover:
176 	ASSERT(rv == 0);
177 	if (avl_numnodes(&as->a_wpage) == 0)
178 		return (0);
179 
180 	/*
181 	 * as->a_wpage can only be changed while the process is totally stopped.
182 	 * Don't grab p_lock here.  Holding p_lock while grabbing the address
183 	 * space lock leads to deadlocks with the clock thread.
184 	 *
185 	 * p_maplock prevents simultaneous execution of this function.  Under
186 	 * normal circumstances, holdwatch() will stop all other threads, so the
187 	 * lock isn't really needed.  But there may be multiple threads within
188 	 * stop() when SWATCHOK is set, so we need to handle multiple threads
189 	 * at once.  See holdwatch() for the details of this dance.
190 	 */
191 
192 	mutex_enter(&p->p_maplock);
193 
194 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
195 	if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
196 		pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
197 
198 	for (; pwp != NULL && pwp->wp_vaddr < eaddr;
199 	    pwp = AVL_NEXT(&as->a_wpage, pwp)) {
200 
201 		/*
202 		 * If the requested protection has not been
203 		 * removed, we need not remap this page.
204 		 */
205 		prot = pwp->wp_prot;
206 		if (kernel || (prot & PROT_USER))
207 			if (prot & prot_rw)
208 				continue;
209 		/*
210 		 * If the requested access does not exist in the page's
211 		 * original protections, we need not remap this page.
212 		 * If the page does not exist yet, we can't test it.
213 		 */
214 		if ((prot = pwp->wp_oprot) != 0) {
215 			if (!(kernel || (prot & PROT_USER)))
216 				continue;
217 			if (!(prot & prot_rw))
218 				continue;
219 		}
220 
221 		if (mapin) {
222 			/*
223 			 * Before mapping the page in, ensure that
224 			 * all other lwps are held in the kernel.
225 			 */
226 			if (p->p_mapcnt == 0) {
227 				mutex_exit(&p->p_maplock);
228 				if (holdwatch() != 0) {
229 					/*
230 					 * We stopped in holdwatch().
231 					 * Start all over again because the
232 					 * watched page list may have changed.
233 					 */
234 					goto startover;
235 				}
236 				mutex_enter(&p->p_maplock);
237 			}
238 			p->p_mapcnt++;
239 		}
240 
241 		addr = pwp->wp_vaddr;
242 		rv++;
243 
244 		prot = pwp->wp_prot;
245 		if (mapin) {
246 			if (kernel)
247 				pwp->wp_kmap[xrw]++;
248 			else
249 				pwp->wp_umap[xrw]++;
250 			pwp->wp_flags |= WP_NOWATCH;
251 			if (pwp->wp_kmap[X] + pwp->wp_umap[X])
252 				/* cannot have exec-only protection */
253 				prot |= PROT_READ|PROT_EXEC;
254 			if (pwp->wp_kmap[R] + pwp->wp_umap[R])
255 				prot |= PROT_READ;
256 			if (pwp->wp_kmap[W] + pwp->wp_umap[W])
257 				/* cannot have write-only protection */
258 				prot |= PROT_READ|PROT_WRITE;
259 #if 0	/* damned broken mmu feature! */
260 			if (sum(pwp->wp_umap) == 0)
261 				prot &= ~PROT_USER;
262 #endif
263 		} else {
264 			ASSERT(pwp->wp_flags & WP_NOWATCH);
265 			if (kernel) {
266 				ASSERT(pwp->wp_kmap[xrw] != 0);
267 				--pwp->wp_kmap[xrw];
268 			} else {
269 				ASSERT(pwp->wp_umap[xrw] != 0);
270 				--pwp->wp_umap[xrw];
271 			}
272 			if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0)
273 				pwp->wp_flags &= ~WP_NOWATCH;
274 			else {
275 				if (pwp->wp_kmap[X] + pwp->wp_umap[X])
276 					/* cannot have exec-only protection */
277 					prot |= PROT_READ|PROT_EXEC;
278 				if (pwp->wp_kmap[R] + pwp->wp_umap[R])
279 					prot |= PROT_READ;
280 				if (pwp->wp_kmap[W] + pwp->wp_umap[W])
281 					/* cannot have write-only protection */
282 					prot |= PROT_READ|PROT_WRITE;
283 #if 0	/* damned broken mmu feature! */
284 				if (sum(pwp->wp_umap) == 0)
285 					prot &= ~PROT_USER;
286 #endif
287 			}
288 		}
289 
290 
291 		if (pwp->wp_oprot != 0) {	/* if page exists */
292 			struct seg *seg;
293 			uint_t oprot;
294 			int err, retrycnt = 0;
295 
296 			AS_LOCK_ENTER(as, RW_WRITER);
297 		retry:
298 			seg = as_segat(as, addr);
299 			ASSERT(seg != NULL);
300 			SEGOP_GETPROT(seg, addr, 0, &oprot);
301 			if (prot != oprot) {
302 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
303 				if (err == IE_RETRY) {
304 					ASSERT(retrycnt == 0);
305 					retrycnt++;
306 					goto retry;
307 				}
308 			}
309 			AS_LOCK_EXIT(as);
310 		}
311 
312 		/*
313 		 * When all pages are mapped back to their normal state,
314 		 * continue the other lwps.
315 		 */
316 		if (!mapin) {
317 			ASSERT(p->p_mapcnt > 0);
318 			p->p_mapcnt--;
319 			if (p->p_mapcnt == 0) {
320 				mutex_exit(&p->p_maplock);
321 				mutex_enter(&p->p_lock);
322 				continuelwps(p);
323 				mutex_exit(&p->p_lock);
324 				mutex_enter(&p->p_maplock);
325 			}
326 		}
327 	}
328 
329 	mutex_exit(&p->p_maplock);
330 
331 	return (rv);
332 }
333 
334 /*
335  * Restore the original page protections on an address range.
336  * If 'kernel' is non-zero, just do it for the kernel.
337  * pr_mappage() returns non-zero if it actually changed anything.
338  *
339  * pr_mappage() and pr_unmappage() must be executed in matched pairs,
340  * but pairs may be nested within other pairs.  The reference counts
341  * sort it all out.  See pr_do_mappage(), above.
342  */
343 static int
344 pr_mappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
345 {
346 	return (pr_do_mappage(addr, size, 1, rw, kernel));
347 }
348 
349 /*
350  * Set the modified page protections on a watched page.
351  * Inverse of pr_mappage().
352  * Needs to be called only if pr_mappage() returned non-zero.
353  */
354 static void
355 pr_unmappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
356 {
357 	(void) pr_do_mappage(addr, size, 0, rw, kernel);
358 }
359 
360 /*
361  * Function called by an lwp after it resumes from stop().
362  */
363 void
364 setallwatch(void)
365 {
366 	proc_t *p = curproc;
367 	struct as *as = curproc->p_as;
368 	struct watched_page *pwp, *next;
369 	struct seg *seg;
370 	caddr_t vaddr;
371 	uint_t prot;
372 	int err, retrycnt;
373 
374 	if (p->p_wprot == NULL)
375 		return;
376 
377 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
378 
379 	AS_LOCK_ENTER(as, RW_WRITER);
380 
381 	pwp = p->p_wprot;
382 	while (pwp != NULL) {
383 
384 		vaddr = pwp->wp_vaddr;
385 		retrycnt = 0;
386 	retry:
387 		ASSERT(pwp->wp_flags & WP_SETPROT);
388 		if ((seg = as_segat(as, vaddr)) != NULL &&
389 		    !(pwp->wp_flags & WP_NOWATCH)) {
390 			prot = pwp->wp_prot;
391 			err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
392 			if (err == IE_RETRY) {
393 				ASSERT(retrycnt == 0);
394 				retrycnt++;
395 				goto retry;
396 			}
397 		}
398 
399 		next = pwp->wp_list;
400 
401 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec == 0) {
402 			/*
403 			 * No watched areas remain in this page.
404 			 * Free the watched_page structure.
405 			 */
406 			avl_remove(&as->a_wpage, pwp);
407 			kmem_free(pwp, sizeof (struct watched_page));
408 		} else {
409 			pwp->wp_flags &= ~WP_SETPROT;
410 		}
411 
412 		pwp = next;
413 	}
414 	p->p_wprot = NULL;
415 
416 	AS_LOCK_EXIT(as);
417 }
418 
419 
420 
421 int
422 pr_is_watchpage_as(caddr_t addr, enum seg_rw rw, struct as *as)
423 {
424 	register struct watched_page *pwp;
425 	struct watched_page tpw;
426 	uint_t prot;
427 	int rv = 0;
428 
429 	switch (rw) {
430 	case S_READ:
431 	case S_WRITE:
432 	case S_EXEC:
433 		break;
434 	default:
435 		return (0);
436 	}
437 
438 	/*
439 	 * as->a_wpage can only be modified while the process is totally
440 	 * stopped.  We need, and should use, no locks here.
441 	 */
442 	if (as != &kas && avl_numnodes(&as->a_wpage) != 0) {
443 		tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
444 		pwp = avl_find(&as->a_wpage, &tpw, NULL);
445 		if (pwp != NULL) {
446 			ASSERT(addr >= pwp->wp_vaddr &&
447 			    addr < pwp->wp_vaddr + PAGESIZE);
448 			if (pwp->wp_oprot != 0) {
449 				prot = pwp->wp_prot;
450 				switch (rw) {
451 				case S_READ:
452 					rv = ((prot & (PROT_USER|PROT_READ))
453 					    != (PROT_USER|PROT_READ));
454 					break;
455 				case S_WRITE:
456 					rv = ((prot & (PROT_USER|PROT_WRITE))
457 					    != (PROT_USER|PROT_WRITE));
458 					break;
459 				case S_EXEC:
460 					rv = ((prot & (PROT_USER|PROT_EXEC))
461 					    != (PROT_USER|PROT_EXEC));
462 					break;
463 				default:
464 					/* can't happen! */
465 					break;
466 				}
467 			}
468 		}
469 	}
470 
471 	return (rv);
472 }
473 
474 
475 /*
476  * trap() calls here to determine if a fault is in a watched page.
477  * We return nonzero if this is true and the load/store would fail.
478  */
479 int
480 pr_is_watchpage(caddr_t addr, enum seg_rw rw)
481 {
482 	struct as *as = curproc->p_as;
483 
484 	if ((as == &kas) || avl_numnodes(&as->a_wpage) == 0)
485 		return (0);
486 
487 	return (pr_is_watchpage_as(addr, rw, as));
488 }
489 
490 
491 
492 /*
493  * trap() calls here to determine if a fault is a watchpoint.
494  */
495 int
496 pr_is_watchpoint(caddr_t *paddr, int *pta, size_t size, size_t *plen,
497 	enum seg_rw rw)
498 {
499 	proc_t *p = curproc;
500 	caddr_t addr = *paddr;
501 	caddr_t eaddr = addr + size;
502 	register struct watched_area *pwa;
503 	struct watched_area twa;
504 	int rv = 0;
505 	int ta = 0;
506 	size_t len = 0;
507 
508 	switch (rw) {
509 	case S_READ:
510 	case S_WRITE:
511 	case S_EXEC:
512 		break;
513 	default:
514 		*pta = 0;
515 		return (0);
516 	}
517 
518 	/*
519 	 * p->p_warea is protected by p->p_lock.
520 	 */
521 	mutex_enter(&p->p_lock);
522 
523 	/* BEGIN CSTYLED */
524 	/*
525 	 * This loop is somewhat complicated because the fault region can span
526 	 * multiple watched areas.  For example:
527 	 *
528 	 *            addr              eaddr
529 	 * 		+-----------------+
530 	 * 		| fault region    |
531 	 * 	+-------+--------+----+---+------------+
532 	 *      | prot not right |    | prot correct   |
533 	 *      +----------------+    +----------------+
534 	 *    wa_vaddr	      wa_eaddr
535 	 *    		      wa_vaddr		wa_eaddr
536 	 *
537 	 * We start at the area greater than or equal to the starting address.
538 	 * As long as some portion of the fault region overlaps the current
539 	 * area, we continue checking permissions until we find an appropriate
540 	 * match.
541 	 */
542 	/* END CSTYLED */
543 	twa.wa_vaddr = addr;
544 	twa.wa_eaddr = eaddr;
545 
546 	for (pwa = pr_find_watched_area(p, &twa, NULL);
547 	    pwa != NULL && eaddr > pwa->wa_vaddr && addr < pwa->wa_eaddr;
548 	    pwa = AVL_NEXT(&p->p_warea, pwa)) {
549 
550 		switch (rw) {
551 		case S_READ:
552 			if (pwa->wa_flags & WA_READ)
553 				rv = TRAP_RWATCH;
554 			break;
555 		case S_WRITE:
556 			if (pwa->wa_flags & WA_WRITE)
557 				rv = TRAP_WWATCH;
558 			break;
559 		case S_EXEC:
560 			if (pwa->wa_flags & WA_EXEC)
561 				rv = TRAP_XWATCH;
562 			break;
563 		default:
564 			/* can't happen */
565 			break;
566 		}
567 
568 		/*
569 		 * If protections didn't match, check the next watched
570 		 * area
571 		 */
572 		if (rv != 0) {
573 			if (addr < pwa->wa_vaddr)
574 				addr = pwa->wa_vaddr;
575 			len = pwa->wa_eaddr - addr;
576 			if (pwa->wa_flags & WA_TRAPAFTER)
577 				ta = 1;
578 			break;
579 		}
580 	}
581 
582 	mutex_exit(&p->p_lock);
583 
584 	*paddr = addr;
585 	*pta = ta;
586 	if (plen != NULL)
587 		*plen = len;
588 	return (rv);
589 }
590 
591 /*
592  * Set up to perform a single-step at user level for the
593  * case of a trapafter watchpoint.  Called from trap().
594  */
595 void
596 do_watch_step(caddr_t vaddr, size_t sz, enum seg_rw rw,
597 	int watchcode, greg_t pc)
598 {
599 	register klwp_t *lwp = ttolwp(curthread);
600 	struct lwp_watch *pw = &lwp->lwp_watch[rw_to_index(rw)];
601 
602 	/*
603 	 * Check to see if we are already performing this special
604 	 * watchpoint single-step.  We must not do pr_mappage() twice.
605 	 */
606 
607 	/* special check for two read traps on the same instruction */
608 	if (rw == S_READ && pw->wpaddr != NULL &&
609 	    !(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize)) {
610 		ASSERT(lwp->lwp_watchtrap != 0);
611 		pw++;	/* use the extra S_READ struct */
612 	}
613 
614 	if (pw->wpaddr != NULL) {
615 		ASSERT(lwp->lwp_watchtrap != 0);
616 		ASSERT(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize);
617 		if (pw->wpcode == 0) {
618 			pw->wpcode = watchcode;
619 			pw->wppc = pc;
620 		}
621 	} else {
622 		int mapped = pr_mappage(vaddr, sz, rw, 0);
623 		prstep(lwp, 1);
624 		lwp->lwp_watchtrap = 1;
625 		pw->wpaddr = vaddr;
626 		pw->wpsize = sz;
627 		pw->wpcode = watchcode;
628 		pw->wpmapped = mapped;
629 		pw->wppc = pc;
630 	}
631 }
632 
633 /*
634  * Undo the effects of do_watch_step().
635  * Called from trap() after the single-step is finished.
636  * Also called from issig_forreal() and stop() with a NULL
637  * argument to avoid having these things set more than once.
638  */
639 int
640 undo_watch_step(k_siginfo_t *sip)
641 {
642 	register klwp_t *lwp = ttolwp(curthread);
643 	int fault = 0;
644 
645 	if (lwp->lwp_watchtrap) {
646 		struct lwp_watch *pw = lwp->lwp_watch;
647 		int i;
648 
649 		for (i = 0; i < 4; i++, pw++) {
650 			if (pw->wpaddr == NULL)
651 				continue;
652 			if (pw->wpmapped)
653 				pr_unmappage(pw->wpaddr, pw->wpsize, S_rw[i],
654 				    0);
655 			if (pw->wpcode != 0) {
656 				if (sip != NULL) {
657 					sip->si_signo = SIGTRAP;
658 					sip->si_code = pw->wpcode;
659 					sip->si_addr = pw->wpaddr;
660 					sip->si_trapafter = 1;
661 					sip->si_pc = (caddr_t)pw->wppc;
662 				}
663 				fault = FLTWATCH;
664 				pw->wpcode = 0;
665 			}
666 			pw->wpaddr = NULL;
667 			pw->wpsize = 0;
668 			pw->wpmapped = 0;
669 		}
670 		lwp->lwp_watchtrap = 0;
671 	}
672 
673 	return (fault);
674 }
675 
676 /*
677  * Handle a watchpoint that occurs while doing copyin()
678  * or copyout() in a system call.
679  * Return non-zero if the fault or signal is cleared
680  * by a debugger while the lwp is stopped.
681  */
682 static int
683 sys_watchpoint(caddr_t addr, int watchcode, int ta)
684 {
685 	extern greg_t getuserpc(void);	/* XXX header file */
686 	k_sigset_t smask;
687 	register proc_t *p = ttoproc(curthread);
688 	register klwp_t *lwp = ttolwp(curthread);
689 	register sigqueue_t *sqp;
690 	int rval;
691 
692 	/* assert no locks are held */
693 	/* ASSERT(curthread->t_nlocks == 0); */
694 
695 	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
696 	sqp->sq_info.si_signo = SIGTRAP;
697 	sqp->sq_info.si_code = watchcode;
698 	sqp->sq_info.si_addr = addr;
699 	sqp->sq_info.si_trapafter = ta;
700 	sqp->sq_info.si_pc = (caddr_t)getuserpc();
701 
702 	mutex_enter(&p->p_lock);
703 
704 	/* this will be tested and cleared by the caller */
705 	lwp->lwp_sysabort = 0;
706 
707 	if (prismember(&p->p_fltmask, FLTWATCH)) {
708 		lwp->lwp_curflt = (uchar_t)FLTWATCH;
709 		lwp->lwp_siginfo = sqp->sq_info;
710 		stop(PR_FAULTED, FLTWATCH);
711 		if (lwp->lwp_curflt == 0) {
712 			mutex_exit(&p->p_lock);
713 			kmem_free(sqp, sizeof (sigqueue_t));
714 			return (1);
715 		}
716 		lwp->lwp_curflt = 0;
717 	}
718 
719 	/*
720 	 * post the SIGTRAP signal.
721 	 * Block all other signals so we only stop showing SIGTRAP.
722 	 */
723 	if (signal_is_blocked(curthread, SIGTRAP) ||
724 	    sigismember(&p->p_ignore, SIGTRAP)) {
725 		/* SIGTRAP is blocked or ignored, forget the rest. */
726 		mutex_exit(&p->p_lock);
727 		kmem_free(sqp, sizeof (sigqueue_t));
728 		return (0);
729 	}
730 	sigdelq(p, curthread, SIGTRAP);
731 	sigaddqa(p, curthread, sqp);
732 	schedctl_finish_sigblock(curthread);
733 	smask = curthread->t_hold;
734 	sigfillset(&curthread->t_hold);
735 	sigdiffset(&curthread->t_hold, &cantmask);
736 	sigdelset(&curthread->t_hold, SIGTRAP);
737 	mutex_exit(&p->p_lock);
738 
739 	rval = ((ISSIG_FAST(curthread, lwp, p, FORREAL))? 0 : 1);
740 
741 	/* restore the original signal mask */
742 	mutex_enter(&p->p_lock);
743 	curthread->t_hold = smask;
744 	mutex_exit(&p->p_lock);
745 
746 	return (rval);
747 }
748 
749 /*
750  * Wrappers for the copyin()/copyout() functions to deal
751  * with watchpoints that fire while in system calls.
752  */
753 
754 static int
755 watch_xcopyin(const void *uaddr, void *kaddr, size_t count)
756 {
757 	klwp_t *lwp = ttolwp(curthread);
758 	caddr_t watch_uaddr = (caddr_t)uaddr;
759 	caddr_t watch_kaddr = (caddr_t)kaddr;
760 	int error = 0;
761 	label_t ljb;
762 	size_t part;
763 	int mapped;
764 
765 	while (count && error == 0) {
766 		int watchcode;
767 		caddr_t vaddr;
768 		size_t len;
769 		int ta;
770 
771 		if ((part = PAGESIZE -
772 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
773 			part = count;
774 
775 		if (!pr_is_watchpage(watch_uaddr, S_READ))
776 			watchcode = 0;
777 		else {
778 			vaddr = watch_uaddr;
779 			watchcode = pr_is_watchpoint(&vaddr, &ta,
780 			    part, &len, S_READ);
781 			if (watchcode && ta == 0)
782 				part = vaddr - watch_uaddr;
783 		}
784 
785 		/*
786 		 * Copy the initial part, up to a watched address, if any.
787 		 */
788 		if (part != 0) {
789 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
790 			if (on_fault(&ljb))
791 				error = EFAULT;
792 			else
793 				copyin_noerr(watch_uaddr, watch_kaddr, part);
794 			no_fault();
795 			if (mapped)
796 				pr_unmappage(watch_uaddr, part, S_READ, 1);
797 			watch_uaddr += part;
798 			watch_kaddr += part;
799 			count -= part;
800 		}
801 		/*
802 		 * If trapafter was specified, then copy through the
803 		 * watched area before taking the watchpoint trap.
804 		 */
805 		while (count && watchcode && ta && len > part && error == 0) {
806 			len -= part;
807 			if ((part = PAGESIZE) > count)
808 				part = count;
809 			if (part > len)
810 				part = len;
811 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
812 			if (on_fault(&ljb))
813 				error = EFAULT;
814 			else
815 				copyin_noerr(watch_uaddr, watch_kaddr, part);
816 			no_fault();
817 			if (mapped)
818 				pr_unmappage(watch_uaddr, part, S_READ, 1);
819 			watch_uaddr += part;
820 			watch_kaddr += part;
821 			count -= part;
822 		}
823 
824 error:
825 		/* if we hit a watched address, do the watchpoint logic */
826 		if (watchcode &&
827 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
828 		    lwp->lwp_sysabort)) {
829 			lwp->lwp_sysabort = 0;
830 			error = EFAULT;
831 			break;
832 		}
833 	}
834 
835 	return (error);
836 }
837 
838 static int
839 watch_copyin(const void *kaddr, void *uaddr, size_t count)
840 {
841 	return (watch_xcopyin(kaddr, uaddr, count) ? -1 : 0);
842 }
843 
844 
845 static int
846 watch_xcopyout(const void *kaddr, void *uaddr, size_t count)
847 {
848 	klwp_t *lwp = ttolwp(curthread);
849 	caddr_t watch_uaddr = (caddr_t)uaddr;
850 	caddr_t watch_kaddr = (caddr_t)kaddr;
851 	int error = 0;
852 	label_t ljb;
853 
854 	while (count && error == 0) {
855 		int watchcode;
856 		caddr_t vaddr;
857 		size_t part;
858 		size_t len;
859 		int ta;
860 		int mapped;
861 
862 		if ((part = PAGESIZE -
863 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
864 			part = count;
865 
866 		if (!pr_is_watchpage(watch_uaddr, S_WRITE))
867 			watchcode = 0;
868 		else {
869 			vaddr = watch_uaddr;
870 			watchcode = pr_is_watchpoint(&vaddr, &ta,
871 			    part, &len, S_WRITE);
872 			if (watchcode) {
873 				if (ta == 0)
874 					part = vaddr - watch_uaddr;
875 				else {
876 					len += vaddr - watch_uaddr;
877 					if (part > len)
878 						part = len;
879 				}
880 			}
881 		}
882 
883 		/*
884 		 * Copy the initial part, up to a watched address, if any.
885 		 */
886 		if (part != 0) {
887 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
888 			if (on_fault(&ljb))
889 				error = EFAULT;
890 			else
891 				copyout_noerr(watch_kaddr, watch_uaddr, part);
892 			no_fault();
893 			if (mapped)
894 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
895 			watch_uaddr += part;
896 			watch_kaddr += part;
897 			count -= part;
898 		}
899 
900 		/*
901 		 * If trapafter was specified, then copy through the
902 		 * watched area before taking the watchpoint trap.
903 		 */
904 		while (count && watchcode && ta && len > part && error == 0) {
905 			len -= part;
906 			if ((part = PAGESIZE) > count)
907 				part = count;
908 			if (part > len)
909 				part = len;
910 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
911 			if (on_fault(&ljb))
912 				error = EFAULT;
913 			else
914 				copyout_noerr(watch_kaddr, watch_uaddr, part);
915 			no_fault();
916 			if (mapped)
917 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
918 			watch_uaddr += part;
919 			watch_kaddr += part;
920 			count -= part;
921 		}
922 
923 		/* if we hit a watched address, do the watchpoint logic */
924 		if (watchcode &&
925 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
926 		    lwp->lwp_sysabort)) {
927 			lwp->lwp_sysabort = 0;
928 			error = EFAULT;
929 			break;
930 		}
931 	}
932 
933 	return (error);
934 }
935 
936 static int
937 watch_copyout(const void *kaddr, void *uaddr, size_t count)
938 {
939 	return (watch_xcopyout(kaddr, uaddr, count) ? -1 : 0);
940 }
941 
942 static int
943 watch_copyinstr(
944 	const char *uaddr,
945 	char *kaddr,
946 	size_t maxlength,
947 	size_t *lencopied)
948 {
949 	klwp_t *lwp = ttolwp(curthread);
950 	size_t resid;
951 	int error = 0;
952 	label_t ljb;
953 
954 	if ((resid = maxlength) == 0)
955 		return (ENAMETOOLONG);
956 
957 	while (resid && error == 0) {
958 		int watchcode;
959 		caddr_t vaddr;
960 		size_t part;
961 		size_t len;
962 		size_t size;
963 		int ta;
964 		int mapped;
965 
966 		if ((part = PAGESIZE -
967 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
968 			part = resid;
969 
970 		if (!pr_is_watchpage((caddr_t)uaddr, S_READ))
971 			watchcode = 0;
972 		else {
973 			vaddr = (caddr_t)uaddr;
974 			watchcode = pr_is_watchpoint(&vaddr, &ta,
975 			    part, &len, S_READ);
976 			if (watchcode) {
977 				if (ta == 0)
978 					part = vaddr - uaddr;
979 				else {
980 					len += vaddr - uaddr;
981 					if (part > len)
982 						part = len;
983 				}
984 			}
985 		}
986 
987 		/*
988 		 * Copy the initial part, up to a watched address, if any.
989 		 */
990 		if (part != 0) {
991 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
992 			if (on_fault(&ljb))
993 				error = EFAULT;
994 			else
995 				error = copyinstr_noerr(uaddr, kaddr, part,
996 				    &size);
997 			no_fault();
998 			if (mapped)
999 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1000 			uaddr += size;
1001 			kaddr += size;
1002 			resid -= size;
1003 			if (error == ENAMETOOLONG && resid > 0)
1004 				error = 0;
1005 			if (error != 0 || (watchcode &&
1006 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1007 				break;	/* didn't reach the watched area */
1008 		}
1009 
1010 		/*
1011 		 * If trapafter was specified, then copy through the
1012 		 * watched area before taking the watchpoint trap.
1013 		 */
1014 		while (resid && watchcode && ta && len > part && error == 0 &&
1015 		    size == part && kaddr[-1] != '\0') {
1016 			len -= part;
1017 			if ((part = PAGESIZE) > resid)
1018 				part = resid;
1019 			if (part > len)
1020 				part = len;
1021 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1022 			if (on_fault(&ljb))
1023 				error = EFAULT;
1024 			else
1025 				error = copyinstr_noerr(uaddr, kaddr, part,
1026 				    &size);
1027 			no_fault();
1028 			if (mapped)
1029 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1030 			uaddr += size;
1031 			kaddr += size;
1032 			resid -= size;
1033 			if (error == ENAMETOOLONG && resid > 0)
1034 				error = 0;
1035 		}
1036 
1037 		/* if we hit a watched address, do the watchpoint logic */
1038 		if (watchcode &&
1039 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1040 		    lwp->lwp_sysabort)) {
1041 			lwp->lwp_sysabort = 0;
1042 			error = EFAULT;
1043 			break;
1044 		}
1045 
1046 		if (error == 0 && part != 0 &&
1047 		    (size < part || kaddr[-1] == '\0'))
1048 			break;
1049 	}
1050 
1051 	if (error != EFAULT && lencopied)
1052 		*lencopied = maxlength - resid;
1053 	return (error);
1054 }
1055 
1056 static int
1057 watch_copyoutstr(
1058 	const char *kaddr,
1059 	char *uaddr,
1060 	size_t maxlength,
1061 	size_t *lencopied)
1062 {
1063 	klwp_t *lwp = ttolwp(curthread);
1064 	size_t resid;
1065 	int error = 0;
1066 	label_t ljb;
1067 
1068 	if ((resid = maxlength) == 0)
1069 		return (ENAMETOOLONG);
1070 
1071 	while (resid && error == 0) {
1072 		int watchcode;
1073 		caddr_t vaddr;
1074 		size_t part;
1075 		size_t len;
1076 		size_t size;
1077 		int ta;
1078 		int mapped;
1079 
1080 		if ((part = PAGESIZE -
1081 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
1082 			part = resid;
1083 
1084 		if (!pr_is_watchpage(uaddr, S_WRITE)) {
1085 			watchcode = 0;
1086 		} else {
1087 			vaddr = uaddr;
1088 			watchcode = pr_is_watchpoint(&vaddr, &ta,
1089 			    part, &len, S_WRITE);
1090 			if (watchcode && ta == 0)
1091 				part = vaddr - uaddr;
1092 		}
1093 
1094 		/*
1095 		 * Copy the initial part, up to a watched address, if any.
1096 		 */
1097 		if (part != 0) {
1098 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1099 			if (on_fault(&ljb))
1100 				error = EFAULT;
1101 			else
1102 				error = copyoutstr_noerr(kaddr, uaddr, part,
1103 				    &size);
1104 			no_fault();
1105 			if (mapped)
1106 				pr_unmappage(uaddr, part, S_WRITE, 1);
1107 			uaddr += size;
1108 			kaddr += size;
1109 			resid -= size;
1110 			if (error == ENAMETOOLONG && resid > 0)
1111 				error = 0;
1112 			if (error != 0 || (watchcode &&
1113 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1114 				break;	/* didn't reach the watched area */
1115 		}
1116 
1117 		/*
1118 		 * If trapafter was specified, then copy through the
1119 		 * watched area before taking the watchpoint trap.
1120 		 */
1121 		while (resid && watchcode && ta && len > part && error == 0 &&
1122 		    size == part && kaddr[-1] != '\0') {
1123 			len -= part;
1124 			if ((part = PAGESIZE) > resid)
1125 				part = resid;
1126 			if (part > len)
1127 				part = len;
1128 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1129 			if (on_fault(&ljb))
1130 				error = EFAULT;
1131 			else
1132 				error = copyoutstr_noerr(kaddr, uaddr, part,
1133 				    &size);
1134 			no_fault();
1135 			if (mapped)
1136 				pr_unmappage(uaddr, part, S_WRITE, 1);
1137 			uaddr += size;
1138 			kaddr += size;
1139 			resid -= size;
1140 			if (error == ENAMETOOLONG && resid > 0)
1141 				error = 0;
1142 		}
1143 
1144 		/* if we hit a watched address, do the watchpoint logic */
1145 		if (watchcode &&
1146 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1147 		    lwp->lwp_sysabort)) {
1148 			lwp->lwp_sysabort = 0;
1149 			error = EFAULT;
1150 			break;
1151 		}
1152 
1153 		if (error == 0 && part != 0 &&
1154 		    (size < part || kaddr[-1] == '\0'))
1155 			break;
1156 	}
1157 
1158 	if (error != EFAULT && lencopied)
1159 		*lencopied = maxlength - resid;
1160 	return (error);
1161 }
1162 
1163 typedef int (*fuword_func)(const void *, void *);
1164 
1165 /*
1166  * Generic form of watch_fuword8(), watch_fuword16(), etc.
1167  */
1168 static int
1169 watch_fuword(const void *addr, void *dst, fuword_func func, size_t size)
1170 {
1171 	klwp_t *lwp = ttolwp(curthread);
1172 	int watchcode;
1173 	caddr_t vaddr;
1174 	int mapped;
1175 	int rv = 0;
1176 	int ta;
1177 	label_t ljb;
1178 
1179 	for (;;) {
1180 
1181 		vaddr = (caddr_t)addr;
1182 		watchcode = pr_is_watchpoint(&vaddr, &ta, size, NULL, S_READ);
1183 		if (watchcode == 0 || ta != 0) {
1184 			mapped = pr_mappage((caddr_t)addr, size, S_READ, 1);
1185 			if (on_fault(&ljb))
1186 				rv = -1;
1187 			else
1188 				(*func)(addr, dst);
1189 			no_fault();
1190 			if (mapped)
1191 				pr_unmappage((caddr_t)addr, size, S_READ, 1);
1192 		}
1193 		if (watchcode &&
1194 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1195 		    lwp->lwp_sysabort)) {
1196 			lwp->lwp_sysabort = 0;
1197 			rv = -1;
1198 			break;
1199 		}
1200 		if (watchcode == 0 || ta != 0)
1201 			break;
1202 	}
1203 
1204 	return (rv);
1205 }
1206 
1207 static int
1208 watch_fuword8(const void *addr, uint8_t *dst)
1209 {
1210 	return (watch_fuword(addr, dst, (fuword_func)fuword8_noerr,
1211 	    sizeof (*dst)));
1212 }
1213 
1214 static int
1215 watch_fuword16(const void *addr, uint16_t *dst)
1216 {
1217 	return (watch_fuword(addr, dst, (fuword_func)fuword16_noerr,
1218 	    sizeof (*dst)));
1219 }
1220 
1221 static int
1222 watch_fuword32(const void *addr, uint32_t *dst)
1223 {
1224 	return (watch_fuword(addr, dst, (fuword_func)fuword32_noerr,
1225 	    sizeof (*dst)));
1226 }
1227 
1228 #ifdef _LP64
1229 static int
1230 watch_fuword64(const void *addr, uint64_t *dst)
1231 {
1232 	return (watch_fuword(addr, dst, (fuword_func)fuword64_noerr,
1233 	    sizeof (*dst)));
1234 }
1235 #endif
1236 
1237 
1238 static int
1239 watch_suword8(void *addr, uint8_t value)
1240 {
1241 	klwp_t *lwp = ttolwp(curthread);
1242 	int watchcode;
1243 	caddr_t vaddr;
1244 	int mapped;
1245 	int rv = 0;
1246 	int ta;
1247 	label_t ljb;
1248 
1249 	for (;;) {
1250 
1251 		vaddr = (caddr_t)addr;
1252 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1253 		    S_WRITE);
1254 		if (watchcode == 0 || ta != 0) {
1255 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1256 			    S_WRITE, 1);
1257 			if (on_fault(&ljb))
1258 				rv = -1;
1259 			else
1260 				suword8_noerr(addr, value);
1261 			no_fault();
1262 			if (mapped)
1263 				pr_unmappage((caddr_t)addr, sizeof (value),
1264 				    S_WRITE, 1);
1265 		}
1266 		if (watchcode &&
1267 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1268 		    lwp->lwp_sysabort)) {
1269 			lwp->lwp_sysabort = 0;
1270 			rv = -1;
1271 			break;
1272 		}
1273 		if (watchcode == 0 || ta != 0)
1274 			break;
1275 	}
1276 
1277 	return (rv);
1278 }
1279 
1280 static int
1281 watch_suword16(void *addr, uint16_t value)
1282 {
1283 	klwp_t *lwp = ttolwp(curthread);
1284 	int watchcode;
1285 	caddr_t vaddr;
1286 	int mapped;
1287 	int rv = 0;
1288 	int ta;
1289 	label_t ljb;
1290 
1291 	for (;;) {
1292 
1293 		vaddr = (caddr_t)addr;
1294 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1295 		    S_WRITE);
1296 		if (watchcode == 0 || ta != 0) {
1297 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1298 			    S_WRITE, 1);
1299 			if (on_fault(&ljb))
1300 				rv = -1;
1301 			else
1302 				suword16_noerr(addr, value);
1303 			no_fault();
1304 			if (mapped)
1305 				pr_unmappage((caddr_t)addr, sizeof (value),
1306 				    S_WRITE, 1);
1307 		}
1308 		if (watchcode &&
1309 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1310 		    lwp->lwp_sysabort)) {
1311 			lwp->lwp_sysabort = 0;
1312 			rv = -1;
1313 			break;
1314 		}
1315 		if (watchcode == 0 || ta != 0)
1316 			break;
1317 	}
1318 
1319 	return (rv);
1320 }
1321 
1322 static int
1323 watch_suword32(void *addr, uint32_t value)
1324 {
1325 	klwp_t *lwp = ttolwp(curthread);
1326 	int watchcode;
1327 	caddr_t vaddr;
1328 	int mapped;
1329 	int rv = 0;
1330 	int ta;
1331 	label_t ljb;
1332 
1333 	for (;;) {
1334 
1335 		vaddr = (caddr_t)addr;
1336 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1337 		    S_WRITE);
1338 		if (watchcode == 0 || ta != 0) {
1339 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1340 			    S_WRITE, 1);
1341 			if (on_fault(&ljb))
1342 				rv = -1;
1343 			else
1344 				suword32_noerr(addr, value);
1345 			no_fault();
1346 			if (mapped)
1347 				pr_unmappage((caddr_t)addr, sizeof (value),
1348 				    S_WRITE, 1);
1349 		}
1350 		if (watchcode &&
1351 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1352 		    lwp->lwp_sysabort)) {
1353 			lwp->lwp_sysabort = 0;
1354 			rv = -1;
1355 			break;
1356 		}
1357 		if (watchcode == 0 || ta != 0)
1358 			break;
1359 	}
1360 
1361 	return (rv);
1362 }
1363 
1364 #ifdef _LP64
1365 static int
1366 watch_suword64(void *addr, uint64_t value)
1367 {
1368 	klwp_t *lwp = ttolwp(curthread);
1369 	int watchcode;
1370 	caddr_t vaddr;
1371 	int mapped;
1372 	int rv = 0;
1373 	int ta;
1374 	label_t ljb;
1375 
1376 	for (;;) {
1377 
1378 		vaddr = (caddr_t)addr;
1379 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1380 		    S_WRITE);
1381 		if (watchcode == 0 || ta != 0) {
1382 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1383 			    S_WRITE, 1);
1384 			if (on_fault(&ljb))
1385 				rv = -1;
1386 			else
1387 				suword64_noerr(addr, value);
1388 			no_fault();
1389 			if (mapped)
1390 				pr_unmappage((caddr_t)addr, sizeof (value),
1391 				    S_WRITE, 1);
1392 		}
1393 		if (watchcode &&
1394 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1395 		    lwp->lwp_sysabort)) {
1396 			lwp->lwp_sysabort = 0;
1397 			rv = -1;
1398 			break;
1399 		}
1400 		if (watchcode == 0 || ta != 0)
1401 			break;
1402 	}
1403 
1404 	return (rv);
1405 }
1406 #endif /* _LP64 */
1407 
1408 /*
1409  * Check for watched addresses in the given address space.
1410  * Return 1 if this is true, otherwise 0.
1411  */
1412 static int
1413 pr_is_watched(caddr_t base, size_t len, int rw)
1414 {
1415 	caddr_t saddr = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1416 	caddr_t eaddr = base + len;
1417 	caddr_t paddr;
1418 
1419 	for (paddr = saddr; paddr < eaddr; paddr += PAGESIZE) {
1420 		if (pr_is_watchpage(paddr, rw))
1421 			return (1);
1422 	}
1423 
1424 	return (0);
1425 }
1426 
1427 /*
1428  * Wrapper for the physio() function.
1429  * Splits one uio operation with multiple iovecs into uio operations with
1430  * only one iovecs to do the watchpoint handling separately for each iovecs.
1431  */
1432 static int
1433 watch_physio(int (*strat)(struct buf *), struct buf *bp, dev_t dev,
1434     int rw, void (*mincnt)(struct buf *), struct uio *uio)
1435 {
1436 	struct uio auio;
1437 	struct iovec *iov;
1438 	caddr_t  base;
1439 	size_t len;
1440 	int seg_rw;
1441 	int error = 0;
1442 
1443 	if (uio->uio_segflg == UIO_SYSSPACE)
1444 		return (default_physio(strat, bp, dev, rw, mincnt, uio));
1445 
1446 	seg_rw = (rw == B_READ) ? S_WRITE : S_READ;
1447 
1448 	while (uio->uio_iovcnt > 0) {
1449 		if (uio->uio_resid == 0) {
1450 			/*
1451 			 * Make sure to return the uio structure with the
1452 			 * same values as default_physio() does.
1453 			 */
1454 			uio->uio_iov++;
1455 			uio->uio_iovcnt--;
1456 			continue;
1457 		}
1458 
1459 		iov = uio->uio_iov;
1460 		len = MIN(iov->iov_len, uio->uio_resid);
1461 
1462 		auio.uio_iovcnt = 1;
1463 		auio.uio_iov = iov;
1464 		auio.uio_resid = len;
1465 		auio.uio_loffset = uio->uio_loffset;
1466 		auio.uio_llimit = uio->uio_llimit;
1467 		auio.uio_fmode = uio->uio_fmode;
1468 		auio.uio_extflg = uio->uio_extflg;
1469 		auio.uio_segflg = uio->uio_segflg;
1470 
1471 		base = iov->iov_base;
1472 
1473 		if (!pr_is_watched(base, len, seg_rw)) {
1474 			/*
1475 			 * The given memory references don't cover a
1476 			 * watched page.
1477 			 */
1478 			error = default_physio(strat, bp, dev, rw, mincnt,
1479 			    &auio);
1480 
1481 			/* Update uio with values from auio. */
1482 			len -= auio.uio_resid;
1483 			uio->uio_resid -= len;
1484 			uio->uio_loffset += len;
1485 
1486 			/*
1487 			 * Return if an error occurred or not all data
1488 			 * was copied.
1489 			 */
1490 			if (auio.uio_resid || error)
1491 				break;
1492 			uio->uio_iov++;
1493 			uio->uio_iovcnt--;
1494 		} else {
1495 			int mapped, watchcode, ta;
1496 			caddr_t vaddr = base;
1497 			klwp_t *lwp = ttolwp(curthread);
1498 
1499 			watchcode = pr_is_watchpoint(&vaddr, &ta, len,
1500 			    NULL, seg_rw);
1501 
1502 			if (watchcode == 0 || ta != 0) {
1503 				/*
1504 				 * Do the io if the given memory references
1505 				 * don't cover a watched area (watchcode=0)
1506 				 * or if WA_TRAPAFTER was specified.
1507 				 */
1508 				mapped = pr_mappage(base, len, seg_rw, 1);
1509 				error = default_physio(strat, bp, dev, rw,
1510 				    mincnt, &auio);
1511 				if (mapped)
1512 					pr_unmappage(base, len, seg_rw, 1);
1513 
1514 				len -= auio.uio_resid;
1515 				uio->uio_resid -= len;
1516 				uio->uio_loffset += len;
1517 			}
1518 
1519 			/*
1520 			 * If we hit a watched address, do the watchpoint logic.
1521 			 */
1522 			if (watchcode &&
1523 			    (!sys_watchpoint(vaddr, watchcode, ta) ||
1524 			    lwp->lwp_sysabort)) {
1525 				lwp->lwp_sysabort = 0;
1526 				return (EFAULT);
1527 			}
1528 
1529 			/*
1530 			 * Check for errors from default_physio().
1531 			 */
1532 			if (watchcode == 0 || ta != 0) {
1533 				if (auio.uio_resid || error)
1534 					break;
1535 				uio->uio_iov++;
1536 				uio->uio_iovcnt--;
1537 			}
1538 		}
1539 	}
1540 
1541 	return (error);
1542 }
1543 
1544 int
1545 wa_compare(const void *a, const void *b)
1546 {
1547 	const watched_area_t *pa = a;
1548 	const watched_area_t *pb = b;
1549 
1550 	if (pa->wa_vaddr < pb->wa_vaddr)
1551 		return (-1);
1552 	else if (pa->wa_vaddr > pb->wa_vaddr)
1553 		return (1);
1554 	else
1555 		return (0);
1556 }
1557 
1558 int
1559 wp_compare(const void *a, const void *b)
1560 {
1561 	const watched_page_t *pa = a;
1562 	const watched_page_t *pb = b;
1563 
1564 	if (pa->wp_vaddr < pb->wp_vaddr)
1565 		return (-1);
1566 	else if (pa->wp_vaddr > pb->wp_vaddr)
1567 		return (1);
1568 	else
1569 		return (0);
1570 }
1571 
1572 /*
1573  * Given an address range, finds the first watched area which overlaps some or
1574  * all of the range.
1575  */
1576 watched_area_t *
1577 pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where)
1578 {
1579 	caddr_t vaddr = pwa->wa_vaddr;
1580 	caddr_t eaddr = pwa->wa_eaddr;
1581 	watched_area_t *wap;
1582 	avl_index_t real_where;
1583 
1584 	/* First, check if there is an exact match.  */
1585 	wap = avl_find(&p->p_warea, pwa, &real_where);
1586 
1587 
1588 	/* Check to see if we overlap with the previous area.  */
1589 	if (wap == NULL) {
1590 		wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE);
1591 		if (wap != NULL &&
1592 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1593 			wap = NULL;
1594 	}
1595 
1596 	/* Try the next area.  */
1597 	if (wap == NULL) {
1598 		wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER);
1599 		if (wap != NULL &&
1600 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1601 			wap = NULL;
1602 	}
1603 
1604 	if (where)
1605 		*where = real_where;
1606 
1607 	return (wap);
1608 }
1609 
1610 void
1611 watch_enable(kthread_id_t t)
1612 {
1613 	t->t_proc_flag |= TP_WATCHPT;
1614 	install_copyops(t, &watch_copyops);
1615 }
1616 
1617 void
1618 watch_disable(kthread_id_t t)
1619 {
1620 	t->t_proc_flag &= ~TP_WATCHPT;
1621 	remove_copyops(t);
1622 }
1623 
1624 int
1625 copyin_nowatch(const void *uaddr, void *kaddr, size_t len)
1626 {
1627 	int watched, ret;
1628 
1629 	watched = watch_disable_addr(uaddr, len, S_READ);
1630 	ret = copyin(uaddr, kaddr, len);
1631 	if (watched)
1632 		watch_enable_addr(uaddr, len, S_READ);
1633 
1634 	return (ret);
1635 }
1636 
1637 int
1638 copyout_nowatch(const void *kaddr, void *uaddr, size_t len)
1639 {
1640 	int watched, ret;
1641 
1642 	watched = watch_disable_addr(uaddr, len, S_WRITE);
1643 	ret = copyout(kaddr, uaddr, len);
1644 	if (watched)
1645 		watch_enable_addr(uaddr, len, S_WRITE);
1646 
1647 	return (ret);
1648 }
1649 
1650 #ifdef _LP64
1651 int
1652 fuword64_nowatch(const void *addr, uint64_t *value)
1653 {
1654 	int watched, ret;
1655 
1656 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1657 	ret = fuword64(addr, value);
1658 	if (watched)
1659 		watch_enable_addr(addr, sizeof (*value), S_READ);
1660 
1661 	return (ret);
1662 }
1663 #endif
1664 
1665 int
1666 fuword32_nowatch(const void *addr, uint32_t *value)
1667 {
1668 	int watched, ret;
1669 
1670 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1671 	ret = fuword32(addr, value);
1672 	if (watched)
1673 		watch_enable_addr(addr, sizeof (*value), S_READ);
1674 
1675 	return (ret);
1676 }
1677 
1678 #ifdef _LP64
1679 int
1680 suword64_nowatch(void *addr, uint64_t value)
1681 {
1682 	int watched, ret;
1683 
1684 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1685 	ret = suword64(addr, value);
1686 	if (watched)
1687 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1688 
1689 	return (ret);
1690 }
1691 #endif
1692 
1693 int
1694 suword32_nowatch(void *addr, uint32_t value)
1695 {
1696 	int watched, ret;
1697 
1698 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1699 	ret = suword32(addr, value);
1700 	if (watched)
1701 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1702 
1703 	return (ret);
1704 }
1705 
1706 int
1707 watch_disable_addr(const void *addr, size_t len, enum seg_rw rw)
1708 {
1709 	if (pr_watch_active(curproc))
1710 		return (pr_mappage((caddr_t)addr, len, rw, 1));
1711 	return (0);
1712 }
1713 
1714 void
1715 watch_enable_addr(const void *addr, size_t len, enum seg_rw rw)
1716 {
1717 	if (pr_watch_active(curproc))
1718 		pr_unmappage((caddr_t)addr, len, rw, 1);
1719 }
1720