xref: /illumos-gate/usr/src/uts/common/os/watchpoint.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/cred.h>
31 #include <sys/debug.h>
32 #include <sys/inline.h>
33 #include <sys/kmem.h>
34 #include <sys/proc.h>
35 #include <sys/regset.h>
36 #include <sys/sysmacros.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/buf.h>
40 #include <sys/signal.h>
41 #include <sys/user.h>
42 #include <sys/cpuvar.h>
43 
44 #include <sys/fault.h>
45 #include <sys/syscall.h>
46 #include <sys/procfs.h>
47 #include <sys/cmn_err.h>
48 #include <sys/stack.h>
49 #include <sys/watchpoint.h>
50 #include <sys/copyops.h>
51 #include <sys/schedctl.h>
52 
53 #include <sys/mman.h>
54 #include <vm/as.h>
55 #include <vm/seg.h>
56 
57 /*
58  * Copy ops vector for watchpoints.
59  */
60 static int	watch_copyin(const void *, void *, size_t);
61 static int	watch_xcopyin(const void *, void *, size_t);
62 static int	watch_copyout(const void *, void *, size_t);
63 static int	watch_xcopyout(const void *, void *, size_t);
64 static int	watch_copyinstr(const char *, char *, size_t, size_t *);
65 static int	watch_copyoutstr(const char *, char *, size_t, size_t *);
66 static int	watch_fuword8(const void *, uint8_t *);
67 static int	watch_fuword16(const void *, uint16_t *);
68 static int	watch_fuword32(const void *, uint32_t *);
69 static int	watch_suword8(void *, uint8_t);
70 static int	watch_suword16(void *, uint16_t);
71 static int	watch_suword32(void *, uint32_t);
72 static int	watch_physio(int (*)(struct buf *), struct buf *,
73     dev_t, int, void (*)(struct buf *), struct uio *);
74 #ifdef _LP64
75 static int	watch_fuword64(const void *, uint64_t *);
76 static int	watch_suword64(void *, uint64_t);
77 #endif
78 
79 struct copyops watch_copyops = {
80 	watch_copyin,
81 	watch_xcopyin,
82 	watch_copyout,
83 	watch_xcopyout,
84 	watch_copyinstr,
85 	watch_copyoutstr,
86 	watch_fuword8,
87 	watch_fuword16,
88 	watch_fuword32,
89 #ifdef _LP64
90 	watch_fuword64,
91 #else
92 	NULL,
93 #endif
94 	watch_suword8,
95 	watch_suword16,
96 	watch_suword32,
97 #ifdef _LP64
98 	watch_suword64,
99 #else
100 	NULL,
101 #endif
102 	watch_physio
103 };
104 
105 /*
106  * Map the 'rw' argument to a protection flag.
107  */
108 static int
109 rw_to_prot(enum seg_rw rw)
110 {
111 	switch (rw) {
112 	case S_EXEC:
113 		return (PROT_EXEC);
114 	case S_READ:
115 		return (PROT_READ);
116 	case S_WRITE:
117 		return (PROT_WRITE);
118 	default:
119 		return (PROT_NONE);	/* can't happen */
120 	}
121 }
122 
123 /*
124  * Map the 'rw' argument to an index into an array of exec/write/read things.
125  * The index follows the precedence order:  exec .. write .. read
126  */
127 static int
128 rw_to_index(enum seg_rw rw)
129 {
130 	switch (rw) {
131 	default:	/* default case "can't happen" */
132 	case S_EXEC:
133 		return (0);
134 	case S_WRITE:
135 		return (1);
136 	case S_READ:
137 		return (2);
138 	}
139 }
140 
141 /*
142  * Map an index back to a seg_rw.
143  */
144 static enum seg_rw S_rw[4] = {
145 	S_EXEC,
146 	S_WRITE,
147 	S_READ,
148 	S_READ,
149 };
150 
151 #define	X	0
152 #define	W	1
153 #define	R	2
154 #define	sum(a)	(a[X] + a[W] + a[R])
155 
156 /*
157  * Common code for pr_mappage() and pr_unmappage().
158  */
159 static int
160 pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel)
161 {
162 	proc_t *p = curproc;
163 	struct as *as = p->p_as;
164 	char *eaddr = addr + size;
165 	int prot_rw = rw_to_prot(rw);
166 	int xrw = rw_to_index(rw);
167 	int rv = 0;
168 	struct watched_page *pwp;
169 	struct watched_page tpw;
170 	avl_index_t where;
171 	uint_t prot;
172 
173 	ASSERT(as != &kas);
174 
175 startover:
176 	ASSERT(rv == 0);
177 	if (avl_numnodes(&as->a_wpage) == 0)
178 		return (0);
179 
180 	/*
181 	 * as->a_wpage can only be changed while the process is totally stopped.
182 	 * Don't grab p_lock here.  Holding p_lock while grabbing the address
183 	 * space lock leads to deadlocks with the clock thread.
184 	 *
185 	 * p_maplock prevents simultaneous execution of this function.  Under
186 	 * normal circumstances, holdwatch() will stop all other threads, so the
187 	 * lock isn't really needed.  But there may be multiple threads within
188 	 * stop() when SWATCHOK is set, so we need to handle multiple threads
189 	 * at once.  See holdwatch() for the details of this dance.
190 	 */
191 
192 	mutex_enter(&p->p_maplock);
193 
194 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
195 	if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
196 		pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
197 
198 	for (; pwp != NULL && pwp->wp_vaddr < eaddr;
199 	    pwp = AVL_NEXT(&as->a_wpage, pwp)) {
200 
201 		/*
202 		 * If the requested protection has not been
203 		 * removed, we need not remap this page.
204 		 */
205 		prot = pwp->wp_prot;
206 		if (kernel || (prot & PROT_USER))
207 			if (prot & prot_rw)
208 				continue;
209 		/*
210 		 * If the requested access does not exist in the page's
211 		 * original protections, we need not remap this page.
212 		 * If the page does not exist yet, we can't test it.
213 		 */
214 		if ((prot = pwp->wp_oprot) != 0) {
215 			if (!(kernel || (prot & PROT_USER)))
216 				continue;
217 			if (!(prot & prot_rw))
218 				continue;
219 		}
220 
221 		if (mapin) {
222 			/*
223 			 * Before mapping the page in, ensure that
224 			 * all other lwps are held in the kernel.
225 			 */
226 			if (p->p_mapcnt == 0) {
227 				mutex_exit(&p->p_maplock);
228 				if (holdwatch() != 0) {
229 					/*
230 					 * We stopped in holdwatch().
231 					 * Start all over again because the
232 					 * watched page list may have changed.
233 					 */
234 					goto startover;
235 				}
236 				mutex_enter(&p->p_maplock);
237 			}
238 			p->p_mapcnt++;
239 		}
240 
241 		addr = pwp->wp_vaddr;
242 		rv++;
243 
244 		prot = pwp->wp_prot;
245 		if (mapin) {
246 			if (kernel)
247 				pwp->wp_kmap[xrw]++;
248 			else
249 				pwp->wp_umap[xrw]++;
250 			pwp->wp_flags |= WP_NOWATCH;
251 			if (pwp->wp_kmap[X] + pwp->wp_umap[X])
252 				/* cannot have exec-only protection */
253 				prot |= PROT_READ|PROT_EXEC;
254 			if (pwp->wp_kmap[R] + pwp->wp_umap[R])
255 				prot |= PROT_READ;
256 			if (pwp->wp_kmap[W] + pwp->wp_umap[W])
257 				/* cannot have write-only protection */
258 				prot |= PROT_READ|PROT_WRITE;
259 #if 0	/* damned broken mmu feature! */
260 			if (sum(pwp->wp_umap) == 0)
261 				prot &= ~PROT_USER;
262 #endif
263 		} else {
264 			ASSERT(pwp->wp_flags & WP_NOWATCH);
265 			if (kernel) {
266 				ASSERT(pwp->wp_kmap[xrw] != 0);
267 				--pwp->wp_kmap[xrw];
268 			} else {
269 				ASSERT(pwp->wp_umap[xrw] != 0);
270 				--pwp->wp_umap[xrw];
271 			}
272 			if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0)
273 				pwp->wp_flags &= ~WP_NOWATCH;
274 			else {
275 				if (pwp->wp_kmap[X] + pwp->wp_umap[X])
276 					/* cannot have exec-only protection */
277 					prot |= PROT_READ|PROT_EXEC;
278 				if (pwp->wp_kmap[R] + pwp->wp_umap[R])
279 					prot |= PROT_READ;
280 				if (pwp->wp_kmap[W] + pwp->wp_umap[W])
281 					/* cannot have write-only protection */
282 					prot |= PROT_READ|PROT_WRITE;
283 #if 0	/* damned broken mmu feature! */
284 				if (sum(pwp->wp_umap) == 0)
285 					prot &= ~PROT_USER;
286 #endif
287 			}
288 		}
289 
290 
291 		if (pwp->wp_oprot != 0) {	/* if page exists */
292 			struct seg *seg;
293 			uint_t oprot;
294 			int err, retrycnt = 0;
295 
296 			AS_LOCK_ENTER(as, RW_WRITER);
297 		retry:
298 			seg = as_segat(as, addr);
299 			ASSERT(seg != NULL);
300 			SEGOP_GETPROT(seg, addr, 0, &oprot);
301 			if (prot != oprot) {
302 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
303 				if (err == IE_RETRY) {
304 					ASSERT(retrycnt == 0);
305 					retrycnt++;
306 					goto retry;
307 				}
308 			}
309 			AS_LOCK_EXIT(as);
310 		}
311 
312 		/*
313 		 * When all pages are mapped back to their normal state,
314 		 * continue the other lwps.
315 		 */
316 		if (!mapin) {
317 			ASSERT(p->p_mapcnt > 0);
318 			p->p_mapcnt--;
319 			if (p->p_mapcnt == 0) {
320 				mutex_exit(&p->p_maplock);
321 				mutex_enter(&p->p_lock);
322 				continuelwps(p);
323 				mutex_exit(&p->p_lock);
324 				mutex_enter(&p->p_maplock);
325 			}
326 		}
327 	}
328 
329 	mutex_exit(&p->p_maplock);
330 
331 	return (rv);
332 }
333 
334 /*
335  * Restore the original page protections on an address range.
336  * If 'kernel' is non-zero, just do it for the kernel.
337  * pr_mappage() returns non-zero if it actually changed anything.
338  *
339  * pr_mappage() and pr_unmappage() must be executed in matched pairs,
340  * but pairs may be nested within other pairs.  The reference counts
341  * sort it all out.  See pr_do_mappage(), above.
342  */
343 static int
344 pr_mappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
345 {
346 	return (pr_do_mappage(addr, size, 1, rw, kernel));
347 }
348 
349 /*
350  * Set the modified page protections on a watched page.
351  * Inverse of pr_mappage().
352  * Needs to be called only if pr_mappage() returned non-zero.
353  */
354 static void
355 pr_unmappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
356 {
357 	(void) pr_do_mappage(addr, size, 0, rw, kernel);
358 }
359 
360 /*
361  * Function called by an lwp after it resumes from stop().
362  */
363 void
364 setallwatch(void)
365 {
366 	proc_t *p = curproc;
367 	struct as *as = curproc->p_as;
368 	struct watched_page *pwp, *next;
369 	struct seg *seg;
370 	caddr_t vaddr;
371 	uint_t prot;
372 	int err, retrycnt;
373 
374 	if (p->p_wprot == NULL)
375 		return;
376 
377 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
378 
379 	AS_LOCK_ENTER(as, RW_WRITER);
380 
381 	pwp = p->p_wprot;
382 	while (pwp != NULL) {
383 
384 		vaddr = pwp->wp_vaddr;
385 		retrycnt = 0;
386 	retry:
387 		ASSERT(pwp->wp_flags & WP_SETPROT);
388 		if ((seg = as_segat(as, vaddr)) != NULL &&
389 		    !(pwp->wp_flags & WP_NOWATCH)) {
390 			prot = pwp->wp_prot;
391 			err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
392 			if (err == IE_RETRY) {
393 				ASSERT(retrycnt == 0);
394 				retrycnt++;
395 				goto retry;
396 			}
397 		}
398 
399 		next = pwp->wp_list;
400 
401 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec == 0) {
402 			/*
403 			 * No watched areas remain in this page.
404 			 * Free the watched_page structure.
405 			 */
406 			avl_remove(&as->a_wpage, pwp);
407 			kmem_free(pwp, sizeof (struct watched_page));
408 		} else {
409 			pwp->wp_flags &= ~WP_SETPROT;
410 		}
411 
412 		pwp = next;
413 	}
414 	p->p_wprot = NULL;
415 
416 	AS_LOCK_EXIT(as);
417 }
418 
419 
420 
421 int
422 pr_is_watchpage_as(caddr_t addr, enum seg_rw rw, struct as *as)
423 {
424 	register struct watched_page *pwp;
425 	struct watched_page tpw;
426 	uint_t prot;
427 	int rv = 0;
428 
429 	switch (rw) {
430 	case S_READ:
431 	case S_WRITE:
432 	case S_EXEC:
433 		break;
434 	default:
435 		return (0);
436 	}
437 
438 	/*
439 	 * as->a_wpage can only be modified while the process is totally
440 	 * stopped.  We need, and should use, no locks here.
441 	 */
442 	if (as != &kas && avl_numnodes(&as->a_wpage) != 0) {
443 		tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
444 		pwp = avl_find(&as->a_wpage, &tpw, NULL);
445 		if (pwp != NULL) {
446 			ASSERT(addr >= pwp->wp_vaddr &&
447 			    addr < pwp->wp_vaddr + PAGESIZE);
448 			if (pwp->wp_oprot != 0) {
449 				prot = pwp->wp_prot;
450 				switch (rw) {
451 				case S_READ:
452 					rv = ((prot & (PROT_USER|PROT_READ))
453 					    != (PROT_USER|PROT_READ));
454 					break;
455 				case S_WRITE:
456 					rv = ((prot & (PROT_USER|PROT_WRITE))
457 					    != (PROT_USER|PROT_WRITE));
458 					break;
459 				case S_EXEC:
460 					rv = ((prot & (PROT_USER|PROT_EXEC))
461 					    != (PROT_USER|PROT_EXEC));
462 					break;
463 				default:
464 					/* can't happen! */
465 					break;
466 				}
467 			}
468 		}
469 	}
470 
471 	return (rv);
472 }
473 
474 
475 /*
476  * trap() calls here to determine if a fault is in a watched page.
477  * We return nonzero if this is true and the load/store would fail.
478  */
479 int
480 pr_is_watchpage(caddr_t addr, enum seg_rw rw)
481 {
482 	struct as *as = curproc->p_as;
483 
484 	if ((as == &kas) || avl_numnodes(&as->a_wpage) == 0)
485 		return (0);
486 
487 	return (pr_is_watchpage_as(addr, rw, as));
488 }
489 
490 
491 
492 /*
493  * trap() calls here to determine if a fault is a watchpoint.
494  */
495 int
496 pr_is_watchpoint(caddr_t *paddr, int *pta, size_t size, size_t *plen,
497 	enum seg_rw rw)
498 {
499 	proc_t *p = curproc;
500 	caddr_t addr = *paddr;
501 	caddr_t eaddr = addr + size;
502 	register struct watched_area *pwa;
503 	struct watched_area twa;
504 	int rv = 0;
505 	int ta = 0;
506 	size_t len = 0;
507 
508 	switch (rw) {
509 	case S_READ:
510 	case S_WRITE:
511 	case S_EXEC:
512 		break;
513 	default:
514 		*pta = 0;
515 		return (0);
516 	}
517 
518 	/*
519 	 * p->p_warea is protected by p->p_lock.
520 	 */
521 	mutex_enter(&p->p_lock);
522 
523 	/* BEGIN CSTYLED */
524 	/*
525 	 * This loop is somewhat complicated because the fault region can span
526 	 * multiple watched areas.  For example:
527 	 *
528 	 *            addr              eaddr
529 	 * 		+-----------------+
530 	 * 		| fault region    |
531 	 * 	+-------+--------+----+---+------------+
532 	 *      | prot not right |    | prot correct   |
533 	 *      +----------------+    +----------------+
534 	 *    wa_vaddr	      wa_eaddr
535 	 *    		      wa_vaddr		wa_eaddr
536 	 *
537 	 * We start at the area greater than or equal to the starting address.
538 	 * As long as some portion of the fault region overlaps the current
539 	 * area, we continue checking permissions until we find an appropriate
540 	 * match.
541 	 */
542 	/* END CSTYLED */
543 	twa.wa_vaddr = addr;
544 	twa.wa_eaddr = eaddr;
545 
546 	for (pwa = pr_find_watched_area(p, &twa, NULL);
547 	    pwa != NULL && eaddr > pwa->wa_vaddr && addr < pwa->wa_eaddr;
548 	    pwa = AVL_NEXT(&p->p_warea, pwa)) {
549 
550 		switch (rw) {
551 		case S_READ:
552 			if (pwa->wa_flags & WA_READ)
553 				rv = TRAP_RWATCH;
554 			break;
555 		case S_WRITE:
556 			if (pwa->wa_flags & WA_WRITE)
557 				rv = TRAP_WWATCH;
558 			break;
559 		case S_EXEC:
560 			if (pwa->wa_flags & WA_EXEC)
561 				rv = TRAP_XWATCH;
562 			break;
563 		default:
564 			/* can't happen */
565 			break;
566 		}
567 
568 		/*
569 		 * If protections didn't match, check the next watched
570 		 * area
571 		 */
572 		if (rv != 0) {
573 			if (addr < pwa->wa_vaddr)
574 				addr = pwa->wa_vaddr;
575 			len = pwa->wa_eaddr - addr;
576 			if (pwa->wa_flags & WA_TRAPAFTER)
577 				ta = 1;
578 			break;
579 		}
580 	}
581 
582 	mutex_exit(&p->p_lock);
583 
584 	*paddr = addr;
585 	*pta = ta;
586 	if (plen != NULL)
587 		*plen = len;
588 	return (rv);
589 }
590 
591 /*
592  * Set up to perform a single-step at user level for the
593  * case of a trapafter watchpoint.  Called from trap().
594  */
595 void
596 do_watch_step(caddr_t vaddr, size_t sz, enum seg_rw rw,
597 	int watchcode, greg_t pc)
598 {
599 	register klwp_t *lwp = ttolwp(curthread);
600 	struct lwp_watch *pw = &lwp->lwp_watch[rw_to_index(rw)];
601 
602 	/*
603 	 * Check to see if we are already performing this special
604 	 * watchpoint single-step.  We must not do pr_mappage() twice.
605 	 */
606 
607 	/* special check for two read traps on the same instruction */
608 	if (rw == S_READ && pw->wpaddr != NULL &&
609 	    !(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize)) {
610 		ASSERT(lwp->lwp_watchtrap != 0);
611 		pw++;	/* use the extra S_READ struct */
612 	}
613 
614 	if (pw->wpaddr != NULL) {
615 		ASSERT(lwp->lwp_watchtrap != 0);
616 		ASSERT(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize);
617 		if (pw->wpcode == 0) {
618 			pw->wpcode = watchcode;
619 			pw->wppc = pc;
620 		}
621 	} else {
622 		int mapped = pr_mappage(vaddr, sz, rw, 0);
623 		prstep(lwp, 1);
624 		lwp->lwp_watchtrap = 1;
625 		pw->wpaddr = vaddr;
626 		pw->wpsize = sz;
627 		pw->wpcode = watchcode;
628 		pw->wpmapped = mapped;
629 		pw->wppc = pc;
630 	}
631 }
632 
633 /*
634  * Undo the effects of do_watch_step().
635  * Called from trap() after the single-step is finished.
636  * Also called from issig_forreal() and stop() with a NULL
637  * argument to avoid having these things set more than once.
638  */
639 int
640 undo_watch_step(k_siginfo_t *sip)
641 {
642 	register klwp_t *lwp = ttolwp(curthread);
643 	int fault = 0;
644 
645 	if (lwp->lwp_watchtrap) {
646 		struct lwp_watch *pw = lwp->lwp_watch;
647 		int i;
648 
649 		for (i = 0; i < 4; i++, pw++) {
650 			if (pw->wpaddr == NULL)
651 				continue;
652 			if (pw->wpmapped)
653 				pr_unmappage(pw->wpaddr, pw->wpsize, S_rw[i],
654 				    0);
655 			if (pw->wpcode != 0) {
656 				if (sip != NULL) {
657 					sip->si_signo = SIGTRAP;
658 					sip->si_code = pw->wpcode;
659 					sip->si_addr = pw->wpaddr;
660 					sip->si_trapafter = 1;
661 					sip->si_pc = (caddr_t)pw->wppc;
662 				}
663 				fault = FLTWATCH;
664 				pw->wpcode = 0;
665 			}
666 			pw->wpaddr = NULL;
667 			pw->wpsize = 0;
668 			pw->wpmapped = 0;
669 		}
670 		lwp->lwp_watchtrap = 0;
671 	}
672 
673 	return (fault);
674 }
675 
676 /*
677  * Handle a watchpoint that occurs while doing copyin()
678  * or copyout() in a system call.
679  * Return non-zero if the fault or signal is cleared
680  * by a debugger while the lwp is stopped.
681  */
682 static int
683 sys_watchpoint(caddr_t addr, int watchcode, int ta)
684 {
685 	extern greg_t getuserpc(void);	/* XXX header file */
686 	k_sigset_t smask;
687 	register proc_t *p = ttoproc(curthread);
688 	register klwp_t *lwp = ttolwp(curthread);
689 	register sigqueue_t *sqp;
690 	int rval;
691 
692 	/* assert no locks are held */
693 	/* ASSERT(curthread->t_nlocks == 0); */
694 
695 	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
696 	sqp->sq_info.si_signo = SIGTRAP;
697 	sqp->sq_info.si_code = watchcode;
698 	sqp->sq_info.si_addr = addr;
699 	sqp->sq_info.si_trapafter = ta;
700 	sqp->sq_info.si_pc = (caddr_t)getuserpc();
701 
702 	mutex_enter(&p->p_lock);
703 
704 	/* this will be tested and cleared by the caller */
705 	lwp->lwp_sysabort = 0;
706 
707 	if (prismember(&p->p_fltmask, FLTWATCH)) {
708 		lwp->lwp_curflt = (uchar_t)FLTWATCH;
709 		lwp->lwp_siginfo = sqp->sq_info;
710 		stop(PR_FAULTED, FLTWATCH);
711 		if (lwp->lwp_curflt == 0) {
712 			mutex_exit(&p->p_lock);
713 			kmem_free(sqp, sizeof (sigqueue_t));
714 			return (1);
715 		}
716 		lwp->lwp_curflt = 0;
717 	}
718 
719 	/*
720 	 * post the SIGTRAP signal.
721 	 * Block all other signals so we only stop showing SIGTRAP.
722 	 */
723 	if (signal_is_blocked(curthread, SIGTRAP) ||
724 	    sigismember(&p->p_ignore, SIGTRAP)) {
725 		/* SIGTRAP is blocked or ignored, forget the rest. */
726 		mutex_exit(&p->p_lock);
727 		kmem_free(sqp, sizeof (sigqueue_t));
728 		return (0);
729 	}
730 	sigdelq(p, curthread, SIGTRAP);
731 	sigaddqa(p, curthread, sqp);
732 	schedctl_finish_sigblock(curthread);
733 	smask = curthread->t_hold;
734 	sigfillset(&curthread->t_hold);
735 	sigdiffset(&curthread->t_hold, &cantmask);
736 	sigdelset(&curthread->t_hold, SIGTRAP);
737 	mutex_exit(&p->p_lock);
738 
739 	rval = ((ISSIG_FAST(curthread, lwp, p, FORREAL))? 0 : 1);
740 
741 	/* restore the original signal mask */
742 	mutex_enter(&p->p_lock);
743 	curthread->t_hold = smask;
744 	mutex_exit(&p->p_lock);
745 
746 	return (rval);
747 }
748 
749 /*
750  * Wrappers for the copyin()/copyout() functions to deal
751  * with watchpoints that fire while in system calls.
752  */
753 
754 static int
755 watch_xcopyin(const void *uaddr, void *kaddr, size_t count)
756 {
757 	klwp_t *lwp = ttolwp(curthread);
758 	caddr_t watch_uaddr = (caddr_t)uaddr;
759 	caddr_t watch_kaddr = (caddr_t)kaddr;
760 	int error = 0;
761 	label_t ljb;
762 	size_t part;
763 	int mapped;
764 
765 	while (count && error == 0) {
766 		int watchcode;
767 		caddr_t vaddr;
768 		size_t len;
769 		int ta;
770 
771 		if ((part = PAGESIZE -
772 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
773 			part = count;
774 
775 		if (!pr_is_watchpage(watch_uaddr, S_READ))
776 			watchcode = 0;
777 		else {
778 			vaddr = watch_uaddr;
779 			watchcode = pr_is_watchpoint(&vaddr, &ta,
780 			    part, &len, S_READ);
781 			if (watchcode && ta == 0)
782 				part = vaddr - watch_uaddr;
783 		}
784 
785 		/*
786 		 * Copy the initial part, up to a watched address, if any.
787 		 */
788 		if (part != 0) {
789 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
790 			if (on_fault(&ljb))
791 				error = EFAULT;
792 			else
793 				copyin_noerr(watch_uaddr, watch_kaddr, part);
794 			no_fault();
795 			if (mapped)
796 				pr_unmappage(watch_uaddr, part, S_READ, 1);
797 			watch_uaddr += part;
798 			watch_kaddr += part;
799 			count -= part;
800 		}
801 		/*
802 		 * If trapafter was specified, then copy through the
803 		 * watched area before taking the watchpoint trap.
804 		 */
805 		while (count && watchcode && ta && len > part && error == 0) {
806 			len -= part;
807 			if ((part = PAGESIZE) > count)
808 				part = count;
809 			if (part > len)
810 				part = len;
811 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
812 			if (on_fault(&ljb))
813 				error = EFAULT;
814 			else
815 				copyin_noerr(watch_uaddr, watch_kaddr, part);
816 			no_fault();
817 			if (mapped)
818 				pr_unmappage(watch_uaddr, part, S_READ, 1);
819 			watch_uaddr += part;
820 			watch_kaddr += part;
821 			count -= part;
822 		}
823 
824 		/* if we hit a watched address, do the watchpoint logic */
825 		if (watchcode &&
826 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
827 		    lwp->lwp_sysabort)) {
828 			lwp->lwp_sysabort = 0;
829 			error = EFAULT;
830 			break;
831 		}
832 	}
833 
834 	return (error);
835 }
836 
837 static int
838 watch_copyin(const void *kaddr, void *uaddr, size_t count)
839 {
840 	return (watch_xcopyin(kaddr, uaddr, count) ? -1 : 0);
841 }
842 
843 
844 static int
845 watch_xcopyout(const void *kaddr, void *uaddr, size_t count)
846 {
847 	klwp_t *lwp = ttolwp(curthread);
848 	caddr_t watch_uaddr = (caddr_t)uaddr;
849 	caddr_t watch_kaddr = (caddr_t)kaddr;
850 	int error = 0;
851 	label_t ljb;
852 
853 	while (count && error == 0) {
854 		int watchcode;
855 		caddr_t vaddr;
856 		size_t part;
857 		size_t len;
858 		int ta;
859 		int mapped;
860 
861 		if ((part = PAGESIZE -
862 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
863 			part = count;
864 
865 		if (!pr_is_watchpage(watch_uaddr, S_WRITE))
866 			watchcode = 0;
867 		else {
868 			vaddr = watch_uaddr;
869 			watchcode = pr_is_watchpoint(&vaddr, &ta,
870 			    part, &len, S_WRITE);
871 			if (watchcode) {
872 				if (ta == 0)
873 					part = vaddr - watch_uaddr;
874 				else {
875 					len += vaddr - watch_uaddr;
876 					if (part > len)
877 						part = len;
878 				}
879 			}
880 		}
881 
882 		/*
883 		 * Copy the initial part, up to a watched address, if any.
884 		 */
885 		if (part != 0) {
886 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
887 			if (on_fault(&ljb))
888 				error = EFAULT;
889 			else
890 				copyout_noerr(watch_kaddr, watch_uaddr, part);
891 			no_fault();
892 			if (mapped)
893 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
894 			watch_uaddr += part;
895 			watch_kaddr += part;
896 			count -= part;
897 		}
898 
899 		/*
900 		 * If trapafter was specified, then copy through the
901 		 * watched area before taking the watchpoint trap.
902 		 */
903 		while (count && watchcode && ta && len > part && error == 0) {
904 			len -= part;
905 			if ((part = PAGESIZE) > count)
906 				part = count;
907 			if (part > len)
908 				part = len;
909 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
910 			if (on_fault(&ljb))
911 				error = EFAULT;
912 			else
913 				copyout_noerr(watch_kaddr, watch_uaddr, part);
914 			no_fault();
915 			if (mapped)
916 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
917 			watch_uaddr += part;
918 			watch_kaddr += part;
919 			count -= part;
920 		}
921 
922 		/* if we hit a watched address, do the watchpoint logic */
923 		if (watchcode &&
924 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
925 		    lwp->lwp_sysabort)) {
926 			lwp->lwp_sysabort = 0;
927 			error = EFAULT;
928 			break;
929 		}
930 	}
931 
932 	return (error);
933 }
934 
935 static int
936 watch_copyout(const void *kaddr, void *uaddr, size_t count)
937 {
938 	return (watch_xcopyout(kaddr, uaddr, count) ? -1 : 0);
939 }
940 
941 static int
942 watch_copyinstr(
943 	const char *uaddr,
944 	char *kaddr,
945 	size_t maxlength,
946 	size_t *lencopied)
947 {
948 	klwp_t *lwp = ttolwp(curthread);
949 	size_t resid;
950 	int error = 0;
951 	label_t ljb;
952 
953 	if ((resid = maxlength) == 0)
954 		return (ENAMETOOLONG);
955 
956 	while (resid && error == 0) {
957 		int watchcode;
958 		caddr_t vaddr;
959 		size_t part;
960 		size_t len;
961 		size_t size;
962 		int ta;
963 		int mapped;
964 
965 		if ((part = PAGESIZE -
966 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
967 			part = resid;
968 
969 		if (!pr_is_watchpage((caddr_t)uaddr, S_READ))
970 			watchcode = 0;
971 		else {
972 			vaddr = (caddr_t)uaddr;
973 			watchcode = pr_is_watchpoint(&vaddr, &ta,
974 			    part, &len, S_READ);
975 			if (watchcode) {
976 				if (ta == 0)
977 					part = vaddr - uaddr;
978 				else {
979 					len += vaddr - uaddr;
980 					if (part > len)
981 						part = len;
982 				}
983 			}
984 		}
985 
986 		/*
987 		 * Copy the initial part, up to a watched address, if any.
988 		 */
989 		if (part != 0) {
990 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
991 			if (on_fault(&ljb))
992 				error = EFAULT;
993 			else
994 				error = copyinstr_noerr(uaddr, kaddr, part,
995 				    &size);
996 			no_fault();
997 			if (mapped)
998 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
999 			uaddr += size;
1000 			kaddr += size;
1001 			resid -= size;
1002 			if (error == ENAMETOOLONG && resid > 0)
1003 				error = 0;
1004 			if (error != 0 || (watchcode &&
1005 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1006 				break;	/* didn't reach the watched area */
1007 		}
1008 
1009 		/*
1010 		 * If trapafter was specified, then copy through the
1011 		 * watched area before taking the watchpoint trap.
1012 		 */
1013 		while (resid && watchcode && ta && len > part && error == 0 &&
1014 		    size == part && kaddr[-1] != '\0') {
1015 			len -= part;
1016 			if ((part = PAGESIZE) > resid)
1017 				part = resid;
1018 			if (part > len)
1019 				part = len;
1020 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1021 			if (on_fault(&ljb))
1022 				error = EFAULT;
1023 			else
1024 				error = copyinstr_noerr(uaddr, kaddr, part,
1025 				    &size);
1026 			no_fault();
1027 			if (mapped)
1028 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1029 			uaddr += size;
1030 			kaddr += size;
1031 			resid -= size;
1032 			if (error == ENAMETOOLONG && resid > 0)
1033 				error = 0;
1034 		}
1035 
1036 		/* if we hit a watched address, do the watchpoint logic */
1037 		if (watchcode &&
1038 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1039 		    lwp->lwp_sysabort)) {
1040 			lwp->lwp_sysabort = 0;
1041 			error = EFAULT;
1042 			break;
1043 		}
1044 
1045 		if (error == 0 && part != 0 &&
1046 		    (size < part || kaddr[-1] == '\0'))
1047 			break;
1048 	}
1049 
1050 	if (error != EFAULT && lencopied)
1051 		*lencopied = maxlength - resid;
1052 	return (error);
1053 }
1054 
1055 static int
1056 watch_copyoutstr(
1057 	const char *kaddr,
1058 	char *uaddr,
1059 	size_t maxlength,
1060 	size_t *lencopied)
1061 {
1062 	klwp_t *lwp = ttolwp(curthread);
1063 	size_t resid;
1064 	int error = 0;
1065 	label_t ljb;
1066 
1067 	if ((resid = maxlength) == 0)
1068 		return (ENAMETOOLONG);
1069 
1070 	while (resid && error == 0) {
1071 		int watchcode;
1072 		caddr_t vaddr;
1073 		size_t part;
1074 		size_t len;
1075 		size_t size;
1076 		int ta;
1077 		int mapped;
1078 
1079 		if ((part = PAGESIZE -
1080 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
1081 			part = resid;
1082 
1083 		if (!pr_is_watchpage(uaddr, S_WRITE)) {
1084 			watchcode = 0;
1085 		} else {
1086 			vaddr = uaddr;
1087 			watchcode = pr_is_watchpoint(&vaddr, &ta,
1088 			    part, &len, S_WRITE);
1089 			if (watchcode && ta == 0)
1090 				part = vaddr - uaddr;
1091 		}
1092 
1093 		/*
1094 		 * Copy the initial part, up to a watched address, if any.
1095 		 */
1096 		if (part != 0) {
1097 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1098 			if (on_fault(&ljb))
1099 				error = EFAULT;
1100 			else
1101 				error = copyoutstr_noerr(kaddr, uaddr, part,
1102 				    &size);
1103 			no_fault();
1104 			if (mapped)
1105 				pr_unmappage(uaddr, part, S_WRITE, 1);
1106 			uaddr += size;
1107 			kaddr += size;
1108 			resid -= size;
1109 			if (error == ENAMETOOLONG && resid > 0)
1110 				error = 0;
1111 			if (error != 0 || (watchcode &&
1112 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1113 				break;	/* didn't reach the watched area */
1114 		}
1115 
1116 		/*
1117 		 * If trapafter was specified, then copy through the
1118 		 * watched area before taking the watchpoint trap.
1119 		 */
1120 		while (resid && watchcode && ta && len > part && error == 0 &&
1121 		    size == part && kaddr[-1] != '\0') {
1122 			len -= part;
1123 			if ((part = PAGESIZE) > resid)
1124 				part = resid;
1125 			if (part > len)
1126 				part = len;
1127 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1128 			if (on_fault(&ljb))
1129 				error = EFAULT;
1130 			else
1131 				error = copyoutstr_noerr(kaddr, uaddr, part,
1132 				    &size);
1133 			no_fault();
1134 			if (mapped)
1135 				pr_unmappage(uaddr, part, S_WRITE, 1);
1136 			uaddr += size;
1137 			kaddr += size;
1138 			resid -= size;
1139 			if (error == ENAMETOOLONG && resid > 0)
1140 				error = 0;
1141 		}
1142 
1143 		/* if we hit a watched address, do the watchpoint logic */
1144 		if (watchcode &&
1145 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1146 		    lwp->lwp_sysabort)) {
1147 			lwp->lwp_sysabort = 0;
1148 			error = EFAULT;
1149 			break;
1150 		}
1151 
1152 		if (error == 0 && part != 0 &&
1153 		    (size < part || kaddr[-1] == '\0'))
1154 			break;
1155 	}
1156 
1157 	if (error != EFAULT && lencopied)
1158 		*lencopied = maxlength - resid;
1159 	return (error);
1160 }
1161 
1162 typedef void (*fuword_func)(const void *, void *);
1163 
1164 /*
1165  * Generic form of watch_fuword8(), watch_fuword16(), etc.
1166  */
1167 static int
1168 watch_fuword(const void *addr, void *dst, fuword_func func, size_t size)
1169 {
1170 	klwp_t *lwp = ttolwp(curthread);
1171 	int watchcode;
1172 	caddr_t vaddr;
1173 	int mapped;
1174 	int rv = 0;
1175 	int ta;
1176 	label_t ljb;
1177 
1178 	for (;;) {
1179 
1180 		vaddr = (caddr_t)addr;
1181 		watchcode = pr_is_watchpoint(&vaddr, &ta, size, NULL, S_READ);
1182 		if (watchcode == 0 || ta != 0) {
1183 			mapped = pr_mappage((caddr_t)addr, size, S_READ, 1);
1184 			if (on_fault(&ljb))
1185 				rv = -1;
1186 			else
1187 				(*func)(addr, dst);
1188 			no_fault();
1189 			if (mapped)
1190 				pr_unmappage((caddr_t)addr, size, S_READ, 1);
1191 		}
1192 		if (watchcode &&
1193 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1194 		    lwp->lwp_sysabort)) {
1195 			lwp->lwp_sysabort = 0;
1196 			rv = -1;
1197 			break;
1198 		}
1199 		if (watchcode == 0 || ta != 0)
1200 			break;
1201 	}
1202 
1203 	return (rv);
1204 }
1205 
1206 static int
1207 watch_fuword8(const void *addr, uint8_t *dst)
1208 {
1209 	return (watch_fuword(addr, dst, (fuword_func)fuword8_noerr,
1210 	    sizeof (*dst)));
1211 }
1212 
1213 static int
1214 watch_fuword16(const void *addr, uint16_t *dst)
1215 {
1216 	return (watch_fuword(addr, dst, (fuword_func)fuword16_noerr,
1217 	    sizeof (*dst)));
1218 }
1219 
1220 static int
1221 watch_fuword32(const void *addr, uint32_t *dst)
1222 {
1223 	return (watch_fuword(addr, dst, (fuword_func)fuword32_noerr,
1224 	    sizeof (*dst)));
1225 }
1226 
1227 #ifdef _LP64
1228 static int
1229 watch_fuword64(const void *addr, uint64_t *dst)
1230 {
1231 	return (watch_fuword(addr, dst, (fuword_func)fuword64_noerr,
1232 	    sizeof (*dst)));
1233 }
1234 #endif
1235 
1236 
1237 static int
1238 watch_suword8(void *addr, uint8_t value)
1239 {
1240 	klwp_t *lwp = ttolwp(curthread);
1241 	int watchcode;
1242 	caddr_t vaddr;
1243 	int mapped;
1244 	int rv = 0;
1245 	int ta;
1246 	label_t ljb;
1247 
1248 	for (;;) {
1249 
1250 		vaddr = (caddr_t)addr;
1251 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1252 		    S_WRITE);
1253 		if (watchcode == 0 || ta != 0) {
1254 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1255 			    S_WRITE, 1);
1256 			if (on_fault(&ljb))
1257 				rv = -1;
1258 			else
1259 				suword8_noerr(addr, value);
1260 			no_fault();
1261 			if (mapped)
1262 				pr_unmappage((caddr_t)addr, sizeof (value),
1263 				    S_WRITE, 1);
1264 		}
1265 		if (watchcode &&
1266 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1267 		    lwp->lwp_sysabort)) {
1268 			lwp->lwp_sysabort = 0;
1269 			rv = -1;
1270 			break;
1271 		}
1272 		if (watchcode == 0 || ta != 0)
1273 			break;
1274 	}
1275 
1276 	return (rv);
1277 }
1278 
1279 static int
1280 watch_suword16(void *addr, uint16_t value)
1281 {
1282 	klwp_t *lwp = ttolwp(curthread);
1283 	int watchcode;
1284 	caddr_t vaddr;
1285 	int mapped;
1286 	int rv = 0;
1287 	int ta;
1288 	label_t ljb;
1289 
1290 	for (;;) {
1291 
1292 		vaddr = (caddr_t)addr;
1293 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1294 		    S_WRITE);
1295 		if (watchcode == 0 || ta != 0) {
1296 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1297 			    S_WRITE, 1);
1298 			if (on_fault(&ljb))
1299 				rv = -1;
1300 			else
1301 				suword16_noerr(addr, value);
1302 			no_fault();
1303 			if (mapped)
1304 				pr_unmappage((caddr_t)addr, sizeof (value),
1305 				    S_WRITE, 1);
1306 		}
1307 		if (watchcode &&
1308 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1309 		    lwp->lwp_sysabort)) {
1310 			lwp->lwp_sysabort = 0;
1311 			rv = -1;
1312 			break;
1313 		}
1314 		if (watchcode == 0 || ta != 0)
1315 			break;
1316 	}
1317 
1318 	return (rv);
1319 }
1320 
1321 static int
1322 watch_suword32(void *addr, uint32_t value)
1323 {
1324 	klwp_t *lwp = ttolwp(curthread);
1325 	int watchcode;
1326 	caddr_t vaddr;
1327 	int mapped;
1328 	int rv = 0;
1329 	int ta;
1330 	label_t ljb;
1331 
1332 	for (;;) {
1333 
1334 		vaddr = (caddr_t)addr;
1335 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1336 		    S_WRITE);
1337 		if (watchcode == 0 || ta != 0) {
1338 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1339 			    S_WRITE, 1);
1340 			if (on_fault(&ljb))
1341 				rv = -1;
1342 			else
1343 				suword32_noerr(addr, value);
1344 			no_fault();
1345 			if (mapped)
1346 				pr_unmappage((caddr_t)addr, sizeof (value),
1347 				    S_WRITE, 1);
1348 		}
1349 		if (watchcode &&
1350 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1351 		    lwp->lwp_sysabort)) {
1352 			lwp->lwp_sysabort = 0;
1353 			rv = -1;
1354 			break;
1355 		}
1356 		if (watchcode == 0 || ta != 0)
1357 			break;
1358 	}
1359 
1360 	return (rv);
1361 }
1362 
1363 #ifdef _LP64
1364 static int
1365 watch_suword64(void *addr, uint64_t value)
1366 {
1367 	klwp_t *lwp = ttolwp(curthread);
1368 	int watchcode;
1369 	caddr_t vaddr;
1370 	int mapped;
1371 	int rv = 0;
1372 	int ta;
1373 	label_t ljb;
1374 
1375 	for (;;) {
1376 
1377 		vaddr = (caddr_t)addr;
1378 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1379 		    S_WRITE);
1380 		if (watchcode == 0 || ta != 0) {
1381 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1382 			    S_WRITE, 1);
1383 			if (on_fault(&ljb))
1384 				rv = -1;
1385 			else
1386 				suword64_noerr(addr, value);
1387 			no_fault();
1388 			if (mapped)
1389 				pr_unmappage((caddr_t)addr, sizeof (value),
1390 				    S_WRITE, 1);
1391 		}
1392 		if (watchcode &&
1393 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1394 		    lwp->lwp_sysabort)) {
1395 			lwp->lwp_sysabort = 0;
1396 			rv = -1;
1397 			break;
1398 		}
1399 		if (watchcode == 0 || ta != 0)
1400 			break;
1401 	}
1402 
1403 	return (rv);
1404 }
1405 #endif /* _LP64 */
1406 
1407 /*
1408  * Check for watched addresses in the given address space.
1409  * Return 1 if this is true, otherwise 0.
1410  */
1411 static int
1412 pr_is_watched(caddr_t base, size_t len, int rw)
1413 {
1414 	caddr_t saddr = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1415 	caddr_t eaddr = base + len;
1416 	caddr_t paddr;
1417 
1418 	for (paddr = saddr; paddr < eaddr; paddr += PAGESIZE) {
1419 		if (pr_is_watchpage(paddr, rw))
1420 			return (1);
1421 	}
1422 
1423 	return (0);
1424 }
1425 
1426 /*
1427  * Wrapper for the physio() function.
1428  * Splits one uio operation with multiple iovecs into uio operations with
1429  * only one iovecs to do the watchpoint handling separately for each iovecs.
1430  */
1431 static int
1432 watch_physio(int (*strat)(struct buf *), struct buf *bp, dev_t dev,
1433     int rw, void (*mincnt)(struct buf *), struct uio *uio)
1434 {
1435 	struct uio auio;
1436 	struct iovec *iov;
1437 	caddr_t  base;
1438 	size_t len;
1439 	int seg_rw;
1440 	int error = 0;
1441 
1442 	if (uio->uio_segflg == UIO_SYSSPACE)
1443 		return (default_physio(strat, bp, dev, rw, mincnt, uio));
1444 
1445 	seg_rw = (rw == B_READ) ? S_WRITE : S_READ;
1446 
1447 	while (uio->uio_iovcnt > 0) {
1448 		if (uio->uio_resid == 0) {
1449 			/*
1450 			 * Make sure to return the uio structure with the
1451 			 * same values as default_physio() does.
1452 			 */
1453 			uio->uio_iov++;
1454 			uio->uio_iovcnt--;
1455 			continue;
1456 		}
1457 
1458 		iov = uio->uio_iov;
1459 		len = MIN(iov->iov_len, uio->uio_resid);
1460 
1461 		auio.uio_iovcnt = 1;
1462 		auio.uio_iov = iov;
1463 		auio.uio_resid = len;
1464 		auio.uio_loffset = uio->uio_loffset;
1465 		auio.uio_llimit = uio->uio_llimit;
1466 		auio.uio_fmode = uio->uio_fmode;
1467 		auio.uio_extflg = uio->uio_extflg;
1468 		auio.uio_segflg = uio->uio_segflg;
1469 
1470 		base = iov->iov_base;
1471 
1472 		if (!pr_is_watched(base, len, seg_rw)) {
1473 			/*
1474 			 * The given memory references don't cover a
1475 			 * watched page.
1476 			 */
1477 			error = default_physio(strat, bp, dev, rw, mincnt,
1478 			    &auio);
1479 
1480 			/* Update uio with values from auio. */
1481 			len -= auio.uio_resid;
1482 			uio->uio_resid -= len;
1483 			uio->uio_loffset += len;
1484 
1485 			/*
1486 			 * Return if an error occurred or not all data
1487 			 * was copied.
1488 			 */
1489 			if (auio.uio_resid || error)
1490 				break;
1491 			uio->uio_iov++;
1492 			uio->uio_iovcnt--;
1493 		} else {
1494 			int mapped, watchcode, ta;
1495 			caddr_t vaddr = base;
1496 			klwp_t *lwp = ttolwp(curthread);
1497 
1498 			watchcode = pr_is_watchpoint(&vaddr, &ta, len,
1499 			    NULL, seg_rw);
1500 
1501 			if (watchcode == 0 || ta != 0) {
1502 				/*
1503 				 * Do the io if the given memory references
1504 				 * don't cover a watched area (watchcode=0)
1505 				 * or if WA_TRAPAFTER was specified.
1506 				 */
1507 				mapped = pr_mappage(base, len, seg_rw, 1);
1508 				error = default_physio(strat, bp, dev, rw,
1509 				    mincnt, &auio);
1510 				if (mapped)
1511 					pr_unmappage(base, len, seg_rw, 1);
1512 
1513 				len -= auio.uio_resid;
1514 				uio->uio_resid -= len;
1515 				uio->uio_loffset += len;
1516 			}
1517 
1518 			/*
1519 			 * If we hit a watched address, do the watchpoint logic.
1520 			 */
1521 			if (watchcode &&
1522 			    (!sys_watchpoint(vaddr, watchcode, ta) ||
1523 			    lwp->lwp_sysabort)) {
1524 				lwp->lwp_sysabort = 0;
1525 				return (EFAULT);
1526 			}
1527 
1528 			/*
1529 			 * Check for errors from default_physio().
1530 			 */
1531 			if (watchcode == 0 || ta != 0) {
1532 				if (auio.uio_resid || error)
1533 					break;
1534 				uio->uio_iov++;
1535 				uio->uio_iovcnt--;
1536 			}
1537 		}
1538 	}
1539 
1540 	return (error);
1541 }
1542 
1543 int
1544 wa_compare(const void *a, const void *b)
1545 {
1546 	const watched_area_t *pa = a;
1547 	const watched_area_t *pb = b;
1548 
1549 	if (pa->wa_vaddr < pb->wa_vaddr)
1550 		return (-1);
1551 	else if (pa->wa_vaddr > pb->wa_vaddr)
1552 		return (1);
1553 	else
1554 		return (0);
1555 }
1556 
1557 int
1558 wp_compare(const void *a, const void *b)
1559 {
1560 	const watched_page_t *pa = a;
1561 	const watched_page_t *pb = b;
1562 
1563 	if (pa->wp_vaddr < pb->wp_vaddr)
1564 		return (-1);
1565 	else if (pa->wp_vaddr > pb->wp_vaddr)
1566 		return (1);
1567 	else
1568 		return (0);
1569 }
1570 
1571 /*
1572  * Given an address range, finds the first watched area which overlaps some or
1573  * all of the range.
1574  */
1575 watched_area_t *
1576 pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where)
1577 {
1578 	caddr_t vaddr = pwa->wa_vaddr;
1579 	caddr_t eaddr = pwa->wa_eaddr;
1580 	watched_area_t *wap;
1581 	avl_index_t real_where;
1582 
1583 	/* First, check if there is an exact match.  */
1584 	wap = avl_find(&p->p_warea, pwa, &real_where);
1585 
1586 
1587 	/* Check to see if we overlap with the previous area.  */
1588 	if (wap == NULL) {
1589 		wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE);
1590 		if (wap != NULL &&
1591 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1592 			wap = NULL;
1593 	}
1594 
1595 	/* Try the next area.  */
1596 	if (wap == NULL) {
1597 		wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER);
1598 		if (wap != NULL &&
1599 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1600 			wap = NULL;
1601 	}
1602 
1603 	if (where)
1604 		*where = real_where;
1605 
1606 	return (wap);
1607 }
1608 
1609 void
1610 watch_enable(kthread_id_t t)
1611 {
1612 	t->t_proc_flag |= TP_WATCHPT;
1613 	install_copyops(t, &watch_copyops);
1614 }
1615 
1616 void
1617 watch_disable(kthread_id_t t)
1618 {
1619 	t->t_proc_flag &= ~TP_WATCHPT;
1620 	remove_copyops(t);
1621 }
1622 
1623 int
1624 copyin_nowatch(const void *uaddr, void *kaddr, size_t len)
1625 {
1626 	int watched, ret;
1627 
1628 	watched = watch_disable_addr(uaddr, len, S_READ);
1629 	ret = copyin(uaddr, kaddr, len);
1630 	if (watched)
1631 		watch_enable_addr(uaddr, len, S_READ);
1632 
1633 	return (ret);
1634 }
1635 
1636 int
1637 copyout_nowatch(const void *kaddr, void *uaddr, size_t len)
1638 {
1639 	int watched, ret;
1640 
1641 	watched = watch_disable_addr(uaddr, len, S_WRITE);
1642 	ret = copyout(kaddr, uaddr, len);
1643 	if (watched)
1644 		watch_enable_addr(uaddr, len, S_WRITE);
1645 
1646 	return (ret);
1647 }
1648 
1649 #ifdef _LP64
1650 int
1651 fuword64_nowatch(const void *addr, uint64_t *value)
1652 {
1653 	int watched, ret;
1654 
1655 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1656 	ret = fuword64(addr, value);
1657 	if (watched)
1658 		watch_enable_addr(addr, sizeof (*value), S_READ);
1659 
1660 	return (ret);
1661 }
1662 #endif
1663 
1664 int
1665 fuword32_nowatch(const void *addr, uint32_t *value)
1666 {
1667 	int watched, ret;
1668 
1669 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1670 	ret = fuword32(addr, value);
1671 	if (watched)
1672 		watch_enable_addr(addr, sizeof (*value), S_READ);
1673 
1674 	return (ret);
1675 }
1676 
1677 #ifdef _LP64
1678 int
1679 suword64_nowatch(void *addr, uint64_t value)
1680 {
1681 	int watched, ret;
1682 
1683 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1684 	ret = suword64(addr, value);
1685 	if (watched)
1686 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1687 
1688 	return (ret);
1689 }
1690 #endif
1691 
1692 int
1693 suword32_nowatch(void *addr, uint32_t value)
1694 {
1695 	int watched, ret;
1696 
1697 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1698 	ret = suword32(addr, value);
1699 	if (watched)
1700 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1701 
1702 	return (ret);
1703 }
1704 
1705 int
1706 watch_disable_addr(const void *addr, size_t len, enum seg_rw rw)
1707 {
1708 	if (pr_watch_active(curproc))
1709 		return (pr_mappage((caddr_t)addr, len, rw, 1));
1710 	return (0);
1711 }
1712 
1713 void
1714 watch_enable_addr(const void *addr, size_t len, enum seg_rw rw)
1715 {
1716 	if (pr_watch_active(curproc))
1717 		pr_unmappage((caddr_t)addr, len, rw, 1);
1718 }
1719