xref: /titanic_52/usr/src/uts/common/os/watchpoint.c (revision 9512fe850e98fdd448c638ca63fdd92a8a510255)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/cred.h>
33 #include <sys/debug.h>
34 #include <sys/inline.h>
35 #include <sys/kmem.h>
36 #include <sys/proc.h>
37 #include <sys/regset.h>
38 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/prsystm.h>
41 #include <sys/buf.h>
42 #include <sys/signal.h>
43 #include <sys/user.h>
44 #include <sys/cpuvar.h>
45 
46 #include <sys/fault.h>
47 #include <sys/syscall.h>
48 #include <sys/procfs.h>
49 #include <sys/cmn_err.h>
50 #include <sys/stack.h>
51 #include <sys/watchpoint.h>
52 #include <sys/copyops.h>
53 #include <sys/schedctl.h>
54 
55 #include <sys/mman.h>
56 #include <vm/as.h>
57 #include <vm/seg.h>
58 
59 /*
60  * Copy ops vector for watchpoints.
61  */
62 static int	watch_copyin(const void *, void *, size_t);
63 static int	watch_xcopyin(const void *, void *, size_t);
64 static int	watch_copyout(const void *, void *, size_t);
65 static int	watch_xcopyout(const void *, void *, size_t);
66 static int	watch_copyinstr(const char *, char *, size_t, size_t *);
67 static int	watch_copyoutstr(const char *, char *, size_t, size_t *);
68 static int	watch_fuword8(const void *, uint8_t *);
69 static int	watch_fuword16(const void *, uint16_t *);
70 static int	watch_fuword32(const void *, uint32_t *);
71 static int	watch_suword8(void *, uint8_t);
72 static int	watch_suword16(void *, uint16_t);
73 static int	watch_suword32(void *, uint32_t);
74 static int	watch_physio(int (*)(struct buf *), struct buf *,
75     dev_t, int, void (*)(struct buf *), struct uio *);
76 #ifdef _LP64
77 static int	watch_fuword64(const void *, uint64_t *);
78 static int	watch_suword64(void *, uint64_t);
79 #endif
80 
81 struct copyops watch_copyops = {
82 	watch_copyin,
83 	watch_xcopyin,
84 	watch_copyout,
85 	watch_xcopyout,
86 	watch_copyinstr,
87 	watch_copyoutstr,
88 	watch_fuword8,
89 	watch_fuword16,
90 	watch_fuword32,
91 #ifdef _LP64
92 	watch_fuword64,
93 #else
94 	NULL,
95 #endif
96 	watch_suword8,
97 	watch_suword16,
98 	watch_suword32,
99 #ifdef _LP64
100 	watch_suword64,
101 #else
102 	NULL,
103 #endif
104 	watch_physio
105 };
106 
107 /*
108  * Map the 'rw' argument to a protection flag.
109  */
110 static int
111 rw_to_prot(enum seg_rw rw)
112 {
113 	switch (rw) {
114 	case S_EXEC:
115 		return (PROT_EXEC);
116 	case S_READ:
117 		return (PROT_READ);
118 	case S_WRITE:
119 		return (PROT_WRITE);
120 	default:
121 		return (PROT_NONE);	/* can't happen */
122 	}
123 }
124 
125 /*
126  * Map the 'rw' argument to an index into an array of exec/write/read things.
127  * The index follows the precedence order:  exec .. write .. read
128  */
129 static int
130 rw_to_index(enum seg_rw rw)
131 {
132 	switch (rw) {
133 	default:	/* default case "can't happen" */
134 	case S_EXEC:
135 		return (0);
136 	case S_WRITE:
137 		return (1);
138 	case S_READ:
139 		return (2);
140 	}
141 }
142 
143 /*
144  * Map an index back to a seg_rw.
145  */
146 static enum seg_rw S_rw[4] = {
147 	S_EXEC,
148 	S_WRITE,
149 	S_READ,
150 	S_READ,
151 };
152 
153 #define	X	0
154 #define	W	1
155 #define	R	2
156 #define	sum(a)	(a[X] + a[W] + a[R])
157 
158 /*
159  * Common code for pr_mappage() and pr_unmappage().
160  */
161 static int
162 pr_do_mappage(caddr_t addr, size_t size, int mapin, enum seg_rw rw, int kernel)
163 {
164 	proc_t *p = curproc;
165 	struct as *as = p->p_as;
166 	char *eaddr = addr + size;
167 	int prot_rw = rw_to_prot(rw);
168 	int xrw = rw_to_index(rw);
169 	int rv = 0;
170 	struct watched_page *pwp;
171 	struct watched_page tpw;
172 	avl_index_t where;
173 	uint_t prot;
174 
175 	ASSERT(as != &kas);
176 
177 startover:
178 	ASSERT(rv == 0);
179 	if (avl_numnodes(&as->a_wpage) == 0)
180 		return (0);
181 
182 	/*
183 	 * as->a_wpage can only be changed while the process is totally stopped.
184 	 * Don't grab p_lock here.  Holding p_lock while grabbing the address
185 	 * space lock leads to deadlocks with the clock thread.  Note that if an
186 	 * as_fault() is servicing a fault to a watched page on behalf of an
187 	 * XHAT provider, watchpoint will be temporarily cleared (and wp_prot
188 	 * will be set to wp_oprot).  Since this is done while holding as writer
189 	 * lock, we need to grab as lock (reader lock is good enough).
190 	 *
191 	 * p_maplock prevents simultaneous execution of this function.  Under
192 	 * normal circumstances, holdwatch() will stop all other threads, so the
193 	 * lock isn't really needed.  But there may be multiple threads within
194 	 * stop() when SWATCHOK is set, so we need to handle multiple threads
195 	 * at once.  See holdwatch() for the details of this dance.
196 	 */
197 
198 	mutex_enter(&p->p_maplock);
199 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
200 
201 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
202 	if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
203 		pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
204 
205 	for (; pwp != NULL && pwp->wp_vaddr < eaddr;
206 		pwp = AVL_NEXT(&as->a_wpage, pwp)) {
207 
208 		/*
209 		 * If the requested protection has not been
210 		 * removed, we need not remap this page.
211 		 */
212 		prot = pwp->wp_prot;
213 		if (kernel || (prot & PROT_USER))
214 			if (prot & prot_rw)
215 				continue;
216 		/*
217 		 * If the requested access does not exist in the page's
218 		 * original protections, we need not remap this page.
219 		 * If the page does not exist yet, we can't test it.
220 		 */
221 		if ((prot = pwp->wp_oprot) != 0) {
222 			if (!(kernel || (prot & PROT_USER)))
223 				continue;
224 			if (!(prot & prot_rw))
225 				continue;
226 		}
227 
228 		if (mapin) {
229 			/*
230 			 * Before mapping the page in, ensure that
231 			 * all other lwps are held in the kernel.
232 			 */
233 			if (p->p_mapcnt == 0) {
234 				/*
235 				 * Release as lock while in holdwatch()
236 				 * in case other threads need to grab it.
237 				 */
238 				AS_LOCK_EXIT(as, &as->a_lock);
239 				mutex_exit(&p->p_maplock);
240 				if (holdwatch() != 0) {
241 					/*
242 					 * We stopped in holdwatch().
243 					 * Start all over again because the
244 					 * watched page list may have changed.
245 					 */
246 					goto startover;
247 				}
248 				mutex_enter(&p->p_maplock);
249 				AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
250 			}
251 			p->p_mapcnt++;
252 		}
253 
254 		addr = pwp->wp_vaddr;
255 		rv++;
256 
257 		prot = pwp->wp_prot;
258 		if (mapin) {
259 			if (kernel)
260 				pwp->wp_kmap[xrw]++;
261 			else
262 				pwp->wp_umap[xrw]++;
263 			pwp->wp_flags |= WP_NOWATCH;
264 			if (pwp->wp_kmap[X] + pwp->wp_umap[X])
265 				/* cannot have exec-only protection */
266 				prot |= PROT_READ|PROT_EXEC;
267 			if (pwp->wp_kmap[R] + pwp->wp_umap[R])
268 				prot |= PROT_READ;
269 			if (pwp->wp_kmap[W] + pwp->wp_umap[W])
270 				/* cannot have write-only protection */
271 				prot |= PROT_READ|PROT_WRITE;
272 #if 0	/* damned broken mmu feature! */
273 			if (sum(pwp->wp_umap) == 0)
274 				prot &= ~PROT_USER;
275 #endif
276 		} else {
277 			ASSERT(pwp->wp_flags & WP_NOWATCH);
278 			if (kernel) {
279 				ASSERT(pwp->wp_kmap[xrw] != 0);
280 				--pwp->wp_kmap[xrw];
281 			} else {
282 				ASSERT(pwp->wp_umap[xrw] != 0);
283 				--pwp->wp_umap[xrw];
284 			}
285 			if (sum(pwp->wp_kmap) + sum(pwp->wp_umap) == 0)
286 				pwp->wp_flags &= ~WP_NOWATCH;
287 			else {
288 				if (pwp->wp_kmap[X] + pwp->wp_umap[X])
289 					/* cannot have exec-only protection */
290 					prot |= PROT_READ|PROT_EXEC;
291 				if (pwp->wp_kmap[R] + pwp->wp_umap[R])
292 					prot |= PROT_READ;
293 				if (pwp->wp_kmap[W] + pwp->wp_umap[W])
294 					/* cannot have write-only protection */
295 					prot |= PROT_READ|PROT_WRITE;
296 #if 0	/* damned broken mmu feature! */
297 				if (sum(pwp->wp_umap) == 0)
298 					prot &= ~PROT_USER;
299 #endif
300 			}
301 		}
302 
303 
304 		if (pwp->wp_oprot != 0) {	/* if page exists */
305 			struct seg *seg;
306 			uint_t oprot;
307 			int err, retrycnt = 0;
308 
309 			AS_LOCK_EXIT(as, &as->a_lock);
310 			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
311 		retry:
312 			seg = as_segat(as, addr);
313 			ASSERT(seg != NULL);
314 			SEGOP_GETPROT(seg, addr, 0, &oprot);
315 			if (prot != oprot) {
316 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
317 				if (err == IE_RETRY) {
318 					ASSERT(retrycnt == 0);
319 					retrycnt++;
320 					goto retry;
321 				}
322 			}
323 			AS_LOCK_EXIT(as, &as->a_lock);
324 		} else
325 			AS_LOCK_EXIT(as, &as->a_lock);
326 
327 		/*
328 		 * When all pages are mapped back to their normal state,
329 		 * continue the other lwps.
330 		 */
331 		if (!mapin) {
332 			ASSERT(p->p_mapcnt > 0);
333 			p->p_mapcnt--;
334 			if (p->p_mapcnt == 0) {
335 				mutex_exit(&p->p_maplock);
336 				mutex_enter(&p->p_lock);
337 				continuelwps(p);
338 				mutex_exit(&p->p_lock);
339 				mutex_enter(&p->p_maplock);
340 			}
341 		}
342 
343 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
344 	}
345 
346 	AS_LOCK_EXIT(as, &as->a_lock);
347 	mutex_exit(&p->p_maplock);
348 
349 	return (rv);
350 }
351 
352 /*
353  * Restore the original page protections on an address range.
354  * If 'kernel' is non-zero, just do it for the kernel.
355  * pr_mappage() returns non-zero if it actually changed anything.
356  *
357  * pr_mappage() and pr_unmappage() must be executed in matched pairs,
358  * but pairs may be nested within other pairs.  The reference counts
359  * sort it all out.  See pr_do_mappage(), above.
360  */
361 static int
362 pr_mappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
363 {
364 	return (pr_do_mappage(addr, size, 1, rw, kernel));
365 }
366 
367 /*
368  * Set the modified page protections on a watched page.
369  * Inverse of pr_mappage().
370  * Needs to be called only if pr_mappage() returned non-zero.
371  */
372 static void
373 pr_unmappage(const caddr_t addr, size_t size, enum seg_rw rw, int kernel)
374 {
375 	(void) pr_do_mappage(addr, size, 0, rw, kernel);
376 }
377 
378 /*
379  * Function called by an lwp after it resumes from stop().
380  */
381 void
382 setallwatch(void)
383 {
384 	proc_t *p = curproc;
385 	struct as *as = curproc->p_as;
386 	struct watched_page *pwp, *next;
387 	struct seg *seg;
388 	caddr_t vaddr;
389 	uint_t prot;
390 	int err, retrycnt;
391 
392 	if (p->p_wprot == NULL)
393 		return;
394 
395 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
396 
397 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
398 
399 	pwp = p->p_wprot;
400 	while (pwp != NULL) {
401 
402 		vaddr = pwp->wp_vaddr;
403 		retrycnt = 0;
404 	retry:
405 		ASSERT(pwp->wp_flags & WP_SETPROT);
406 		if ((seg = as_segat(as, vaddr)) != NULL &&
407 		    !(pwp->wp_flags & WP_NOWATCH)) {
408 			prot = pwp->wp_prot;
409 			err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
410 			if (err == IE_RETRY) {
411 				ASSERT(retrycnt == 0);
412 				retrycnt++;
413 				goto retry;
414 			}
415 		}
416 
417 		next = pwp->wp_list;
418 
419 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec == 0) {
420 			/*
421 			 * No watched areas remain in this page.
422 			 * Free the watched_page structure.
423 			 */
424 			avl_remove(&as->a_wpage, pwp);
425 			kmem_free(pwp, sizeof (struct watched_page));
426 		} else {
427 			pwp->wp_flags &= ~WP_SETPROT;
428 		}
429 
430 		pwp = next;
431 	}
432 	p->p_wprot = NULL;
433 
434 	AS_LOCK_EXIT(as, &as->a_lock);
435 }
436 
437 
438 
439 /* Must be called with as lock held */
440 int
441 pr_is_watchpage_as(caddr_t addr, enum seg_rw rw, struct as *as)
442 {
443 	register struct watched_page *pwp;
444 	struct watched_page tpw;
445 	uint_t prot;
446 	int rv = 0;
447 
448 	switch (rw) {
449 	case S_READ:
450 	case S_WRITE:
451 	case S_EXEC:
452 		break;
453 	default:
454 		return (0);
455 	}
456 
457 	/*
458 	 * as->a_wpage can only be modified while the process is totally
459 	 * stopped.  We need, and should use, no locks here.
460 	 */
461 	if (as != &kas && avl_numnodes(&as->a_wpage) != 0) {
462 		tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
463 		pwp = avl_find(&as->a_wpage, &tpw, NULL);
464 		if (pwp != NULL) {
465 			ASSERT(addr >= pwp->wp_vaddr &&
466 			    addr < pwp->wp_vaddr + PAGESIZE);
467 			if (pwp->wp_oprot != 0) {
468 				prot = pwp->wp_prot;
469 				switch (rw) {
470 				case S_READ:
471 					rv = ((prot & (PROT_USER|PROT_READ))
472 						!= (PROT_USER|PROT_READ));
473 					break;
474 				case S_WRITE:
475 					rv = ((prot & (PROT_USER|PROT_WRITE))
476 						!= (PROT_USER|PROT_WRITE));
477 					break;
478 				case S_EXEC:
479 					rv = ((prot & (PROT_USER|PROT_EXEC))
480 						!= (PROT_USER|PROT_EXEC));
481 					break;
482 				default:
483 					/* can't happen! */
484 					break;
485 				}
486 			}
487 		}
488 	}
489 
490 	return (rv);
491 }
492 
493 
494 /*
495  * trap() calls here to determine if a fault is in a watched page.
496  * We return nonzero if this is true and the load/store would fail.
497  */
498 int
499 pr_is_watchpage(caddr_t addr, enum seg_rw rw)
500 {
501 	struct as *as = curproc->p_as;
502 	int rv;
503 
504 	if ((as == &kas) || avl_numnodes(&as->a_wpage) == 0)
505 		return (0);
506 
507 	/* Grab the lock because of XHAT (see comment in pr_mappage()) */
508 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
509 	rv = pr_is_watchpage_as(addr, rw, as);
510 	AS_LOCK_EXIT(as, &as->a_lock);
511 
512 	return (rv);
513 }
514 
515 
516 
517 /*
518  * trap() calls here to determine if a fault is a watchpoint.
519  */
520 int
521 pr_is_watchpoint(caddr_t *paddr, int *pta, size_t size, size_t *plen,
522 	enum seg_rw rw)
523 {
524 	proc_t *p = curproc;
525 	caddr_t addr = *paddr;
526 	caddr_t eaddr = addr + size;
527 	register struct watched_area *pwa;
528 	struct watched_area twa;
529 	int rv = 0;
530 	int ta = 0;
531 	size_t len = 0;
532 
533 	switch (rw) {
534 	case S_READ:
535 	case S_WRITE:
536 	case S_EXEC:
537 		break;
538 	default:
539 		*pta = 0;
540 		return (0);
541 	}
542 
543 	/*
544 	 * p->p_warea is protected by p->p_lock.
545 	 */
546 	mutex_enter(&p->p_lock);
547 
548 	/* BEGIN CSTYLED */
549 	/*
550 	 * This loop is somewhat complicated because the fault region can span
551 	 * multiple watched areas.  For example:
552 	 *
553 	 *            addr              eaddr
554 	 * 		+-----------------+
555 	 * 		| fault region    |
556 	 * 	+-------+--------+----+---+------------+
557 	 *      | prot not right |    | prot correct   |
558 	 *      +----------------+    +----------------+
559 	 *    wa_vaddr	      wa_eaddr
560 	 *    		      wa_vaddr		wa_eaddr
561 	 *
562 	 * We start at the area greater than or equal to the starting address.
563 	 * As long as some portion of the fault region overlaps the current
564 	 * area, we continue checking permissions until we find an appropriate
565 	 * match.
566 	 */
567 	/* END CSTYLED */
568 	twa.wa_vaddr = addr;
569 	twa.wa_eaddr = eaddr;
570 
571 	for (pwa = pr_find_watched_area(p, &twa, NULL);
572 	    pwa != NULL && eaddr > pwa->wa_vaddr && addr < pwa->wa_eaddr;
573 	    pwa = AVL_NEXT(&p->p_warea, pwa)) {
574 
575 		switch (rw) {
576 		case S_READ:
577 			if (pwa->wa_flags & WA_READ)
578 				rv = TRAP_RWATCH;
579 			break;
580 		case S_WRITE:
581 			if (pwa->wa_flags & WA_WRITE)
582 				rv = TRAP_WWATCH;
583 			break;
584 		case S_EXEC:
585 			if (pwa->wa_flags & WA_EXEC)
586 				rv = TRAP_XWATCH;
587 			break;
588 		default:
589 			/* can't happen */
590 			break;
591 		}
592 
593 		/*
594 		 * If protections didn't match, check the next watched
595 		 * area
596 		 */
597 		if (rv != 0) {
598 			if (addr < pwa->wa_vaddr)
599 				addr = pwa->wa_vaddr;
600 			len = pwa->wa_eaddr - addr;
601 			if (pwa->wa_flags & WA_TRAPAFTER)
602 				ta = 1;
603 			break;
604 		}
605 	}
606 
607 	mutex_exit(&p->p_lock);
608 
609 	*paddr = addr;
610 	*pta = ta;
611 	if (plen != NULL)
612 		*plen = len;
613 	return (rv);
614 }
615 
616 /*
617  * Set up to perform a single-step at user level for the
618  * case of a trapafter watchpoint.  Called from trap().
619  */
620 void
621 do_watch_step(caddr_t vaddr, size_t sz, enum seg_rw rw,
622 	int watchcode, greg_t pc)
623 {
624 	register klwp_t *lwp = ttolwp(curthread);
625 	struct lwp_watch *pw = &lwp->lwp_watch[rw_to_index(rw)];
626 
627 	/*
628 	 * Check to see if we are already performing this special
629 	 * watchpoint single-step.  We must not do pr_mappage() twice.
630 	 */
631 
632 	/* special check for two read traps on the same instruction */
633 	if (rw == S_READ && pw->wpaddr != NULL &&
634 	    !(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize)) {
635 		ASSERT(lwp->lwp_watchtrap != 0);
636 		pw++;	/* use the extra S_READ struct */
637 	}
638 
639 	if (pw->wpaddr != NULL) {
640 		ASSERT(lwp->lwp_watchtrap != 0);
641 		ASSERT(pw->wpaddr <= vaddr && vaddr < pw->wpaddr + pw->wpsize);
642 		if (pw->wpcode == 0) {
643 			pw->wpcode = watchcode;
644 			pw->wppc = pc;
645 		}
646 	} else {
647 		int mapped = pr_mappage(vaddr, sz, rw, 0);
648 		prstep(lwp, 1);
649 		lwp->lwp_watchtrap = 1;
650 		pw->wpaddr = vaddr;
651 		pw->wpsize = sz;
652 		pw->wpcode = watchcode;
653 		pw->wpmapped = mapped;
654 		pw->wppc = pc;
655 	}
656 }
657 
658 /*
659  * Undo the effects of do_watch_step().
660  * Called from trap() after the single-step is finished.
661  * Also called from issig_forreal() and stop() with a NULL
662  * argument to avoid having these things set more than once.
663  */
664 int
665 undo_watch_step(k_siginfo_t *sip)
666 {
667 	register klwp_t *lwp = ttolwp(curthread);
668 	int fault = 0;
669 
670 	if (lwp->lwp_watchtrap) {
671 		struct lwp_watch *pw = lwp->lwp_watch;
672 		int i;
673 
674 		for (i = 0; i < 4; i++, pw++) {
675 			if (pw->wpaddr == NULL)
676 				continue;
677 			if (pw->wpmapped)
678 				pr_unmappage(pw->wpaddr, pw->wpsize, S_rw[i],
679 				    0);
680 			if (pw->wpcode != 0) {
681 				if (sip != NULL) {
682 					sip->si_signo = SIGTRAP;
683 					sip->si_code = pw->wpcode;
684 					sip->si_addr = pw->wpaddr;
685 					sip->si_trapafter = 1;
686 					sip->si_pc = (caddr_t)pw->wppc;
687 				}
688 				fault = FLTWATCH;
689 				pw->wpcode = 0;
690 			}
691 			pw->wpaddr = NULL;
692 			pw->wpsize = 0;
693 			pw->wpmapped = 0;
694 		}
695 		lwp->lwp_watchtrap = 0;
696 	}
697 
698 	return (fault);
699 }
700 
701 /*
702  * Handle a watchpoint that occurs while doing copyin()
703  * or copyout() in a system call.
704  * Return non-zero if the fault or signal is cleared
705  * by a debugger while the lwp is stopped.
706  */
707 static int
708 sys_watchpoint(caddr_t addr, int watchcode, int ta)
709 {
710 	extern greg_t getuserpc(void);	/* XXX header file */
711 	k_sigset_t smask;
712 	register proc_t *p = ttoproc(curthread);
713 	register klwp_t *lwp = ttolwp(curthread);
714 	register sigqueue_t *sqp;
715 	int rval;
716 
717 	/* assert no locks are held */
718 	/* ASSERT(curthread->t_nlocks == 0); */
719 
720 	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
721 	sqp->sq_info.si_signo = SIGTRAP;
722 	sqp->sq_info.si_code = watchcode;
723 	sqp->sq_info.si_addr = addr;
724 	sqp->sq_info.si_trapafter = ta;
725 	sqp->sq_info.si_pc = (caddr_t)getuserpc();
726 
727 	mutex_enter(&p->p_lock);
728 
729 	/* this will be tested and cleared by the caller */
730 	lwp->lwp_sysabort = 0;
731 
732 	if (prismember(&p->p_fltmask, FLTWATCH)) {
733 		lwp->lwp_curflt = (uchar_t)FLTWATCH;
734 		lwp->lwp_siginfo = sqp->sq_info;
735 		stop(PR_FAULTED, FLTWATCH);
736 		if (lwp->lwp_curflt == 0) {
737 			mutex_exit(&p->p_lock);
738 			kmem_free(sqp, sizeof (sigqueue_t));
739 			return (1);
740 		}
741 		lwp->lwp_curflt = 0;
742 	}
743 
744 	/*
745 	 * post the SIGTRAP signal.
746 	 * Block all other signals so we only stop showing SIGTRAP.
747 	 */
748 	if (signal_is_blocked(curthread, SIGTRAP) ||
749 	    sigismember(&p->p_ignore, SIGTRAP)) {
750 		/* SIGTRAP is blocked or ignored, forget the rest. */
751 		mutex_exit(&p->p_lock);
752 		kmem_free(sqp, sizeof (sigqueue_t));
753 		return (0);
754 	}
755 	sigdelq(p, curthread, SIGTRAP);
756 	sigaddqa(p, curthread, sqp);
757 	schedctl_finish_sigblock(curthread);
758 	smask = curthread->t_hold;
759 	sigfillset(&curthread->t_hold);
760 	sigdiffset(&curthread->t_hold, &cantmask);
761 	sigdelset(&curthread->t_hold, SIGTRAP);
762 	mutex_exit(&p->p_lock);
763 
764 	rval = ((ISSIG_FAST(curthread, lwp, p, FORREAL))? 0 : 1);
765 
766 	/* restore the original signal mask */
767 	mutex_enter(&p->p_lock);
768 	curthread->t_hold = smask;
769 	mutex_exit(&p->p_lock);
770 
771 	return (rval);
772 }
773 
774 /*
775  * Wrappers for the copyin()/copyout() functions to deal
776  * with watchpoints that fire while in system calls.
777  */
778 
779 static int
780 watch_xcopyin(const void *uaddr, void *kaddr, size_t count)
781 {
782 	klwp_t *lwp = ttolwp(curthread);
783 	caddr_t watch_uaddr = (caddr_t)uaddr;
784 	caddr_t watch_kaddr = (caddr_t)kaddr;
785 	int error = 0;
786 	label_t ljb;
787 	size_t part;
788 	int mapped;
789 
790 	while (count && error == 0) {
791 		int watchcode;
792 		caddr_t vaddr;
793 		size_t len;
794 		int ta;
795 
796 		if ((part = PAGESIZE -
797 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
798 			part = count;
799 
800 		if (!pr_is_watchpage(watch_uaddr, S_READ))
801 			watchcode = 0;
802 		else {
803 			vaddr = watch_uaddr;
804 			watchcode = pr_is_watchpoint(&vaddr, &ta,
805 			    part, &len, S_READ);
806 			if (watchcode && ta == 0)
807 				part = vaddr - watch_uaddr;
808 		}
809 
810 		/*
811 		 * Copy the initial part, up to a watched address, if any.
812 		 */
813 		if (part != 0) {
814 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
815 			if (on_fault(&ljb))
816 				error = EFAULT;
817 			else
818 				copyin_noerr(watch_uaddr, watch_kaddr, part);
819 			no_fault();
820 			if (mapped)
821 				pr_unmappage(watch_uaddr, part, S_READ, 1);
822 			watch_uaddr += part;
823 			watch_kaddr += part;
824 			count -= part;
825 		}
826 		/*
827 		 * If trapafter was specified, then copy through the
828 		 * watched area before taking the watchpoint trap.
829 		 */
830 		while (count && watchcode && ta && len > part && error == 0) {
831 			len -= part;
832 			if ((part = PAGESIZE) > count)
833 				part = count;
834 			if (part > len)
835 				part = len;
836 			mapped = pr_mappage(watch_uaddr, part, S_READ, 1);
837 			if (on_fault(&ljb))
838 				error = EFAULT;
839 			else
840 				copyin_noerr(watch_uaddr, watch_kaddr, part);
841 			no_fault();
842 			if (mapped)
843 				pr_unmappage(watch_uaddr, part, S_READ, 1);
844 			watch_uaddr += part;
845 			watch_kaddr += part;
846 			count -= part;
847 		}
848 
849 error:
850 		/* if we hit a watched address, do the watchpoint logic */
851 		if (watchcode &&
852 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
853 		    lwp->lwp_sysabort)) {
854 			lwp->lwp_sysabort = 0;
855 			error = EFAULT;
856 			break;
857 		}
858 	}
859 
860 	return (error);
861 }
862 
863 static int
864 watch_copyin(const void *kaddr, void *uaddr, size_t count)
865 {
866 	return (watch_xcopyin(kaddr, uaddr, count) ? -1 : 0);
867 }
868 
869 
870 static int
871 watch_xcopyout(const void *kaddr, void *uaddr, size_t count)
872 {
873 	klwp_t *lwp = ttolwp(curthread);
874 	caddr_t watch_uaddr = (caddr_t)uaddr;
875 	caddr_t watch_kaddr = (caddr_t)kaddr;
876 	int error = 0;
877 	label_t ljb;
878 
879 	while (count && error == 0) {
880 		int watchcode;
881 		caddr_t vaddr;
882 		size_t part;
883 		size_t len;
884 		int ta;
885 		int mapped;
886 
887 		if ((part = PAGESIZE -
888 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > count)
889 			part = count;
890 
891 		if (!pr_is_watchpage(watch_uaddr, S_WRITE))
892 			watchcode = 0;
893 		else {
894 			vaddr = watch_uaddr;
895 			watchcode = pr_is_watchpoint(&vaddr, &ta,
896 			    part, &len, S_WRITE);
897 			if (watchcode) {
898 				if (ta == 0)
899 					part = vaddr - watch_uaddr;
900 				else {
901 					len += vaddr - watch_uaddr;
902 					if (part > len)
903 						part = len;
904 				}
905 			}
906 		}
907 
908 		/*
909 		 * Copy the initial part, up to a watched address, if any.
910 		 */
911 		if (part != 0) {
912 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
913 			if (on_fault(&ljb))
914 				error = EFAULT;
915 			else
916 				copyout_noerr(watch_kaddr, watch_uaddr, part);
917 			no_fault();
918 			if (mapped)
919 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
920 			watch_uaddr += part;
921 			watch_kaddr += part;
922 			count -= part;
923 		}
924 
925 		/*
926 		 * If trapafter was specified, then copy through the
927 		 * watched area before taking the watchpoint trap.
928 		 */
929 		while (count && watchcode && ta && len > part && error == 0) {
930 			len -= part;
931 			if ((part = PAGESIZE) > count)
932 				part = count;
933 			if (part > len)
934 				part = len;
935 			mapped = pr_mappage(watch_uaddr, part, S_WRITE, 1);
936 			if (on_fault(&ljb))
937 				error = EFAULT;
938 			else
939 				copyout_noerr(watch_kaddr, watch_uaddr, part);
940 			no_fault();
941 			if (mapped)
942 				pr_unmappage(watch_uaddr, part, S_WRITE, 1);
943 			watch_uaddr += part;
944 			watch_kaddr += part;
945 			count -= part;
946 		}
947 
948 		/* if we hit a watched address, do the watchpoint logic */
949 		if (watchcode &&
950 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
951 		    lwp->lwp_sysabort)) {
952 			lwp->lwp_sysabort = 0;
953 			error = EFAULT;
954 			break;
955 		}
956 	}
957 
958 	return (error);
959 }
960 
961 static int
962 watch_copyout(const void *kaddr, void *uaddr, size_t count)
963 {
964 	return (watch_xcopyout(kaddr, uaddr, count) ? -1 : 0);
965 }
966 
967 static int
968 watch_copyinstr(
969 	const char *uaddr,
970 	char *kaddr,
971 	size_t maxlength,
972 	size_t *lencopied)
973 {
974 	klwp_t *lwp = ttolwp(curthread);
975 	size_t resid;
976 	int error = 0;
977 	label_t ljb;
978 
979 	if ((resid = maxlength) == 0)
980 		return (ENAMETOOLONG);
981 
982 	while (resid && error == 0) {
983 		int watchcode;
984 		caddr_t vaddr;
985 		size_t part;
986 		size_t len;
987 		size_t size;
988 		int ta;
989 		int mapped;
990 
991 		if ((part = PAGESIZE -
992 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
993 			part = resid;
994 
995 		if (!pr_is_watchpage((caddr_t)uaddr, S_READ))
996 			watchcode = 0;
997 		else {
998 			vaddr = (caddr_t)uaddr;
999 			watchcode = pr_is_watchpoint(&vaddr, &ta,
1000 			    part, &len, S_READ);
1001 			if (watchcode) {
1002 				if (ta == 0)
1003 					part = vaddr - uaddr;
1004 				else {
1005 					len += vaddr - uaddr;
1006 					if (part > len)
1007 						part = len;
1008 				}
1009 			}
1010 		}
1011 
1012 		/*
1013 		 * Copy the initial part, up to a watched address, if any.
1014 		 */
1015 		if (part != 0) {
1016 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1017 			if (on_fault(&ljb))
1018 				error = EFAULT;
1019 			else
1020 				error = copyinstr_noerr(uaddr, kaddr, part,
1021 				    &size);
1022 			no_fault();
1023 			if (mapped)
1024 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1025 			uaddr += size;
1026 			kaddr += size;
1027 			resid -= size;
1028 			if (error == ENAMETOOLONG && resid > 0)
1029 			    error = 0;
1030 			if (error != 0 || (watchcode &&
1031 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1032 				break;	/* didn't reach the watched area */
1033 		}
1034 
1035 		/*
1036 		 * If trapafter was specified, then copy through the
1037 		 * watched area before taking the watchpoint trap.
1038 		 */
1039 		while (resid && watchcode && ta && len > part && error == 0 &&
1040 		    size == part && kaddr[-1] != '\0') {
1041 			len -= part;
1042 			if ((part = PAGESIZE) > resid)
1043 				part = resid;
1044 			if (part > len)
1045 				part = len;
1046 			mapped = pr_mappage((caddr_t)uaddr, part, S_READ, 1);
1047 			if (on_fault(&ljb))
1048 				error = EFAULT;
1049 			else
1050 				error = copyinstr_noerr(uaddr, kaddr, part,
1051 				    &size);
1052 			no_fault();
1053 			if (mapped)
1054 				pr_unmappage((caddr_t)uaddr, part, S_READ, 1);
1055 			uaddr += size;
1056 			kaddr += size;
1057 			resid -= size;
1058 			if (error == ENAMETOOLONG && resid > 0)
1059 			    error = 0;
1060 		}
1061 
1062 		/* if we hit a watched address, do the watchpoint logic */
1063 		if (watchcode &&
1064 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1065 		    lwp->lwp_sysabort)) {
1066 			lwp->lwp_sysabort = 0;
1067 			error = EFAULT;
1068 			break;
1069 		}
1070 
1071 		if (error == 0 && part != 0 &&
1072 		    (size < part || kaddr[-1] == '\0'))
1073 			break;
1074 	}
1075 
1076 	if (error != EFAULT && lencopied)
1077 		*lencopied = maxlength - resid;
1078 	return (error);
1079 }
1080 
1081 static int
1082 watch_copyoutstr(
1083 	const char *kaddr,
1084 	char *uaddr,
1085 	size_t maxlength,
1086 	size_t *lencopied)
1087 {
1088 	klwp_t *lwp = ttolwp(curthread);
1089 	size_t resid;
1090 	int error = 0;
1091 	label_t ljb;
1092 
1093 	if ((resid = maxlength) == 0)
1094 		return (ENAMETOOLONG);
1095 
1096 	while (resid && error == 0) {
1097 		int watchcode;
1098 		caddr_t vaddr;
1099 		size_t part;
1100 		size_t len;
1101 		size_t size;
1102 		int ta;
1103 		int mapped;
1104 
1105 		if ((part = PAGESIZE -
1106 		    (((uintptr_t)uaddr) & PAGEOFFSET)) > resid)
1107 			part = resid;
1108 
1109 		if (!pr_is_watchpage(uaddr, S_WRITE)) {
1110 			watchcode = 0;
1111 		} else {
1112 			vaddr = uaddr;
1113 			watchcode = pr_is_watchpoint(&vaddr, &ta,
1114 			    part, &len, S_WRITE);
1115 			if (watchcode && ta == 0)
1116 				part = vaddr - uaddr;
1117 		}
1118 
1119 		/*
1120 		 * Copy the initial part, up to a watched address, if any.
1121 		 */
1122 		if (part != 0) {
1123 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1124 			if (on_fault(&ljb))
1125 				error = EFAULT;
1126 			else
1127 				error = copyoutstr_noerr(kaddr, uaddr, part,
1128 				    &size);
1129 			no_fault();
1130 			if (mapped)
1131 				pr_unmappage(uaddr, part, S_WRITE, 1);
1132 			uaddr += size;
1133 			kaddr += size;
1134 			resid -= size;
1135 			if (error == ENAMETOOLONG && resid > 0)
1136 			    error = 0;
1137 			if (error != 0 || (watchcode &&
1138 			    (uaddr < vaddr || kaddr[-1] == '\0')))
1139 				break;	/* didn't reach the watched area */
1140 		}
1141 
1142 		/*
1143 		 * If trapafter was specified, then copy through the
1144 		 * watched area before taking the watchpoint trap.
1145 		 */
1146 		while (resid && watchcode && ta && len > part && error == 0 &&
1147 		    size == part && kaddr[-1] != '\0') {
1148 			len -= part;
1149 			if ((part = PAGESIZE) > resid)
1150 				part = resid;
1151 			if (part > len)
1152 				part = len;
1153 			mapped = pr_mappage(uaddr, part, S_WRITE, 1);
1154 			if (on_fault(&ljb))
1155 				error = EFAULT;
1156 			else
1157 				error = copyoutstr_noerr(kaddr, uaddr, part,
1158 				    &size);
1159 			no_fault();
1160 			if (mapped)
1161 				pr_unmappage(uaddr, part, S_WRITE, 1);
1162 			uaddr += size;
1163 			kaddr += size;
1164 			resid -= size;
1165 			if (error == ENAMETOOLONG && resid > 0)
1166 			    error = 0;
1167 		}
1168 
1169 		/* if we hit a watched address, do the watchpoint logic */
1170 		if (watchcode &&
1171 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1172 		    lwp->lwp_sysabort)) {
1173 			lwp->lwp_sysabort = 0;
1174 			error = EFAULT;
1175 			break;
1176 		}
1177 
1178 		if (error == 0 && part != 0 &&
1179 		    (size < part || kaddr[-1] == '\0'))
1180 			break;
1181 	}
1182 
1183 	if (error != EFAULT && lencopied)
1184 		*lencopied = maxlength - resid;
1185 	return (error);
1186 }
1187 
1188 typedef int (*fuword_func)(const void *, void *);
1189 
1190 /*
1191  * Generic form of watch_fuword8(), watch_fuword16(), etc.
1192  */
1193 static int
1194 watch_fuword(const void *addr, void *dst, fuword_func func, size_t size)
1195 {
1196 	klwp_t *lwp = ttolwp(curthread);
1197 	int watchcode;
1198 	caddr_t vaddr;
1199 	int mapped;
1200 	int rv = 0;
1201 	int ta;
1202 	label_t ljb;
1203 
1204 	for (;;) {
1205 
1206 		vaddr = (caddr_t)addr;
1207 		watchcode = pr_is_watchpoint(&vaddr, &ta, size, NULL, S_READ);
1208 		if (watchcode == 0 || ta != 0) {
1209 			mapped = pr_mappage((caddr_t)addr, size, S_READ, 1);
1210 			if (on_fault(&ljb))
1211 				rv = -1;
1212 			else
1213 				(*func)(addr, dst);
1214 			no_fault();
1215 			if (mapped)
1216 				pr_unmappage((caddr_t)addr, size, S_READ, 1);
1217 		}
1218 		if (watchcode &&
1219 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1220 		    lwp->lwp_sysabort)) {
1221 			lwp->lwp_sysabort = 0;
1222 			rv = -1;
1223 			break;
1224 		}
1225 		if (watchcode == 0 || ta != 0)
1226 			break;
1227 	}
1228 
1229 	return (rv);
1230 }
1231 
1232 static int
1233 watch_fuword8(const void *addr, uint8_t *dst)
1234 {
1235 	return (watch_fuword(addr, dst, (fuword_func)fuword8_noerr,
1236 	    sizeof (*dst)));
1237 }
1238 
1239 static int
1240 watch_fuword16(const void *addr, uint16_t *dst)
1241 {
1242 	return (watch_fuword(addr, dst, (fuword_func)fuword16_noerr,
1243 	    sizeof (*dst)));
1244 }
1245 
1246 static int
1247 watch_fuword32(const void *addr, uint32_t *dst)
1248 {
1249 	return (watch_fuword(addr, dst, (fuword_func)fuword32_noerr,
1250 	    sizeof (*dst)));
1251 }
1252 
1253 #ifdef _LP64
1254 static int
1255 watch_fuword64(const void *addr, uint64_t *dst)
1256 {
1257 	return (watch_fuword(addr, dst, (fuword_func)fuword64_noerr,
1258 	    sizeof (*dst)));
1259 }
1260 #endif
1261 
1262 
1263 static int
1264 watch_suword8(void *addr, uint8_t value)
1265 {
1266 	klwp_t *lwp = ttolwp(curthread);
1267 	int watchcode;
1268 	caddr_t vaddr;
1269 	int mapped;
1270 	int rv = 0;
1271 	int ta;
1272 	label_t ljb;
1273 
1274 	for (;;) {
1275 
1276 		vaddr = (caddr_t)addr;
1277 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1278 		    S_WRITE);
1279 		if (watchcode == 0 || ta != 0) {
1280 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1281 			    S_WRITE, 1);
1282 			if (on_fault(&ljb))
1283 				rv = -1;
1284 			else
1285 				suword8_noerr(addr, value);
1286 			no_fault();
1287 			if (mapped)
1288 				pr_unmappage((caddr_t)addr, sizeof (value),
1289 				    S_WRITE, 1);
1290 		}
1291 		if (watchcode &&
1292 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1293 		    lwp->lwp_sysabort)) {
1294 			lwp->lwp_sysabort = 0;
1295 			rv = -1;
1296 			break;
1297 		}
1298 		if (watchcode == 0 || ta != 0)
1299 			break;
1300 	}
1301 
1302 	return (rv);
1303 }
1304 
1305 static int
1306 watch_suword16(void *addr, uint16_t value)
1307 {
1308 	klwp_t *lwp = ttolwp(curthread);
1309 	int watchcode;
1310 	caddr_t vaddr;
1311 	int mapped;
1312 	int rv = 0;
1313 	int ta;
1314 	label_t ljb;
1315 
1316 	for (;;) {
1317 
1318 		vaddr = (caddr_t)addr;
1319 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1320 		    S_WRITE);
1321 		if (watchcode == 0 || ta != 0) {
1322 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1323 			    S_WRITE, 1);
1324 			if (on_fault(&ljb))
1325 				rv = -1;
1326 			else
1327 				suword16_noerr(addr, value);
1328 			no_fault();
1329 			if (mapped)
1330 				pr_unmappage((caddr_t)addr, sizeof (value),
1331 				    S_WRITE, 1);
1332 		}
1333 		if (watchcode &&
1334 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1335 		    lwp->lwp_sysabort)) {
1336 			lwp->lwp_sysabort = 0;
1337 			rv = -1;
1338 			break;
1339 		}
1340 		if (watchcode == 0 || ta != 0)
1341 			break;
1342 	}
1343 
1344 	return (rv);
1345 }
1346 
1347 static int
1348 watch_suword32(void *addr, uint32_t value)
1349 {
1350 	klwp_t *lwp = ttolwp(curthread);
1351 	int watchcode;
1352 	caddr_t vaddr;
1353 	int mapped;
1354 	int rv = 0;
1355 	int ta;
1356 	label_t ljb;
1357 
1358 	for (;;) {
1359 
1360 		vaddr = (caddr_t)addr;
1361 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1362 		    S_WRITE);
1363 		if (watchcode == 0 || ta != 0) {
1364 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1365 			    S_WRITE, 1);
1366 			if (on_fault(&ljb))
1367 				rv = -1;
1368 			else
1369 				suword32_noerr(addr, value);
1370 			no_fault();
1371 			if (mapped)
1372 				pr_unmappage((caddr_t)addr, sizeof (value),
1373 				    S_WRITE, 1);
1374 		}
1375 		if (watchcode &&
1376 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1377 		    lwp->lwp_sysabort)) {
1378 			lwp->lwp_sysabort = 0;
1379 			rv = -1;
1380 			break;
1381 		}
1382 		if (watchcode == 0 || ta != 0)
1383 			break;
1384 	}
1385 
1386 	return (rv);
1387 }
1388 
1389 #ifdef _LP64
1390 static int
1391 watch_suword64(void *addr, uint64_t value)
1392 {
1393 	klwp_t *lwp = ttolwp(curthread);
1394 	int watchcode;
1395 	caddr_t vaddr;
1396 	int mapped;
1397 	int rv = 0;
1398 	int ta;
1399 	label_t ljb;
1400 
1401 	for (;;) {
1402 
1403 		vaddr = (caddr_t)addr;
1404 		watchcode = pr_is_watchpoint(&vaddr, &ta, sizeof (value), NULL,
1405 		    S_WRITE);
1406 		if (watchcode == 0 || ta != 0) {
1407 			mapped = pr_mappage((caddr_t)addr, sizeof (value),
1408 			    S_WRITE, 1);
1409 			if (on_fault(&ljb))
1410 				rv = -1;
1411 			else
1412 				suword64_noerr(addr, value);
1413 			no_fault();
1414 			if (mapped)
1415 				pr_unmappage((caddr_t)addr, sizeof (value),
1416 				    S_WRITE, 1);
1417 		}
1418 		if (watchcode &&
1419 		    (!sys_watchpoint(vaddr, watchcode, ta) ||
1420 		    lwp->lwp_sysabort)) {
1421 			lwp->lwp_sysabort = 0;
1422 			rv = -1;
1423 			break;
1424 		}
1425 		if (watchcode == 0 || ta != 0)
1426 			break;
1427 	}
1428 
1429 	return (rv);
1430 }
1431 #endif /* _LP64 */
1432 
1433 /*
1434  * Check for watched addresses in the given address space.
1435  * Return 1 if this is true, otherwise 0.
1436  */
1437 static int
1438 pr_is_watched(caddr_t base, size_t len, int rw)
1439 {
1440 	caddr_t saddr = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1441 	caddr_t eaddr = base + len;
1442 	caddr_t paddr;
1443 
1444 	for (paddr = saddr; paddr < eaddr; paddr += PAGESIZE) {
1445 		if (pr_is_watchpage(paddr, rw))
1446 			return (1);
1447 	}
1448 
1449 	return (0);
1450 }
1451 
1452 /*
1453  * Wrapper for the physio() function.
1454  * Splits one uio operation with multiple iovecs into uio operations with
1455  * only one iovecs to do the watchpoint handling separately for each iovecs.
1456  */
1457 static int
1458 watch_physio(int (*strat)(struct buf *), struct buf *bp, dev_t dev,
1459     int rw, void (*mincnt)(struct buf *), struct uio *uio)
1460 {
1461 	struct uio auio;
1462 	struct iovec *iov;
1463 	caddr_t  base;
1464 	size_t len;
1465 	int seg_rw;
1466 	int error = 0;
1467 
1468 	if (uio->uio_segflg == UIO_SYSSPACE)
1469 		return (default_physio(strat, bp, dev, rw, mincnt, uio));
1470 
1471 	seg_rw = (rw == B_READ) ? S_WRITE : S_READ;
1472 
1473 	while (uio->uio_iovcnt > 0) {
1474 		if (uio->uio_resid == 0) {
1475 			/*
1476 			 * Make sure to return the uio structure with the
1477 			 * same values as default_physio() does.
1478 			 */
1479 			uio->uio_iov++;
1480 			uio->uio_iovcnt--;
1481 			continue;
1482 		}
1483 
1484 		iov = uio->uio_iov;
1485 		len = MIN(iov->iov_len, uio->uio_resid);
1486 
1487 		auio.uio_iovcnt = 1;
1488 		auio.uio_iov = iov;
1489 		auio.uio_resid = len;
1490 		auio.uio_loffset = uio->uio_loffset;
1491 		auio.uio_llimit = uio->uio_llimit;
1492 		auio.uio_fmode = uio->uio_fmode;
1493 		auio.uio_extflg = uio->uio_extflg;
1494 		auio.uio_segflg = uio->uio_segflg;
1495 
1496 		base = iov->iov_base;
1497 
1498 		if (!pr_is_watched(base, len, seg_rw)) {
1499 			/*
1500 			 * The given memory references don't cover a
1501 			 * watched page.
1502 			 */
1503 			error = default_physio(strat, bp, dev, rw, mincnt,
1504 			    &auio);
1505 
1506 			/* Update uio with values from auio. */
1507 			len -= auio.uio_resid;
1508 			uio->uio_resid -= len;
1509 			uio->uio_loffset += len;
1510 
1511 			/*
1512 			 * Return if an error occurred or not all data
1513 			 * was copied.
1514 			 */
1515 			if (auio.uio_resid || error)
1516 				break;
1517 			uio->uio_iov++;
1518 			uio->uio_iovcnt--;
1519 		} else {
1520 			int mapped, watchcode, ta;
1521 			caddr_t vaddr = base;
1522 			klwp_t *lwp = ttolwp(curthread);
1523 
1524 			watchcode = pr_is_watchpoint(&vaddr, &ta, len,
1525 			    NULL, seg_rw);
1526 
1527 			if (watchcode == 0 || ta != 0) {
1528 				/*
1529 				 * Do the io if the given memory references
1530 				 * don't cover a watched area (watchcode=0)
1531 				 * or if WA_TRAPAFTER was specified.
1532 				 */
1533 				mapped = pr_mappage(base, len, seg_rw, 1);
1534 				error = default_physio(strat, bp, dev, rw,
1535 				    mincnt, &auio);
1536 				if (mapped)
1537 					pr_unmappage(base, len, seg_rw, 1);
1538 
1539 				len -= auio.uio_resid;
1540 				uio->uio_resid -= len;
1541 				uio->uio_loffset += len;
1542 			}
1543 
1544 			/*
1545 			 * If we hit a watched address, do the watchpoint logic.
1546 			 */
1547 			if (watchcode &&
1548 			    (!sys_watchpoint(vaddr, watchcode, ta) ||
1549 			    lwp->lwp_sysabort)) {
1550 				lwp->lwp_sysabort = 0;
1551 				return (EFAULT);
1552 			}
1553 
1554 			/*
1555 			 * Check for errors from default_physio().
1556 			 */
1557 			if (watchcode == 0 || ta != 0) {
1558 				if (auio.uio_resid || error)
1559 					break;
1560 				uio->uio_iov++;
1561 				uio->uio_iovcnt--;
1562 			}
1563 		}
1564 	}
1565 
1566 	return (error);
1567 }
1568 
1569 int
1570 wa_compare(const void *a, const void *b)
1571 {
1572 	const watched_area_t *pa = a;
1573 	const watched_area_t *pb = b;
1574 
1575 	if (pa->wa_vaddr < pb->wa_vaddr)
1576 		return (-1);
1577 	else if (pa->wa_vaddr > pb->wa_vaddr)
1578 		return (1);
1579 	else
1580 		return (0);
1581 }
1582 
1583 int
1584 wp_compare(const void *a, const void *b)
1585 {
1586 	const watched_page_t *pa = a;
1587 	const watched_page_t *pb = b;
1588 
1589 	if (pa->wp_vaddr < pb->wp_vaddr)
1590 		return (-1);
1591 	else if (pa->wp_vaddr > pb->wp_vaddr)
1592 		return (1);
1593 	else
1594 		return (0);
1595 }
1596 
1597 /*
1598  * Given an address range, finds the first watched area which overlaps some or
1599  * all of the range.
1600  */
1601 watched_area_t *
1602 pr_find_watched_area(proc_t *p, watched_area_t *pwa, avl_index_t *where)
1603 {
1604 	caddr_t vaddr = pwa->wa_vaddr;
1605 	caddr_t eaddr = pwa->wa_eaddr;
1606 	watched_area_t *wap;
1607 	avl_index_t real_where;
1608 
1609 	/* First, check if there is an exact match.  */
1610 	wap = avl_find(&p->p_warea, pwa, &real_where);
1611 
1612 
1613 	/* Check to see if we overlap with the previous area.  */
1614 	if (wap == NULL) {
1615 		wap = avl_nearest(&p->p_warea, real_where, AVL_BEFORE);
1616 		if (wap != NULL &&
1617 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1618 			wap = NULL;
1619 	}
1620 
1621 	/* Try the next area.  */
1622 	if (wap == NULL) {
1623 		wap = avl_nearest(&p->p_warea, real_where, AVL_AFTER);
1624 		if (wap != NULL &&
1625 		    (vaddr >= wap->wa_eaddr || eaddr <= wap->wa_vaddr))
1626 			wap = NULL;
1627 	}
1628 
1629 	if (where)
1630 		*where = real_where;
1631 
1632 	return (wap);
1633 }
1634 
1635 void
1636 watch_enable(kthread_id_t t)
1637 {
1638 	t->t_proc_flag |= TP_WATCHPT;
1639 	install_copyops(t, &watch_copyops);
1640 }
1641 
1642 void
1643 watch_disable(kthread_id_t t)
1644 {
1645 	t->t_proc_flag &= ~TP_WATCHPT;
1646 	remove_copyops(t);
1647 }
1648 
1649 int
1650 copyin_nowatch(const void *uaddr, void *kaddr, size_t len)
1651 {
1652 	int watched, ret;
1653 
1654 	watched = watch_disable_addr(uaddr, len, S_READ);
1655 	ret = copyin(uaddr, kaddr, len);
1656 	if (watched)
1657 		watch_enable_addr(uaddr, len, S_READ);
1658 
1659 	return (ret);
1660 }
1661 
1662 int
1663 copyout_nowatch(const void *kaddr, void *uaddr, size_t len)
1664 {
1665 	int watched, ret;
1666 
1667 	watched = watch_disable_addr(uaddr, len, S_WRITE);
1668 	ret = copyout(kaddr, uaddr, len);
1669 	if (watched)
1670 		watch_enable_addr(uaddr, len, S_WRITE);
1671 
1672 	return (ret);
1673 }
1674 
1675 #ifdef _LP64
1676 int
1677 fuword64_nowatch(const void *addr, uint64_t *value)
1678 {
1679 	int watched, ret;
1680 
1681 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1682 	ret = fuword64(addr, value);
1683 	if (watched)
1684 		watch_enable_addr(addr, sizeof (*value), S_READ);
1685 
1686 	return (ret);
1687 }
1688 #endif
1689 
1690 int
1691 fuword32_nowatch(const void *addr, uint32_t *value)
1692 {
1693 	int watched, ret;
1694 
1695 	watched = watch_disable_addr(addr, sizeof (*value), S_READ);
1696 	ret = fuword32(addr, value);
1697 	if (watched)
1698 		watch_enable_addr(addr, sizeof (*value), S_READ);
1699 
1700 	return (ret);
1701 }
1702 
1703 #ifdef _LP64
1704 int
1705 suword64_nowatch(void *addr, uint64_t value)
1706 {
1707 	int watched, ret;
1708 
1709 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1710 	ret = suword64(addr, value);
1711 	if (watched)
1712 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1713 
1714 	return (ret);
1715 }
1716 #endif
1717 
1718 int
1719 suword32_nowatch(void *addr, uint32_t value)
1720 {
1721 	int watched, ret;
1722 
1723 	watched = watch_disable_addr(addr, sizeof (value), S_WRITE);
1724 	ret = suword32(addr, value);
1725 	if (watched)
1726 		watch_enable_addr(addr, sizeof (value), S_WRITE);
1727 
1728 	return (ret);
1729 }
1730 
1731 int
1732 watch_disable_addr(const void *addr, size_t len, enum seg_rw rw)
1733 {
1734 	if (pr_watch_active(curproc))
1735 		return (pr_mappage((caddr_t)addr, len, rw, 1));
1736 	return (0);
1737 }
1738 
1739 void
1740 watch_enable_addr(const void *addr, size_t len, enum seg_rw rw)
1741 {
1742 	if (pr_watch_active(curproc))
1743 		pr_unmappage((caddr_t)addr, len, rw, 1);
1744 }
1745