1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * VM - address spaces. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/errno.h> 49 #include <sys/systm.h> 50 #include <sys/mman.h> 51 #include <sys/sysmacros.h> 52 #include <sys/cpuvar.h> 53 #include <sys/sysinfo.h> 54 #include <sys/kmem.h> 55 #include <sys/vnode.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/tnf_probe.h> 60 #include <sys/vtrace.h> 61 62 #include <vm/hat.h> 63 #include <vm/xhat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_vn.h> 67 #include <vm/seg_dev.h> 68 #include <vm/seg_kmem.h> 69 #include <vm/seg_map.h> 70 #include <vm/seg_spt.h> 71 #include <vm/page.h> 72 73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 74 75 static struct kmem_cache *as_cache; 76 77 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 78 static void as_clearwatchprot(struct as *, caddr_t, size_t); 79 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 80 81 82 /* 83 * Verifying the segment lists is very time-consuming; it may not be 84 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 85 */ 86 #ifdef DEBUG 87 #define VERIFY_SEGLIST 88 int do_as_verify = 0; 89 #endif 90 91 /* 92 * Allocate a new callback data structure entry and fill in the events of 93 * interest, the address range of interest, and the callback argument. 94 * Link the entry on the as->a_callbacks list. A callback entry for the 95 * entire address space may be specified with vaddr = 0 and size = -1. 96 * 97 * CALLERS RESPONSIBILITY: If not calling from within the process context for 98 * the specified as, the caller must guarantee persistence of the specified as 99 * for the duration of this function (eg. pages being locked within the as 100 * will guarantee persistence). 101 */ 102 int 103 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 104 caddr_t vaddr, size_t size, int sleepflag) 105 { 106 struct as_callback *current_head, *cb; 107 caddr_t saddr; 108 size_t rsize; 109 110 /* callback function and an event are mandatory */ 111 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 112 return (EINVAL); 113 114 /* Adding a callback after as_free has been called is not allowed */ 115 if (as == &kas) 116 return (ENOMEM); 117 118 /* 119 * vaddr = 0 and size = -1 is used to indicate that the callback range 120 * is the entire address space so no rounding is done in that case. 121 */ 122 if (size != -1) { 123 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 124 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 125 (size_t)saddr; 126 /* check for wraparound */ 127 if (saddr + rsize < saddr) 128 return (ENOMEM); 129 } else { 130 if (vaddr != 0) 131 return (EINVAL); 132 saddr = vaddr; 133 rsize = size; 134 } 135 136 /* Allocate and initialize a callback entry */ 137 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 138 if (cb == NULL) 139 return (EAGAIN); 140 141 cb->ascb_func = cb_func; 142 cb->ascb_arg = arg; 143 cb->ascb_events = events; 144 cb->ascb_saddr = saddr; 145 cb->ascb_len = rsize; 146 147 /* Add the entry to the list */ 148 mutex_enter(&as->a_contents); 149 current_head = as->a_callbacks; 150 as->a_callbacks = cb; 151 cb->ascb_next = current_head; 152 153 /* 154 * The call to this function may lose in a race with 155 * a pertinent event - eg. a thread does long term memory locking 156 * but before the callback is added another thread executes as_unmap. 157 * A broadcast here resolves that. 158 */ 159 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 160 AS_CLRUNMAPWAIT(as); 161 cv_broadcast(&as->a_cv); 162 } 163 164 mutex_exit(&as->a_contents); 165 return (0); 166 } 167 168 /* 169 * Search the callback list for an entry which pertains to arg. 170 * 171 * This is called from within the client upon completion of the callback. 172 * RETURN VALUES: 173 * AS_CALLBACK_DELETED (callback entry found and deleted) 174 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 175 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 176 * entry will be made in as_do_callbacks) 177 * 178 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 179 * set, it indicates that as_do_callbacks is processing this entry. The 180 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 181 * to unblock as_do_callbacks, in case it is blocked. 182 * 183 * CALLERS RESPONSIBILITY: If not calling from within the process context for 184 * the specified as, the caller must guarantee persistence of the specified as 185 * for the duration of this function (eg. pages being locked within the as 186 * will guarantee persistence). 187 */ 188 uint_t 189 as_delete_callback(struct as *as, void *arg) 190 { 191 struct as_callback **prevcb = &as->a_callbacks; 192 struct as_callback *cb; 193 uint_t rc = AS_CALLBACK_NOTFOUND; 194 195 mutex_enter(&as->a_contents); 196 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 197 if (cb->ascb_arg != arg) 198 continue; 199 200 /* 201 * If the events indicate AS_CALLBACK_CALLED, just clear 202 * AS_ALL_EVENT in the events field and wakeup the thread 203 * that may be waiting in as_do_callbacks. as_do_callbacks 204 * will take care of removing this entry from the list. In 205 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 206 * (AS_CALLBACK_CALLED not set), just remove it from the 207 * list, return the memory and return AS_CALLBACK_DELETED. 208 */ 209 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 210 /* leave AS_CALLBACK_CALLED */ 211 cb->ascb_events &= ~AS_ALL_EVENT; 212 rc = AS_CALLBACK_DELETE_DEFERRED; 213 cv_broadcast(&as->a_cv); 214 } else { 215 *prevcb = cb->ascb_next; 216 kmem_free(cb, sizeof (struct as_callback)); 217 rc = AS_CALLBACK_DELETED; 218 } 219 break; 220 } 221 mutex_exit(&as->a_contents); 222 return (rc); 223 } 224 225 /* 226 * Searches the as callback list for a matching entry. 227 * Returns a pointer to the first matching callback, or NULL if 228 * nothing is found. 229 * This function never sleeps so it is ok to call it with more 230 * locks held but the (required) a_contents mutex. 231 * 232 * See also comment on as_do_callbacks below. 233 */ 234 static struct as_callback * 235 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 236 size_t event_len) 237 { 238 struct as_callback *cb; 239 240 ASSERT(MUTEX_HELD(&as->a_contents)); 241 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 242 /* 243 * If the callback has not already been called, then 244 * check if events or address range pertains. An event_len 245 * of zero means do an unconditional callback. 246 */ 247 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 248 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 249 (event_addr + event_len < cb->ascb_saddr) || 250 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 251 continue; 252 } 253 break; 254 } 255 return (cb); 256 } 257 258 /* 259 * Executes a given callback and removes it from the callback list for 260 * this address space. 261 * This function may sleep so the caller must drop all locks except 262 * a_contents before calling this func. 263 * 264 * See also comments on as_do_callbacks below. 265 */ 266 static void 267 as_execute_callback(struct as *as, struct as_callback *cb, 268 uint_t events) 269 { 270 struct as_callback **prevcb; 271 void *cb_arg; 272 273 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 274 cb->ascb_events |= AS_CALLBACK_CALLED; 275 mutex_exit(&as->a_contents); 276 (*cb->ascb_func)(as, cb->ascb_arg, events); 277 mutex_enter(&as->a_contents); 278 /* 279 * the callback function is required to delete the callback 280 * when the callback function determines it is OK for 281 * this thread to continue. as_delete_callback will clear 282 * the AS_ALL_EVENT in the events field when it is deleted. 283 * If the callback function called as_delete_callback, 284 * events will already be cleared and there will be no blocking. 285 */ 286 while ((cb->ascb_events & events) != 0) { 287 cv_wait(&as->a_cv, &as->a_contents); 288 } 289 /* 290 * This entry needs to be taken off the list. Normally, the 291 * callback func itself does that, but unfortunately the list 292 * may have changed while the callback was running because the 293 * a_contents mutex was dropped and someone else other than the 294 * callback func itself could have called as_delete_callback, 295 * so we have to search to find this entry again. The entry 296 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 297 */ 298 cb_arg = cb->ascb_arg; 299 prevcb = &as->a_callbacks; 300 for (cb = as->a_callbacks; cb != NULL; 301 prevcb = &cb->ascb_next, cb = *prevcb) { 302 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 303 (cb_arg != cb->ascb_arg)) { 304 continue; 305 } 306 *prevcb = cb->ascb_next; 307 kmem_free(cb, sizeof (struct as_callback)); 308 break; 309 } 310 } 311 312 /* 313 * Check the callback list for a matching event and intersection of 314 * address range. If there is a match invoke the callback. Skip an entry if: 315 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 316 * - not event of interest 317 * - not address range of interest 318 * 319 * An event_len of zero indicates a request for an unconditional callback 320 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 321 * a_contents lock must be dropped before a callback, so only one callback 322 * can be done before returning. Return -1 (true) if a callback was 323 * executed and removed from the list, else return 0 (false). 324 * 325 * The logically separate parts, i.e. finding a matching callback and 326 * executing a given callback have been separated into two functions 327 * so that they can be called with different sets of locks held beyond 328 * the always-required a_contents. as_find_callback does not sleep so 329 * it is ok to call it if more locks than a_contents (i.e. the a_lock 330 * rwlock) are held. as_execute_callback on the other hand may sleep 331 * so all locks beyond a_contents must be dropped by the caller if one 332 * does not want to end comatose. 333 */ 334 static int 335 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 336 size_t event_len) 337 { 338 struct as_callback *cb; 339 340 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 341 as_execute_callback(as, cb, events); 342 return (-1); 343 } 344 return (0); 345 } 346 347 /* 348 * Search for the segment containing addr. If a segment containing addr 349 * exists, that segment is returned. If no such segment exists, and 350 * the list spans addresses greater than addr, then the first segment 351 * whose base is greater than addr is returned; otherwise, NULL is 352 * returned unless tail is true, in which case the last element of the 353 * list is returned. 354 * 355 * a_seglast is used to cache the last found segment for repeated 356 * searches to the same addr (which happens frequently). 357 */ 358 struct seg * 359 as_findseg(struct as *as, caddr_t addr, int tail) 360 { 361 struct seg *seg = as->a_seglast; 362 avl_index_t where; 363 364 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 365 366 if (seg != NULL && 367 seg->s_base <= addr && 368 addr < seg->s_base + seg->s_size) 369 return (seg); 370 371 seg = avl_find(&as->a_segtree, &addr, &where); 372 if (seg != NULL) 373 return (as->a_seglast = seg); 374 375 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 376 if (seg == NULL && tail) 377 seg = avl_last(&as->a_segtree); 378 return (as->a_seglast = seg); 379 } 380 381 #ifdef VERIFY_SEGLIST 382 /* 383 * verify that the linked list is coherent 384 */ 385 static void 386 as_verify(struct as *as) 387 { 388 struct seg *seg, *seglast, *p, *n; 389 uint_t nsegs = 0; 390 391 if (do_as_verify == 0) 392 return; 393 394 seglast = as->a_seglast; 395 396 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 397 ASSERT(seg->s_as == as); 398 p = AS_SEGPREV(as, seg); 399 n = AS_SEGNEXT(as, seg); 400 ASSERT(p == NULL || p->s_as == as); 401 ASSERT(p == NULL || p->s_base < seg->s_base); 402 ASSERT(n == NULL || n->s_base > seg->s_base); 403 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 404 if (seg == seglast) 405 seglast = NULL; 406 nsegs++; 407 } 408 ASSERT(seglast == NULL); 409 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 410 } 411 #endif /* VERIFY_SEGLIST */ 412 413 /* 414 * Add a new segment to the address space. The avl_find() 415 * may be expensive so we attempt to use last segment accessed 416 * in as_gap() as an insertion point. 417 */ 418 int 419 as_addseg(struct as *as, struct seg *newseg) 420 { 421 struct seg *seg; 422 caddr_t addr; 423 caddr_t eaddr; 424 avl_index_t where; 425 426 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 427 428 as->a_updatedir = 1; /* inform /proc */ 429 gethrestime(&as->a_updatetime); 430 431 if (as->a_lastgaphl != NULL) { 432 struct seg *hseg = NULL; 433 struct seg *lseg = NULL; 434 435 if (as->a_lastgaphl->s_base > newseg->s_base) { 436 hseg = as->a_lastgaphl; 437 lseg = AVL_PREV(&as->a_segtree, hseg); 438 } else { 439 lseg = as->a_lastgaphl; 440 hseg = AVL_NEXT(&as->a_segtree, lseg); 441 } 442 443 if (hseg && lseg && lseg->s_base < newseg->s_base && 444 hseg->s_base > newseg->s_base) { 445 avl_insert_here(&as->a_segtree, newseg, lseg, 446 AVL_AFTER); 447 as->a_lastgaphl = NULL; 448 as->a_seglast = newseg; 449 return (0); 450 } 451 as->a_lastgaphl = NULL; 452 } 453 454 addr = newseg->s_base; 455 eaddr = addr + newseg->s_size; 456 again: 457 458 seg = avl_find(&as->a_segtree, &addr, &where); 459 460 if (seg == NULL) 461 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 462 463 if (seg == NULL) 464 seg = avl_last(&as->a_segtree); 465 466 if (seg != NULL) { 467 caddr_t base = seg->s_base; 468 469 /* 470 * If top of seg is below the requested address, then 471 * the insertion point is at the end of the linked list, 472 * and seg points to the tail of the list. Otherwise, 473 * the insertion point is immediately before seg. 474 */ 475 if (base + seg->s_size > addr) { 476 if (addr >= base || eaddr > base) { 477 #ifdef __sparc 478 extern struct seg_ops segnf_ops; 479 480 /* 481 * no-fault segs must disappear if overlaid. 482 * XXX need new segment type so 483 * we don't have to check s_ops 484 */ 485 if (seg->s_ops == &segnf_ops) { 486 seg_unmap(seg); 487 goto again; 488 } 489 #endif 490 return (-1); /* overlapping segment */ 491 } 492 } 493 } 494 as->a_seglast = newseg; 495 avl_insert(&as->a_segtree, newseg, where); 496 497 #ifdef VERIFY_SEGLIST 498 as_verify(as); 499 #endif 500 return (0); 501 } 502 503 struct seg * 504 as_removeseg(struct as *as, struct seg *seg) 505 { 506 avl_tree_t *t; 507 508 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 509 510 as->a_updatedir = 1; /* inform /proc */ 511 gethrestime(&as->a_updatetime); 512 513 if (seg == NULL) 514 return (NULL); 515 516 t = &as->a_segtree; 517 if (as->a_seglast == seg) 518 as->a_seglast = NULL; 519 as->a_lastgaphl = NULL; 520 521 /* 522 * if this segment is at an address higher than 523 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 524 */ 525 if (as->a_lastgap && 526 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 527 as->a_lastgap = AVL_NEXT(t, seg); 528 529 /* 530 * remove the segment from the seg tree 531 */ 532 avl_remove(t, seg); 533 534 #ifdef VERIFY_SEGLIST 535 as_verify(as); 536 #endif 537 return (seg); 538 } 539 540 /* 541 * Find a segment containing addr. 542 */ 543 struct seg * 544 as_segat(struct as *as, caddr_t addr) 545 { 546 struct seg *seg = as->a_seglast; 547 548 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 549 550 if (seg != NULL && seg->s_base <= addr && 551 addr < seg->s_base + seg->s_size) 552 return (seg); 553 554 seg = avl_find(&as->a_segtree, &addr, NULL); 555 return (seg); 556 } 557 558 /* 559 * Serialize all searches for holes in an address space to 560 * prevent two or more threads from allocating the same virtual 561 * address range. The address space must not be "read/write" 562 * locked by the caller since we may block. 563 */ 564 void 565 as_rangelock(struct as *as) 566 { 567 mutex_enter(&as->a_contents); 568 while (AS_ISCLAIMGAP(as)) 569 cv_wait(&as->a_cv, &as->a_contents); 570 AS_SETCLAIMGAP(as); 571 mutex_exit(&as->a_contents); 572 } 573 574 /* 575 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 576 */ 577 void 578 as_rangeunlock(struct as *as) 579 { 580 mutex_enter(&as->a_contents); 581 AS_CLRCLAIMGAP(as); 582 cv_signal(&as->a_cv); 583 mutex_exit(&as->a_contents); 584 } 585 586 /* 587 * compar segments (or just an address) by segment address range 588 */ 589 static int 590 as_segcompar(const void *x, const void *y) 591 { 592 struct seg *a = (struct seg *)x; 593 struct seg *b = (struct seg *)y; 594 595 if (a->s_base < b->s_base) 596 return (-1); 597 if (a->s_base >= b->s_base + b->s_size) 598 return (1); 599 return (0); 600 } 601 602 603 void 604 as_avlinit(struct as *as) 605 { 606 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 607 offsetof(struct seg, s_tree)); 608 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 609 offsetof(struct watched_page, wp_link)); 610 } 611 612 /*ARGSUSED*/ 613 static int 614 as_constructor(void *buf, void *cdrarg, int kmflags) 615 { 616 struct as *as = buf; 617 618 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 619 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 620 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 621 as_avlinit(as); 622 return (0); 623 } 624 625 /*ARGSUSED1*/ 626 static void 627 as_destructor(void *buf, void *cdrarg) 628 { 629 struct as *as = buf; 630 631 avl_destroy(&as->a_segtree); 632 mutex_destroy(&as->a_contents); 633 cv_destroy(&as->a_cv); 634 rw_destroy(&as->a_lock); 635 } 636 637 void 638 as_init(void) 639 { 640 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 641 as_constructor, as_destructor, NULL, NULL, NULL, 0); 642 } 643 644 /* 645 * Allocate and initialize an address space data structure. 646 * We call hat_alloc to allow any machine dependent 647 * information in the hat structure to be initialized. 648 */ 649 struct as * 650 as_alloc(void) 651 { 652 struct as *as; 653 654 as = kmem_cache_alloc(as_cache, KM_SLEEP); 655 656 as->a_flags = 0; 657 as->a_vbits = 0; 658 as->a_hrm = NULL; 659 as->a_seglast = NULL; 660 as->a_size = 0; 661 as->a_updatedir = 0; 662 gethrestime(&as->a_updatetime); 663 as->a_objectdir = NULL; 664 as->a_sizedir = 0; 665 as->a_userlimit = (caddr_t)USERLIMIT; 666 as->a_lastgap = NULL; 667 as->a_lastgaphl = NULL; 668 as->a_callbacks = NULL; 669 670 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 671 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 672 AS_LOCK_EXIT(as, &as->a_lock); 673 674 as->a_xhat = NULL; 675 676 return (as); 677 } 678 679 /* 680 * Free an address space data structure. 681 * Need to free the hat first and then 682 * all the segments on this as and finally 683 * the space for the as struct itself. 684 */ 685 void 686 as_free(struct as *as) 687 { 688 struct hat *hat = as->a_hat; 689 struct seg *seg, *next; 690 int called = 0; 691 692 top: 693 /* 694 * Invoke ALL callbacks. as_do_callbacks will do one callback 695 * per call, and not return (-1) until the callback has completed. 696 * When as_do_callbacks returns zero, all callbacks have completed. 697 */ 698 mutex_enter(&as->a_contents); 699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)); 700 701 /* This will prevent new XHATs from attaching to as */ 702 if (!called) 703 AS_SETBUSY(as); 704 mutex_exit(&as->a_contents); 705 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 706 707 if (!called) { 708 called = 1; 709 hat_free_start(hat); 710 if (as->a_xhat != NULL) 711 xhat_free_start_all(as); 712 } 713 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 714 int err; 715 716 next = AS_SEGNEXT(as, seg); 717 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 718 if (err == EAGAIN) { 719 mutex_enter(&as->a_contents); 720 if (as->a_callbacks) { 721 AS_LOCK_EXIT(as, &as->a_lock); 722 } else { 723 /* 724 * Memory is currently locked. Wait for a 725 * cv_signal that it has been unlocked, then 726 * try the operation again. 727 */ 728 if (AS_ISUNMAPWAIT(as) == 0) 729 cv_broadcast(&as->a_cv); 730 AS_SETUNMAPWAIT(as); 731 AS_LOCK_EXIT(as, &as->a_lock); 732 while (AS_ISUNMAPWAIT(as)) 733 cv_wait(&as->a_cv, &as->a_contents); 734 } 735 mutex_exit(&as->a_contents); 736 goto top; 737 } else { 738 /* 739 * We do not expect any other error return at this 740 * time. This is similar to an ASSERT in seg_unmap() 741 */ 742 ASSERT(err == 0); 743 } 744 } 745 hat_free_end(hat); 746 if (as->a_xhat != NULL) 747 xhat_free_end_all(as); 748 AS_LOCK_EXIT(as, &as->a_lock); 749 750 /* /proc stuff */ 751 ASSERT(avl_numnodes(&as->a_wpage) == 0); 752 if (as->a_objectdir) { 753 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 754 as->a_objectdir = NULL; 755 as->a_sizedir = 0; 756 } 757 758 /* 759 * Free the struct as back to kmem. Assert it has no segments. 760 */ 761 ASSERT(avl_numnodes(&as->a_segtree) == 0); 762 kmem_cache_free(as_cache, as); 763 } 764 765 int 766 as_dup(struct as *as, struct as **outas) 767 { 768 struct as *newas; 769 struct seg *seg, *newseg; 770 int error; 771 772 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 773 as_clearwatch(as); 774 newas = as_alloc(); 775 newas->a_userlimit = as->a_userlimit; 776 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER); 777 778 /* This will prevent new XHATs from attaching */ 779 mutex_enter(&as->a_contents); 780 AS_SETBUSY(as); 781 mutex_exit(&as->a_contents); 782 mutex_enter(&newas->a_contents); 783 AS_SETBUSY(newas); 784 mutex_exit(&newas->a_contents); 785 786 787 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 788 789 if (seg->s_flags & S_PURGE) 790 continue; 791 792 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 793 if (newseg == NULL) { 794 AS_LOCK_EXIT(newas, &newas->a_lock); 795 as_setwatch(as); 796 mutex_enter(&as->a_contents); 797 AS_CLRBUSY(as); 798 mutex_exit(&as->a_contents); 799 AS_LOCK_EXIT(as, &as->a_lock); 800 as_free(newas); 801 return (-1); 802 } 803 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 804 /* 805 * We call seg_free() on the new seg 806 * because the segment is not set up 807 * completely; i.e. it has no ops. 808 */ 809 as_setwatch(as); 810 mutex_enter(&as->a_contents); 811 AS_CLRBUSY(as); 812 mutex_exit(&as->a_contents); 813 AS_LOCK_EXIT(as, &as->a_lock); 814 seg_free(newseg); 815 AS_LOCK_EXIT(newas, &newas->a_lock); 816 as_free(newas); 817 return (error); 818 } 819 newas->a_size += seg->s_size; 820 } 821 822 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 823 if (as->a_xhat != NULL) 824 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL); 825 826 mutex_enter(&newas->a_contents); 827 AS_CLRBUSY(newas); 828 mutex_exit(&newas->a_contents); 829 AS_LOCK_EXIT(newas, &newas->a_lock); 830 831 as_setwatch(as); 832 mutex_enter(&as->a_contents); 833 AS_CLRBUSY(as); 834 mutex_exit(&as->a_contents); 835 AS_LOCK_EXIT(as, &as->a_lock); 836 if (error != 0) { 837 as_free(newas); 838 return (error); 839 } 840 *outas = newas; 841 return (0); 842 } 843 844 /* 845 * Handle a ``fault'' at addr for size bytes. 846 */ 847 faultcode_t 848 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 849 enum fault_type type, enum seg_rw rw) 850 { 851 struct seg *seg; 852 caddr_t raddr; /* rounded down addr */ 853 size_t rsize; /* rounded up size */ 854 size_t ssize; 855 faultcode_t res = 0; 856 caddr_t addrsav; 857 struct seg *segsav; 858 int as_lock_held; 859 klwp_t *lwp = ttolwp(curthread); 860 int is_xhat = 0; 861 int holding_wpage = 0; 862 extern struct seg_ops segdev_ops; 863 864 865 866 if (as->a_hat != hat) { 867 /* This must be an XHAT then */ 868 is_xhat = 1; 869 870 if ((type != F_INVAL) || (as == &kas)) 871 return (FC_NOSUPPORT); 872 } 873 874 retry: 875 if (!is_xhat) { 876 /* 877 * Indicate that the lwp is not to be stopped while waiting 878 * for a pagefault. This is to avoid deadlock while debugging 879 * a process via /proc over NFS (in particular). 880 */ 881 if (lwp != NULL) { 882 lwp->lwp_nostop++; 883 lwp->lwp_nostop_r++; 884 } 885 886 /* 887 * same length must be used when we softlock and softunlock. 888 * We don't support softunlocking lengths less than 889 * the original length when there is largepage support. 890 * See seg_dev.c for more comments. 891 */ 892 switch (type) { 893 894 case F_SOFTLOCK: 895 CPU_STATS_ADD_K(vm, softlock, 1); 896 break; 897 898 case F_SOFTUNLOCK: 899 break; 900 901 case F_PROT: 902 CPU_STATS_ADD_K(vm, prot_fault, 1); 903 break; 904 905 case F_INVAL: 906 CPU_STATS_ENTER_K(); 907 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 908 if (as == &kas) 909 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 910 CPU_STATS_EXIT_K(); 911 break; 912 } 913 } 914 915 /* Kernel probe */ 916 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 917 tnf_opaque, address, addr, 918 tnf_fault_type, fault_type, type, 919 tnf_seg_access, access, rw); 920 921 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 922 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 923 (size_t)raddr; 924 925 /* 926 * XXX -- Don't grab the as lock for segkmap. We should grab it for 927 * correctness, but then we could be stuck holding this lock for 928 * a LONG time if the fault needs to be resolved on a slow 929 * filesystem, and then no-one will be able to exec new commands, 930 * as exec'ing requires the write lock on the as. 931 */ 932 if (as == &kas && segkmap && segkmap->s_base <= raddr && 933 raddr + size < segkmap->s_base + segkmap->s_size) { 934 /* 935 * if (as==&kas), this can't be XHAT: we've already returned 936 * FC_NOSUPPORT. 937 */ 938 seg = segkmap; 939 as_lock_held = 0; 940 } else { 941 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 942 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) { 943 /* 944 * Grab and hold the writers' lock on the as 945 * if the fault is to a watched page. 946 * This will keep CPUs from "peeking" at the 947 * address range while we're temporarily boosting 948 * the permissions for the XHAT device to 949 * resolve the fault in the segment layer. 950 * 951 * We could check whether faulted address 952 * is within a watched page and only then grab 953 * the writer lock, but this is simpler. 954 */ 955 AS_LOCK_EXIT(as, &as->a_lock); 956 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 957 } 958 959 seg = as_segat(as, raddr); 960 if (seg == NULL) { 961 AS_LOCK_EXIT(as, &as->a_lock); 962 if ((lwp != NULL) && (!is_xhat)) { 963 lwp->lwp_nostop--; 964 lwp->lwp_nostop_r--; 965 } 966 return (FC_NOMAP); 967 } 968 969 as_lock_held = 1; 970 } 971 972 addrsav = raddr; 973 segsav = seg; 974 975 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 976 if (raddr >= seg->s_base + seg->s_size) { 977 seg = AS_SEGNEXT(as, seg); 978 if (seg == NULL || raddr != seg->s_base) { 979 res = FC_NOMAP; 980 break; 981 } 982 } 983 if (raddr + rsize > seg->s_base + seg->s_size) 984 ssize = seg->s_base + seg->s_size - raddr; 985 else 986 ssize = rsize; 987 988 if (!is_xhat || (seg->s_ops != &segdev_ops)) { 989 990 if (is_xhat && avl_numnodes(&as->a_wpage) != 0 && 991 pr_is_watchpage_as(raddr, rw, as)) { 992 /* 993 * Handle watch pages. If we're faulting on a 994 * watched page from an X-hat, we have to 995 * restore the original permissions while we 996 * handle the fault. 997 */ 998 as_clearwatch(as); 999 holding_wpage = 1; 1000 } 1001 1002 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 1003 1004 /* Restore watchpoints */ 1005 if (holding_wpage) { 1006 as_setwatch(as); 1007 holding_wpage = 0; 1008 } 1009 1010 if (res != 0) 1011 break; 1012 } else { 1013 /* XHAT does not support seg_dev */ 1014 res = FC_NOSUPPORT; 1015 break; 1016 } 1017 } 1018 1019 /* 1020 * If we were SOFTLOCKing and encountered a failure, 1021 * we must SOFTUNLOCK the range we already did. (Maybe we 1022 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 1023 * right here...) 1024 */ 1025 if (res != 0 && type == F_SOFTLOCK) { 1026 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 1027 if (addrsav >= seg->s_base + seg->s_size) 1028 seg = AS_SEGNEXT(as, seg); 1029 ASSERT(seg != NULL); 1030 /* 1031 * Now call the fault routine again to perform the 1032 * unlock using S_OTHER instead of the rw variable 1033 * since we never got a chance to touch the pages. 1034 */ 1035 if (raddr > seg->s_base + seg->s_size) 1036 ssize = seg->s_base + seg->s_size - addrsav; 1037 else 1038 ssize = raddr - addrsav; 1039 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 1040 F_SOFTUNLOCK, S_OTHER); 1041 } 1042 } 1043 if (as_lock_held) 1044 AS_LOCK_EXIT(as, &as->a_lock); 1045 if ((lwp != NULL) && (!is_xhat)) { 1046 lwp->lwp_nostop--; 1047 lwp->lwp_nostop_r--; 1048 } 1049 /* 1050 * If the lower levels returned EDEADLK for a fault, 1051 * It means that we should retry the fault. Let's wait 1052 * a bit also to let the deadlock causing condition clear. 1053 * This is part of a gross hack to work around a design flaw 1054 * in the ufs/sds logging code and should go away when the 1055 * logging code is re-designed to fix the problem. See bug 1056 * 4125102 for details of the problem. 1057 */ 1058 if (FC_ERRNO(res) == EDEADLK) { 1059 delay(deadlk_wait); 1060 res = 0; 1061 goto retry; 1062 } 1063 return (res); 1064 } 1065 1066 1067 1068 /* 1069 * Asynchronous ``fault'' at addr for size bytes. 1070 */ 1071 faultcode_t 1072 as_faulta(struct as *as, caddr_t addr, size_t size) 1073 { 1074 struct seg *seg; 1075 caddr_t raddr; /* rounded down addr */ 1076 size_t rsize; /* rounded up size */ 1077 faultcode_t res = 0; 1078 klwp_t *lwp = ttolwp(curthread); 1079 1080 retry: 1081 /* 1082 * Indicate that the lwp is not to be stopped while waiting 1083 * for a pagefault. This is to avoid deadlock while debugging 1084 * a process via /proc over NFS (in particular). 1085 */ 1086 if (lwp != NULL) { 1087 lwp->lwp_nostop++; 1088 lwp->lwp_nostop_r++; 1089 } 1090 1091 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1092 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1093 (size_t)raddr; 1094 1095 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1096 seg = as_segat(as, raddr); 1097 if (seg == NULL) { 1098 AS_LOCK_EXIT(as, &as->a_lock); 1099 if (lwp != NULL) { 1100 lwp->lwp_nostop--; 1101 lwp->lwp_nostop_r--; 1102 } 1103 return (FC_NOMAP); 1104 } 1105 1106 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1107 if (raddr >= seg->s_base + seg->s_size) { 1108 seg = AS_SEGNEXT(as, seg); 1109 if (seg == NULL || raddr != seg->s_base) { 1110 res = FC_NOMAP; 1111 break; 1112 } 1113 } 1114 res = SEGOP_FAULTA(seg, raddr); 1115 if (res != 0) 1116 break; 1117 } 1118 AS_LOCK_EXIT(as, &as->a_lock); 1119 if (lwp != NULL) { 1120 lwp->lwp_nostop--; 1121 lwp->lwp_nostop_r--; 1122 } 1123 /* 1124 * If the lower levels returned EDEADLK for a fault, 1125 * It means that we should retry the fault. Let's wait 1126 * a bit also to let the deadlock causing condition clear. 1127 * This is part of a gross hack to work around a design flaw 1128 * in the ufs/sds logging code and should go away when the 1129 * logging code is re-designed to fix the problem. See bug 1130 * 4125102 for details of the problem. 1131 */ 1132 if (FC_ERRNO(res) == EDEADLK) { 1133 delay(deadlk_wait); 1134 res = 0; 1135 goto retry; 1136 } 1137 return (res); 1138 } 1139 1140 /* 1141 * Set the virtual mapping for the interval from [addr : addr + size) 1142 * in address space `as' to have the specified protection. 1143 * It is ok for the range to cross over several segments, 1144 * as long as they are contiguous. 1145 */ 1146 int 1147 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1148 { 1149 struct seg *seg; 1150 struct as_callback *cb; 1151 size_t ssize; 1152 caddr_t raddr; /* rounded down addr */ 1153 size_t rsize; /* rounded up size */ 1154 int error = 0, writer = 0; 1155 caddr_t saveraddr; 1156 size_t saversize; 1157 1158 setprot_top: 1159 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1160 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1161 (size_t)raddr; 1162 1163 if (raddr + rsize < raddr) /* check for wraparound */ 1164 return (ENOMEM); 1165 1166 saveraddr = raddr; 1167 saversize = rsize; 1168 1169 /* 1170 * Normally we only lock the as as a reader. But 1171 * if due to setprot the segment driver needs to split 1172 * a segment it will return IE_RETRY. Therefore we re-aquire 1173 * the as lock as a writer so the segment driver can change 1174 * the seg list. Also the segment driver will return IE_RETRY 1175 * after it has changed the segment list so we therefore keep 1176 * locking as a writer. Since these opeartions should be rare 1177 * want to only lock as a writer when necessary. 1178 */ 1179 if (writer || avl_numnodes(&as->a_wpage) != 0) { 1180 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1181 } else { 1182 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1183 } 1184 1185 as_clearwatchprot(as, raddr, rsize); 1186 seg = as_segat(as, raddr); 1187 if (seg == NULL) { 1188 as_setwatch(as); 1189 AS_LOCK_EXIT(as, &as->a_lock); 1190 return (ENOMEM); 1191 } 1192 1193 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1194 if (raddr >= seg->s_base + seg->s_size) { 1195 seg = AS_SEGNEXT(as, seg); 1196 if (seg == NULL || raddr != seg->s_base) { 1197 error = ENOMEM; 1198 break; 1199 } 1200 } 1201 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1202 ssize = seg->s_base + seg->s_size - raddr; 1203 else 1204 ssize = rsize; 1205 error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1206 1207 if (error == IE_NOMEM) { 1208 error = EAGAIN; 1209 break; 1210 } 1211 1212 if (error == IE_RETRY) { 1213 AS_LOCK_EXIT(as, &as->a_lock); 1214 writer = 1; 1215 goto setprot_top; 1216 } 1217 1218 if (error == EAGAIN) { 1219 /* 1220 * Make sure we have a_lock as writer. 1221 */ 1222 if (writer == 0) { 1223 AS_LOCK_EXIT(as, &as->a_lock); 1224 writer = 1; 1225 goto setprot_top; 1226 } 1227 1228 /* 1229 * Memory is currently locked. It must be unlocked 1230 * before this operation can succeed through a retry. 1231 * The possible reasons for locked memory and 1232 * corresponding strategies for unlocking are: 1233 * (1) Normal I/O 1234 * wait for a signal that the I/O operation 1235 * has completed and the memory is unlocked. 1236 * (2) Asynchronous I/O 1237 * The aio subsystem does not unlock pages when 1238 * the I/O is completed. Those pages are unlocked 1239 * when the application calls aiowait/aioerror. 1240 * So, to prevent blocking forever, cv_broadcast() 1241 * is done to wake up aio_cleanup_thread. 1242 * Subsequently, segvn_reclaim will be called, and 1243 * that will do AS_CLRUNMAPWAIT() and wake us up. 1244 * (3) Long term page locking: 1245 * Drivers intending to have pages locked for a 1246 * period considerably longer than for normal I/O 1247 * (essentially forever) may have registered for a 1248 * callback so they may unlock these pages on 1249 * request. This is needed to allow this operation 1250 * to succeed. Each entry on the callback list is 1251 * examined. If the event or address range pertains 1252 * the callback is invoked (unless it already is in 1253 * progress). The a_contents lock must be dropped 1254 * before the callback, so only one callback can 1255 * be done at a time. Go to the top and do more 1256 * until zero is returned. If zero is returned, 1257 * either there were no callbacks for this event 1258 * or they were already in progress. 1259 */ 1260 mutex_enter(&as->a_contents); 1261 if (as->a_callbacks && 1262 (cb = as_find_callback(as, AS_SETPROT_EVENT, 1263 seg->s_base, seg->s_size))) { 1264 AS_LOCK_EXIT(as, &as->a_lock); 1265 as_execute_callback(as, cb, AS_SETPROT_EVENT); 1266 } else { 1267 if (AS_ISUNMAPWAIT(as) == 0) 1268 cv_broadcast(&as->a_cv); 1269 AS_SETUNMAPWAIT(as); 1270 AS_LOCK_EXIT(as, &as->a_lock); 1271 while (AS_ISUNMAPWAIT(as)) 1272 cv_wait(&as->a_cv, &as->a_contents); 1273 } 1274 mutex_exit(&as->a_contents); 1275 goto setprot_top; 1276 } else if (error != 0) 1277 break; 1278 } 1279 if (error != 0) { 1280 as_setwatch(as); 1281 } else { 1282 as_setwatchprot(as, saveraddr, saversize, prot); 1283 } 1284 AS_LOCK_EXIT(as, &as->a_lock); 1285 return (error); 1286 } 1287 1288 /* 1289 * Check to make sure that the interval [addr, addr + size) 1290 * in address space `as' has at least the specified protection. 1291 * It is ok for the range to cross over several segments, as long 1292 * as they are contiguous. 1293 */ 1294 int 1295 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1296 { 1297 struct seg *seg; 1298 size_t ssize; 1299 caddr_t raddr; /* rounded down addr */ 1300 size_t rsize; /* rounded up size */ 1301 int error = 0; 1302 1303 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1304 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1305 (size_t)raddr; 1306 1307 if (raddr + rsize < raddr) /* check for wraparound */ 1308 return (ENOMEM); 1309 1310 /* 1311 * This is ugly as sin... 1312 * Normally, we only acquire the address space readers lock. 1313 * However, if the address space has watchpoints present, 1314 * we must acquire the writer lock on the address space for 1315 * the benefit of as_clearwatchprot() and as_setwatchprot(). 1316 */ 1317 if (avl_numnodes(&as->a_wpage) != 0) 1318 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1319 else 1320 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1321 as_clearwatchprot(as, raddr, rsize); 1322 seg = as_segat(as, raddr); 1323 if (seg == NULL) { 1324 as_setwatch(as); 1325 AS_LOCK_EXIT(as, &as->a_lock); 1326 return (ENOMEM); 1327 } 1328 1329 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1330 if (raddr >= seg->s_base + seg->s_size) { 1331 seg = AS_SEGNEXT(as, seg); 1332 if (seg == NULL || raddr != seg->s_base) { 1333 error = ENOMEM; 1334 break; 1335 } 1336 } 1337 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1338 ssize = seg->s_base + seg->s_size - raddr; 1339 else 1340 ssize = rsize; 1341 1342 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1343 if (error != 0) 1344 break; 1345 } 1346 as_setwatch(as); 1347 AS_LOCK_EXIT(as, &as->a_lock); 1348 return (error); 1349 } 1350 1351 int 1352 as_unmap(struct as *as, caddr_t addr, size_t size) 1353 { 1354 struct seg *seg, *seg_next; 1355 struct as_callback *cb; 1356 caddr_t raddr, eaddr; 1357 size_t ssize; 1358 int err; 1359 1360 top: 1361 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1362 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1363 (uintptr_t)PAGEMASK); 1364 1365 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1366 1367 as->a_updatedir = 1; /* inform /proc */ 1368 gethrestime(&as->a_updatetime); 1369 1370 /* 1371 * Use as_findseg to find the first segment in the range, then 1372 * step through the segments in order, following s_next. 1373 */ 1374 as_clearwatchprot(as, raddr, eaddr - raddr); 1375 1376 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1377 if (eaddr <= seg->s_base) 1378 break; /* eaddr was in a gap; all done */ 1379 1380 /* this is implied by the test above */ 1381 ASSERT(raddr < eaddr); 1382 1383 if (raddr < seg->s_base) 1384 raddr = seg->s_base; /* raddr was in a gap */ 1385 1386 if (eaddr > (seg->s_base + seg->s_size)) 1387 ssize = seg->s_base + seg->s_size - raddr; 1388 else 1389 ssize = eaddr - raddr; 1390 1391 /* 1392 * Save next segment pointer since seg can be 1393 * destroyed during the segment unmap operation. 1394 */ 1395 seg_next = AS_SEGNEXT(as, seg); 1396 1397 err = SEGOP_UNMAP(seg, raddr, ssize); 1398 if (err == EAGAIN) { 1399 /* 1400 * Memory is currently locked. It must be unlocked 1401 * before this operation can succeed through a retry. 1402 * The possible reasons for locked memory and 1403 * corresponding strategies for unlocking are: 1404 * (1) Normal I/O 1405 * wait for a signal that the I/O operation 1406 * has completed and the memory is unlocked. 1407 * (2) Asynchronous I/O 1408 * The aio subsystem does not unlock pages when 1409 * the I/O is completed. Those pages are unlocked 1410 * when the application calls aiowait/aioerror. 1411 * So, to prevent blocking forever, cv_broadcast() 1412 * is done to wake up aio_cleanup_thread. 1413 * Subsequently, segvn_reclaim will be called, and 1414 * that will do AS_CLRUNMAPWAIT() and wake us up. 1415 * (3) Long term page locking: 1416 * Drivers intending to have pages locked for a 1417 * period considerably longer than for normal I/O 1418 * (essentially forever) may have registered for a 1419 * callback so they may unlock these pages on 1420 * request. This is needed to allow this operation 1421 * to succeed. Each entry on the callback list is 1422 * examined. If the event or address range pertains 1423 * the callback is invoked (unless it already is in 1424 * progress). The a_contents lock must be dropped 1425 * before the callback, so only one callback can 1426 * be done at a time. Go to the top and do more 1427 * until zero is returned. If zero is returned, 1428 * either there were no callbacks for this event 1429 * or they were already in progress. 1430 */ 1431 as_setwatch(as); 1432 mutex_enter(&as->a_contents); 1433 if (as->a_callbacks && 1434 (cb = as_find_callback(as, AS_UNMAP_EVENT, 1435 seg->s_base, seg->s_size))) { 1436 AS_LOCK_EXIT(as, &as->a_lock); 1437 as_execute_callback(as, cb, AS_UNMAP_EVENT); 1438 } else { 1439 if (AS_ISUNMAPWAIT(as) == 0) 1440 cv_broadcast(&as->a_cv); 1441 AS_SETUNMAPWAIT(as); 1442 AS_LOCK_EXIT(as, &as->a_lock); 1443 while (AS_ISUNMAPWAIT(as)) 1444 cv_wait(&as->a_cv, &as->a_contents); 1445 } 1446 mutex_exit(&as->a_contents); 1447 goto top; 1448 } else if (err == IE_RETRY) { 1449 as_setwatch(as); 1450 AS_LOCK_EXIT(as, &as->a_lock); 1451 goto top; 1452 } else if (err) { 1453 as_setwatch(as); 1454 AS_LOCK_EXIT(as, &as->a_lock); 1455 return (-1); 1456 } 1457 1458 as->a_size -= ssize; 1459 raddr += ssize; 1460 } 1461 AS_LOCK_EXIT(as, &as->a_lock); 1462 return (0); 1463 } 1464 1465 static int 1466 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, 1467 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1468 { 1469 uint_t szc; 1470 uint_t nszc; 1471 int error; 1472 caddr_t a; 1473 caddr_t eaddr; 1474 size_t segsize; 1475 struct seg *seg; 1476 size_t pgsz; 1477 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); 1478 uint_t save_szcvec; 1479 1480 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1481 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1482 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1483 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); 1484 if (!do_off) { 1485 vn_a->offset = 0; 1486 } 1487 1488 if (szcvec <= 1) { 1489 seg = seg_alloc(as, addr, size); 1490 if (seg == NULL) { 1491 return (ENOMEM); 1492 } 1493 vn_a->szc = 0; 1494 error = (*crfp)(seg, vn_a); 1495 if (error != 0) { 1496 seg_free(seg); 1497 } 1498 return (error); 1499 } 1500 1501 eaddr = addr + size; 1502 save_szcvec = szcvec; 1503 szcvec >>= 1; 1504 szc = 0; 1505 nszc = 0; 1506 while (szcvec) { 1507 if ((szcvec & 0x1) == 0) { 1508 nszc++; 1509 szcvec >>= 1; 1510 continue; 1511 } 1512 nszc++; 1513 pgsz = page_get_pagesize(nszc); 1514 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1515 if (a != addr) { 1516 ASSERT(a < eaddr); 1517 segsize = a - addr; 1518 seg = seg_alloc(as, addr, segsize); 1519 if (seg == NULL) { 1520 return (ENOMEM); 1521 } 1522 vn_a->szc = szc; 1523 error = (*crfp)(seg, vn_a); 1524 if (error != 0) { 1525 seg_free(seg); 1526 return (error); 1527 } 1528 *segcreated = 1; 1529 if (do_off) { 1530 vn_a->offset += segsize; 1531 } 1532 addr = a; 1533 } 1534 szc = nszc; 1535 szcvec >>= 1; 1536 } 1537 1538 ASSERT(addr < eaddr); 1539 szcvec = save_szcvec | 1; /* add 8K pages */ 1540 while (szcvec) { 1541 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1542 ASSERT(a >= addr); 1543 if (a != addr) { 1544 segsize = a - addr; 1545 seg = seg_alloc(as, addr, segsize); 1546 if (seg == NULL) { 1547 return (ENOMEM); 1548 } 1549 vn_a->szc = szc; 1550 error = (*crfp)(seg, vn_a); 1551 if (error != 0) { 1552 seg_free(seg); 1553 return (error); 1554 } 1555 *segcreated = 1; 1556 if (do_off) { 1557 vn_a->offset += segsize; 1558 } 1559 addr = a; 1560 } 1561 szcvec &= ~(1 << szc); 1562 if (szcvec) { 1563 szc = highbit(szcvec) - 1; 1564 pgsz = page_get_pagesize(szc); 1565 } 1566 } 1567 ASSERT(addr == eaddr); 1568 1569 return (0); 1570 } 1571 1572 static int 1573 as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1574 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1575 { 1576 int text = vn_a->flags & MAP_TEXT; 1577 uint_t szcvec = map_execseg_pgszcvec(text, addr, size); 1578 int error; 1579 struct seg *seg; 1580 struct vattr va; 1581 u_offset_t eoff; 1582 size_t save_size = 0; 1583 1584 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1585 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1586 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1587 ASSERT(vn_a->vp != NULL); 1588 ASSERT(vn_a->amp == NULL); 1589 1590 again: 1591 if (szcvec <= 1) { 1592 seg = seg_alloc(as, addr, size); 1593 if (seg == NULL) { 1594 return (ENOMEM); 1595 } 1596 vn_a->szc = 0; 1597 error = (*crfp)(seg, vn_a); 1598 if (error != 0) { 1599 seg_free(seg); 1600 } 1601 return (error); 1602 } 1603 1604 va.va_mask = AT_SIZE; 1605 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) { 1606 szcvec = 0; 1607 goto again; 1608 } 1609 eoff = vn_a->offset & PAGEMASK; 1610 if (eoff >= va.va_size) { 1611 szcvec = 0; 1612 goto again; 1613 } 1614 eoff += size; 1615 if (btopr(va.va_size) < btopr(eoff)) { 1616 save_size = size; 1617 size = va.va_size - (vn_a->offset & PAGEMASK); 1618 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1619 szcvec = map_execseg_pgszcvec(text, addr, size); 1620 if (szcvec <= 1) { 1621 size = save_size; 1622 goto again; 1623 } 1624 } 1625 1626 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, 1627 segcreated); 1628 if (error != 0) { 1629 return (error); 1630 } 1631 if (save_size) { 1632 addr += size; 1633 size = save_size - size; 1634 szcvec = 0; 1635 goto again; 1636 } 1637 return (0); 1638 } 1639 1640 static int 1641 as_map_sham(struct as *as, caddr_t addr, size_t size, 1642 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1643 { 1644 uint_t szcvec = map_shm_pgszcvec(addr, size, 1645 vn_a->amp == NULL ? (uintptr_t)addr : 1646 (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE)); 1647 1648 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1649 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1650 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1651 ASSERT(vn_a->vp == NULL); 1652 1653 return (as_map_segvn_segs(as, addr, size, szcvec, 1654 crfp, vn_a, segcreated)); 1655 } 1656 1657 int 1658 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1659 { 1660 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1661 return (as_map_locked(as, addr, size, crfp, argsp)); 1662 } 1663 1664 int 1665 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), 1666 void *argsp) 1667 { 1668 struct seg *seg = NULL; 1669 caddr_t raddr; /* rounded down addr */ 1670 size_t rsize; /* rounded up size */ 1671 int error; 1672 struct proc *p = curproc; 1673 1674 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1675 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1676 (size_t)raddr; 1677 1678 /* 1679 * check for wrap around 1680 */ 1681 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1682 AS_LOCK_EXIT(as, &as->a_lock); 1683 return (ENOMEM); 1684 } 1685 1686 as->a_updatedir = 1; /* inform /proc */ 1687 gethrestime(&as->a_updatetime); 1688 1689 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1690 AS_LOCK_EXIT(as, &as->a_lock); 1691 1692 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1693 RCA_UNSAFE_ALL); 1694 1695 return (ENOMEM); 1696 } 1697 1698 if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) { 1699 int unmap = 0; 1700 if (AS_MAP_SHAMP(crfp, argsp)) { 1701 error = as_map_sham(as, raddr, rsize, crfp, 1702 (struct segvn_crargs *)argsp, &unmap); 1703 } else { 1704 error = as_map_vnsegs(as, raddr, rsize, crfp, 1705 (struct segvn_crargs *)argsp, &unmap); 1706 } 1707 if (error != 0) { 1708 AS_LOCK_EXIT(as, &as->a_lock); 1709 if (unmap) { 1710 (void) as_unmap(as, addr, size); 1711 } 1712 return (error); 1713 } 1714 } else { 1715 seg = seg_alloc(as, addr, size); 1716 if (seg == NULL) { 1717 AS_LOCK_EXIT(as, &as->a_lock); 1718 return (ENOMEM); 1719 } 1720 1721 error = (*crfp)(seg, argsp); 1722 if (error != 0) { 1723 seg_free(seg); 1724 AS_LOCK_EXIT(as, &as->a_lock); 1725 return (error); 1726 } 1727 } 1728 1729 /* 1730 * Add size now so as_unmap will work if as_ctl fails. 1731 */ 1732 as->a_size += rsize; 1733 1734 as_setwatch(as); 1735 1736 /* 1737 * If the address space is locked, 1738 * establish memory locks for the new segment. 1739 */ 1740 mutex_enter(&as->a_contents); 1741 if (AS_ISPGLCK(as)) { 1742 mutex_exit(&as->a_contents); 1743 AS_LOCK_EXIT(as, &as->a_lock); 1744 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1745 if (error != 0) 1746 (void) as_unmap(as, addr, size); 1747 } else { 1748 mutex_exit(&as->a_contents); 1749 AS_LOCK_EXIT(as, &as->a_lock); 1750 } 1751 return (error); 1752 } 1753 1754 1755 /* 1756 * Delete all segments in the address space marked with S_PURGE. 1757 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1758 * These segments are deleted as a first step before calls to as_gap(), so 1759 * that they don't affect mmap() or shmat(). 1760 */ 1761 void 1762 as_purge(struct as *as) 1763 { 1764 struct seg *seg; 1765 struct seg *next_seg; 1766 1767 /* 1768 * the setting of NEEDSPURGE is protect by as_rangelock(), so 1769 * no need to grab a_contents mutex for this check 1770 */ 1771 if ((as->a_flags & AS_NEEDSPURGE) == 0) 1772 return; 1773 1774 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1775 next_seg = NULL; 1776 seg = AS_SEGFIRST(as); 1777 while (seg != NULL) { 1778 next_seg = AS_SEGNEXT(as, seg); 1779 if (seg->s_flags & S_PURGE) 1780 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1781 seg = next_seg; 1782 } 1783 AS_LOCK_EXIT(as, &as->a_lock); 1784 1785 mutex_enter(&as->a_contents); 1786 as->a_flags &= ~AS_NEEDSPURGE; 1787 mutex_exit(&as->a_contents); 1788 } 1789 1790 /* 1791 * Find a hole of at least size minlen within [base, base + len). 1792 * 1793 * If flags specifies AH_HI, the hole will have the highest possible address 1794 * in the range. We use the as->a_lastgap field to figure out where to 1795 * start looking for a gap. 1796 * 1797 * Otherwise, the gap will have the lowest possible address. 1798 * 1799 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1800 * 1801 * If an adequate hole is found, base and len are set to reflect the part of 1802 * the hole that is within range, and 0 is returned, otherwise, 1803 * -1 is returned. 1804 * 1805 * NOTE: This routine is not correct when base+len overflows caddr_t. 1806 */ 1807 int 1808 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1809 caddr_t addr) 1810 { 1811 caddr_t lobound = *basep; 1812 caddr_t hibound = lobound + *lenp; 1813 struct seg *lseg, *hseg; 1814 caddr_t lo, hi; 1815 int forward; 1816 caddr_t save_base; 1817 size_t save_len; 1818 1819 save_base = *basep; 1820 save_len = *lenp; 1821 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1822 if (AS_SEGFIRST(as) == NULL) { 1823 if (valid_va_range(basep, lenp, minlen, flags & AH_DIR)) { 1824 AS_LOCK_EXIT(as, &as->a_lock); 1825 return (0); 1826 } else { 1827 AS_LOCK_EXIT(as, &as->a_lock); 1828 *basep = save_base; 1829 *lenp = save_len; 1830 return (-1); 1831 } 1832 } 1833 1834 /* 1835 * Set up to iterate over all the inter-segment holes in the given 1836 * direction. lseg is NULL for the lowest-addressed hole and hseg is 1837 * NULL for the highest-addressed hole. If moving backwards, we reset 1838 * sseg to denote the highest-addressed segment. 1839 */ 1840 forward = (flags & AH_DIR) == AH_LO; 1841 if (forward) { 1842 hseg = as_findseg(as, lobound, 1); 1843 lseg = AS_SEGPREV(as, hseg); 1844 } else { 1845 1846 /* 1847 * If allocating at least as much as the last allocation, 1848 * use a_lastgap's base as a better estimate of hibound. 1849 */ 1850 if (as->a_lastgap && 1851 minlen >= as->a_lastgap->s_size && 1852 hibound >= as->a_lastgap->s_base) 1853 hibound = as->a_lastgap->s_base; 1854 1855 hseg = as_findseg(as, hibound, 1); 1856 if (hseg->s_base + hseg->s_size < hibound) { 1857 lseg = hseg; 1858 hseg = NULL; 1859 } else { 1860 lseg = AS_SEGPREV(as, hseg); 1861 } 1862 } 1863 1864 for (;;) { 1865 /* 1866 * Set lo and hi to the hole's boundaries. (We should really 1867 * use MAXADDR in place of hibound in the expression below, 1868 * but can't express it easily; using hibound in its place is 1869 * harmless.) 1870 */ 1871 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1872 hi = (hseg == NULL) ? hibound : hseg->s_base; 1873 /* 1874 * If the iteration has moved past the interval from lobound 1875 * to hibound it's pointless to continue. 1876 */ 1877 if ((forward && lo > hibound) || (!forward && hi < lobound)) 1878 break; 1879 else if (lo > hibound || hi < lobound) 1880 goto cont; 1881 /* 1882 * Candidate hole lies at least partially within the allowable 1883 * range. Restrict it to fall completely within that range, 1884 * i.e., to [max(lo, lobound), min(hi, hibound)]. 1885 */ 1886 if (lo < lobound) 1887 lo = lobound; 1888 if (hi > hibound) 1889 hi = hibound; 1890 /* 1891 * Verify that the candidate hole is big enough and meets 1892 * hardware constraints. 1893 */ 1894 *basep = lo; 1895 *lenp = hi - lo; 1896 if (valid_va_range(basep, lenp, minlen, 1897 forward ? AH_LO : AH_HI) && 1898 ((flags & AH_CONTAIN) == 0 || 1899 (*basep <= addr && *basep + *lenp > addr))) { 1900 if (!forward) 1901 as->a_lastgap = hseg; 1902 if (hseg != NULL) 1903 as->a_lastgaphl = hseg; 1904 else 1905 as->a_lastgaphl = lseg; 1906 AS_LOCK_EXIT(as, &as->a_lock); 1907 return (0); 1908 } 1909 cont: 1910 /* 1911 * Move to the next hole. 1912 */ 1913 if (forward) { 1914 lseg = hseg; 1915 if (lseg == NULL) 1916 break; 1917 hseg = AS_SEGNEXT(as, hseg); 1918 } else { 1919 hseg = lseg; 1920 if (hseg == NULL) 1921 break; 1922 lseg = AS_SEGPREV(as, lseg); 1923 } 1924 } 1925 *basep = save_base; 1926 *lenp = save_len; 1927 AS_LOCK_EXIT(as, &as->a_lock); 1928 return (-1); 1929 } 1930 1931 /* 1932 * Return the next range within [base, base + len) that is backed 1933 * with "real memory". Skip holes and non-seg_vn segments. 1934 * We're lazy and only return one segment at a time. 1935 */ 1936 int 1937 as_memory(struct as *as, caddr_t *basep, size_t *lenp) 1938 { 1939 extern struct seg_ops segspt_shmops; /* needs a header file */ 1940 struct seg *seg; 1941 caddr_t addr, eaddr; 1942 caddr_t segend; 1943 1944 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1945 1946 addr = *basep; 1947 eaddr = addr + *lenp; 1948 1949 seg = as_findseg(as, addr, 0); 1950 if (seg != NULL) 1951 addr = MAX(seg->s_base, addr); 1952 1953 for (;;) { 1954 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 1955 AS_LOCK_EXIT(as, &as->a_lock); 1956 return (EINVAL); 1957 } 1958 1959 if (seg->s_ops == &segvn_ops) { 1960 segend = seg->s_base + seg->s_size; 1961 break; 1962 } 1963 1964 /* 1965 * We do ISM by looking into the private data 1966 * to determine the real size of the segment. 1967 */ 1968 if (seg->s_ops == &segspt_shmops) { 1969 segend = seg->s_base + spt_realsize(seg); 1970 if (addr < segend) 1971 break; 1972 } 1973 1974 seg = AS_SEGNEXT(as, seg); 1975 1976 if (seg != NULL) 1977 addr = seg->s_base; 1978 } 1979 1980 *basep = addr; 1981 1982 if (segend > eaddr) 1983 *lenp = eaddr - addr; 1984 else 1985 *lenp = segend - addr; 1986 1987 AS_LOCK_EXIT(as, &as->a_lock); 1988 return (0); 1989 } 1990 1991 /* 1992 * Swap the pages associated with the address space as out to 1993 * secondary storage, returning the number of bytes actually 1994 * swapped. 1995 * 1996 * The value returned is intended to correlate well with the process's 1997 * memory requirements. Its usefulness for this purpose depends on 1998 * how well the segment-level routines do at returning accurate 1999 * information. 2000 */ 2001 size_t 2002 as_swapout(struct as *as) 2003 { 2004 struct seg *seg; 2005 size_t swpcnt = 0; 2006 2007 /* 2008 * Kernel-only processes have given up their address 2009 * spaces. Of course, we shouldn't be attempting to 2010 * swap out such processes in the first place... 2011 */ 2012 if (as == NULL) 2013 return (0); 2014 2015 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2016 2017 /* Prevent XHATs from attaching */ 2018 mutex_enter(&as->a_contents); 2019 AS_SETBUSY(as); 2020 mutex_exit(&as->a_contents); 2021 2022 2023 /* 2024 * Free all mapping resources associated with the address 2025 * space. The segment-level swapout routines capitalize 2026 * on this unmapping by scavanging pages that have become 2027 * unmapped here. 2028 */ 2029 hat_swapout(as->a_hat); 2030 if (as->a_xhat != NULL) 2031 xhat_swapout_all(as); 2032 2033 mutex_enter(&as->a_contents); 2034 AS_CLRBUSY(as); 2035 mutex_exit(&as->a_contents); 2036 2037 /* 2038 * Call the swapout routines of all segments in the address 2039 * space to do the actual work, accumulating the amount of 2040 * space reclaimed. 2041 */ 2042 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2043 struct seg_ops *ov = seg->s_ops; 2044 2045 /* 2046 * We have to check to see if the seg has 2047 * an ops vector because the seg may have 2048 * been in the middle of being set up when 2049 * the process was picked for swapout. 2050 */ 2051 if ((ov != NULL) && (ov->swapout != NULL)) 2052 swpcnt += SEGOP_SWAPOUT(seg); 2053 } 2054 AS_LOCK_EXIT(as, &as->a_lock); 2055 return (swpcnt); 2056 } 2057 2058 /* 2059 * Determine whether data from the mappings in interval [addr, addr + size) 2060 * are in the primary memory (core) cache. 2061 */ 2062 int 2063 as_incore(struct as *as, caddr_t addr, 2064 size_t size, char *vec, size_t *sizep) 2065 { 2066 struct seg *seg; 2067 size_t ssize; 2068 caddr_t raddr; /* rounded down addr */ 2069 size_t rsize; /* rounded up size */ 2070 size_t isize; /* iteration size */ 2071 int error = 0; /* result, assume success */ 2072 2073 *sizep = 0; 2074 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2075 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 2076 (size_t)raddr; 2077 2078 if (raddr + rsize < raddr) /* check for wraparound */ 2079 return (ENOMEM); 2080 2081 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2082 seg = as_segat(as, raddr); 2083 if (seg == NULL) { 2084 AS_LOCK_EXIT(as, &as->a_lock); 2085 return (-1); 2086 } 2087 2088 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2089 if (raddr >= seg->s_base + seg->s_size) { 2090 seg = AS_SEGNEXT(as, seg); 2091 if (seg == NULL || raddr != seg->s_base) { 2092 error = -1; 2093 break; 2094 } 2095 } 2096 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2097 ssize = seg->s_base + seg->s_size - raddr; 2098 else 2099 ssize = rsize; 2100 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2101 if (isize != ssize) { 2102 error = -1; 2103 break; 2104 } 2105 vec += btopr(ssize); 2106 } 2107 AS_LOCK_EXIT(as, &as->a_lock); 2108 return (error); 2109 } 2110 2111 static void 2112 as_segunlock(struct seg *seg, caddr_t addr, int attr, 2113 ulong_t *bitmap, size_t position, size_t npages) 2114 { 2115 caddr_t range_start; 2116 size_t pos1 = position; 2117 size_t pos2; 2118 size_t size; 2119 size_t end_pos = npages + position; 2120 2121 while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2122 size = ptob((pos2 - pos1)); 2123 range_start = (caddr_t)((uintptr_t)addr + 2124 ptob(pos1 - position)); 2125 2126 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2127 (ulong_t *)NULL, (size_t)NULL); 2128 pos1 = pos2; 2129 } 2130 } 2131 2132 static void 2133 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2134 caddr_t raddr, size_t rsize) 2135 { 2136 struct seg *seg = as_segat(as, raddr); 2137 size_t ssize; 2138 2139 while (rsize != 0) { 2140 if (raddr >= seg->s_base + seg->s_size) 2141 seg = AS_SEGNEXT(as, seg); 2142 2143 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2144 ssize = seg->s_base + seg->s_size - raddr; 2145 else 2146 ssize = rsize; 2147 2148 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2149 2150 rsize -= ssize; 2151 raddr += ssize; 2152 } 2153 } 2154 2155 /* 2156 * Cache control operations over the interval [addr, addr + size) in 2157 * address space "as". 2158 */ 2159 /*ARGSUSED*/ 2160 int 2161 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2162 uintptr_t arg, ulong_t *lock_map, size_t pos) 2163 { 2164 struct seg *seg; /* working segment */ 2165 caddr_t raddr; /* rounded down addr */ 2166 caddr_t initraddr; /* saved initial rounded down addr */ 2167 size_t rsize; /* rounded up size */ 2168 size_t initrsize; /* saved initial rounded up size */ 2169 size_t ssize; /* size of seg */ 2170 int error = 0; /* result */ 2171 size_t mlock_size; /* size of bitmap */ 2172 ulong_t *mlock_map; /* pointer to bitmap used */ 2173 /* to represent the locked */ 2174 /* pages. */ 2175 retry: 2176 if (error == IE_RETRY) 2177 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2178 else 2179 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2180 2181 /* 2182 * If these are address space lock/unlock operations, loop over 2183 * all segments in the address space, as appropriate. 2184 */ 2185 if (func == MC_LOCKAS) { 2186 size_t npages, idx; 2187 size_t rlen = 0; /* rounded as length */ 2188 2189 idx = pos; 2190 2191 if (arg & MCL_FUTURE) { 2192 mutex_enter(&as->a_contents); 2193 AS_SETPGLCK(as); 2194 mutex_exit(&as->a_contents); 2195 } 2196 if ((arg & MCL_CURRENT) == 0) { 2197 AS_LOCK_EXIT(as, &as->a_lock); 2198 return (0); 2199 } 2200 2201 seg = AS_SEGFIRST(as); 2202 if (seg == NULL) { 2203 AS_LOCK_EXIT(as, &as->a_lock); 2204 return (0); 2205 } 2206 2207 do { 2208 raddr = (caddr_t)((uintptr_t)seg->s_base & 2209 (uintptr_t)PAGEMASK); 2210 rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2211 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2212 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2213 2214 mlock_size = BT_BITOUL(btopr(rlen)); 2215 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2216 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2217 AS_LOCK_EXIT(as, &as->a_lock); 2218 return (EAGAIN); 2219 } 2220 2221 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2222 error = SEGOP_LOCKOP(seg, seg->s_base, 2223 seg->s_size, attr, MC_LOCK, mlock_map, pos); 2224 if (error != 0) 2225 break; 2226 pos += seg_pages(seg); 2227 } 2228 2229 if (error) { 2230 for (seg = AS_SEGFIRST(as); seg != NULL; 2231 seg = AS_SEGNEXT(as, seg)) { 2232 2233 raddr = (caddr_t)((uintptr_t)seg->s_base & 2234 (uintptr_t)PAGEMASK); 2235 npages = seg_pages(seg); 2236 as_segunlock(seg, raddr, attr, mlock_map, 2237 idx, npages); 2238 idx += npages; 2239 } 2240 } 2241 2242 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2243 AS_LOCK_EXIT(as, &as->a_lock); 2244 goto lockerr; 2245 } else if (func == MC_UNLOCKAS) { 2246 mutex_enter(&as->a_contents); 2247 AS_CLRPGLCK(as); 2248 mutex_exit(&as->a_contents); 2249 2250 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2251 error = SEGOP_LOCKOP(seg, seg->s_base, 2252 seg->s_size, attr, MC_UNLOCK, NULL, 0); 2253 if (error != 0) 2254 break; 2255 } 2256 2257 AS_LOCK_EXIT(as, &as->a_lock); 2258 goto lockerr; 2259 } 2260 2261 /* 2262 * Normalize addresses and sizes. 2263 */ 2264 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2265 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2266 (size_t)raddr; 2267 2268 if (raddr + rsize < raddr) { /* check for wraparound */ 2269 AS_LOCK_EXIT(as, &as->a_lock); 2270 return (ENOMEM); 2271 } 2272 2273 /* 2274 * Get initial segment. 2275 */ 2276 if ((seg = as_segat(as, raddr)) == NULL) { 2277 AS_LOCK_EXIT(as, &as->a_lock); 2278 return (ENOMEM); 2279 } 2280 2281 if (func == MC_LOCK) { 2282 mlock_size = BT_BITOUL(btopr(rsize)); 2283 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2284 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2285 AS_LOCK_EXIT(as, &as->a_lock); 2286 return (EAGAIN); 2287 } 2288 } 2289 2290 /* 2291 * Loop over all segments. If a hole in the address range is 2292 * discovered, then fail. For each segment, perform the appropriate 2293 * control operation. 2294 */ 2295 while (rsize != 0) { 2296 2297 /* 2298 * Make sure there's no hole, calculate the portion 2299 * of the next segment to be operated over. 2300 */ 2301 if (raddr >= seg->s_base + seg->s_size) { 2302 seg = AS_SEGNEXT(as, seg); 2303 if (seg == NULL || raddr != seg->s_base) { 2304 if (func == MC_LOCK) { 2305 as_unlockerr(as, attr, mlock_map, 2306 initraddr, initrsize - rsize); 2307 kmem_free(mlock_map, 2308 mlock_size * sizeof (ulong_t)); 2309 } 2310 AS_LOCK_EXIT(as, &as->a_lock); 2311 return (ENOMEM); 2312 } 2313 } 2314 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2315 ssize = seg->s_base + seg->s_size - raddr; 2316 else 2317 ssize = rsize; 2318 2319 /* 2320 * Dispatch on specific function. 2321 */ 2322 switch (func) { 2323 2324 /* 2325 * Synchronize cached data from mappings with backing 2326 * objects. 2327 */ 2328 case MC_SYNC: 2329 if (error = SEGOP_SYNC(seg, raddr, ssize, 2330 attr, (uint_t)arg)) { 2331 AS_LOCK_EXIT(as, &as->a_lock); 2332 return (error); 2333 } 2334 break; 2335 2336 /* 2337 * Lock pages in memory. 2338 */ 2339 case MC_LOCK: 2340 if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2341 attr, func, mlock_map, pos)) { 2342 as_unlockerr(as, attr, mlock_map, initraddr, 2343 initrsize - rsize + ssize); 2344 kmem_free(mlock_map, mlock_size * 2345 sizeof (ulong_t)); 2346 AS_LOCK_EXIT(as, &as->a_lock); 2347 goto lockerr; 2348 } 2349 break; 2350 2351 /* 2352 * Unlock mapped pages. 2353 */ 2354 case MC_UNLOCK: 2355 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2356 (ulong_t *)NULL, (size_t)NULL); 2357 break; 2358 2359 /* 2360 * Store VM advise for mapped pages in segment layer. 2361 */ 2362 case MC_ADVISE: 2363 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2364 2365 /* 2366 * Check for regular errors and special retry error 2367 */ 2368 if (error) { 2369 if (error == IE_RETRY) { 2370 /* 2371 * Need to acquire writers lock, so 2372 * have to drop readers lock and start 2373 * all over again 2374 */ 2375 AS_LOCK_EXIT(as, &as->a_lock); 2376 goto retry; 2377 } else if (error == IE_REATTACH) { 2378 /* 2379 * Find segment for current address 2380 * because current segment just got 2381 * split or concatenated 2382 */ 2383 seg = as_segat(as, raddr); 2384 if (seg == NULL) { 2385 AS_LOCK_EXIT(as, &as->a_lock); 2386 return (ENOMEM); 2387 } 2388 } else { 2389 /* 2390 * Regular error 2391 */ 2392 AS_LOCK_EXIT(as, &as->a_lock); 2393 return (error); 2394 } 2395 } 2396 break; 2397 2398 /* 2399 * Can't happen. 2400 */ 2401 default: 2402 panic("as_ctl: bad operation %d", func); 2403 /*NOTREACHED*/ 2404 } 2405 2406 rsize -= ssize; 2407 raddr += ssize; 2408 } 2409 2410 if (func == MC_LOCK) 2411 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2412 AS_LOCK_EXIT(as, &as->a_lock); 2413 return (0); 2414 lockerr: 2415 2416 /* 2417 * If the lower levels returned EDEADLK for a segment lockop, 2418 * it means that we should retry the operation. Let's wait 2419 * a bit also to let the deadlock causing condition clear. 2420 * This is part of a gross hack to work around a design flaw 2421 * in the ufs/sds logging code and should go away when the 2422 * logging code is re-designed to fix the problem. See bug 2423 * 4125102 for details of the problem. 2424 */ 2425 if (error == EDEADLK) { 2426 delay(deadlk_wait); 2427 error = 0; 2428 goto retry; 2429 } 2430 return (error); 2431 } 2432 2433 /* 2434 * Special code for exec to move the stack segment from its interim 2435 * place in the old address to the right place in the new address space. 2436 */ 2437 /*ARGSUSED*/ 2438 int 2439 as_exec(struct as *oas, caddr_t ostka, size_t stksz, 2440 struct as *nas, caddr_t nstka, uint_t hatflag) 2441 { 2442 struct seg *stkseg; 2443 2444 AS_LOCK_ENTER(oas, &oas->a_lock, RW_WRITER); 2445 stkseg = as_segat(oas, ostka); 2446 stkseg = as_removeseg(oas, stkseg); 2447 ASSERT(stkseg != NULL); 2448 ASSERT(stkseg->s_base == ostka && stkseg->s_size == stksz); 2449 stkseg->s_as = nas; 2450 stkseg->s_base = nstka; 2451 2452 /* 2453 * It's ok to lock the address space we are about to exec to. 2454 */ 2455 AS_LOCK_ENTER(nas, &nas->a_lock, RW_WRITER); 2456 ASSERT(avl_numnodes(&nas->a_wpage) == 0); 2457 nas->a_size += stkseg->s_size; 2458 oas->a_size -= stkseg->s_size; 2459 (void) as_addseg(nas, stkseg); 2460 AS_LOCK_EXIT(nas, &nas->a_lock); 2461 AS_LOCK_EXIT(oas, &oas->a_lock); 2462 return (0); 2463 } 2464 2465 static int 2466 f_decode(faultcode_t fault_err) 2467 { 2468 int error = 0; 2469 2470 switch (FC_CODE(fault_err)) { 2471 case FC_OBJERR: 2472 error = FC_ERRNO(fault_err); 2473 break; 2474 case FC_PROT: 2475 error = EACCES; 2476 break; 2477 default: 2478 error = EFAULT; 2479 break; 2480 } 2481 return (error); 2482 } 2483 2484 /* 2485 * lock pages in a given address space. Return shadow list. If 2486 * the list is NULL, the MMU mapping is also locked. 2487 */ 2488 int 2489 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2490 size_t size, enum seg_rw rw) 2491 { 2492 size_t rsize; 2493 caddr_t base; 2494 caddr_t raddr; 2495 faultcode_t fault_err; 2496 struct seg *seg; 2497 int res; 2498 int prefaulted = 0; 2499 2500 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2501 "as_pagelock_start: addr %p size %ld", addr, size); 2502 2503 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2504 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2505 (size_t)raddr; 2506 top: 2507 /* 2508 * if the request crosses two segments let 2509 * as_fault handle it. 2510 */ 2511 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2512 seg = as_findseg(as, addr, 0); 2513 if ((seg == NULL) || ((base = seg->s_base) > addr) || 2514 (addr + size) > base + seg->s_size) { 2515 AS_LOCK_EXIT(as, &as->a_lock); 2516 goto slow; 2517 } 2518 2519 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2520 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2521 2522 /* 2523 * try to lock pages and pass back shadow list 2524 */ 2525 res = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2526 2527 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2528 AS_LOCK_EXIT(as, &as->a_lock); 2529 if (res == 0) { 2530 return (0); 2531 } else if (res == ENOTSUP || prefaulted) { 2532 /* 2533 * (1) segment driver doesn't support PAGELOCK fastpath, or 2534 * (2) we've already tried fast path unsuccessfully after 2535 * faulting in the addr range below; system might be 2536 * thrashing or there may not be enough availrmem. 2537 */ 2538 goto slow; 2539 } 2540 2541 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_FAULT_START, 2542 "as_fault_start: addr %p size %ld", addr, size); 2543 2544 /* 2545 * we might get here because of some COW fault or non 2546 * existing page. Let as_fault deal with it. Just load 2547 * the page, don't lock the MMU mapping. 2548 */ 2549 fault_err = as_fault(as->a_hat, as, addr, size, F_INVAL, rw); 2550 if (fault_err != 0) { 2551 return (f_decode(fault_err)); 2552 } 2553 2554 prefaulted = 1; 2555 2556 /* 2557 * try fast path again; since we've dropped a_lock, 2558 * we need to try the dance from the start to see if 2559 * the addr range is still valid. 2560 */ 2561 goto top; 2562 slow: 2563 /* 2564 * load the page and lock the MMU mapping. 2565 */ 2566 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2567 if (fault_err != 0) { 2568 return (f_decode(fault_err)); 2569 } 2570 *ppp = NULL; 2571 2572 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2573 return (0); 2574 } 2575 2576 /* 2577 * unlock pages in a given address range 2578 */ 2579 void 2580 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2581 enum seg_rw rw) 2582 { 2583 struct seg *seg; 2584 size_t rsize; 2585 caddr_t raddr; 2586 2587 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2588 "as_pageunlock_start: addr %p size %ld", addr, size); 2589 2590 /* 2591 * if the shadow list is NULL, as_pagelock was 2592 * falling back to as_fault 2593 */ 2594 if (pp == NULL) { 2595 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2596 return; 2597 } 2598 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2599 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2600 (size_t)raddr; 2601 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2602 seg = as_findseg(as, addr, 0); 2603 ASSERT(seg); 2604 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2605 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2606 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2607 AS_LOCK_EXIT(as, &as->a_lock); 2608 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2609 } 2610 2611 /* 2612 * reclaim cached pages in a given address range 2613 */ 2614 void 2615 as_pagereclaim(struct as *as, struct page **pp, caddr_t addr, 2616 size_t size, enum seg_rw rw) 2617 { 2618 struct seg *seg; 2619 size_t rsize; 2620 caddr_t raddr; 2621 2622 ASSERT(AS_READ_HELD(as, &as->a_lock)); 2623 ASSERT(pp != NULL); 2624 2625 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2626 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2627 (size_t)raddr; 2628 seg = as_findseg(as, addr, 0); 2629 ASSERT(seg); 2630 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGERECLAIM, rw); 2631 } 2632 2633 #define MAXPAGEFLIP 4 2634 #define MAXPAGEFLIPSIZ MAXPAGEFLIP*PAGESIZE 2635 2636 int 2637 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2638 boolean_t wait) 2639 { 2640 struct seg *seg; 2641 size_t ssize; 2642 caddr_t raddr; /* rounded down addr */ 2643 size_t rsize; /* rounded up size */ 2644 int error = 0; 2645 size_t pgsz = page_get_pagesize(szc); 2646 2647 setpgsz_top: 2648 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2649 return (EINVAL); 2650 } 2651 2652 raddr = addr; 2653 rsize = size; 2654 2655 if (raddr + rsize < raddr) /* check for wraparound */ 2656 return (ENOMEM); 2657 2658 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2659 as_clearwatchprot(as, raddr, rsize); 2660 seg = as_segat(as, raddr); 2661 if (seg == NULL) { 2662 as_setwatch(as); 2663 AS_LOCK_EXIT(as, &as->a_lock); 2664 return (ENOMEM); 2665 } 2666 2667 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2668 if (raddr >= seg->s_base + seg->s_size) { 2669 seg = AS_SEGNEXT(as, seg); 2670 if (seg == NULL || raddr != seg->s_base) { 2671 error = ENOMEM; 2672 break; 2673 } 2674 } 2675 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2676 ssize = seg->s_base + seg->s_size - raddr; 2677 } else { 2678 ssize = rsize; 2679 } 2680 2681 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2682 2683 if (error == IE_NOMEM) { 2684 error = EAGAIN; 2685 break; 2686 } 2687 2688 if (error == IE_RETRY) { 2689 AS_LOCK_EXIT(as, &as->a_lock); 2690 goto setpgsz_top; 2691 } 2692 2693 if (error == ENOTSUP) { 2694 error = EINVAL; 2695 break; 2696 } 2697 2698 if (wait && (error == EAGAIN)) { 2699 /* 2700 * Memory is currently locked. It must be unlocked 2701 * before this operation can succeed through a retry. 2702 * The possible reasons for locked memory and 2703 * corresponding strategies for unlocking are: 2704 * (1) Normal I/O 2705 * wait for a signal that the I/O operation 2706 * has completed and the memory is unlocked. 2707 * (2) Asynchronous I/O 2708 * The aio subsystem does not unlock pages when 2709 * the I/O is completed. Those pages are unlocked 2710 * when the application calls aiowait/aioerror. 2711 * So, to prevent blocking forever, cv_broadcast() 2712 * is done to wake up aio_cleanup_thread. 2713 * Subsequently, segvn_reclaim will be called, and 2714 * that will do AS_CLRUNMAPWAIT() and wake us up. 2715 * (3) Long term page locking: 2716 * This is not relevant for as_setpagesize() 2717 * because we cannot change the page size for 2718 * driver memory. The attempt to do so will 2719 * fail with a different error than EAGAIN so 2720 * there's no need to trigger as callbacks like 2721 * as_unmap, as_setprot or as_free would do. 2722 */ 2723 mutex_enter(&as->a_contents); 2724 if (AS_ISUNMAPWAIT(as) == 0) { 2725 cv_broadcast(&as->a_cv); 2726 } 2727 AS_SETUNMAPWAIT(as); 2728 AS_LOCK_EXIT(as, &as->a_lock); 2729 while (AS_ISUNMAPWAIT(as)) { 2730 cv_wait(&as->a_cv, &as->a_contents); 2731 } 2732 mutex_exit(&as->a_contents); 2733 goto setpgsz_top; 2734 } else if (error != 0) { 2735 break; 2736 } 2737 } 2738 as_setwatch(as); 2739 AS_LOCK_EXIT(as, &as->a_lock); 2740 return (error); 2741 } 2742 2743 /* 2744 * Setup all of the uninitialized watched pages that we can. 2745 */ 2746 void 2747 as_setwatch(struct as *as) 2748 { 2749 struct watched_page *pwp; 2750 struct seg *seg; 2751 caddr_t vaddr; 2752 uint_t prot; 2753 int err, retrycnt; 2754 2755 if (avl_numnodes(&as->a_wpage) == 0) 2756 return; 2757 2758 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2759 2760 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 2761 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 2762 retrycnt = 0; 2763 retry: 2764 vaddr = pwp->wp_vaddr; 2765 if (pwp->wp_oprot != 0 || /* already set up */ 2766 (seg = as_segat(as, vaddr)) == NULL || 2767 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 2768 continue; 2769 2770 pwp->wp_oprot = prot; 2771 if (pwp->wp_read) 2772 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2773 if (pwp->wp_write) 2774 prot &= ~PROT_WRITE; 2775 if (pwp->wp_exec) 2776 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2777 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 2778 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 2779 if (err == IE_RETRY) { 2780 pwp->wp_oprot = 0; 2781 ASSERT(retrycnt == 0); 2782 retrycnt++; 2783 goto retry; 2784 } 2785 } 2786 pwp->wp_prot = prot; 2787 } 2788 } 2789 2790 /* 2791 * Clear all of the watched pages in the address space. 2792 */ 2793 void 2794 as_clearwatch(struct as *as) 2795 { 2796 struct watched_page *pwp; 2797 struct seg *seg; 2798 caddr_t vaddr; 2799 uint_t prot; 2800 int err, retrycnt; 2801 2802 if (avl_numnodes(&as->a_wpage) == 0) 2803 return; 2804 2805 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2806 2807 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 2808 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 2809 retrycnt = 0; 2810 retry: 2811 vaddr = pwp->wp_vaddr; 2812 if (pwp->wp_oprot == 0 || /* not set up */ 2813 (seg = as_segat(as, vaddr)) == NULL) 2814 continue; 2815 2816 if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 2817 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 2818 if (err == IE_RETRY) { 2819 ASSERT(retrycnt == 0); 2820 retrycnt++; 2821 goto retry; 2822 } 2823 } 2824 pwp->wp_oprot = 0; 2825 pwp->wp_prot = 0; 2826 } 2827 } 2828 2829 /* 2830 * Force a new setup for all the watched pages in the range. 2831 */ 2832 static void 2833 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 2834 { 2835 struct watched_page *pwp; 2836 struct watched_page tpw; 2837 caddr_t eaddr = addr + size; 2838 caddr_t vaddr; 2839 struct seg *seg; 2840 int err, retrycnt; 2841 uint_t wprot; 2842 avl_index_t where; 2843 2844 if (avl_numnodes(&as->a_wpage) == 0) 2845 return; 2846 2847 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2848 2849 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2850 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 2851 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 2852 2853 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 2854 retrycnt = 0; 2855 vaddr = pwp->wp_vaddr; 2856 2857 wprot = prot; 2858 if (pwp->wp_read) 2859 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2860 if (pwp->wp_write) 2861 wprot &= ~PROT_WRITE; 2862 if (pwp->wp_exec) 2863 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2864 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 2865 retry: 2866 seg = as_segat(as, vaddr); 2867 if (seg == NULL) { 2868 panic("as_setwatchprot: no seg"); 2869 /*NOTREACHED*/ 2870 } 2871 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 2872 if (err == IE_RETRY) { 2873 ASSERT(retrycnt == 0); 2874 retrycnt++; 2875 goto retry; 2876 } 2877 } 2878 pwp->wp_oprot = prot; 2879 pwp->wp_prot = wprot; 2880 2881 pwp = AVL_NEXT(&as->a_wpage, pwp); 2882 } 2883 } 2884 2885 /* 2886 * Clear all of the watched pages in the range. 2887 */ 2888 static void 2889 as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 2890 { 2891 caddr_t eaddr = addr + size; 2892 struct watched_page *pwp; 2893 struct watched_page tpw; 2894 uint_t prot; 2895 struct seg *seg; 2896 int err, retrycnt; 2897 avl_index_t where; 2898 2899 if (avl_numnodes(&as->a_wpage) == 0) 2900 return; 2901 2902 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2903 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 2904 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 2905 2906 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2907 2908 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 2909 ASSERT(addr >= pwp->wp_vaddr); 2910 2911 if ((prot = pwp->wp_oprot) != 0) { 2912 retrycnt = 0; 2913 2914 if (prot != pwp->wp_prot) { 2915 retry: 2916 seg = as_segat(as, pwp->wp_vaddr); 2917 if (seg == NULL) 2918 continue; 2919 err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 2920 PAGESIZE, prot); 2921 if (err == IE_RETRY) { 2922 ASSERT(retrycnt == 0); 2923 retrycnt++; 2924 goto retry; 2925 2926 } 2927 } 2928 pwp->wp_oprot = 0; 2929 pwp->wp_prot = 0; 2930 } 2931 2932 pwp = AVL_NEXT(&as->a_wpage, pwp); 2933 } 2934 } 2935 2936 void 2937 as_signal_proc(struct as *as, k_siginfo_t *siginfo) 2938 { 2939 struct proc *p; 2940 2941 mutex_enter(&pidlock); 2942 for (p = practive; p; p = p->p_next) { 2943 if (p->p_as == as) { 2944 mutex_enter(&p->p_lock); 2945 if (p->p_as == as) 2946 sigaddq(p, NULL, siginfo, KM_NOSLEEP); 2947 mutex_exit(&p->p_lock); 2948 } 2949 } 2950 mutex_exit(&pidlock); 2951 } 2952 2953 /* 2954 * return memory object ID 2955 */ 2956 int 2957 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 2958 { 2959 struct seg *seg; 2960 int sts; 2961 2962 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2963 seg = as_segat(as, addr); 2964 if (seg == NULL) { 2965 AS_LOCK_EXIT(as, &as->a_lock); 2966 return (EFAULT); 2967 } 2968 /* 2969 * catch old drivers which may not support getmemid 2970 */ 2971 if (seg->s_ops->getmemid == NULL) { 2972 AS_LOCK_EXIT(as, &as->a_lock); 2973 return (ENODEV); 2974 } 2975 2976 sts = SEGOP_GETMEMID(seg, addr, memidp); 2977 2978 AS_LOCK_EXIT(as, &as->a_lock); 2979 return (sts); 2980 } 2981