1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2015, Joyent, Inc. All rights reserved. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 /* 41 * VM - address spaces. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/t_lock.h> 46 #include <sys/param.h> 47 #include <sys/errno.h> 48 #include <sys/systm.h> 49 #include <sys/mman.h> 50 #include <sys/sysmacros.h> 51 #include <sys/cpuvar.h> 52 #include <sys/sysinfo.h> 53 #include <sys/kmem.h> 54 #include <sys/vnode.h> 55 #include <sys/vmsystm.h> 56 #include <sys/cmn_err.h> 57 #include <sys/debug.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/vtrace.h> 60 61 #include <vm/hat.h> 62 #include <vm/as.h> 63 #include <vm/seg.h> 64 #include <vm/seg_vn.h> 65 #include <vm/seg_dev.h> 66 #include <vm/seg_kmem.h> 67 #include <vm/seg_map.h> 68 #include <vm/seg_spt.h> 69 #include <vm/page.h> 70 71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 72 73 static struct kmem_cache *as_cache; 74 75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 76 static void as_clearwatchprot(struct as *, caddr_t, size_t); 77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 78 79 80 /* 81 * Verifying the segment lists is very time-consuming; it may not be 82 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 83 */ 84 #ifdef DEBUG 85 #define VERIFY_SEGLIST 86 int do_as_verify = 0; 87 #endif 88 89 /* 90 * Allocate a new callback data structure entry and fill in the events of 91 * interest, the address range of interest, and the callback argument. 92 * Link the entry on the as->a_callbacks list. A callback entry for the 93 * entire address space may be specified with vaddr = 0 and size = -1. 94 * 95 * CALLERS RESPONSIBILITY: If not calling from within the process context for 96 * the specified as, the caller must guarantee persistence of the specified as 97 * for the duration of this function (eg. pages being locked within the as 98 * will guarantee persistence). 99 */ 100 int 101 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 102 caddr_t vaddr, size_t size, int sleepflag) 103 { 104 struct as_callback *current_head, *cb; 105 caddr_t saddr; 106 size_t rsize; 107 108 /* callback function and an event are mandatory */ 109 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 110 return (EINVAL); 111 112 /* Adding a callback after as_free has been called is not allowed */ 113 if (as == &kas) 114 return (ENOMEM); 115 116 /* 117 * vaddr = 0 and size = -1 is used to indicate that the callback range 118 * is the entire address space so no rounding is done in that case. 119 */ 120 if (size != -1) { 121 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 122 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 123 (size_t)saddr; 124 /* check for wraparound */ 125 if (saddr + rsize < saddr) 126 return (ENOMEM); 127 } else { 128 if (vaddr != 0) 129 return (EINVAL); 130 saddr = vaddr; 131 rsize = size; 132 } 133 134 /* Allocate and initialize a callback entry */ 135 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 136 if (cb == NULL) 137 return (EAGAIN); 138 139 cb->ascb_func = cb_func; 140 cb->ascb_arg = arg; 141 cb->ascb_events = events; 142 cb->ascb_saddr = saddr; 143 cb->ascb_len = rsize; 144 145 /* Add the entry to the list */ 146 mutex_enter(&as->a_contents); 147 current_head = as->a_callbacks; 148 as->a_callbacks = cb; 149 cb->ascb_next = current_head; 150 151 /* 152 * The call to this function may lose in a race with 153 * a pertinent event - eg. a thread does long term memory locking 154 * but before the callback is added another thread executes as_unmap. 155 * A broadcast here resolves that. 156 */ 157 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 158 AS_CLRUNMAPWAIT(as); 159 cv_broadcast(&as->a_cv); 160 } 161 162 mutex_exit(&as->a_contents); 163 return (0); 164 } 165 166 /* 167 * Search the callback list for an entry which pertains to arg. 168 * 169 * This is called from within the client upon completion of the callback. 170 * RETURN VALUES: 171 * AS_CALLBACK_DELETED (callback entry found and deleted) 172 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 173 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 174 * entry will be made in as_do_callbacks) 175 * 176 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 177 * set, it indicates that as_do_callbacks is processing this entry. The 178 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 179 * to unblock as_do_callbacks, in case it is blocked. 180 * 181 * CALLERS RESPONSIBILITY: If not calling from within the process context for 182 * the specified as, the caller must guarantee persistence of the specified as 183 * for the duration of this function (eg. pages being locked within the as 184 * will guarantee persistence). 185 */ 186 uint_t 187 as_delete_callback(struct as *as, void *arg) 188 { 189 struct as_callback **prevcb = &as->a_callbacks; 190 struct as_callback *cb; 191 uint_t rc = AS_CALLBACK_NOTFOUND; 192 193 mutex_enter(&as->a_contents); 194 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 195 if (cb->ascb_arg != arg) 196 continue; 197 198 /* 199 * If the events indicate AS_CALLBACK_CALLED, just clear 200 * AS_ALL_EVENT in the events field and wakeup the thread 201 * that may be waiting in as_do_callbacks. as_do_callbacks 202 * will take care of removing this entry from the list. In 203 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 204 * (AS_CALLBACK_CALLED not set), just remove it from the 205 * list, return the memory and return AS_CALLBACK_DELETED. 206 */ 207 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 208 /* leave AS_CALLBACK_CALLED */ 209 cb->ascb_events &= ~AS_ALL_EVENT; 210 rc = AS_CALLBACK_DELETE_DEFERRED; 211 cv_broadcast(&as->a_cv); 212 } else { 213 *prevcb = cb->ascb_next; 214 kmem_free(cb, sizeof (struct as_callback)); 215 rc = AS_CALLBACK_DELETED; 216 } 217 break; 218 } 219 mutex_exit(&as->a_contents); 220 return (rc); 221 } 222 223 /* 224 * Searches the as callback list for a matching entry. 225 * Returns a pointer to the first matching callback, or NULL if 226 * nothing is found. 227 * This function never sleeps so it is ok to call it with more 228 * locks held but the (required) a_contents mutex. 229 * 230 * See also comment on as_do_callbacks below. 231 */ 232 static struct as_callback * 233 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 234 size_t event_len) 235 { 236 struct as_callback *cb; 237 238 ASSERT(MUTEX_HELD(&as->a_contents)); 239 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 240 /* 241 * If the callback has not already been called, then 242 * check if events or address range pertains. An event_len 243 * of zero means do an unconditional callback. 244 */ 245 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 246 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 247 (event_addr + event_len < cb->ascb_saddr) || 248 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 249 continue; 250 } 251 break; 252 } 253 return (cb); 254 } 255 256 /* 257 * Executes a given callback and removes it from the callback list for 258 * this address space. 259 * This function may sleep so the caller must drop all locks except 260 * a_contents before calling this func. 261 * 262 * See also comments on as_do_callbacks below. 263 */ 264 static void 265 as_execute_callback(struct as *as, struct as_callback *cb, 266 uint_t events) 267 { 268 struct as_callback **prevcb; 269 void *cb_arg; 270 271 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 272 cb->ascb_events |= AS_CALLBACK_CALLED; 273 mutex_exit(&as->a_contents); 274 (*cb->ascb_func)(as, cb->ascb_arg, events); 275 mutex_enter(&as->a_contents); 276 /* 277 * the callback function is required to delete the callback 278 * when the callback function determines it is OK for 279 * this thread to continue. as_delete_callback will clear 280 * the AS_ALL_EVENT in the events field when it is deleted. 281 * If the callback function called as_delete_callback, 282 * events will already be cleared and there will be no blocking. 283 */ 284 while ((cb->ascb_events & events) != 0) { 285 cv_wait(&as->a_cv, &as->a_contents); 286 } 287 /* 288 * This entry needs to be taken off the list. Normally, the 289 * callback func itself does that, but unfortunately the list 290 * may have changed while the callback was running because the 291 * a_contents mutex was dropped and someone else other than the 292 * callback func itself could have called as_delete_callback, 293 * so we have to search to find this entry again. The entry 294 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 295 */ 296 cb_arg = cb->ascb_arg; 297 prevcb = &as->a_callbacks; 298 for (cb = as->a_callbacks; cb != NULL; 299 prevcb = &cb->ascb_next, cb = *prevcb) { 300 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 301 (cb_arg != cb->ascb_arg)) { 302 continue; 303 } 304 *prevcb = cb->ascb_next; 305 kmem_free(cb, sizeof (struct as_callback)); 306 break; 307 } 308 } 309 310 /* 311 * Check the callback list for a matching event and intersection of 312 * address range. If there is a match invoke the callback. Skip an entry if: 313 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 314 * - not event of interest 315 * - not address range of interest 316 * 317 * An event_len of zero indicates a request for an unconditional callback 318 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 319 * a_contents lock must be dropped before a callback, so only one callback 320 * can be done before returning. Return -1 (true) if a callback was 321 * executed and removed from the list, else return 0 (false). 322 * 323 * The logically separate parts, i.e. finding a matching callback and 324 * executing a given callback have been separated into two functions 325 * so that they can be called with different sets of locks held beyond 326 * the always-required a_contents. as_find_callback does not sleep so 327 * it is ok to call it if more locks than a_contents (i.e. the a_lock 328 * rwlock) are held. as_execute_callback on the other hand may sleep 329 * so all locks beyond a_contents must be dropped by the caller if one 330 * does not want to end comatose. 331 */ 332 static int 333 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 334 size_t event_len) 335 { 336 struct as_callback *cb; 337 338 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 339 as_execute_callback(as, cb, events); 340 return (-1); 341 } 342 return (0); 343 } 344 345 /* 346 * Search for the segment containing addr. If a segment containing addr 347 * exists, that segment is returned. If no such segment exists, and 348 * the list spans addresses greater than addr, then the first segment 349 * whose base is greater than addr is returned; otherwise, NULL is 350 * returned unless tail is true, in which case the last element of the 351 * list is returned. 352 * 353 * a_seglast is used to cache the last found segment for repeated 354 * searches to the same addr (which happens frequently). 355 */ 356 struct seg * 357 as_findseg(struct as *as, caddr_t addr, int tail) 358 { 359 struct seg *seg = as->a_seglast; 360 avl_index_t where; 361 362 ASSERT(AS_LOCK_HELD(as)); 363 364 if (seg != NULL && 365 seg->s_base <= addr && 366 addr < seg->s_base + seg->s_size) 367 return (seg); 368 369 seg = avl_find(&as->a_segtree, &addr, &where); 370 if (seg != NULL) 371 return (as->a_seglast = seg); 372 373 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 374 if (seg == NULL && tail) 375 seg = avl_last(&as->a_segtree); 376 return (as->a_seglast = seg); 377 } 378 379 #ifdef VERIFY_SEGLIST 380 /* 381 * verify that the linked list is coherent 382 */ 383 static void 384 as_verify(struct as *as) 385 { 386 struct seg *seg, *seglast, *p, *n; 387 uint_t nsegs = 0; 388 389 if (do_as_verify == 0) 390 return; 391 392 seglast = as->a_seglast; 393 394 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 395 ASSERT(seg->s_as == as); 396 p = AS_SEGPREV(as, seg); 397 n = AS_SEGNEXT(as, seg); 398 ASSERT(p == NULL || p->s_as == as); 399 ASSERT(p == NULL || p->s_base < seg->s_base); 400 ASSERT(n == NULL || n->s_base > seg->s_base); 401 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 402 if (seg == seglast) 403 seglast = NULL; 404 nsegs++; 405 } 406 ASSERT(seglast == NULL); 407 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 408 } 409 #endif /* VERIFY_SEGLIST */ 410 411 /* 412 * Add a new segment to the address space. The avl_find() 413 * may be expensive so we attempt to use last segment accessed 414 * in as_gap() as an insertion point. 415 */ 416 int 417 as_addseg(struct as *as, struct seg *newseg) 418 { 419 struct seg *seg; 420 caddr_t addr; 421 caddr_t eaddr; 422 avl_index_t where; 423 424 ASSERT(AS_WRITE_HELD(as)); 425 426 as->a_updatedir = 1; /* inform /proc */ 427 gethrestime(&as->a_updatetime); 428 429 if (as->a_lastgaphl != NULL) { 430 struct seg *hseg = NULL; 431 struct seg *lseg = NULL; 432 433 if (as->a_lastgaphl->s_base > newseg->s_base) { 434 hseg = as->a_lastgaphl; 435 lseg = AVL_PREV(&as->a_segtree, hseg); 436 } else { 437 lseg = as->a_lastgaphl; 438 hseg = AVL_NEXT(&as->a_segtree, lseg); 439 } 440 441 if (hseg && lseg && lseg->s_base < newseg->s_base && 442 hseg->s_base > newseg->s_base) { 443 avl_insert_here(&as->a_segtree, newseg, lseg, 444 AVL_AFTER); 445 as->a_lastgaphl = NULL; 446 as->a_seglast = newseg; 447 return (0); 448 } 449 as->a_lastgaphl = NULL; 450 } 451 452 addr = newseg->s_base; 453 eaddr = addr + newseg->s_size; 454 again: 455 456 seg = avl_find(&as->a_segtree, &addr, &where); 457 458 if (seg == NULL) 459 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 460 461 if (seg == NULL) 462 seg = avl_last(&as->a_segtree); 463 464 if (seg != NULL) { 465 caddr_t base = seg->s_base; 466 467 /* 468 * If top of seg is below the requested address, then 469 * the insertion point is at the end of the linked list, 470 * and seg points to the tail of the list. Otherwise, 471 * the insertion point is immediately before seg. 472 */ 473 if (base + seg->s_size > addr) { 474 if (addr >= base || eaddr > base) { 475 #ifdef __sparc 476 extern struct seg_ops segnf_ops; 477 478 /* 479 * no-fault segs must disappear if overlaid. 480 * XXX need new segment type so 481 * we don't have to check s_ops 482 */ 483 if (seg->s_ops == &segnf_ops) { 484 seg_unmap(seg); 485 goto again; 486 } 487 #endif 488 return (-1); /* overlapping segment */ 489 } 490 } 491 } 492 as->a_seglast = newseg; 493 avl_insert(&as->a_segtree, newseg, where); 494 495 #ifdef VERIFY_SEGLIST 496 as_verify(as); 497 #endif 498 return (0); 499 } 500 501 struct seg * 502 as_removeseg(struct as *as, struct seg *seg) 503 { 504 avl_tree_t *t; 505 506 ASSERT(AS_WRITE_HELD(as)); 507 508 as->a_updatedir = 1; /* inform /proc */ 509 gethrestime(&as->a_updatetime); 510 511 if (seg == NULL) 512 return (NULL); 513 514 t = &as->a_segtree; 515 if (as->a_seglast == seg) 516 as->a_seglast = NULL; 517 as->a_lastgaphl = NULL; 518 519 /* 520 * if this segment is at an address higher than 521 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 522 */ 523 if (as->a_lastgap && 524 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 525 as->a_lastgap = AVL_NEXT(t, seg); 526 527 /* 528 * remove the segment from the seg tree 529 */ 530 avl_remove(t, seg); 531 532 #ifdef VERIFY_SEGLIST 533 as_verify(as); 534 #endif 535 return (seg); 536 } 537 538 /* 539 * Find a segment containing addr. 540 */ 541 struct seg * 542 as_segat(struct as *as, caddr_t addr) 543 { 544 struct seg *seg = as->a_seglast; 545 546 ASSERT(AS_LOCK_HELD(as)); 547 548 if (seg != NULL && seg->s_base <= addr && 549 addr < seg->s_base + seg->s_size) 550 return (seg); 551 552 seg = avl_find(&as->a_segtree, &addr, NULL); 553 return (seg); 554 } 555 556 /* 557 * Serialize all searches for holes in an address space to 558 * prevent two or more threads from allocating the same virtual 559 * address range. The address space must not be "read/write" 560 * locked by the caller since we may block. 561 */ 562 void 563 as_rangelock(struct as *as) 564 { 565 mutex_enter(&as->a_contents); 566 while (AS_ISCLAIMGAP(as)) 567 cv_wait(&as->a_cv, &as->a_contents); 568 AS_SETCLAIMGAP(as); 569 mutex_exit(&as->a_contents); 570 } 571 572 /* 573 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 574 */ 575 void 576 as_rangeunlock(struct as *as) 577 { 578 mutex_enter(&as->a_contents); 579 AS_CLRCLAIMGAP(as); 580 cv_signal(&as->a_cv); 581 mutex_exit(&as->a_contents); 582 } 583 584 /* 585 * compar segments (or just an address) by segment address range 586 */ 587 static int 588 as_segcompar(const void *x, const void *y) 589 { 590 struct seg *a = (struct seg *)x; 591 struct seg *b = (struct seg *)y; 592 593 if (a->s_base < b->s_base) 594 return (-1); 595 if (a->s_base >= b->s_base + b->s_size) 596 return (1); 597 return (0); 598 } 599 600 601 void 602 as_avlinit(struct as *as) 603 { 604 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 605 offsetof(struct seg, s_tree)); 606 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 607 offsetof(struct watched_page, wp_link)); 608 } 609 610 /*ARGSUSED*/ 611 static int 612 as_constructor(void *buf, void *cdrarg, int kmflags) 613 { 614 struct as *as = buf; 615 616 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 617 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 618 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 619 as_avlinit(as); 620 return (0); 621 } 622 623 /*ARGSUSED1*/ 624 static void 625 as_destructor(void *buf, void *cdrarg) 626 { 627 struct as *as = buf; 628 629 avl_destroy(&as->a_segtree); 630 mutex_destroy(&as->a_contents); 631 cv_destroy(&as->a_cv); 632 rw_destroy(&as->a_lock); 633 } 634 635 void 636 as_init(void) 637 { 638 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 639 as_constructor, as_destructor, NULL, NULL, NULL, 0); 640 } 641 642 /* 643 * Allocate and initialize an address space data structure. 644 * We call hat_alloc to allow any machine dependent 645 * information in the hat structure to be initialized. 646 */ 647 struct as * 648 as_alloc(void) 649 { 650 struct as *as; 651 652 as = kmem_cache_alloc(as_cache, KM_SLEEP); 653 654 as->a_flags = 0; 655 as->a_vbits = 0; 656 as->a_hrm = NULL; 657 as->a_seglast = NULL; 658 as->a_size = 0; 659 as->a_resvsize = 0; 660 as->a_updatedir = 0; 661 gethrestime(&as->a_updatetime); 662 as->a_objectdir = NULL; 663 as->a_sizedir = 0; 664 as->a_userlimit = (caddr_t)USERLIMIT; 665 as->a_lastgap = NULL; 666 as->a_lastgaphl = NULL; 667 as->a_callbacks = NULL; 668 669 AS_LOCK_ENTER(as, RW_WRITER); 670 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 671 AS_LOCK_EXIT(as); 672 673 return (as); 674 } 675 676 /* 677 * Free an address space data structure. 678 * Need to free the hat first and then 679 * all the segments on this as and finally 680 * the space for the as struct itself. 681 */ 682 void 683 as_free(struct as *as) 684 { 685 struct hat *hat = as->a_hat; 686 struct seg *seg, *next; 687 boolean_t free_started = B_FALSE; 688 689 top: 690 /* 691 * Invoke ALL callbacks. as_do_callbacks will do one callback 692 * per call, and not return (-1) until the callback has completed. 693 * When as_do_callbacks returns zero, all callbacks have completed. 694 */ 695 mutex_enter(&as->a_contents); 696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)) 697 ; 698 699 mutex_exit(&as->a_contents); 700 AS_LOCK_ENTER(as, RW_WRITER); 701 702 if (!free_started) { 703 free_started = B_TRUE; 704 hat_free_start(hat); 705 } 706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 707 int err; 708 709 next = AS_SEGNEXT(as, seg); 710 retry: 711 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 712 if (err == EAGAIN) { 713 mutex_enter(&as->a_contents); 714 if (as->a_callbacks) { 715 AS_LOCK_EXIT(as); 716 } else if (!AS_ISNOUNMAPWAIT(as)) { 717 /* 718 * Memory is currently locked. Wait for a 719 * cv_signal that it has been unlocked, then 720 * try the operation again. 721 */ 722 if (AS_ISUNMAPWAIT(as) == 0) 723 cv_broadcast(&as->a_cv); 724 AS_SETUNMAPWAIT(as); 725 AS_LOCK_EXIT(as); 726 while (AS_ISUNMAPWAIT(as)) 727 cv_wait(&as->a_cv, &as->a_contents); 728 } else { 729 /* 730 * We may have raced with 731 * segvn_reclaim()/segspt_reclaim(). In this 732 * case clean nounmapwait flag and retry since 733 * softlockcnt in this segment may be already 734 * 0. We don't drop as writer lock so our 735 * number of retries without sleeping should 736 * be very small. See segvn_reclaim() for 737 * more comments. 738 */ 739 AS_CLRNOUNMAPWAIT(as); 740 mutex_exit(&as->a_contents); 741 goto retry; 742 } 743 mutex_exit(&as->a_contents); 744 goto top; 745 } else { 746 /* 747 * We do not expect any other error return at this 748 * time. This is similar to an ASSERT in seg_unmap() 749 */ 750 ASSERT(err == 0); 751 } 752 } 753 hat_free_end(hat); 754 AS_LOCK_EXIT(as); 755 756 /* /proc stuff */ 757 ASSERT(avl_numnodes(&as->a_wpage) == 0); 758 if (as->a_objectdir) { 759 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 760 as->a_objectdir = NULL; 761 as->a_sizedir = 0; 762 } 763 764 /* 765 * Free the struct as back to kmem. Assert it has no segments. 766 */ 767 ASSERT(avl_numnodes(&as->a_segtree) == 0); 768 kmem_cache_free(as_cache, as); 769 } 770 771 int 772 as_dup(struct as *as, struct proc *forkedproc) 773 { 774 struct as *newas; 775 struct seg *seg, *newseg; 776 size_t purgesize = 0; 777 int error; 778 779 AS_LOCK_ENTER(as, RW_WRITER); 780 as_clearwatch(as); 781 newas = as_alloc(); 782 newas->a_userlimit = as->a_userlimit; 783 newas->a_proc = forkedproc; 784 785 AS_LOCK_ENTER(newas, RW_WRITER); 786 787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); 788 789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 790 791 if (seg->s_flags & S_PURGE) { 792 purgesize += seg->s_size; 793 continue; 794 } 795 796 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 797 if (newseg == NULL) { 798 AS_LOCK_EXIT(newas); 799 as_setwatch(as); 800 AS_LOCK_EXIT(as); 801 as_free(newas); 802 return (-1); 803 } 804 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 805 /* 806 * We call seg_free() on the new seg 807 * because the segment is not set up 808 * completely; i.e. it has no ops. 809 */ 810 as_setwatch(as); 811 AS_LOCK_EXIT(as); 812 seg_free(newseg); 813 AS_LOCK_EXIT(newas); 814 as_free(newas); 815 return (error); 816 } 817 newas->a_size += seg->s_size; 818 } 819 newas->a_resvsize = as->a_resvsize - purgesize; 820 821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 822 823 AS_LOCK_EXIT(newas); 824 825 as_setwatch(as); 826 AS_LOCK_EXIT(as); 827 if (error != 0) { 828 as_free(newas); 829 return (error); 830 } 831 forkedproc->p_as = newas; 832 return (0); 833 } 834 835 /* 836 * Handle a ``fault'' at addr for size bytes. 837 */ 838 faultcode_t 839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 840 enum fault_type type, enum seg_rw rw) 841 { 842 struct seg *seg; 843 caddr_t raddr; /* rounded down addr */ 844 size_t rsize; /* rounded up size */ 845 size_t ssize; 846 faultcode_t res = 0; 847 caddr_t addrsav; 848 struct seg *segsav; 849 int as_lock_held; 850 klwp_t *lwp = ttolwp(curthread); 851 852 853 854 retry: 855 /* 856 * Indicate that the lwp is not to be stopped while waiting for a 857 * pagefault. This is to avoid deadlock while debugging a process 858 * via /proc over NFS (in particular). 859 */ 860 if (lwp != NULL) 861 lwp->lwp_nostop++; 862 863 /* 864 * same length must be used when we softlock and softunlock. We 865 * don't support softunlocking lengths less than the original length 866 * when there is largepage support. See seg_dev.c for more 867 * comments. 868 */ 869 switch (type) { 870 871 case F_SOFTLOCK: 872 CPU_STATS_ADD_K(vm, softlock, 1); 873 break; 874 875 case F_SOFTUNLOCK: 876 break; 877 878 case F_PROT: 879 CPU_STATS_ADD_K(vm, prot_fault, 1); 880 break; 881 882 case F_INVAL: 883 CPU_STATS_ENTER_K(); 884 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 885 if (as == &kas) 886 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 887 CPU_STATS_EXIT_K(); 888 break; 889 } 890 891 /* Kernel probe */ 892 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 893 tnf_opaque, address, addr, 894 tnf_fault_type, fault_type, type, 895 tnf_seg_access, access, rw); 896 897 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 898 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 899 (size_t)raddr; 900 901 /* 902 * XXX -- Don't grab the as lock for segkmap. We should grab it for 903 * correctness, but then we could be stuck holding this lock for 904 * a LONG time if the fault needs to be resolved on a slow 905 * filesystem, and then no-one will be able to exec new commands, 906 * as exec'ing requires the write lock on the as. 907 */ 908 if (as == &kas && segkmap && segkmap->s_base <= raddr && 909 raddr + size < segkmap->s_base + segkmap->s_size) { 910 seg = segkmap; 911 as_lock_held = 0; 912 } else { 913 AS_LOCK_ENTER(as, RW_READER); 914 915 seg = as_segat(as, raddr); 916 if (seg == NULL) { 917 AS_LOCK_EXIT(as); 918 if (lwp != NULL) 919 lwp->lwp_nostop--; 920 return (FC_NOMAP); 921 } 922 923 as_lock_held = 1; 924 } 925 926 addrsav = raddr; 927 segsav = seg; 928 929 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 930 if (raddr >= seg->s_base + seg->s_size) { 931 seg = AS_SEGNEXT(as, seg); 932 if (seg == NULL || raddr != seg->s_base) { 933 res = FC_NOMAP; 934 break; 935 } 936 } 937 if (raddr + rsize > seg->s_base + seg->s_size) 938 ssize = seg->s_base + seg->s_size - raddr; 939 else 940 ssize = rsize; 941 942 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 943 if (res != 0) 944 break; 945 } 946 947 /* 948 * If we were SOFTLOCKing and encountered a failure, 949 * we must SOFTUNLOCK the range we already did. (Maybe we 950 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 951 * right here...) 952 */ 953 if (res != 0 && type == F_SOFTLOCK) { 954 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 955 if (addrsav >= seg->s_base + seg->s_size) 956 seg = AS_SEGNEXT(as, seg); 957 ASSERT(seg != NULL); 958 /* 959 * Now call the fault routine again to perform the 960 * unlock using S_OTHER instead of the rw variable 961 * since we never got a chance to touch the pages. 962 */ 963 if (raddr > seg->s_base + seg->s_size) 964 ssize = seg->s_base + seg->s_size - addrsav; 965 else 966 ssize = raddr - addrsav; 967 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 968 F_SOFTUNLOCK, S_OTHER); 969 } 970 } 971 if (as_lock_held) 972 AS_LOCK_EXIT(as); 973 if (lwp != NULL) 974 lwp->lwp_nostop--; 975 976 /* 977 * If the lower levels returned EDEADLK for a fault, 978 * It means that we should retry the fault. Let's wait 979 * a bit also to let the deadlock causing condition clear. 980 * This is part of a gross hack to work around a design flaw 981 * in the ufs/sds logging code and should go away when the 982 * logging code is re-designed to fix the problem. See bug 983 * 4125102 for details of the problem. 984 */ 985 if (FC_ERRNO(res) == EDEADLK) { 986 delay(deadlk_wait); 987 res = 0; 988 goto retry; 989 } 990 return (res); 991 } 992 993 994 995 /* 996 * Asynchronous ``fault'' at addr for size bytes. 997 */ 998 faultcode_t 999 as_faulta(struct as *as, caddr_t addr, size_t size) 1000 { 1001 struct seg *seg; 1002 caddr_t raddr; /* rounded down addr */ 1003 size_t rsize; /* rounded up size */ 1004 faultcode_t res = 0; 1005 klwp_t *lwp = ttolwp(curthread); 1006 1007 retry: 1008 /* 1009 * Indicate that the lwp is not to be stopped while waiting 1010 * for a pagefault. This is to avoid deadlock while debugging 1011 * a process via /proc over NFS (in particular). 1012 */ 1013 if (lwp != NULL) 1014 lwp->lwp_nostop++; 1015 1016 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1017 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1018 (size_t)raddr; 1019 1020 AS_LOCK_ENTER(as, RW_READER); 1021 seg = as_segat(as, raddr); 1022 if (seg == NULL) { 1023 AS_LOCK_EXIT(as); 1024 if (lwp != NULL) 1025 lwp->lwp_nostop--; 1026 return (FC_NOMAP); 1027 } 1028 1029 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1030 if (raddr >= seg->s_base + seg->s_size) { 1031 seg = AS_SEGNEXT(as, seg); 1032 if (seg == NULL || raddr != seg->s_base) { 1033 res = FC_NOMAP; 1034 break; 1035 } 1036 } 1037 res = SEGOP_FAULTA(seg, raddr); 1038 if (res != 0) 1039 break; 1040 } 1041 AS_LOCK_EXIT(as); 1042 if (lwp != NULL) 1043 lwp->lwp_nostop--; 1044 /* 1045 * If the lower levels returned EDEADLK for a fault, 1046 * It means that we should retry the fault. Let's wait 1047 * a bit also to let the deadlock causing condition clear. 1048 * This is part of a gross hack to work around a design flaw 1049 * in the ufs/sds logging code and should go away when the 1050 * logging code is re-designed to fix the problem. See bug 1051 * 4125102 for details of the problem. 1052 */ 1053 if (FC_ERRNO(res) == EDEADLK) { 1054 delay(deadlk_wait); 1055 res = 0; 1056 goto retry; 1057 } 1058 return (res); 1059 } 1060 1061 /* 1062 * Set the virtual mapping for the interval from [addr : addr + size) 1063 * in address space `as' to have the specified protection. 1064 * It is ok for the range to cross over several segments, 1065 * as long as they are contiguous. 1066 */ 1067 int 1068 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1069 { 1070 struct seg *seg; 1071 struct as_callback *cb; 1072 size_t ssize; 1073 caddr_t raddr; /* rounded down addr */ 1074 size_t rsize; /* rounded up size */ 1075 int error = 0, writer = 0; 1076 caddr_t saveraddr; 1077 size_t saversize; 1078 1079 setprot_top: 1080 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1081 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1082 (size_t)raddr; 1083 1084 if (raddr + rsize < raddr) /* check for wraparound */ 1085 return (ENOMEM); 1086 1087 saveraddr = raddr; 1088 saversize = rsize; 1089 1090 /* 1091 * Normally we only lock the as as a reader. But 1092 * if due to setprot the segment driver needs to split 1093 * a segment it will return IE_RETRY. Therefore we re-acquire 1094 * the as lock as a writer so the segment driver can change 1095 * the seg list. Also the segment driver will return IE_RETRY 1096 * after it has changed the segment list so we therefore keep 1097 * locking as a writer. Since these opeartions should be rare 1098 * want to only lock as a writer when necessary. 1099 */ 1100 if (writer || avl_numnodes(&as->a_wpage) != 0) { 1101 AS_LOCK_ENTER(as, RW_WRITER); 1102 } else { 1103 AS_LOCK_ENTER(as, RW_READER); 1104 } 1105 1106 as_clearwatchprot(as, raddr, rsize); 1107 seg = as_segat(as, raddr); 1108 if (seg == NULL) { 1109 as_setwatch(as); 1110 AS_LOCK_EXIT(as); 1111 return (ENOMEM); 1112 } 1113 1114 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1115 if (raddr >= seg->s_base + seg->s_size) { 1116 seg = AS_SEGNEXT(as, seg); 1117 if (seg == NULL || raddr != seg->s_base) { 1118 error = ENOMEM; 1119 break; 1120 } 1121 } 1122 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1123 ssize = seg->s_base + seg->s_size - raddr; 1124 else 1125 ssize = rsize; 1126 retry: 1127 error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1128 1129 if (error == IE_NOMEM) { 1130 error = EAGAIN; 1131 break; 1132 } 1133 1134 if (error == IE_RETRY) { 1135 AS_LOCK_EXIT(as); 1136 writer = 1; 1137 goto setprot_top; 1138 } 1139 1140 if (error == EAGAIN) { 1141 /* 1142 * Make sure we have a_lock as writer. 1143 */ 1144 if (writer == 0) { 1145 AS_LOCK_EXIT(as); 1146 writer = 1; 1147 goto setprot_top; 1148 } 1149 1150 /* 1151 * Memory is currently locked. It must be unlocked 1152 * before this operation can succeed through a retry. 1153 * The possible reasons for locked memory and 1154 * corresponding strategies for unlocking are: 1155 * (1) Normal I/O 1156 * wait for a signal that the I/O operation 1157 * has completed and the memory is unlocked. 1158 * (2) Asynchronous I/O 1159 * The aio subsystem does not unlock pages when 1160 * the I/O is completed. Those pages are unlocked 1161 * when the application calls aiowait/aioerror. 1162 * So, to prevent blocking forever, cv_broadcast() 1163 * is done to wake up aio_cleanup_thread. 1164 * Subsequently, segvn_reclaim will be called, and 1165 * that will do AS_CLRUNMAPWAIT() and wake us up. 1166 * (3) Long term page locking: 1167 * Drivers intending to have pages locked for a 1168 * period considerably longer than for normal I/O 1169 * (essentially forever) may have registered for a 1170 * callback so they may unlock these pages on 1171 * request. This is needed to allow this operation 1172 * to succeed. Each entry on the callback list is 1173 * examined. If the event or address range pertains 1174 * the callback is invoked (unless it already is in 1175 * progress). The a_contents lock must be dropped 1176 * before the callback, so only one callback can 1177 * be done at a time. Go to the top and do more 1178 * until zero is returned. If zero is returned, 1179 * either there were no callbacks for this event 1180 * or they were already in progress. 1181 */ 1182 mutex_enter(&as->a_contents); 1183 if (as->a_callbacks && 1184 (cb = as_find_callback(as, AS_SETPROT_EVENT, 1185 seg->s_base, seg->s_size))) { 1186 AS_LOCK_EXIT(as); 1187 as_execute_callback(as, cb, AS_SETPROT_EVENT); 1188 } else if (!AS_ISNOUNMAPWAIT(as)) { 1189 if (AS_ISUNMAPWAIT(as) == 0) 1190 cv_broadcast(&as->a_cv); 1191 AS_SETUNMAPWAIT(as); 1192 AS_LOCK_EXIT(as); 1193 while (AS_ISUNMAPWAIT(as)) 1194 cv_wait(&as->a_cv, &as->a_contents); 1195 } else { 1196 /* 1197 * We may have raced with 1198 * segvn_reclaim()/segspt_reclaim(). In this 1199 * case clean nounmapwait flag and retry since 1200 * softlockcnt in this segment may be already 1201 * 0. We don't drop as writer lock so our 1202 * number of retries without sleeping should 1203 * be very small. See segvn_reclaim() for 1204 * more comments. 1205 */ 1206 AS_CLRNOUNMAPWAIT(as); 1207 mutex_exit(&as->a_contents); 1208 goto retry; 1209 } 1210 mutex_exit(&as->a_contents); 1211 goto setprot_top; 1212 } else if (error != 0) 1213 break; 1214 } 1215 if (error != 0) { 1216 as_setwatch(as); 1217 } else { 1218 as_setwatchprot(as, saveraddr, saversize, prot); 1219 } 1220 AS_LOCK_EXIT(as); 1221 return (error); 1222 } 1223 1224 /* 1225 * Check to make sure that the interval [addr, addr + size) 1226 * in address space `as' has at least the specified protection. 1227 * It is ok for the range to cross over several segments, as long 1228 * as they are contiguous. 1229 */ 1230 int 1231 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1232 { 1233 struct seg *seg; 1234 size_t ssize; 1235 caddr_t raddr; /* rounded down addr */ 1236 size_t rsize; /* rounded up size */ 1237 int error = 0; 1238 1239 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1240 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1241 (size_t)raddr; 1242 1243 if (raddr + rsize < raddr) /* check for wraparound */ 1244 return (ENOMEM); 1245 1246 /* 1247 * This is ugly as sin... 1248 * Normally, we only acquire the address space readers lock. 1249 * However, if the address space has watchpoints present, 1250 * we must acquire the writer lock on the address space for 1251 * the benefit of as_clearwatchprot() and as_setwatchprot(). 1252 */ 1253 if (avl_numnodes(&as->a_wpage) != 0) 1254 AS_LOCK_ENTER(as, RW_WRITER); 1255 else 1256 AS_LOCK_ENTER(as, RW_READER); 1257 as_clearwatchprot(as, raddr, rsize); 1258 seg = as_segat(as, raddr); 1259 if (seg == NULL) { 1260 as_setwatch(as); 1261 AS_LOCK_EXIT(as); 1262 return (ENOMEM); 1263 } 1264 1265 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1266 if (raddr >= seg->s_base + seg->s_size) { 1267 seg = AS_SEGNEXT(as, seg); 1268 if (seg == NULL || raddr != seg->s_base) { 1269 error = ENOMEM; 1270 break; 1271 } 1272 } 1273 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1274 ssize = seg->s_base + seg->s_size - raddr; 1275 else 1276 ssize = rsize; 1277 1278 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1279 if (error != 0) 1280 break; 1281 } 1282 as_setwatch(as); 1283 AS_LOCK_EXIT(as); 1284 return (error); 1285 } 1286 1287 int 1288 as_unmap(struct as *as, caddr_t addr, size_t size) 1289 { 1290 struct seg *seg, *seg_next; 1291 struct as_callback *cb; 1292 caddr_t raddr, eaddr; 1293 size_t ssize, rsize = 0; 1294 int err; 1295 1296 top: 1297 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1298 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1299 (uintptr_t)PAGEMASK); 1300 1301 AS_LOCK_ENTER(as, RW_WRITER); 1302 1303 as->a_updatedir = 1; /* inform /proc */ 1304 gethrestime(&as->a_updatetime); 1305 1306 /* 1307 * Use as_findseg to find the first segment in the range, then 1308 * step through the segments in order, following s_next. 1309 */ 1310 as_clearwatchprot(as, raddr, eaddr - raddr); 1311 1312 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1313 if (eaddr <= seg->s_base) 1314 break; /* eaddr was in a gap; all done */ 1315 1316 /* this is implied by the test above */ 1317 ASSERT(raddr < eaddr); 1318 1319 if (raddr < seg->s_base) 1320 raddr = seg->s_base; /* raddr was in a gap */ 1321 1322 if (eaddr > (seg->s_base + seg->s_size)) 1323 ssize = seg->s_base + seg->s_size - raddr; 1324 else 1325 ssize = eaddr - raddr; 1326 1327 /* 1328 * Save next segment pointer since seg can be 1329 * destroyed during the segment unmap operation. 1330 */ 1331 seg_next = AS_SEGNEXT(as, seg); 1332 1333 /* 1334 * We didn't count /dev/null mappings, so ignore them here. 1335 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again, 1336 * we have to do this check here while we have seg.) 1337 */ 1338 rsize = 0; 1339 if (!SEG_IS_DEVNULL_MAPPING(seg) && 1340 !SEG_IS_PARTIAL_RESV(seg)) 1341 rsize = ssize; 1342 1343 retry: 1344 err = SEGOP_UNMAP(seg, raddr, ssize); 1345 if (err == EAGAIN) { 1346 /* 1347 * Memory is currently locked. It must be unlocked 1348 * before this operation can succeed through a retry. 1349 * The possible reasons for locked memory and 1350 * corresponding strategies for unlocking are: 1351 * (1) Normal I/O 1352 * wait for a signal that the I/O operation 1353 * has completed and the memory is unlocked. 1354 * (2) Asynchronous I/O 1355 * The aio subsystem does not unlock pages when 1356 * the I/O is completed. Those pages are unlocked 1357 * when the application calls aiowait/aioerror. 1358 * So, to prevent blocking forever, cv_broadcast() 1359 * is done to wake up aio_cleanup_thread. 1360 * Subsequently, segvn_reclaim will be called, and 1361 * that will do AS_CLRUNMAPWAIT() and wake us up. 1362 * (3) Long term page locking: 1363 * Drivers intending to have pages locked for a 1364 * period considerably longer than for normal I/O 1365 * (essentially forever) may have registered for a 1366 * callback so they may unlock these pages on 1367 * request. This is needed to allow this operation 1368 * to succeed. Each entry on the callback list is 1369 * examined. If the event or address range pertains 1370 * the callback is invoked (unless it already is in 1371 * progress). The a_contents lock must be dropped 1372 * before the callback, so only one callback can 1373 * be done at a time. Go to the top and do more 1374 * until zero is returned. If zero is returned, 1375 * either there were no callbacks for this event 1376 * or they were already in progress. 1377 */ 1378 mutex_enter(&as->a_contents); 1379 if (as->a_callbacks && 1380 (cb = as_find_callback(as, AS_UNMAP_EVENT, 1381 seg->s_base, seg->s_size))) { 1382 AS_LOCK_EXIT(as); 1383 as_execute_callback(as, cb, AS_UNMAP_EVENT); 1384 } else if (!AS_ISNOUNMAPWAIT(as)) { 1385 if (AS_ISUNMAPWAIT(as) == 0) 1386 cv_broadcast(&as->a_cv); 1387 AS_SETUNMAPWAIT(as); 1388 AS_LOCK_EXIT(as); 1389 while (AS_ISUNMAPWAIT(as)) 1390 cv_wait(&as->a_cv, &as->a_contents); 1391 } else { 1392 /* 1393 * We may have raced with 1394 * segvn_reclaim()/segspt_reclaim(). In this 1395 * case clean nounmapwait flag and retry since 1396 * softlockcnt in this segment may be already 1397 * 0. We don't drop as writer lock so our 1398 * number of retries without sleeping should 1399 * be very small. See segvn_reclaim() for 1400 * more comments. 1401 */ 1402 AS_CLRNOUNMAPWAIT(as); 1403 mutex_exit(&as->a_contents); 1404 goto retry; 1405 } 1406 mutex_exit(&as->a_contents); 1407 goto top; 1408 } else if (err == IE_RETRY) { 1409 AS_LOCK_EXIT(as); 1410 goto top; 1411 } else if (err) { 1412 as_setwatch(as); 1413 AS_LOCK_EXIT(as); 1414 return (-1); 1415 } 1416 1417 as->a_size -= ssize; 1418 if (rsize) 1419 as->a_resvsize -= rsize; 1420 raddr += ssize; 1421 } 1422 AS_LOCK_EXIT(as); 1423 return (0); 1424 } 1425 1426 static int 1427 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, 1428 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1429 { 1430 uint_t szc; 1431 uint_t nszc; 1432 int error; 1433 caddr_t a; 1434 caddr_t eaddr; 1435 size_t segsize; 1436 struct seg *seg; 1437 size_t pgsz; 1438 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); 1439 uint_t save_szcvec; 1440 1441 ASSERT(AS_WRITE_HELD(as)); 1442 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1443 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1444 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); 1445 if (!do_off) { 1446 vn_a->offset = 0; 1447 } 1448 1449 if (szcvec <= 1) { 1450 seg = seg_alloc(as, addr, size); 1451 if (seg == NULL) { 1452 return (ENOMEM); 1453 } 1454 vn_a->szc = 0; 1455 error = (*crfp)(seg, vn_a); 1456 if (error != 0) { 1457 seg_free(seg); 1458 } else { 1459 as->a_size += size; 1460 as->a_resvsize += size; 1461 } 1462 return (error); 1463 } 1464 1465 eaddr = addr + size; 1466 save_szcvec = szcvec; 1467 szcvec >>= 1; 1468 szc = 0; 1469 nszc = 0; 1470 while (szcvec) { 1471 if ((szcvec & 0x1) == 0) { 1472 nszc++; 1473 szcvec >>= 1; 1474 continue; 1475 } 1476 nszc++; 1477 pgsz = page_get_pagesize(nszc); 1478 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1479 if (a != addr) { 1480 ASSERT(a < eaddr); 1481 segsize = a - addr; 1482 seg = seg_alloc(as, addr, segsize); 1483 if (seg == NULL) { 1484 return (ENOMEM); 1485 } 1486 vn_a->szc = szc; 1487 error = (*crfp)(seg, vn_a); 1488 if (error != 0) { 1489 seg_free(seg); 1490 return (error); 1491 } 1492 as->a_size += segsize; 1493 as->a_resvsize += segsize; 1494 *segcreated = 1; 1495 if (do_off) { 1496 vn_a->offset += segsize; 1497 } 1498 addr = a; 1499 } 1500 szc = nszc; 1501 szcvec >>= 1; 1502 } 1503 1504 ASSERT(addr < eaddr); 1505 szcvec = save_szcvec | 1; /* add 8K pages */ 1506 while (szcvec) { 1507 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1508 ASSERT(a >= addr); 1509 if (a != addr) { 1510 segsize = a - addr; 1511 seg = seg_alloc(as, addr, segsize); 1512 if (seg == NULL) { 1513 return (ENOMEM); 1514 } 1515 vn_a->szc = szc; 1516 error = (*crfp)(seg, vn_a); 1517 if (error != 0) { 1518 seg_free(seg); 1519 return (error); 1520 } 1521 as->a_size += segsize; 1522 as->a_resvsize += segsize; 1523 *segcreated = 1; 1524 if (do_off) { 1525 vn_a->offset += segsize; 1526 } 1527 addr = a; 1528 } 1529 szcvec &= ~(1 << szc); 1530 if (szcvec) { 1531 szc = highbit(szcvec) - 1; 1532 pgsz = page_get_pagesize(szc); 1533 } 1534 } 1535 ASSERT(addr == eaddr); 1536 1537 return (0); 1538 } 1539 1540 static int 1541 as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1542 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1543 { 1544 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); 1545 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 1546 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1547 type, 0); 1548 int error; 1549 struct seg *seg; 1550 struct vattr va; 1551 u_offset_t eoff; 1552 size_t save_size = 0; 1553 extern size_t textrepl_size_thresh; 1554 1555 ASSERT(AS_WRITE_HELD(as)); 1556 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1557 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1558 ASSERT(vn_a->vp != NULL); 1559 ASSERT(vn_a->amp == NULL); 1560 1561 again: 1562 if (szcvec <= 1) { 1563 seg = seg_alloc(as, addr, size); 1564 if (seg == NULL) { 1565 return (ENOMEM); 1566 } 1567 vn_a->szc = 0; 1568 error = (*crfp)(seg, vn_a); 1569 if (error != 0) { 1570 seg_free(seg); 1571 } else { 1572 as->a_size += size; 1573 as->a_resvsize += size; 1574 } 1575 return (error); 1576 } 1577 1578 va.va_mask = AT_SIZE; 1579 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) { 1580 szcvec = 0; 1581 goto again; 1582 } 1583 eoff = vn_a->offset & PAGEMASK; 1584 if (eoff >= va.va_size) { 1585 szcvec = 0; 1586 goto again; 1587 } 1588 eoff += size; 1589 if (btopr(va.va_size) < btopr(eoff)) { 1590 save_size = size; 1591 size = va.va_size - (vn_a->offset & PAGEMASK); 1592 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1593 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1594 type, 0); 1595 if (szcvec <= 1) { 1596 size = save_size; 1597 goto again; 1598 } 1599 } 1600 1601 if (size > textrepl_size_thresh) { 1602 vn_a->flags |= _MAP_TEXTREPL; 1603 } 1604 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, 1605 segcreated); 1606 if (error != 0) { 1607 return (error); 1608 } 1609 if (save_size) { 1610 addr += size; 1611 size = save_size - size; 1612 szcvec = 0; 1613 goto again; 1614 } 1615 return (0); 1616 } 1617 1618 /* 1619 * as_map_ansegs: shared or private anonymous memory. Note that the flags 1620 * passed to map_pgszvec cannot be MAP_INITDATA, for anon. 1621 */ 1622 static int 1623 as_map_ansegs(struct as *as, caddr_t addr, size_t size, 1624 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1625 { 1626 uint_t szcvec; 1627 uchar_t type; 1628 1629 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); 1630 if (vn_a->type == MAP_SHARED) { 1631 type = MAPPGSZC_SHM; 1632 } else if (vn_a->type == MAP_PRIVATE) { 1633 if (vn_a->szc == AS_MAP_HEAP) { 1634 type = MAPPGSZC_HEAP; 1635 } else if (vn_a->szc == AS_MAP_STACK) { 1636 type = MAPPGSZC_STACK; 1637 } else { 1638 type = MAPPGSZC_PRIVM; 1639 } 1640 } 1641 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? 1642 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), 1643 (vn_a->flags & MAP_TEXT), type, 0); 1644 ASSERT(AS_WRITE_HELD(as)); 1645 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1646 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1647 ASSERT(vn_a->vp == NULL); 1648 1649 return (as_map_segvn_segs(as, addr, size, szcvec, 1650 crfp, vn_a, segcreated)); 1651 } 1652 1653 int 1654 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1655 { 1656 AS_LOCK_ENTER(as, RW_WRITER); 1657 return (as_map_locked(as, addr, size, crfp, argsp)); 1658 } 1659 1660 int 1661 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), 1662 void *argsp) 1663 { 1664 struct seg *seg = NULL; 1665 caddr_t raddr; /* rounded down addr */ 1666 size_t rsize; /* rounded up size */ 1667 int error; 1668 int unmap = 0; 1669 struct proc *p = curproc; 1670 struct segvn_crargs crargs; 1671 1672 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1673 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1674 (size_t)raddr; 1675 1676 /* 1677 * check for wrap around 1678 */ 1679 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1680 AS_LOCK_EXIT(as); 1681 return (ENOMEM); 1682 } 1683 1684 as->a_updatedir = 1; /* inform /proc */ 1685 gethrestime(&as->a_updatetime); 1686 1687 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1688 AS_LOCK_EXIT(as); 1689 1690 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1691 RCA_UNSAFE_ALL); 1692 1693 return (ENOMEM); 1694 } 1695 1696 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { 1697 crargs = *(struct segvn_crargs *)argsp; 1698 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap); 1699 if (error != 0) { 1700 AS_LOCK_EXIT(as); 1701 if (unmap) { 1702 (void) as_unmap(as, addr, size); 1703 } 1704 return (error); 1705 } 1706 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { 1707 crargs = *(struct segvn_crargs *)argsp; 1708 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap); 1709 if (error != 0) { 1710 AS_LOCK_EXIT(as); 1711 if (unmap) { 1712 (void) as_unmap(as, addr, size); 1713 } 1714 return (error); 1715 } 1716 } else { 1717 seg = seg_alloc(as, addr, size); 1718 if (seg == NULL) { 1719 AS_LOCK_EXIT(as); 1720 return (ENOMEM); 1721 } 1722 1723 error = (*crfp)(seg, argsp); 1724 if (error != 0) { 1725 seg_free(seg); 1726 AS_LOCK_EXIT(as); 1727 return (error); 1728 } 1729 /* 1730 * Add size now so as_unmap will work if as_ctl fails. 1731 */ 1732 as->a_size += rsize; 1733 as->a_resvsize += rsize; 1734 } 1735 1736 as_setwatch(as); 1737 1738 /* 1739 * If the address space is locked, 1740 * establish memory locks for the new segment. 1741 */ 1742 mutex_enter(&as->a_contents); 1743 if (AS_ISPGLCK(as)) { 1744 mutex_exit(&as->a_contents); 1745 AS_LOCK_EXIT(as); 1746 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1747 if (error != 0) 1748 (void) as_unmap(as, addr, size); 1749 } else { 1750 mutex_exit(&as->a_contents); 1751 AS_LOCK_EXIT(as); 1752 } 1753 return (error); 1754 } 1755 1756 1757 /* 1758 * Delete all segments in the address space marked with S_PURGE. 1759 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1760 * These segments are deleted as a first step before calls to as_gap(), so 1761 * that they don't affect mmap() or shmat(). 1762 */ 1763 void 1764 as_purge(struct as *as) 1765 { 1766 struct seg *seg; 1767 struct seg *next_seg; 1768 1769 /* 1770 * the setting of NEEDSPURGE is protect by as_rangelock(), so 1771 * no need to grab a_contents mutex for this check 1772 */ 1773 if ((as->a_flags & AS_NEEDSPURGE) == 0) 1774 return; 1775 1776 AS_LOCK_ENTER(as, RW_WRITER); 1777 next_seg = NULL; 1778 seg = AS_SEGFIRST(as); 1779 while (seg != NULL) { 1780 next_seg = AS_SEGNEXT(as, seg); 1781 if (seg->s_flags & S_PURGE) 1782 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1783 seg = next_seg; 1784 } 1785 AS_LOCK_EXIT(as); 1786 1787 mutex_enter(&as->a_contents); 1788 as->a_flags &= ~AS_NEEDSPURGE; 1789 mutex_exit(&as->a_contents); 1790 } 1791 1792 /* 1793 * Find a hole within [*basep, *basep + *lenp), which contains a mappable 1794 * range of addresses at least "minlen" long, where the base of the range is 1795 * at "off" phase from an "align" boundary and there is space for a 1796 * "redzone"-sized redzone on eithe rside of the range. Thus, 1797 * if align was 4M and off was 16k, the user wants a hole which will start 1798 * 16k into a 4M page. 1799 * 1800 * If flags specifies AH_HI, the hole will have the highest possible address 1801 * in the range. We use the as->a_lastgap field to figure out where to 1802 * start looking for a gap. 1803 * 1804 * Otherwise, the gap will have the lowest possible address. 1805 * 1806 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1807 * 1808 * If an adequate hole is found, *basep and *lenp are set to reflect the part of 1809 * the hole that is within range, and 0 is returned. On failure, -1 is returned. 1810 * 1811 * NOTE: This routine is not correct when base+len overflows caddr_t. 1812 */ 1813 int 1814 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, 1815 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off) 1816 { 1817 caddr_t lobound = *basep; 1818 caddr_t hibound = lobound + *lenp; 1819 struct seg *lseg, *hseg; 1820 caddr_t lo, hi; 1821 int forward; 1822 caddr_t save_base; 1823 size_t save_len; 1824 size_t save_minlen; 1825 size_t save_redzone; 1826 int fast_path = 1; 1827 1828 save_base = *basep; 1829 save_len = *lenp; 1830 save_minlen = minlen; 1831 save_redzone = redzone; 1832 1833 /* 1834 * For the first pass/fast_path, just add align and redzone into 1835 * minlen since if we get an allocation, we can guarantee that it 1836 * will fit the alignment and redzone requested. 1837 * This increases the chance that hibound will be adjusted to 1838 * a_lastgap->s_base which will likely allow us to find an 1839 * acceptable hole in the address space quicker. 1840 * If we can't find a hole with this fast_path, then we look for 1841 * smaller holes in which the alignment and offset may allow 1842 * the allocation to fit. 1843 */ 1844 minlen += align; 1845 minlen += 2 * redzone; 1846 redzone = 0; 1847 1848 AS_LOCK_ENTER(as, RW_READER); 1849 if (AS_SEGFIRST(as) == NULL) { 1850 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR, 1851 align, redzone, off)) { 1852 AS_LOCK_EXIT(as); 1853 return (0); 1854 } else { 1855 AS_LOCK_EXIT(as); 1856 *basep = save_base; 1857 *lenp = save_len; 1858 return (-1); 1859 } 1860 } 1861 1862 retry: 1863 /* 1864 * Set up to iterate over all the inter-segment holes in the given 1865 * direction. lseg is NULL for the lowest-addressed hole and hseg is 1866 * NULL for the highest-addressed hole. If moving backwards, we reset 1867 * sseg to denote the highest-addressed segment. 1868 */ 1869 forward = (flags & AH_DIR) == AH_LO; 1870 if (forward) { 1871 hseg = as_findseg(as, lobound, 1); 1872 lseg = AS_SEGPREV(as, hseg); 1873 } else { 1874 1875 /* 1876 * If allocating at least as much as the last allocation, 1877 * use a_lastgap's base as a better estimate of hibound. 1878 */ 1879 if (as->a_lastgap && 1880 minlen >= as->a_lastgap->s_size && 1881 hibound >= as->a_lastgap->s_base) 1882 hibound = as->a_lastgap->s_base; 1883 1884 hseg = as_findseg(as, hibound, 1); 1885 if (hseg->s_base + hseg->s_size < hibound) { 1886 lseg = hseg; 1887 hseg = NULL; 1888 } else { 1889 lseg = AS_SEGPREV(as, hseg); 1890 } 1891 } 1892 1893 for (;;) { 1894 /* 1895 * Set lo and hi to the hole's boundaries. (We should really 1896 * use MAXADDR in place of hibound in the expression below, 1897 * but can't express it easily; using hibound in its place is 1898 * harmless.) 1899 */ 1900 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1901 hi = (hseg == NULL) ? hibound : hseg->s_base; 1902 /* 1903 * If the iteration has moved past the interval from lobound 1904 * to hibound it's pointless to continue. 1905 */ 1906 if ((forward && lo > hibound) || (!forward && hi < lobound)) 1907 break; 1908 else if (lo > hibound || hi < lobound) 1909 goto cont; 1910 /* 1911 * Candidate hole lies at least partially within the allowable 1912 * range. Restrict it to fall completely within that range, 1913 * i.e., to [max(lo, lobound), min(hi, hibound)]. 1914 */ 1915 if (lo < lobound) 1916 lo = lobound; 1917 if (hi > hibound) 1918 hi = hibound; 1919 /* 1920 * Verify that the candidate hole is big enough and meets 1921 * hardware constraints. If the hole is too small, no need 1922 * to do the further checks since they will fail. 1923 */ 1924 *basep = lo; 1925 *lenp = hi - lo; 1926 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp, 1927 minlen, forward ? AH_LO : AH_HI, align, redzone, off) && 1928 ((flags & AH_CONTAIN) == 0 || 1929 (*basep <= addr && *basep + *lenp > addr))) { 1930 if (!forward) 1931 as->a_lastgap = hseg; 1932 if (hseg != NULL) 1933 as->a_lastgaphl = hseg; 1934 else 1935 as->a_lastgaphl = lseg; 1936 AS_LOCK_EXIT(as); 1937 return (0); 1938 } 1939 cont: 1940 /* 1941 * Move to the next hole. 1942 */ 1943 if (forward) { 1944 lseg = hseg; 1945 if (lseg == NULL) 1946 break; 1947 hseg = AS_SEGNEXT(as, hseg); 1948 } else { 1949 hseg = lseg; 1950 if (hseg == NULL) 1951 break; 1952 lseg = AS_SEGPREV(as, lseg); 1953 } 1954 } 1955 if (fast_path && (align != 0 || save_redzone != 0)) { 1956 fast_path = 0; 1957 minlen = save_minlen; 1958 redzone = save_redzone; 1959 goto retry; 1960 } 1961 *basep = save_base; 1962 *lenp = save_len; 1963 AS_LOCK_EXIT(as); 1964 return (-1); 1965 } 1966 1967 /* 1968 * Find a hole of at least size minlen within [*basep, *basep + *lenp). 1969 * 1970 * If flags specifies AH_HI, the hole will have the highest possible address 1971 * in the range. We use the as->a_lastgap field to figure out where to 1972 * start looking for a gap. 1973 * 1974 * Otherwise, the gap will have the lowest possible address. 1975 * 1976 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1977 * 1978 * If an adequate hole is found, base and len are set to reflect the part of 1979 * the hole that is within range, and 0 is returned, otherwise, 1980 * -1 is returned. 1981 * 1982 * NOTE: This routine is not correct when base+len overflows caddr_t. 1983 */ 1984 int 1985 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1986 caddr_t addr) 1987 { 1988 1989 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0)); 1990 } 1991 1992 /* 1993 * Return the next range within [base, base + len) that is backed 1994 * with "real memory". Skip holes and non-seg_vn segments. 1995 * We're lazy and only return one segment at a time. 1996 */ 1997 int 1998 as_memory(struct as *as, caddr_t *basep, size_t *lenp) 1999 { 2000 extern struct seg_ops segspt_shmops; /* needs a header file */ 2001 struct seg *seg; 2002 caddr_t addr, eaddr; 2003 caddr_t segend; 2004 2005 AS_LOCK_ENTER(as, RW_READER); 2006 2007 addr = *basep; 2008 eaddr = addr + *lenp; 2009 2010 seg = as_findseg(as, addr, 0); 2011 if (seg != NULL) 2012 addr = MAX(seg->s_base, addr); 2013 2014 for (;;) { 2015 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 2016 AS_LOCK_EXIT(as); 2017 return (EINVAL); 2018 } 2019 2020 if (seg->s_ops == &segvn_ops) { 2021 segend = seg->s_base + seg->s_size; 2022 break; 2023 } 2024 2025 /* 2026 * We do ISM by looking into the private data 2027 * to determine the real size of the segment. 2028 */ 2029 if (seg->s_ops == &segspt_shmops) { 2030 segend = seg->s_base + spt_realsize(seg); 2031 if (addr < segend) 2032 break; 2033 } 2034 2035 seg = AS_SEGNEXT(as, seg); 2036 2037 if (seg != NULL) 2038 addr = seg->s_base; 2039 } 2040 2041 *basep = addr; 2042 2043 if (segend > eaddr) 2044 *lenp = eaddr - addr; 2045 else 2046 *lenp = segend - addr; 2047 2048 AS_LOCK_EXIT(as); 2049 return (0); 2050 } 2051 2052 /* 2053 * Swap the pages associated with the address space as out to 2054 * secondary storage, returning the number of bytes actually 2055 * swapped. 2056 * 2057 * The value returned is intended to correlate well with the process's 2058 * memory requirements. Its usefulness for this purpose depends on 2059 * how well the segment-level routines do at returning accurate 2060 * information. 2061 */ 2062 size_t 2063 as_swapout(struct as *as) 2064 { 2065 struct seg *seg; 2066 size_t swpcnt = 0; 2067 2068 /* 2069 * Kernel-only processes have given up their address 2070 * spaces. Of course, we shouldn't be attempting to 2071 * swap out such processes in the first place... 2072 */ 2073 if (as == NULL) 2074 return (0); 2075 2076 AS_LOCK_ENTER(as, RW_READER); 2077 2078 /* 2079 * Free all mapping resources associated with the address 2080 * space. The segment-level swapout routines capitalize 2081 * on this unmapping by scavanging pages that have become 2082 * unmapped here. 2083 */ 2084 hat_swapout(as->a_hat); 2085 2086 /* 2087 * Call the swapout routines of all segments in the address 2088 * space to do the actual work, accumulating the amount of 2089 * space reclaimed. 2090 */ 2091 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2092 struct seg_ops *ov = seg->s_ops; 2093 2094 /* 2095 * We have to check to see if the seg has 2096 * an ops vector because the seg may have 2097 * been in the middle of being set up when 2098 * the process was picked for swapout. 2099 */ 2100 if ((ov != NULL) && (ov->swapout != NULL)) 2101 swpcnt += SEGOP_SWAPOUT(seg); 2102 } 2103 AS_LOCK_EXIT(as); 2104 return (swpcnt); 2105 } 2106 2107 /* 2108 * Determine whether data from the mappings in interval [addr, addr + size) 2109 * are in the primary memory (core) cache. 2110 */ 2111 int 2112 as_incore(struct as *as, caddr_t addr, 2113 size_t size, char *vec, size_t *sizep) 2114 { 2115 struct seg *seg; 2116 size_t ssize; 2117 caddr_t raddr; /* rounded down addr */ 2118 size_t rsize; /* rounded up size */ 2119 size_t isize; /* iteration size */ 2120 int error = 0; /* result, assume success */ 2121 2122 *sizep = 0; 2123 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2124 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 2125 (size_t)raddr; 2126 2127 if (raddr + rsize < raddr) /* check for wraparound */ 2128 return (ENOMEM); 2129 2130 AS_LOCK_ENTER(as, RW_READER); 2131 seg = as_segat(as, raddr); 2132 if (seg == NULL) { 2133 AS_LOCK_EXIT(as); 2134 return (-1); 2135 } 2136 2137 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2138 if (raddr >= seg->s_base + seg->s_size) { 2139 seg = AS_SEGNEXT(as, seg); 2140 if (seg == NULL || raddr != seg->s_base) { 2141 error = -1; 2142 break; 2143 } 2144 } 2145 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2146 ssize = seg->s_base + seg->s_size - raddr; 2147 else 2148 ssize = rsize; 2149 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2150 if (isize != ssize) { 2151 error = -1; 2152 break; 2153 } 2154 vec += btopr(ssize); 2155 } 2156 AS_LOCK_EXIT(as); 2157 return (error); 2158 } 2159 2160 static void 2161 as_segunlock(struct seg *seg, caddr_t addr, int attr, 2162 ulong_t *bitmap, size_t position, size_t npages) 2163 { 2164 caddr_t range_start; 2165 size_t pos1 = position; 2166 size_t pos2; 2167 size_t size; 2168 size_t end_pos = npages + position; 2169 2170 while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2171 size = ptob((pos2 - pos1)); 2172 range_start = (caddr_t)((uintptr_t)addr + 2173 ptob(pos1 - position)); 2174 2175 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2176 (ulong_t *)NULL, (size_t)NULL); 2177 pos1 = pos2; 2178 } 2179 } 2180 2181 static void 2182 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2183 caddr_t raddr, size_t rsize) 2184 { 2185 struct seg *seg = as_segat(as, raddr); 2186 size_t ssize; 2187 2188 while (rsize != 0) { 2189 if (raddr >= seg->s_base + seg->s_size) 2190 seg = AS_SEGNEXT(as, seg); 2191 2192 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2193 ssize = seg->s_base + seg->s_size - raddr; 2194 else 2195 ssize = rsize; 2196 2197 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2198 2199 rsize -= ssize; 2200 raddr += ssize; 2201 } 2202 } 2203 2204 /* 2205 * Cache control operations over the interval [addr, addr + size) in 2206 * address space "as". 2207 */ 2208 /*ARGSUSED*/ 2209 int 2210 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2211 uintptr_t arg, ulong_t *lock_map, size_t pos) 2212 { 2213 struct seg *seg; /* working segment */ 2214 caddr_t raddr; /* rounded down addr */ 2215 caddr_t initraddr; /* saved initial rounded down addr */ 2216 size_t rsize; /* rounded up size */ 2217 size_t initrsize; /* saved initial rounded up size */ 2218 size_t ssize; /* size of seg */ 2219 int error = 0; /* result */ 2220 size_t mlock_size; /* size of bitmap */ 2221 ulong_t *mlock_map; /* pointer to bitmap used */ 2222 /* to represent the locked */ 2223 /* pages. */ 2224 retry: 2225 if (error == IE_RETRY) 2226 AS_LOCK_ENTER(as, RW_WRITER); 2227 else 2228 AS_LOCK_ENTER(as, RW_READER); 2229 2230 /* 2231 * If these are address space lock/unlock operations, loop over 2232 * all segments in the address space, as appropriate. 2233 */ 2234 if (func == MC_LOCKAS) { 2235 size_t npages, idx; 2236 size_t rlen = 0; /* rounded as length */ 2237 2238 idx = pos; 2239 2240 if (arg & MCL_FUTURE) { 2241 mutex_enter(&as->a_contents); 2242 AS_SETPGLCK(as); 2243 mutex_exit(&as->a_contents); 2244 } 2245 if ((arg & MCL_CURRENT) == 0) { 2246 AS_LOCK_EXIT(as); 2247 return (0); 2248 } 2249 2250 seg = AS_SEGFIRST(as); 2251 if (seg == NULL) { 2252 AS_LOCK_EXIT(as); 2253 return (0); 2254 } 2255 2256 do { 2257 raddr = (caddr_t)((uintptr_t)seg->s_base & 2258 (uintptr_t)PAGEMASK); 2259 rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2260 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2261 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2262 2263 mlock_size = BT_BITOUL(btopr(rlen)); 2264 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2265 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2266 AS_LOCK_EXIT(as); 2267 return (EAGAIN); 2268 } 2269 2270 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2271 error = SEGOP_LOCKOP(seg, seg->s_base, 2272 seg->s_size, attr, MC_LOCK, mlock_map, pos); 2273 if (error != 0) 2274 break; 2275 pos += seg_pages(seg); 2276 } 2277 2278 if (error) { 2279 for (seg = AS_SEGFIRST(as); seg != NULL; 2280 seg = AS_SEGNEXT(as, seg)) { 2281 2282 raddr = (caddr_t)((uintptr_t)seg->s_base & 2283 (uintptr_t)PAGEMASK); 2284 npages = seg_pages(seg); 2285 as_segunlock(seg, raddr, attr, mlock_map, 2286 idx, npages); 2287 idx += npages; 2288 } 2289 } 2290 2291 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2292 AS_LOCK_EXIT(as); 2293 goto lockerr; 2294 } else if (func == MC_UNLOCKAS) { 2295 mutex_enter(&as->a_contents); 2296 AS_CLRPGLCK(as); 2297 mutex_exit(&as->a_contents); 2298 2299 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2300 error = SEGOP_LOCKOP(seg, seg->s_base, 2301 seg->s_size, attr, MC_UNLOCK, NULL, 0); 2302 if (error != 0) 2303 break; 2304 } 2305 2306 AS_LOCK_EXIT(as); 2307 goto lockerr; 2308 } 2309 2310 /* 2311 * Normalize addresses and sizes. 2312 */ 2313 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2314 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2315 (size_t)raddr; 2316 2317 if (raddr + rsize < raddr) { /* check for wraparound */ 2318 AS_LOCK_EXIT(as); 2319 return (ENOMEM); 2320 } 2321 2322 /* 2323 * Get initial segment. 2324 */ 2325 if ((seg = as_segat(as, raddr)) == NULL) { 2326 AS_LOCK_EXIT(as); 2327 return (ENOMEM); 2328 } 2329 2330 if (func == MC_LOCK) { 2331 mlock_size = BT_BITOUL(btopr(rsize)); 2332 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2333 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2334 AS_LOCK_EXIT(as); 2335 return (EAGAIN); 2336 } 2337 } 2338 2339 /* 2340 * Loop over all segments. If a hole in the address range is 2341 * discovered, then fail. For each segment, perform the appropriate 2342 * control operation. 2343 */ 2344 while (rsize != 0) { 2345 2346 /* 2347 * Make sure there's no hole, calculate the portion 2348 * of the next segment to be operated over. 2349 */ 2350 if (raddr >= seg->s_base + seg->s_size) { 2351 seg = AS_SEGNEXT(as, seg); 2352 if (seg == NULL || raddr != seg->s_base) { 2353 if (func == MC_LOCK) { 2354 as_unlockerr(as, attr, mlock_map, 2355 initraddr, initrsize - rsize); 2356 kmem_free(mlock_map, 2357 mlock_size * sizeof (ulong_t)); 2358 } 2359 AS_LOCK_EXIT(as); 2360 return (ENOMEM); 2361 } 2362 } 2363 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2364 ssize = seg->s_base + seg->s_size - raddr; 2365 else 2366 ssize = rsize; 2367 2368 /* 2369 * Dispatch on specific function. 2370 */ 2371 switch (func) { 2372 2373 /* 2374 * Synchronize cached data from mappings with backing 2375 * objects. 2376 */ 2377 case MC_SYNC: 2378 if (error = SEGOP_SYNC(seg, raddr, ssize, 2379 attr, (uint_t)arg)) { 2380 AS_LOCK_EXIT(as); 2381 return (error); 2382 } 2383 break; 2384 2385 /* 2386 * Lock pages in memory. 2387 */ 2388 case MC_LOCK: 2389 if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2390 attr, func, mlock_map, pos)) { 2391 as_unlockerr(as, attr, mlock_map, initraddr, 2392 initrsize - rsize + ssize); 2393 kmem_free(mlock_map, mlock_size * 2394 sizeof (ulong_t)); 2395 AS_LOCK_EXIT(as); 2396 goto lockerr; 2397 } 2398 break; 2399 2400 /* 2401 * Unlock mapped pages. 2402 */ 2403 case MC_UNLOCK: 2404 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2405 (ulong_t *)NULL, (size_t)NULL); 2406 break; 2407 2408 /* 2409 * Store VM advise for mapped pages in segment layer. 2410 */ 2411 case MC_ADVISE: 2412 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2413 2414 /* 2415 * Check for regular errors and special retry error 2416 */ 2417 if (error) { 2418 if (error == IE_RETRY) { 2419 /* 2420 * Need to acquire writers lock, so 2421 * have to drop readers lock and start 2422 * all over again 2423 */ 2424 AS_LOCK_EXIT(as); 2425 goto retry; 2426 } else if (error == IE_REATTACH) { 2427 /* 2428 * Find segment for current address 2429 * because current segment just got 2430 * split or concatenated 2431 */ 2432 seg = as_segat(as, raddr); 2433 if (seg == NULL) { 2434 AS_LOCK_EXIT(as); 2435 return (ENOMEM); 2436 } 2437 } else { 2438 /* 2439 * Regular error 2440 */ 2441 AS_LOCK_EXIT(as); 2442 return (error); 2443 } 2444 } 2445 break; 2446 2447 case MC_INHERIT_ZERO: 2448 if (seg->s_ops->inherit == NULL) { 2449 error = ENOTSUP; 2450 } else { 2451 error = SEGOP_INHERIT(seg, raddr, ssize, 2452 SEGP_INH_ZERO); 2453 } 2454 if (error != 0) { 2455 AS_LOCK_EXIT(as); 2456 return (error); 2457 } 2458 break; 2459 2460 /* 2461 * Can't happen. 2462 */ 2463 default: 2464 panic("as_ctl: bad operation %d", func); 2465 /*NOTREACHED*/ 2466 } 2467 2468 rsize -= ssize; 2469 raddr += ssize; 2470 } 2471 2472 if (func == MC_LOCK) 2473 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2474 AS_LOCK_EXIT(as); 2475 return (0); 2476 lockerr: 2477 2478 /* 2479 * If the lower levels returned EDEADLK for a segment lockop, 2480 * it means that we should retry the operation. Let's wait 2481 * a bit also to let the deadlock causing condition clear. 2482 * This is part of a gross hack to work around a design flaw 2483 * in the ufs/sds logging code and should go away when the 2484 * logging code is re-designed to fix the problem. See bug 2485 * 4125102 for details of the problem. 2486 */ 2487 if (error == EDEADLK) { 2488 delay(deadlk_wait); 2489 error = 0; 2490 goto retry; 2491 } 2492 return (error); 2493 } 2494 2495 int 2496 fc_decode(faultcode_t fault_err) 2497 { 2498 int error = 0; 2499 2500 switch (FC_CODE(fault_err)) { 2501 case FC_OBJERR: 2502 error = FC_ERRNO(fault_err); 2503 break; 2504 case FC_PROT: 2505 error = EACCES; 2506 break; 2507 default: 2508 error = EFAULT; 2509 break; 2510 } 2511 return (error); 2512 } 2513 2514 /* 2515 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow 2516 * lists from each segment and copy them to one contiguous shadow list (plist) 2517 * as expected by the caller. Save pointers to per segment shadow lists at 2518 * the tail of plist so that they can be used during as_pageunlock(). 2519 */ 2520 static int 2521 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp, 2522 caddr_t addr, size_t size, enum seg_rw rw) 2523 { 2524 caddr_t sv_addr = addr; 2525 size_t sv_size = size; 2526 struct seg *sv_seg = seg; 2527 ulong_t segcnt = 1; 2528 ulong_t cnt; 2529 size_t ssize; 2530 pgcnt_t npages = btop(size); 2531 page_t **plist; 2532 page_t **pl; 2533 int error; 2534 caddr_t eaddr; 2535 faultcode_t fault_err = 0; 2536 pgcnt_t pl_off; 2537 extern struct seg_ops segspt_shmops; 2538 2539 ASSERT(AS_LOCK_HELD(as)); 2540 ASSERT(seg != NULL); 2541 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2542 ASSERT(addr + size > seg->s_base + seg->s_size); 2543 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2544 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2545 2546 /* 2547 * Count the number of segments covered by the range we are about to 2548 * lock. The segment count is used to size the shadow list we return 2549 * back to the caller. 2550 */ 2551 for (; size != 0; size -= ssize, addr += ssize) { 2552 if (addr >= seg->s_base + seg->s_size) { 2553 2554 seg = AS_SEGNEXT(as, seg); 2555 if (seg == NULL || addr != seg->s_base) { 2556 AS_LOCK_EXIT(as); 2557 return (EFAULT); 2558 } 2559 /* 2560 * Do a quick check if subsequent segments 2561 * will most likely support pagelock. 2562 */ 2563 if (seg->s_ops == &segvn_ops) { 2564 vnode_t *vp; 2565 2566 if (SEGOP_GETVP(seg, addr, &vp) != 0 || 2567 vp != NULL) { 2568 AS_LOCK_EXIT(as); 2569 goto slow; 2570 } 2571 } else if (seg->s_ops != &segspt_shmops) { 2572 AS_LOCK_EXIT(as); 2573 goto slow; 2574 } 2575 segcnt++; 2576 } 2577 if (addr + size > seg->s_base + seg->s_size) { 2578 ssize = seg->s_base + seg->s_size - addr; 2579 } else { 2580 ssize = size; 2581 } 2582 } 2583 ASSERT(segcnt > 1); 2584 2585 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP); 2586 2587 addr = sv_addr; 2588 size = sv_size; 2589 seg = sv_seg; 2590 2591 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) { 2592 if (addr >= seg->s_base + seg->s_size) { 2593 seg = AS_SEGNEXT(as, seg); 2594 ASSERT(seg != NULL && addr == seg->s_base); 2595 cnt++; 2596 ASSERT(cnt < segcnt); 2597 } 2598 if (addr + size > seg->s_base + seg->s_size) { 2599 ssize = seg->s_base + seg->s_size - addr; 2600 } else { 2601 ssize = size; 2602 } 2603 pl = &plist[npages + cnt]; 2604 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2605 L_PAGELOCK, rw); 2606 if (error) { 2607 break; 2608 } 2609 ASSERT(plist[npages + cnt] != NULL); 2610 ASSERT(pl_off + btop(ssize) <= npages); 2611 bcopy(plist[npages + cnt], &plist[pl_off], 2612 btop(ssize) * sizeof (page_t *)); 2613 pl_off += btop(ssize); 2614 } 2615 2616 if (size == 0) { 2617 AS_LOCK_EXIT(as); 2618 ASSERT(cnt == segcnt - 1); 2619 *ppp = plist; 2620 return (0); 2621 } 2622 2623 /* 2624 * one of pagelock calls failed. The error type is in error variable. 2625 * Unlock what we've locked so far and retry with F_SOFTLOCK if error 2626 * type is either EFAULT or ENOTSUP. Otherwise just return the error 2627 * back to the caller. 2628 */ 2629 2630 eaddr = addr; 2631 seg = sv_seg; 2632 2633 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) { 2634 if (addr >= seg->s_base + seg->s_size) { 2635 seg = AS_SEGNEXT(as, seg); 2636 ASSERT(seg != NULL && addr == seg->s_base); 2637 cnt++; 2638 ASSERT(cnt < segcnt); 2639 } 2640 if (eaddr > seg->s_base + seg->s_size) { 2641 ssize = seg->s_base + seg->s_size - addr; 2642 } else { 2643 ssize = eaddr - addr; 2644 } 2645 pl = &plist[npages + cnt]; 2646 ASSERT(*pl != NULL); 2647 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2648 L_PAGEUNLOCK, rw); 2649 } 2650 2651 AS_LOCK_EXIT(as); 2652 2653 kmem_free(plist, (npages + segcnt) * sizeof (page_t *)); 2654 2655 if (error != ENOTSUP && error != EFAULT) { 2656 return (error); 2657 } 2658 2659 slow: 2660 /* 2661 * If we are here because pagelock failed due to the need to cow fault 2662 * in the pages we want to lock F_SOFTLOCK will do this job and in 2663 * next as_pagelock() call for this address range pagelock will 2664 * hopefully succeed. 2665 */ 2666 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw); 2667 if (fault_err != 0) { 2668 return (fc_decode(fault_err)); 2669 } 2670 *ppp = NULL; 2671 2672 return (0); 2673 } 2674 2675 /* 2676 * lock pages in a given address space. Return shadow list. If 2677 * the list is NULL, the MMU mapping is also locked. 2678 */ 2679 int 2680 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2681 size_t size, enum seg_rw rw) 2682 { 2683 size_t rsize; 2684 caddr_t raddr; 2685 faultcode_t fault_err; 2686 struct seg *seg; 2687 int err; 2688 2689 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2690 "as_pagelock_start: addr %p size %ld", addr, size); 2691 2692 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2693 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2694 (size_t)raddr; 2695 2696 /* 2697 * if the request crosses two segments let 2698 * as_fault handle it. 2699 */ 2700 AS_LOCK_ENTER(as, RW_READER); 2701 2702 seg = as_segat(as, raddr); 2703 if (seg == NULL) { 2704 AS_LOCK_EXIT(as); 2705 return (EFAULT); 2706 } 2707 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2708 if (raddr + rsize > seg->s_base + seg->s_size) { 2709 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw)); 2710 } 2711 if (raddr + rsize <= raddr) { 2712 AS_LOCK_EXIT(as); 2713 return (EFAULT); 2714 } 2715 2716 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2717 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2718 2719 /* 2720 * try to lock pages and pass back shadow list 2721 */ 2722 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2723 2724 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2725 2726 AS_LOCK_EXIT(as); 2727 2728 if (err == 0 || (err != ENOTSUP && err != EFAULT)) { 2729 return (err); 2730 } 2731 2732 /* 2733 * Use F_SOFTLOCK to lock the pages because pagelock failed either due 2734 * to no pagelock support for this segment or pages need to be cow 2735 * faulted in. If fault is needed F_SOFTLOCK will do this job for 2736 * this as_pagelock() call and in the next as_pagelock() call for the 2737 * same address range pagelock call will hopefull succeed. 2738 */ 2739 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2740 if (fault_err != 0) { 2741 return (fc_decode(fault_err)); 2742 } 2743 *ppp = NULL; 2744 2745 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2746 return (0); 2747 } 2748 2749 /* 2750 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow 2751 * lists from the end of plist and call pageunlock interface for each segment. 2752 * Drop as lock and free plist. 2753 */ 2754 static void 2755 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size, 2756 struct page **plist, enum seg_rw rw) 2757 { 2758 ulong_t cnt; 2759 caddr_t eaddr = addr + size; 2760 pgcnt_t npages = btop(size); 2761 size_t ssize; 2762 page_t **pl; 2763 2764 ASSERT(AS_LOCK_HELD(as)); 2765 ASSERT(seg != NULL); 2766 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2767 ASSERT(addr + size > seg->s_base + seg->s_size); 2768 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2769 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2770 ASSERT(plist != NULL); 2771 2772 for (cnt = 0; addr < eaddr; addr += ssize) { 2773 if (addr >= seg->s_base + seg->s_size) { 2774 seg = AS_SEGNEXT(as, seg); 2775 ASSERT(seg != NULL && addr == seg->s_base); 2776 cnt++; 2777 } 2778 if (eaddr > seg->s_base + seg->s_size) { 2779 ssize = seg->s_base + seg->s_size - addr; 2780 } else { 2781 ssize = eaddr - addr; 2782 } 2783 pl = &plist[npages + cnt]; 2784 ASSERT(*pl != NULL); 2785 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2786 L_PAGEUNLOCK, rw); 2787 } 2788 ASSERT(cnt > 0); 2789 AS_LOCK_EXIT(as); 2790 2791 cnt++; 2792 kmem_free(plist, (npages + cnt) * sizeof (page_t *)); 2793 } 2794 2795 /* 2796 * unlock pages in a given address range 2797 */ 2798 void 2799 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2800 enum seg_rw rw) 2801 { 2802 struct seg *seg; 2803 size_t rsize; 2804 caddr_t raddr; 2805 2806 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2807 "as_pageunlock_start: addr %p size %ld", addr, size); 2808 2809 /* 2810 * if the shadow list is NULL, as_pagelock was 2811 * falling back to as_fault 2812 */ 2813 if (pp == NULL) { 2814 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2815 return; 2816 } 2817 2818 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2819 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2820 (size_t)raddr; 2821 2822 AS_LOCK_ENTER(as, RW_READER); 2823 seg = as_segat(as, raddr); 2824 ASSERT(seg != NULL); 2825 2826 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2827 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2828 2829 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2830 if (raddr + rsize <= seg->s_base + seg->s_size) { 2831 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2832 } else { 2833 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw); 2834 return; 2835 } 2836 AS_LOCK_EXIT(as); 2837 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2838 } 2839 2840 int 2841 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2842 boolean_t wait) 2843 { 2844 struct seg *seg; 2845 size_t ssize; 2846 caddr_t raddr; /* rounded down addr */ 2847 size_t rsize; /* rounded up size */ 2848 int error = 0; 2849 size_t pgsz = page_get_pagesize(szc); 2850 2851 setpgsz_top: 2852 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2853 return (EINVAL); 2854 } 2855 2856 raddr = addr; 2857 rsize = size; 2858 2859 if (raddr + rsize < raddr) /* check for wraparound */ 2860 return (ENOMEM); 2861 2862 AS_LOCK_ENTER(as, RW_WRITER); 2863 as_clearwatchprot(as, raddr, rsize); 2864 seg = as_segat(as, raddr); 2865 if (seg == NULL) { 2866 as_setwatch(as); 2867 AS_LOCK_EXIT(as); 2868 return (ENOMEM); 2869 } 2870 2871 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2872 if (raddr >= seg->s_base + seg->s_size) { 2873 seg = AS_SEGNEXT(as, seg); 2874 if (seg == NULL || raddr != seg->s_base) { 2875 error = ENOMEM; 2876 break; 2877 } 2878 } 2879 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2880 ssize = seg->s_base + seg->s_size - raddr; 2881 } else { 2882 ssize = rsize; 2883 } 2884 2885 retry: 2886 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2887 2888 if (error == IE_NOMEM) { 2889 error = EAGAIN; 2890 break; 2891 } 2892 2893 if (error == IE_RETRY) { 2894 AS_LOCK_EXIT(as); 2895 goto setpgsz_top; 2896 } 2897 2898 if (error == ENOTSUP) { 2899 error = EINVAL; 2900 break; 2901 } 2902 2903 if (wait && (error == EAGAIN)) { 2904 /* 2905 * Memory is currently locked. It must be unlocked 2906 * before this operation can succeed through a retry. 2907 * The possible reasons for locked memory and 2908 * corresponding strategies for unlocking are: 2909 * (1) Normal I/O 2910 * wait for a signal that the I/O operation 2911 * has completed and the memory is unlocked. 2912 * (2) Asynchronous I/O 2913 * The aio subsystem does not unlock pages when 2914 * the I/O is completed. Those pages are unlocked 2915 * when the application calls aiowait/aioerror. 2916 * So, to prevent blocking forever, cv_broadcast() 2917 * is done to wake up aio_cleanup_thread. 2918 * Subsequently, segvn_reclaim will be called, and 2919 * that will do AS_CLRUNMAPWAIT() and wake us up. 2920 * (3) Long term page locking: 2921 * This is not relevant for as_setpagesize() 2922 * because we cannot change the page size for 2923 * driver memory. The attempt to do so will 2924 * fail with a different error than EAGAIN so 2925 * there's no need to trigger as callbacks like 2926 * as_unmap, as_setprot or as_free would do. 2927 */ 2928 mutex_enter(&as->a_contents); 2929 if (!AS_ISNOUNMAPWAIT(as)) { 2930 if (AS_ISUNMAPWAIT(as) == 0) { 2931 cv_broadcast(&as->a_cv); 2932 } 2933 AS_SETUNMAPWAIT(as); 2934 AS_LOCK_EXIT(as); 2935 while (AS_ISUNMAPWAIT(as)) { 2936 cv_wait(&as->a_cv, &as->a_contents); 2937 } 2938 } else { 2939 /* 2940 * We may have raced with 2941 * segvn_reclaim()/segspt_reclaim(). In this 2942 * case clean nounmapwait flag and retry since 2943 * softlockcnt in this segment may be already 2944 * 0. We don't drop as writer lock so our 2945 * number of retries without sleeping should 2946 * be very small. See segvn_reclaim() for 2947 * more comments. 2948 */ 2949 AS_CLRNOUNMAPWAIT(as); 2950 mutex_exit(&as->a_contents); 2951 goto retry; 2952 } 2953 mutex_exit(&as->a_contents); 2954 goto setpgsz_top; 2955 } else if (error != 0) { 2956 break; 2957 } 2958 } 2959 as_setwatch(as); 2960 AS_LOCK_EXIT(as); 2961 return (error); 2962 } 2963 2964 /* 2965 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments 2966 * in its chunk where s_szc is less than the szc we want to set. 2967 */ 2968 static int 2969 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 2970 int *retry) 2971 { 2972 struct seg *seg; 2973 size_t ssize; 2974 int error; 2975 2976 ASSERT(AS_WRITE_HELD(as)); 2977 2978 seg = as_segat(as, raddr); 2979 if (seg == NULL) { 2980 panic("as_iset3_default_lpsize: no seg"); 2981 } 2982 2983 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2984 if (raddr >= seg->s_base + seg->s_size) { 2985 seg = AS_SEGNEXT(as, seg); 2986 if (seg == NULL || raddr != seg->s_base) { 2987 panic("as_iset3_default_lpsize: as changed"); 2988 } 2989 } 2990 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2991 ssize = seg->s_base + seg->s_size - raddr; 2992 } else { 2993 ssize = rsize; 2994 } 2995 2996 if (szc > seg->s_szc) { 2997 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2998 /* Only retry on EINVAL segments that have no vnode. */ 2999 if (error == EINVAL) { 3000 vnode_t *vp = NULL; 3001 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && 3002 (SEGOP_GETVP(seg, raddr, &vp) != 0 || 3003 vp == NULL)) { 3004 *retry = 1; 3005 } else { 3006 *retry = 0; 3007 } 3008 } 3009 if (error) { 3010 return (error); 3011 } 3012 } 3013 } 3014 return (0); 3015 } 3016 3017 /* 3018 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the 3019 * pagesize on each segment in its range, but if any fails with EINVAL, 3020 * then it reduces the pagesizes to the next size in the bitmap and 3021 * retries as_iset3_default_lpsize(). The reason why the code retries 3022 * smaller allowed sizes on EINVAL is because (a) the anon offset may not 3023 * match the bigger sizes, and (b) it's hard to get this offset (to begin 3024 * with) to pass to map_pgszcvec(). 3025 */ 3026 static int 3027 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, 3028 uint_t szcvec) 3029 { 3030 int error; 3031 int retry; 3032 3033 ASSERT(AS_WRITE_HELD(as)); 3034 3035 for (;;) { 3036 error = as_iset3_default_lpsize(as, addr, size, szc, &retry); 3037 if (error == EINVAL && retry) { 3038 szcvec &= ~(1 << szc); 3039 if (szcvec <= 1) { 3040 return (EINVAL); 3041 } 3042 szc = highbit(szcvec) - 1; 3043 } else { 3044 return (error); 3045 } 3046 } 3047 } 3048 3049 /* 3050 * as_iset1_default_lpsize() breaks its chunk into areas where existing 3051 * segments have a smaller szc than we want to set. For each such area, 3052 * it calls as_iset2_default_lpsize() 3053 */ 3054 static int 3055 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 3056 uint_t szcvec) 3057 { 3058 struct seg *seg; 3059 size_t ssize; 3060 caddr_t setaddr = raddr; 3061 size_t setsize = 0; 3062 int set; 3063 int error; 3064 3065 ASSERT(AS_WRITE_HELD(as)); 3066 3067 seg = as_segat(as, raddr); 3068 if (seg == NULL) { 3069 panic("as_iset1_default_lpsize: no seg"); 3070 } 3071 if (seg->s_szc < szc) { 3072 set = 1; 3073 } else { 3074 set = 0; 3075 } 3076 3077 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3078 if (raddr >= seg->s_base + seg->s_size) { 3079 seg = AS_SEGNEXT(as, seg); 3080 if (seg == NULL || raddr != seg->s_base) { 3081 panic("as_iset1_default_lpsize: as changed"); 3082 } 3083 if (seg->s_szc >= szc && set) { 3084 ASSERT(setsize != 0); 3085 error = as_iset2_default_lpsize(as, 3086 setaddr, setsize, szc, szcvec); 3087 if (error) { 3088 return (error); 3089 } 3090 set = 0; 3091 } else if (seg->s_szc < szc && !set) { 3092 setaddr = raddr; 3093 setsize = 0; 3094 set = 1; 3095 } 3096 } 3097 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3098 ssize = seg->s_base + seg->s_size - raddr; 3099 } else { 3100 ssize = rsize; 3101 } 3102 } 3103 error = 0; 3104 if (set) { 3105 ASSERT(setsize != 0); 3106 error = as_iset2_default_lpsize(as, setaddr, setsize, 3107 szc, szcvec); 3108 } 3109 return (error); 3110 } 3111 3112 /* 3113 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap 3114 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each 3115 * chunk to as_iset1_default_lpsize(). 3116 */ 3117 static int 3118 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, 3119 int type) 3120 { 3121 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 3122 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, 3123 flags, rtype, 1); 3124 uint_t szc; 3125 uint_t nszc; 3126 int error; 3127 caddr_t a; 3128 caddr_t eaddr; 3129 size_t segsize; 3130 size_t pgsz; 3131 uint_t save_szcvec; 3132 3133 ASSERT(AS_WRITE_HELD(as)); 3134 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 3135 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 3136 3137 szcvec &= ~1; 3138 if (szcvec <= 1) { /* skip if base page size */ 3139 return (0); 3140 } 3141 3142 /* Get the pagesize of the first larger page size. */ 3143 szc = lowbit(szcvec) - 1; 3144 pgsz = page_get_pagesize(szc); 3145 eaddr = addr + size; 3146 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3147 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3148 3149 save_szcvec = szcvec; 3150 szcvec >>= (szc + 1); 3151 nszc = szc; 3152 while (szcvec) { 3153 if ((szcvec & 0x1) == 0) { 3154 nszc++; 3155 szcvec >>= 1; 3156 continue; 3157 } 3158 nszc++; 3159 pgsz = page_get_pagesize(nszc); 3160 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3161 if (a != addr) { 3162 ASSERT(szc > 0); 3163 ASSERT(a < eaddr); 3164 segsize = a - addr; 3165 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3166 save_szcvec); 3167 if (error) { 3168 return (error); 3169 } 3170 addr = a; 3171 } 3172 szc = nszc; 3173 szcvec >>= 1; 3174 } 3175 3176 ASSERT(addr < eaddr); 3177 szcvec = save_szcvec; 3178 while (szcvec) { 3179 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3180 ASSERT(a >= addr); 3181 if (a != addr) { 3182 ASSERT(szc > 0); 3183 segsize = a - addr; 3184 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3185 save_szcvec); 3186 if (error) { 3187 return (error); 3188 } 3189 addr = a; 3190 } 3191 szcvec &= ~(1 << szc); 3192 if (szcvec) { 3193 szc = highbit(szcvec) - 1; 3194 pgsz = page_get_pagesize(szc); 3195 } 3196 } 3197 ASSERT(addr == eaddr); 3198 3199 return (0); 3200 } 3201 3202 /* 3203 * Set the default large page size for the range. Called via memcntl with 3204 * page size set to 0. as_set_default_lpsize breaks the range down into 3205 * chunks with the same type/flags, ignores-non segvn segments, and passes 3206 * each chunk to as_iset_default_lpsize(). 3207 */ 3208 int 3209 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) 3210 { 3211 struct seg *seg; 3212 caddr_t raddr; 3213 size_t rsize; 3214 size_t ssize; 3215 int rtype, rflags; 3216 int stype, sflags; 3217 int error; 3218 caddr_t setaddr; 3219 size_t setsize; 3220 int segvn; 3221 3222 if (size == 0) 3223 return (0); 3224 3225 AS_LOCK_ENTER(as, RW_WRITER); 3226 again: 3227 error = 0; 3228 3229 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3230 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 3231 (size_t)raddr; 3232 3233 if (raddr + rsize < raddr) { /* check for wraparound */ 3234 AS_LOCK_EXIT(as); 3235 return (ENOMEM); 3236 } 3237 as_clearwatchprot(as, raddr, rsize); 3238 seg = as_segat(as, raddr); 3239 if (seg == NULL) { 3240 as_setwatch(as); 3241 AS_LOCK_EXIT(as); 3242 return (ENOMEM); 3243 } 3244 if (seg->s_ops == &segvn_ops) { 3245 rtype = SEGOP_GETTYPE(seg, addr); 3246 rflags = rtype & (MAP_TEXT | MAP_INITDATA); 3247 rtype = rtype & (MAP_SHARED | MAP_PRIVATE); 3248 segvn = 1; 3249 } else { 3250 segvn = 0; 3251 } 3252 setaddr = raddr; 3253 setsize = 0; 3254 3255 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3256 if (raddr >= (seg->s_base + seg->s_size)) { 3257 seg = AS_SEGNEXT(as, seg); 3258 if (seg == NULL || raddr != seg->s_base) { 3259 error = ENOMEM; 3260 break; 3261 } 3262 if (seg->s_ops == &segvn_ops) { 3263 stype = SEGOP_GETTYPE(seg, raddr); 3264 sflags = stype & (MAP_TEXT | MAP_INITDATA); 3265 stype &= (MAP_SHARED | MAP_PRIVATE); 3266 if (segvn && (rflags != sflags || 3267 rtype != stype)) { 3268 /* 3269 * The next segment is also segvn but 3270 * has different flags and/or type. 3271 */ 3272 ASSERT(setsize != 0); 3273 error = as_iset_default_lpsize(as, 3274 setaddr, setsize, rflags, rtype); 3275 if (error) { 3276 break; 3277 } 3278 rflags = sflags; 3279 rtype = stype; 3280 setaddr = raddr; 3281 setsize = 0; 3282 } else if (!segvn) { 3283 rflags = sflags; 3284 rtype = stype; 3285 setaddr = raddr; 3286 setsize = 0; 3287 segvn = 1; 3288 } 3289 } else if (segvn) { 3290 /* The next segment is not segvn. */ 3291 ASSERT(setsize != 0); 3292 error = as_iset_default_lpsize(as, 3293 setaddr, setsize, rflags, rtype); 3294 if (error) { 3295 break; 3296 } 3297 segvn = 0; 3298 } 3299 } 3300 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3301 ssize = seg->s_base + seg->s_size - raddr; 3302 } else { 3303 ssize = rsize; 3304 } 3305 } 3306 if (error == 0 && segvn) { 3307 /* The last chunk when rsize == 0. */ 3308 ASSERT(setsize != 0); 3309 error = as_iset_default_lpsize(as, setaddr, setsize, 3310 rflags, rtype); 3311 } 3312 3313 if (error == IE_RETRY) { 3314 goto again; 3315 } else if (error == IE_NOMEM) { 3316 error = EAGAIN; 3317 } else if (error == ENOTSUP) { 3318 error = EINVAL; 3319 } else if (error == EAGAIN) { 3320 mutex_enter(&as->a_contents); 3321 if (!AS_ISNOUNMAPWAIT(as)) { 3322 if (AS_ISUNMAPWAIT(as) == 0) { 3323 cv_broadcast(&as->a_cv); 3324 } 3325 AS_SETUNMAPWAIT(as); 3326 AS_LOCK_EXIT(as); 3327 while (AS_ISUNMAPWAIT(as)) { 3328 cv_wait(&as->a_cv, &as->a_contents); 3329 } 3330 mutex_exit(&as->a_contents); 3331 AS_LOCK_ENTER(as, RW_WRITER); 3332 } else { 3333 /* 3334 * We may have raced with 3335 * segvn_reclaim()/segspt_reclaim(). In this case 3336 * clean nounmapwait flag and retry since softlockcnt 3337 * in this segment may be already 0. We don't drop as 3338 * writer lock so our number of retries without 3339 * sleeping should be very small. See segvn_reclaim() 3340 * for more comments. 3341 */ 3342 AS_CLRNOUNMAPWAIT(as); 3343 mutex_exit(&as->a_contents); 3344 } 3345 goto again; 3346 } 3347 3348 as_setwatch(as); 3349 AS_LOCK_EXIT(as); 3350 return (error); 3351 } 3352 3353 /* 3354 * Setup all of the uninitialized watched pages that we can. 3355 */ 3356 void 3357 as_setwatch(struct as *as) 3358 { 3359 struct watched_page *pwp; 3360 struct seg *seg; 3361 caddr_t vaddr; 3362 uint_t prot; 3363 int err, retrycnt; 3364 3365 if (avl_numnodes(&as->a_wpage) == 0) 3366 return; 3367 3368 ASSERT(AS_WRITE_HELD(as)); 3369 3370 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3371 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3372 retrycnt = 0; 3373 retry: 3374 vaddr = pwp->wp_vaddr; 3375 if (pwp->wp_oprot != 0 || /* already set up */ 3376 (seg = as_segat(as, vaddr)) == NULL || 3377 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 3378 continue; 3379 3380 pwp->wp_oprot = prot; 3381 if (pwp->wp_read) 3382 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3383 if (pwp->wp_write) 3384 prot &= ~PROT_WRITE; 3385 if (pwp->wp_exec) 3386 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3387 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 3388 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3389 if (err == IE_RETRY) { 3390 pwp->wp_oprot = 0; 3391 ASSERT(retrycnt == 0); 3392 retrycnt++; 3393 goto retry; 3394 } 3395 } 3396 pwp->wp_prot = prot; 3397 } 3398 } 3399 3400 /* 3401 * Clear all of the watched pages in the address space. 3402 */ 3403 void 3404 as_clearwatch(struct as *as) 3405 { 3406 struct watched_page *pwp; 3407 struct seg *seg; 3408 caddr_t vaddr; 3409 uint_t prot; 3410 int err, retrycnt; 3411 3412 if (avl_numnodes(&as->a_wpage) == 0) 3413 return; 3414 3415 ASSERT(AS_WRITE_HELD(as)); 3416 3417 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3418 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3419 retrycnt = 0; 3420 retry: 3421 vaddr = pwp->wp_vaddr; 3422 if (pwp->wp_oprot == 0 || /* not set up */ 3423 (seg = as_segat(as, vaddr)) == NULL) 3424 continue; 3425 3426 if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 3427 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3428 if (err == IE_RETRY) { 3429 ASSERT(retrycnt == 0); 3430 retrycnt++; 3431 goto retry; 3432 } 3433 } 3434 pwp->wp_oprot = 0; 3435 pwp->wp_prot = 0; 3436 } 3437 } 3438 3439 /* 3440 * Force a new setup for all the watched pages in the range. 3441 */ 3442 static void 3443 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 3444 { 3445 struct watched_page *pwp; 3446 struct watched_page tpw; 3447 caddr_t eaddr = addr + size; 3448 caddr_t vaddr; 3449 struct seg *seg; 3450 int err, retrycnt; 3451 uint_t wprot; 3452 avl_index_t where; 3453 3454 if (avl_numnodes(&as->a_wpage) == 0) 3455 return; 3456 3457 ASSERT(AS_WRITE_HELD(as)); 3458 3459 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3460 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3461 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3462 3463 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3464 retrycnt = 0; 3465 vaddr = pwp->wp_vaddr; 3466 3467 wprot = prot; 3468 if (pwp->wp_read) 3469 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3470 if (pwp->wp_write) 3471 wprot &= ~PROT_WRITE; 3472 if (pwp->wp_exec) 3473 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3474 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 3475 retry: 3476 seg = as_segat(as, vaddr); 3477 if (seg == NULL) { 3478 panic("as_setwatchprot: no seg"); 3479 /*NOTREACHED*/ 3480 } 3481 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 3482 if (err == IE_RETRY) { 3483 ASSERT(retrycnt == 0); 3484 retrycnt++; 3485 goto retry; 3486 } 3487 } 3488 pwp->wp_oprot = prot; 3489 pwp->wp_prot = wprot; 3490 3491 pwp = AVL_NEXT(&as->a_wpage, pwp); 3492 } 3493 } 3494 3495 /* 3496 * Clear all of the watched pages in the range. 3497 */ 3498 static void 3499 as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 3500 { 3501 caddr_t eaddr = addr + size; 3502 struct watched_page *pwp; 3503 struct watched_page tpw; 3504 uint_t prot; 3505 struct seg *seg; 3506 int err, retrycnt; 3507 avl_index_t where; 3508 3509 if (avl_numnodes(&as->a_wpage) == 0) 3510 return; 3511 3512 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3513 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3514 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3515 3516 ASSERT(AS_WRITE_HELD(as)); 3517 3518 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3519 3520 if ((prot = pwp->wp_oprot) != 0) { 3521 retrycnt = 0; 3522 3523 if (prot != pwp->wp_prot) { 3524 retry: 3525 seg = as_segat(as, pwp->wp_vaddr); 3526 if (seg == NULL) 3527 continue; 3528 err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 3529 PAGESIZE, prot); 3530 if (err == IE_RETRY) { 3531 ASSERT(retrycnt == 0); 3532 retrycnt++; 3533 goto retry; 3534 3535 } 3536 } 3537 pwp->wp_oprot = 0; 3538 pwp->wp_prot = 0; 3539 } 3540 3541 pwp = AVL_NEXT(&as->a_wpage, pwp); 3542 } 3543 } 3544 3545 void 3546 as_signal_proc(struct as *as, k_siginfo_t *siginfo) 3547 { 3548 struct proc *p; 3549 3550 mutex_enter(&pidlock); 3551 for (p = practive; p; p = p->p_next) { 3552 if (p->p_as == as) { 3553 mutex_enter(&p->p_lock); 3554 if (p->p_as == as) 3555 sigaddq(p, NULL, siginfo, KM_NOSLEEP); 3556 mutex_exit(&p->p_lock); 3557 } 3558 } 3559 mutex_exit(&pidlock); 3560 } 3561 3562 /* 3563 * return memory object ID 3564 */ 3565 int 3566 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 3567 { 3568 struct seg *seg; 3569 int sts; 3570 3571 AS_LOCK_ENTER(as, RW_READER); 3572 seg = as_segat(as, addr); 3573 if (seg == NULL) { 3574 AS_LOCK_EXIT(as); 3575 return (EFAULT); 3576 } 3577 /* 3578 * catch old drivers which may not support getmemid 3579 */ 3580 if (seg->s_ops->getmemid == NULL) { 3581 AS_LOCK_EXIT(as); 3582 return (ENODEV); 3583 } 3584 3585 sts = SEGOP_GETMEMID(seg, addr, memidp); 3586 3587 AS_LOCK_EXIT(as); 3588 return (sts); 3589 } 3590