1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * VM - address spaces. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/errno.h> 49 #include <sys/systm.h> 50 #include <sys/mman.h> 51 #include <sys/sysmacros.h> 52 #include <sys/cpuvar.h> 53 #include <sys/sysinfo.h> 54 #include <sys/kmem.h> 55 #include <sys/vnode.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/tnf_probe.h> 60 #include <sys/vtrace.h> 61 62 #include <vm/hat.h> 63 #include <vm/xhat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_vn.h> 67 #include <vm/seg_dev.h> 68 #include <vm/seg_kmem.h> 69 #include <vm/seg_map.h> 70 #include <vm/seg_spt.h> 71 #include <vm/page.h> 72 73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 74 75 static struct kmem_cache *as_cache; 76 77 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 78 static void as_clearwatchprot(struct as *, caddr_t, size_t); 79 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 80 81 82 /* 83 * Verifying the segment lists is very time-consuming; it may not be 84 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 85 */ 86 #ifdef DEBUG 87 #define VERIFY_SEGLIST 88 int do_as_verify = 0; 89 #endif 90 91 /* 92 * Allocate a new callback data structure entry and fill in the events of 93 * interest, the address range of interest, and the callback argument. 94 * Link the entry on the as->a_callbacks list. A callback entry for the 95 * entire address space may be specified with vaddr = 0 and size = -1. 96 * 97 * CALLERS RESPONSIBILITY: If not calling from within the process context for 98 * the specified as, the caller must guarantee persistence of the specified as 99 * for the duration of this function (eg. pages being locked within the as 100 * will guarantee persistence). 101 */ 102 int 103 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 104 caddr_t vaddr, size_t size, int sleepflag) 105 { 106 struct as_callback *current_head, *cb; 107 caddr_t saddr; 108 size_t rsize; 109 110 /* callback function and an event are mandatory */ 111 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 112 return (EINVAL); 113 114 /* Adding a callback after as_free has been called is not allowed */ 115 if (as == &kas) 116 return (ENOMEM); 117 118 /* 119 * vaddr = 0 and size = -1 is used to indicate that the callback range 120 * is the entire address space so no rounding is done in that case. 121 */ 122 if (size != -1) { 123 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 124 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 125 (size_t)saddr; 126 /* check for wraparound */ 127 if (saddr + rsize < saddr) 128 return (ENOMEM); 129 } else { 130 if (vaddr != 0) 131 return (EINVAL); 132 saddr = vaddr; 133 rsize = size; 134 } 135 136 /* Allocate and initialize a callback entry */ 137 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 138 if (cb == NULL) 139 return (EAGAIN); 140 141 cb->ascb_func = cb_func; 142 cb->ascb_arg = arg; 143 cb->ascb_events = events; 144 cb->ascb_saddr = saddr; 145 cb->ascb_len = rsize; 146 147 /* Add the entry to the list */ 148 mutex_enter(&as->a_contents); 149 current_head = as->a_callbacks; 150 as->a_callbacks = cb; 151 cb->ascb_next = current_head; 152 153 /* 154 * The call to this function may lose in a race with 155 * a pertinent event - eg. a thread does long term memory locking 156 * but before the callback is added another thread executes as_unmap. 157 * A broadcast here resolves that. 158 */ 159 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 160 AS_CLRUNMAPWAIT(as); 161 cv_broadcast(&as->a_cv); 162 } 163 164 mutex_exit(&as->a_contents); 165 return (0); 166 } 167 168 /* 169 * Search the callback list for an entry which pertains to arg. 170 * 171 * This is called from within the client upon completion of the callback. 172 * RETURN VALUES: 173 * AS_CALLBACK_DELETED (callback entry found and deleted) 174 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 175 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 176 * entry will be made in as_do_callbacks) 177 * 178 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 179 * set, it indicates that as_do_callbacks is processing this entry. The 180 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 181 * to unblock as_do_callbacks, in case it is blocked. 182 * 183 * CALLERS RESPONSIBILITY: If not calling from within the process context for 184 * the specified as, the caller must guarantee persistence of the specified as 185 * for the duration of this function (eg. pages being locked within the as 186 * will guarantee persistence). 187 */ 188 uint_t 189 as_delete_callback(struct as *as, void *arg) 190 { 191 struct as_callback **prevcb = &as->a_callbacks; 192 struct as_callback *cb; 193 uint_t rc = AS_CALLBACK_NOTFOUND; 194 195 mutex_enter(&as->a_contents); 196 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 197 if (cb->ascb_arg != arg) 198 continue; 199 200 /* 201 * If the events indicate AS_CALLBACK_CALLED, just clear 202 * AS_ALL_EVENT in the events field and wakeup the thread 203 * that may be waiting in as_do_callbacks. as_do_callbacks 204 * will take care of removing this entry from the list. In 205 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 206 * (AS_CALLBACK_CALLED not set), just remove it from the 207 * list, return the memory and return AS_CALLBACK_DELETED. 208 */ 209 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 210 /* leave AS_CALLBACK_CALLED */ 211 cb->ascb_events &= ~AS_ALL_EVENT; 212 rc = AS_CALLBACK_DELETE_DEFERRED; 213 cv_broadcast(&as->a_cv); 214 } else { 215 *prevcb = cb->ascb_next; 216 kmem_free(cb, sizeof (struct as_callback)); 217 rc = AS_CALLBACK_DELETED; 218 } 219 break; 220 } 221 mutex_exit(&as->a_contents); 222 return (rc); 223 } 224 225 /* 226 * Searches the as callback list for a matching entry. 227 * Returns a pointer to the first matching callback, or NULL if 228 * nothing is found. 229 * This function never sleeps so it is ok to call it with more 230 * locks held but the (required) a_contents mutex. 231 * 232 * See also comment on as_do_callbacks below. 233 */ 234 static struct as_callback * 235 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 236 size_t event_len) 237 { 238 struct as_callback *cb; 239 240 ASSERT(MUTEX_HELD(&as->a_contents)); 241 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 242 /* 243 * If the callback has not already been called, then 244 * check if events or address range pertains. An event_len 245 * of zero means do an unconditional callback. 246 */ 247 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 248 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 249 (event_addr + event_len < cb->ascb_saddr) || 250 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 251 continue; 252 } 253 break; 254 } 255 return (cb); 256 } 257 258 /* 259 * Executes a given callback and removes it from the callback list for 260 * this address space. 261 * This function may sleep so the caller must drop all locks except 262 * a_contents before calling this func. 263 * 264 * See also comments on as_do_callbacks below. 265 */ 266 static void 267 as_execute_callback(struct as *as, struct as_callback *cb, 268 uint_t events) 269 { 270 struct as_callback **prevcb; 271 void *cb_arg; 272 273 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 274 cb->ascb_events |= AS_CALLBACK_CALLED; 275 mutex_exit(&as->a_contents); 276 (*cb->ascb_func)(as, cb->ascb_arg, events); 277 mutex_enter(&as->a_contents); 278 /* 279 * the callback function is required to delete the callback 280 * when the callback function determines it is OK for 281 * this thread to continue. as_delete_callback will clear 282 * the AS_ALL_EVENT in the events field when it is deleted. 283 * If the callback function called as_delete_callback, 284 * events will already be cleared and there will be no blocking. 285 */ 286 while ((cb->ascb_events & events) != 0) { 287 cv_wait(&as->a_cv, &as->a_contents); 288 } 289 /* 290 * This entry needs to be taken off the list. Normally, the 291 * callback func itself does that, but unfortunately the list 292 * may have changed while the callback was running because the 293 * a_contents mutex was dropped and someone else other than the 294 * callback func itself could have called as_delete_callback, 295 * so we have to search to find this entry again. The entry 296 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 297 */ 298 cb_arg = cb->ascb_arg; 299 prevcb = &as->a_callbacks; 300 for (cb = as->a_callbacks; cb != NULL; 301 prevcb = &cb->ascb_next, cb = *prevcb) { 302 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 303 (cb_arg != cb->ascb_arg)) { 304 continue; 305 } 306 *prevcb = cb->ascb_next; 307 kmem_free(cb, sizeof (struct as_callback)); 308 break; 309 } 310 } 311 312 /* 313 * Check the callback list for a matching event and intersection of 314 * address range. If there is a match invoke the callback. Skip an entry if: 315 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 316 * - not event of interest 317 * - not address range of interest 318 * 319 * An event_len of zero indicates a request for an unconditional callback 320 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 321 * a_contents lock must be dropped before a callback, so only one callback 322 * can be done before returning. Return -1 (true) if a callback was 323 * executed and removed from the list, else return 0 (false). 324 * 325 * The logically separate parts, i.e. finding a matching callback and 326 * executing a given callback have been separated into two functions 327 * so that they can be called with different sets of locks held beyond 328 * the always-required a_contents. as_find_callback does not sleep so 329 * it is ok to call it if more locks than a_contents (i.e. the a_lock 330 * rwlock) are held. as_execute_callback on the other hand may sleep 331 * so all locks beyond a_contents must be dropped by the caller if one 332 * does not want to end comatose. 333 */ 334 static int 335 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 336 size_t event_len) 337 { 338 struct as_callback *cb; 339 340 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 341 as_execute_callback(as, cb, events); 342 return (-1); 343 } 344 return (0); 345 } 346 347 /* 348 * Search for the segment containing addr. If a segment containing addr 349 * exists, that segment is returned. If no such segment exists, and 350 * the list spans addresses greater than addr, then the first segment 351 * whose base is greater than addr is returned; otherwise, NULL is 352 * returned unless tail is true, in which case the last element of the 353 * list is returned. 354 * 355 * a_seglast is used to cache the last found segment for repeated 356 * searches to the same addr (which happens frequently). 357 */ 358 struct seg * 359 as_findseg(struct as *as, caddr_t addr, int tail) 360 { 361 struct seg *seg = as->a_seglast; 362 avl_index_t where; 363 364 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 365 366 if (seg != NULL && 367 seg->s_base <= addr && 368 addr < seg->s_base + seg->s_size) 369 return (seg); 370 371 seg = avl_find(&as->a_segtree, &addr, &where); 372 if (seg != NULL) 373 return (as->a_seglast = seg); 374 375 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 376 if (seg == NULL && tail) 377 seg = avl_last(&as->a_segtree); 378 return (as->a_seglast = seg); 379 } 380 381 #ifdef VERIFY_SEGLIST 382 /* 383 * verify that the linked list is coherent 384 */ 385 static void 386 as_verify(struct as *as) 387 { 388 struct seg *seg, *seglast, *p, *n; 389 uint_t nsegs = 0; 390 391 if (do_as_verify == 0) 392 return; 393 394 seglast = as->a_seglast; 395 396 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 397 ASSERT(seg->s_as == as); 398 p = AS_SEGPREV(as, seg); 399 n = AS_SEGNEXT(as, seg); 400 ASSERT(p == NULL || p->s_as == as); 401 ASSERT(p == NULL || p->s_base < seg->s_base); 402 ASSERT(n == NULL || n->s_base > seg->s_base); 403 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 404 if (seg == seglast) 405 seglast = NULL; 406 nsegs++; 407 } 408 ASSERT(seglast == NULL); 409 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 410 } 411 #endif /* VERIFY_SEGLIST */ 412 413 /* 414 * Add a new segment to the address space. The avl_find() 415 * may be expensive so we attempt to use last segment accessed 416 * in as_gap() as an insertion point. 417 */ 418 int 419 as_addseg(struct as *as, struct seg *newseg) 420 { 421 struct seg *seg; 422 caddr_t addr; 423 caddr_t eaddr; 424 avl_index_t where; 425 426 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 427 428 as->a_updatedir = 1; /* inform /proc */ 429 gethrestime(&as->a_updatetime); 430 431 if (as->a_lastgaphl != NULL) { 432 struct seg *hseg = NULL; 433 struct seg *lseg = NULL; 434 435 if (as->a_lastgaphl->s_base > newseg->s_base) { 436 hseg = as->a_lastgaphl; 437 lseg = AVL_PREV(&as->a_segtree, hseg); 438 } else { 439 lseg = as->a_lastgaphl; 440 hseg = AVL_NEXT(&as->a_segtree, lseg); 441 } 442 443 if (hseg && lseg && lseg->s_base < newseg->s_base && 444 hseg->s_base > newseg->s_base) { 445 avl_insert_here(&as->a_segtree, newseg, lseg, 446 AVL_AFTER); 447 as->a_lastgaphl = NULL; 448 as->a_seglast = newseg; 449 return (0); 450 } 451 as->a_lastgaphl = NULL; 452 } 453 454 addr = newseg->s_base; 455 eaddr = addr + newseg->s_size; 456 again: 457 458 seg = avl_find(&as->a_segtree, &addr, &where); 459 460 if (seg == NULL) 461 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 462 463 if (seg == NULL) 464 seg = avl_last(&as->a_segtree); 465 466 if (seg != NULL) { 467 caddr_t base = seg->s_base; 468 469 /* 470 * If top of seg is below the requested address, then 471 * the insertion point is at the end of the linked list, 472 * and seg points to the tail of the list. Otherwise, 473 * the insertion point is immediately before seg. 474 */ 475 if (base + seg->s_size > addr) { 476 if (addr >= base || eaddr > base) { 477 #ifdef __sparc 478 extern struct seg_ops segnf_ops; 479 480 /* 481 * no-fault segs must disappear if overlaid. 482 * XXX need new segment type so 483 * we don't have to check s_ops 484 */ 485 if (seg->s_ops == &segnf_ops) { 486 seg_unmap(seg); 487 goto again; 488 } 489 #endif 490 return (-1); /* overlapping segment */ 491 } 492 } 493 } 494 as->a_seglast = newseg; 495 avl_insert(&as->a_segtree, newseg, where); 496 497 #ifdef VERIFY_SEGLIST 498 as_verify(as); 499 #endif 500 return (0); 501 } 502 503 struct seg * 504 as_removeseg(struct as *as, struct seg *seg) 505 { 506 avl_tree_t *t; 507 508 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 509 510 as->a_updatedir = 1; /* inform /proc */ 511 gethrestime(&as->a_updatetime); 512 513 if (seg == NULL) 514 return (NULL); 515 516 t = &as->a_segtree; 517 if (as->a_seglast == seg) 518 as->a_seglast = NULL; 519 as->a_lastgaphl = NULL; 520 521 /* 522 * if this segment is at an address higher than 523 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 524 */ 525 if (as->a_lastgap && 526 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 527 as->a_lastgap = AVL_NEXT(t, seg); 528 529 /* 530 * remove the segment from the seg tree 531 */ 532 avl_remove(t, seg); 533 534 #ifdef VERIFY_SEGLIST 535 as_verify(as); 536 #endif 537 return (seg); 538 } 539 540 /* 541 * Find a segment containing addr. 542 */ 543 struct seg * 544 as_segat(struct as *as, caddr_t addr) 545 { 546 struct seg *seg = as->a_seglast; 547 548 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 549 550 if (seg != NULL && seg->s_base <= addr && 551 addr < seg->s_base + seg->s_size) 552 return (seg); 553 554 seg = avl_find(&as->a_segtree, &addr, NULL); 555 return (seg); 556 } 557 558 /* 559 * Serialize all searches for holes in an address space to 560 * prevent two or more threads from allocating the same virtual 561 * address range. The address space must not be "read/write" 562 * locked by the caller since we may block. 563 */ 564 void 565 as_rangelock(struct as *as) 566 { 567 mutex_enter(&as->a_contents); 568 while (AS_ISCLAIMGAP(as)) 569 cv_wait(&as->a_cv, &as->a_contents); 570 AS_SETCLAIMGAP(as); 571 mutex_exit(&as->a_contents); 572 } 573 574 /* 575 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 576 */ 577 void 578 as_rangeunlock(struct as *as) 579 { 580 mutex_enter(&as->a_contents); 581 AS_CLRCLAIMGAP(as); 582 cv_signal(&as->a_cv); 583 mutex_exit(&as->a_contents); 584 } 585 586 /* 587 * compar segments (or just an address) by segment address range 588 */ 589 static int 590 as_segcompar(const void *x, const void *y) 591 { 592 struct seg *a = (struct seg *)x; 593 struct seg *b = (struct seg *)y; 594 595 if (a->s_base < b->s_base) 596 return (-1); 597 if (a->s_base >= b->s_base + b->s_size) 598 return (1); 599 return (0); 600 } 601 602 603 void 604 as_avlinit(struct as *as) 605 { 606 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 607 offsetof(struct seg, s_tree)); 608 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 609 offsetof(struct watched_page, wp_link)); 610 } 611 612 /*ARGSUSED*/ 613 static int 614 as_constructor(void *buf, void *cdrarg, int kmflags) 615 { 616 struct as *as = buf; 617 618 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 619 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 620 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 621 as_avlinit(as); 622 return (0); 623 } 624 625 /*ARGSUSED1*/ 626 static void 627 as_destructor(void *buf, void *cdrarg) 628 { 629 struct as *as = buf; 630 631 avl_destroy(&as->a_segtree); 632 mutex_destroy(&as->a_contents); 633 cv_destroy(&as->a_cv); 634 rw_destroy(&as->a_lock); 635 } 636 637 void 638 as_init(void) 639 { 640 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 641 as_constructor, as_destructor, NULL, NULL, NULL, 0); 642 } 643 644 /* 645 * Allocate and initialize an address space data structure. 646 * We call hat_alloc to allow any machine dependent 647 * information in the hat structure to be initialized. 648 */ 649 struct as * 650 as_alloc(void) 651 { 652 struct as *as; 653 654 as = kmem_cache_alloc(as_cache, KM_SLEEP); 655 656 as->a_flags = 0; 657 as->a_vbits = 0; 658 as->a_hrm = NULL; 659 as->a_seglast = NULL; 660 as->a_size = 0; 661 as->a_updatedir = 0; 662 gethrestime(&as->a_updatetime); 663 as->a_objectdir = NULL; 664 as->a_sizedir = 0; 665 as->a_userlimit = (caddr_t)USERLIMIT; 666 as->a_lastgap = NULL; 667 as->a_lastgaphl = NULL; 668 as->a_callbacks = NULL; 669 670 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 671 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 672 AS_LOCK_EXIT(as, &as->a_lock); 673 674 as->a_xhat = NULL; 675 676 return (as); 677 } 678 679 /* 680 * Free an address space data structure. 681 * Need to free the hat first and then 682 * all the segments on this as and finally 683 * the space for the as struct itself. 684 */ 685 void 686 as_free(struct as *as) 687 { 688 struct hat *hat = as->a_hat; 689 struct seg *seg, *next; 690 int called = 0; 691 692 top: 693 /* 694 * Invoke ALL callbacks. as_do_callbacks will do one callback 695 * per call, and not return (-1) until the callback has completed. 696 * When as_do_callbacks returns zero, all callbacks have completed. 697 */ 698 mutex_enter(&as->a_contents); 699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)) 700 ; 701 702 /* This will prevent new XHATs from attaching to as */ 703 if (!called) 704 AS_SETBUSY(as); 705 mutex_exit(&as->a_contents); 706 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 707 708 if (!called) { 709 called = 1; 710 hat_free_start(hat); 711 if (as->a_xhat != NULL) 712 xhat_free_start_all(as); 713 } 714 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 715 int err; 716 717 next = AS_SEGNEXT(as, seg); 718 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 719 if (err == EAGAIN) { 720 mutex_enter(&as->a_contents); 721 if (as->a_callbacks) { 722 AS_LOCK_EXIT(as, &as->a_lock); 723 } else { 724 /* 725 * Memory is currently locked. Wait for a 726 * cv_signal that it has been unlocked, then 727 * try the operation again. 728 */ 729 if (AS_ISUNMAPWAIT(as) == 0) 730 cv_broadcast(&as->a_cv); 731 AS_SETUNMAPWAIT(as); 732 AS_LOCK_EXIT(as, &as->a_lock); 733 while (AS_ISUNMAPWAIT(as)) 734 cv_wait(&as->a_cv, &as->a_contents); 735 } 736 mutex_exit(&as->a_contents); 737 goto top; 738 } else { 739 /* 740 * We do not expect any other error return at this 741 * time. This is similar to an ASSERT in seg_unmap() 742 */ 743 ASSERT(err == 0); 744 } 745 } 746 hat_free_end(hat); 747 if (as->a_xhat != NULL) 748 xhat_free_end_all(as); 749 AS_LOCK_EXIT(as, &as->a_lock); 750 751 /* /proc stuff */ 752 ASSERT(avl_numnodes(&as->a_wpage) == 0); 753 if (as->a_objectdir) { 754 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 755 as->a_objectdir = NULL; 756 as->a_sizedir = 0; 757 } 758 759 /* 760 * Free the struct as back to kmem. Assert it has no segments. 761 */ 762 ASSERT(avl_numnodes(&as->a_segtree) == 0); 763 kmem_cache_free(as_cache, as); 764 } 765 766 int 767 as_dup(struct as *as, struct as **outas) 768 { 769 struct as *newas; 770 struct seg *seg, *newseg; 771 int error; 772 773 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 774 as_clearwatch(as); 775 newas = as_alloc(); 776 newas->a_userlimit = as->a_userlimit; 777 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER); 778 779 /* This will prevent new XHATs from attaching */ 780 mutex_enter(&as->a_contents); 781 AS_SETBUSY(as); 782 mutex_exit(&as->a_contents); 783 mutex_enter(&newas->a_contents); 784 AS_SETBUSY(newas); 785 mutex_exit(&newas->a_contents); 786 787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); 788 789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 790 791 if (seg->s_flags & S_PURGE) 792 continue; 793 794 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 795 if (newseg == NULL) { 796 AS_LOCK_EXIT(newas, &newas->a_lock); 797 as_setwatch(as); 798 mutex_enter(&as->a_contents); 799 AS_CLRBUSY(as); 800 mutex_exit(&as->a_contents); 801 AS_LOCK_EXIT(as, &as->a_lock); 802 as_free(newas); 803 return (-1); 804 } 805 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 806 /* 807 * We call seg_free() on the new seg 808 * because the segment is not set up 809 * completely; i.e. it has no ops. 810 */ 811 as_setwatch(as); 812 mutex_enter(&as->a_contents); 813 AS_CLRBUSY(as); 814 mutex_exit(&as->a_contents); 815 AS_LOCK_EXIT(as, &as->a_lock); 816 seg_free(newseg); 817 AS_LOCK_EXIT(newas, &newas->a_lock); 818 as_free(newas); 819 return (error); 820 } 821 newas->a_size += seg->s_size; 822 } 823 824 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 825 if (as->a_xhat != NULL) 826 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL); 827 828 mutex_enter(&newas->a_contents); 829 AS_CLRBUSY(newas); 830 mutex_exit(&newas->a_contents); 831 AS_LOCK_EXIT(newas, &newas->a_lock); 832 833 as_setwatch(as); 834 mutex_enter(&as->a_contents); 835 AS_CLRBUSY(as); 836 mutex_exit(&as->a_contents); 837 AS_LOCK_EXIT(as, &as->a_lock); 838 if (error != 0) { 839 as_free(newas); 840 return (error); 841 } 842 *outas = newas; 843 return (0); 844 } 845 846 /* 847 * Handle a ``fault'' at addr for size bytes. 848 */ 849 faultcode_t 850 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 851 enum fault_type type, enum seg_rw rw) 852 { 853 struct seg *seg; 854 caddr_t raddr; /* rounded down addr */ 855 size_t rsize; /* rounded up size */ 856 size_t ssize; 857 faultcode_t res = 0; 858 caddr_t addrsav; 859 struct seg *segsav; 860 int as_lock_held; 861 klwp_t *lwp = ttolwp(curthread); 862 int is_xhat = 0; 863 int holding_wpage = 0; 864 extern struct seg_ops segdev_ops; 865 866 867 868 if (as->a_hat != hat) { 869 /* This must be an XHAT then */ 870 is_xhat = 1; 871 872 if ((type != F_INVAL) || (as == &kas)) 873 return (FC_NOSUPPORT); 874 } 875 876 retry: 877 if (!is_xhat) { 878 /* 879 * Indicate that the lwp is not to be stopped while waiting 880 * for a pagefault. This is to avoid deadlock while debugging 881 * a process via /proc over NFS (in particular). 882 */ 883 if (lwp != NULL) 884 lwp->lwp_nostop++; 885 886 /* 887 * same length must be used when we softlock and softunlock. 888 * We don't support softunlocking lengths less than 889 * the original length when there is largepage support. 890 * See seg_dev.c for more comments. 891 */ 892 switch (type) { 893 894 case F_SOFTLOCK: 895 CPU_STATS_ADD_K(vm, softlock, 1); 896 break; 897 898 case F_SOFTUNLOCK: 899 break; 900 901 case F_PROT: 902 CPU_STATS_ADD_K(vm, prot_fault, 1); 903 break; 904 905 case F_INVAL: 906 CPU_STATS_ENTER_K(); 907 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 908 if (as == &kas) 909 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 910 CPU_STATS_EXIT_K(); 911 break; 912 } 913 } 914 915 /* Kernel probe */ 916 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 917 tnf_opaque, address, addr, 918 tnf_fault_type, fault_type, type, 919 tnf_seg_access, access, rw); 920 921 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 922 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 923 (size_t)raddr; 924 925 /* 926 * XXX -- Don't grab the as lock for segkmap. We should grab it for 927 * correctness, but then we could be stuck holding this lock for 928 * a LONG time if the fault needs to be resolved on a slow 929 * filesystem, and then no-one will be able to exec new commands, 930 * as exec'ing requires the write lock on the as. 931 */ 932 if (as == &kas && segkmap && segkmap->s_base <= raddr && 933 raddr + size < segkmap->s_base + segkmap->s_size) { 934 /* 935 * if (as==&kas), this can't be XHAT: we've already returned 936 * FC_NOSUPPORT. 937 */ 938 seg = segkmap; 939 as_lock_held = 0; 940 } else { 941 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 942 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) { 943 /* 944 * Grab and hold the writers' lock on the as 945 * if the fault is to a watched page. 946 * This will keep CPUs from "peeking" at the 947 * address range while we're temporarily boosting 948 * the permissions for the XHAT device to 949 * resolve the fault in the segment layer. 950 * 951 * We could check whether faulted address 952 * is within a watched page and only then grab 953 * the writer lock, but this is simpler. 954 */ 955 AS_LOCK_EXIT(as, &as->a_lock); 956 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 957 } 958 959 seg = as_segat(as, raddr); 960 if (seg == NULL) { 961 AS_LOCK_EXIT(as, &as->a_lock); 962 if ((lwp != NULL) && (!is_xhat)) 963 lwp->lwp_nostop--; 964 return (FC_NOMAP); 965 } 966 967 as_lock_held = 1; 968 } 969 970 addrsav = raddr; 971 segsav = seg; 972 973 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 974 if (raddr >= seg->s_base + seg->s_size) { 975 seg = AS_SEGNEXT(as, seg); 976 if (seg == NULL || raddr != seg->s_base) { 977 res = FC_NOMAP; 978 break; 979 } 980 } 981 if (raddr + rsize > seg->s_base + seg->s_size) 982 ssize = seg->s_base + seg->s_size - raddr; 983 else 984 ssize = rsize; 985 986 if (!is_xhat || (seg->s_ops != &segdev_ops)) { 987 988 if (is_xhat && avl_numnodes(&as->a_wpage) != 0 && 989 pr_is_watchpage_as(raddr, rw, as)) { 990 /* 991 * Handle watch pages. If we're faulting on a 992 * watched page from an X-hat, we have to 993 * restore the original permissions while we 994 * handle the fault. 995 */ 996 as_clearwatch(as); 997 holding_wpage = 1; 998 } 999 1000 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 1001 1002 /* Restore watchpoints */ 1003 if (holding_wpage) { 1004 as_setwatch(as); 1005 holding_wpage = 0; 1006 } 1007 1008 if (res != 0) 1009 break; 1010 } else { 1011 /* XHAT does not support seg_dev */ 1012 res = FC_NOSUPPORT; 1013 break; 1014 } 1015 } 1016 1017 /* 1018 * If we were SOFTLOCKing and encountered a failure, 1019 * we must SOFTUNLOCK the range we already did. (Maybe we 1020 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 1021 * right here...) 1022 */ 1023 if (res != 0 && type == F_SOFTLOCK) { 1024 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 1025 if (addrsav >= seg->s_base + seg->s_size) 1026 seg = AS_SEGNEXT(as, seg); 1027 ASSERT(seg != NULL); 1028 /* 1029 * Now call the fault routine again to perform the 1030 * unlock using S_OTHER instead of the rw variable 1031 * since we never got a chance to touch the pages. 1032 */ 1033 if (raddr > seg->s_base + seg->s_size) 1034 ssize = seg->s_base + seg->s_size - addrsav; 1035 else 1036 ssize = raddr - addrsav; 1037 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 1038 F_SOFTUNLOCK, S_OTHER); 1039 } 1040 } 1041 if (as_lock_held) 1042 AS_LOCK_EXIT(as, &as->a_lock); 1043 if ((lwp != NULL) && (!is_xhat)) 1044 lwp->lwp_nostop--; 1045 1046 /* 1047 * If the lower levels returned EDEADLK for a fault, 1048 * It means that we should retry the fault. Let's wait 1049 * a bit also to let the deadlock causing condition clear. 1050 * This is part of a gross hack to work around a design flaw 1051 * in the ufs/sds logging code and should go away when the 1052 * logging code is re-designed to fix the problem. See bug 1053 * 4125102 for details of the problem. 1054 */ 1055 if (FC_ERRNO(res) == EDEADLK) { 1056 delay(deadlk_wait); 1057 res = 0; 1058 goto retry; 1059 } 1060 return (res); 1061 } 1062 1063 1064 1065 /* 1066 * Asynchronous ``fault'' at addr for size bytes. 1067 */ 1068 faultcode_t 1069 as_faulta(struct as *as, caddr_t addr, size_t size) 1070 { 1071 struct seg *seg; 1072 caddr_t raddr; /* rounded down addr */ 1073 size_t rsize; /* rounded up size */ 1074 faultcode_t res = 0; 1075 klwp_t *lwp = ttolwp(curthread); 1076 1077 retry: 1078 /* 1079 * Indicate that the lwp is not to be stopped while waiting 1080 * for a pagefault. This is to avoid deadlock while debugging 1081 * a process via /proc over NFS (in particular). 1082 */ 1083 if (lwp != NULL) 1084 lwp->lwp_nostop++; 1085 1086 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1087 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1088 (size_t)raddr; 1089 1090 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1091 seg = as_segat(as, raddr); 1092 if (seg == NULL) { 1093 AS_LOCK_EXIT(as, &as->a_lock); 1094 if (lwp != NULL) 1095 lwp->lwp_nostop--; 1096 return (FC_NOMAP); 1097 } 1098 1099 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1100 if (raddr >= seg->s_base + seg->s_size) { 1101 seg = AS_SEGNEXT(as, seg); 1102 if (seg == NULL || raddr != seg->s_base) { 1103 res = FC_NOMAP; 1104 break; 1105 } 1106 } 1107 res = SEGOP_FAULTA(seg, raddr); 1108 if (res != 0) 1109 break; 1110 } 1111 AS_LOCK_EXIT(as, &as->a_lock); 1112 if (lwp != NULL) 1113 lwp->lwp_nostop--; 1114 /* 1115 * If the lower levels returned EDEADLK for a fault, 1116 * It means that we should retry the fault. Let's wait 1117 * a bit also to let the deadlock causing condition clear. 1118 * This is part of a gross hack to work around a design flaw 1119 * in the ufs/sds logging code and should go away when the 1120 * logging code is re-designed to fix the problem. See bug 1121 * 4125102 for details of the problem. 1122 */ 1123 if (FC_ERRNO(res) == EDEADLK) { 1124 delay(deadlk_wait); 1125 res = 0; 1126 goto retry; 1127 } 1128 return (res); 1129 } 1130 1131 /* 1132 * Set the virtual mapping for the interval from [addr : addr + size) 1133 * in address space `as' to have the specified protection. 1134 * It is ok for the range to cross over several segments, 1135 * as long as they are contiguous. 1136 */ 1137 int 1138 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1139 { 1140 struct seg *seg; 1141 struct as_callback *cb; 1142 size_t ssize; 1143 caddr_t raddr; /* rounded down addr */ 1144 size_t rsize; /* rounded up size */ 1145 int error = 0, writer = 0; 1146 caddr_t saveraddr; 1147 size_t saversize; 1148 1149 setprot_top: 1150 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1151 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1152 (size_t)raddr; 1153 1154 if (raddr + rsize < raddr) /* check for wraparound */ 1155 return (ENOMEM); 1156 1157 saveraddr = raddr; 1158 saversize = rsize; 1159 1160 /* 1161 * Normally we only lock the as as a reader. But 1162 * if due to setprot the segment driver needs to split 1163 * a segment it will return IE_RETRY. Therefore we re-acquire 1164 * the as lock as a writer so the segment driver can change 1165 * the seg list. Also the segment driver will return IE_RETRY 1166 * after it has changed the segment list so we therefore keep 1167 * locking as a writer. Since these opeartions should be rare 1168 * want to only lock as a writer when necessary. 1169 */ 1170 if (writer || avl_numnodes(&as->a_wpage) != 0) { 1171 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1172 } else { 1173 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1174 } 1175 1176 as_clearwatchprot(as, raddr, rsize); 1177 seg = as_segat(as, raddr); 1178 if (seg == NULL) { 1179 as_setwatch(as); 1180 AS_LOCK_EXIT(as, &as->a_lock); 1181 return (ENOMEM); 1182 } 1183 1184 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1185 if (raddr >= seg->s_base + seg->s_size) { 1186 seg = AS_SEGNEXT(as, seg); 1187 if (seg == NULL || raddr != seg->s_base) { 1188 error = ENOMEM; 1189 break; 1190 } 1191 } 1192 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1193 ssize = seg->s_base + seg->s_size - raddr; 1194 else 1195 ssize = rsize; 1196 error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1197 1198 if (error == IE_NOMEM) { 1199 error = EAGAIN; 1200 break; 1201 } 1202 1203 if (error == IE_RETRY) { 1204 AS_LOCK_EXIT(as, &as->a_lock); 1205 writer = 1; 1206 goto setprot_top; 1207 } 1208 1209 if (error == EAGAIN) { 1210 /* 1211 * Make sure we have a_lock as writer. 1212 */ 1213 if (writer == 0) { 1214 AS_LOCK_EXIT(as, &as->a_lock); 1215 writer = 1; 1216 goto setprot_top; 1217 } 1218 1219 /* 1220 * Memory is currently locked. It must be unlocked 1221 * before this operation can succeed through a retry. 1222 * The possible reasons for locked memory and 1223 * corresponding strategies for unlocking are: 1224 * (1) Normal I/O 1225 * wait for a signal that the I/O operation 1226 * has completed and the memory is unlocked. 1227 * (2) Asynchronous I/O 1228 * The aio subsystem does not unlock pages when 1229 * the I/O is completed. Those pages are unlocked 1230 * when the application calls aiowait/aioerror. 1231 * So, to prevent blocking forever, cv_broadcast() 1232 * is done to wake up aio_cleanup_thread. 1233 * Subsequently, segvn_reclaim will be called, and 1234 * that will do AS_CLRUNMAPWAIT() and wake us up. 1235 * (3) Long term page locking: 1236 * Drivers intending to have pages locked for a 1237 * period considerably longer than for normal I/O 1238 * (essentially forever) may have registered for a 1239 * callback so they may unlock these pages on 1240 * request. This is needed to allow this operation 1241 * to succeed. Each entry on the callback list is 1242 * examined. If the event or address range pertains 1243 * the callback is invoked (unless it already is in 1244 * progress). The a_contents lock must be dropped 1245 * before the callback, so only one callback can 1246 * be done at a time. Go to the top and do more 1247 * until zero is returned. If zero is returned, 1248 * either there were no callbacks for this event 1249 * or they were already in progress. 1250 */ 1251 mutex_enter(&as->a_contents); 1252 if (as->a_callbacks && 1253 (cb = as_find_callback(as, AS_SETPROT_EVENT, 1254 seg->s_base, seg->s_size))) { 1255 AS_LOCK_EXIT(as, &as->a_lock); 1256 as_execute_callback(as, cb, AS_SETPROT_EVENT); 1257 } else { 1258 if (AS_ISUNMAPWAIT(as) == 0) 1259 cv_broadcast(&as->a_cv); 1260 AS_SETUNMAPWAIT(as); 1261 AS_LOCK_EXIT(as, &as->a_lock); 1262 while (AS_ISUNMAPWAIT(as)) 1263 cv_wait(&as->a_cv, &as->a_contents); 1264 } 1265 mutex_exit(&as->a_contents); 1266 goto setprot_top; 1267 } else if (error != 0) 1268 break; 1269 } 1270 if (error != 0) { 1271 as_setwatch(as); 1272 } else { 1273 as_setwatchprot(as, saveraddr, saversize, prot); 1274 } 1275 AS_LOCK_EXIT(as, &as->a_lock); 1276 return (error); 1277 } 1278 1279 /* 1280 * Check to make sure that the interval [addr, addr + size) 1281 * in address space `as' has at least the specified protection. 1282 * It is ok for the range to cross over several segments, as long 1283 * as they are contiguous. 1284 */ 1285 int 1286 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1287 { 1288 struct seg *seg; 1289 size_t ssize; 1290 caddr_t raddr; /* rounded down addr */ 1291 size_t rsize; /* rounded up size */ 1292 int error = 0; 1293 1294 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1295 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1296 (size_t)raddr; 1297 1298 if (raddr + rsize < raddr) /* check for wraparound */ 1299 return (ENOMEM); 1300 1301 /* 1302 * This is ugly as sin... 1303 * Normally, we only acquire the address space readers lock. 1304 * However, if the address space has watchpoints present, 1305 * we must acquire the writer lock on the address space for 1306 * the benefit of as_clearwatchprot() and as_setwatchprot(). 1307 */ 1308 if (avl_numnodes(&as->a_wpage) != 0) 1309 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1310 else 1311 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1312 as_clearwatchprot(as, raddr, rsize); 1313 seg = as_segat(as, raddr); 1314 if (seg == NULL) { 1315 as_setwatch(as); 1316 AS_LOCK_EXIT(as, &as->a_lock); 1317 return (ENOMEM); 1318 } 1319 1320 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1321 if (raddr >= seg->s_base + seg->s_size) { 1322 seg = AS_SEGNEXT(as, seg); 1323 if (seg == NULL || raddr != seg->s_base) { 1324 error = ENOMEM; 1325 break; 1326 } 1327 } 1328 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1329 ssize = seg->s_base + seg->s_size - raddr; 1330 else 1331 ssize = rsize; 1332 1333 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1334 if (error != 0) 1335 break; 1336 } 1337 as_setwatch(as); 1338 AS_LOCK_EXIT(as, &as->a_lock); 1339 return (error); 1340 } 1341 1342 int 1343 as_unmap(struct as *as, caddr_t addr, size_t size) 1344 { 1345 struct seg *seg, *seg_next; 1346 struct as_callback *cb; 1347 caddr_t raddr, eaddr; 1348 size_t ssize; 1349 int err; 1350 1351 top: 1352 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1353 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1354 (uintptr_t)PAGEMASK); 1355 1356 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1357 1358 as->a_updatedir = 1; /* inform /proc */ 1359 gethrestime(&as->a_updatetime); 1360 1361 /* 1362 * Use as_findseg to find the first segment in the range, then 1363 * step through the segments in order, following s_next. 1364 */ 1365 as_clearwatchprot(as, raddr, eaddr - raddr); 1366 1367 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1368 if (eaddr <= seg->s_base) 1369 break; /* eaddr was in a gap; all done */ 1370 1371 /* this is implied by the test above */ 1372 ASSERT(raddr < eaddr); 1373 1374 if (raddr < seg->s_base) 1375 raddr = seg->s_base; /* raddr was in a gap */ 1376 1377 if (eaddr > (seg->s_base + seg->s_size)) 1378 ssize = seg->s_base + seg->s_size - raddr; 1379 else 1380 ssize = eaddr - raddr; 1381 1382 /* 1383 * Save next segment pointer since seg can be 1384 * destroyed during the segment unmap operation. 1385 */ 1386 seg_next = AS_SEGNEXT(as, seg); 1387 1388 err = SEGOP_UNMAP(seg, raddr, ssize); 1389 if (err == EAGAIN) { 1390 /* 1391 * Memory is currently locked. It must be unlocked 1392 * before this operation can succeed through a retry. 1393 * The possible reasons for locked memory and 1394 * corresponding strategies for unlocking are: 1395 * (1) Normal I/O 1396 * wait for a signal that the I/O operation 1397 * has completed and the memory is unlocked. 1398 * (2) Asynchronous I/O 1399 * The aio subsystem does not unlock pages when 1400 * the I/O is completed. Those pages are unlocked 1401 * when the application calls aiowait/aioerror. 1402 * So, to prevent blocking forever, cv_broadcast() 1403 * is done to wake up aio_cleanup_thread. 1404 * Subsequently, segvn_reclaim will be called, and 1405 * that will do AS_CLRUNMAPWAIT() and wake us up. 1406 * (3) Long term page locking: 1407 * Drivers intending to have pages locked for a 1408 * period considerably longer than for normal I/O 1409 * (essentially forever) may have registered for a 1410 * callback so they may unlock these pages on 1411 * request. This is needed to allow this operation 1412 * to succeed. Each entry on the callback list is 1413 * examined. If the event or address range pertains 1414 * the callback is invoked (unless it already is in 1415 * progress). The a_contents lock must be dropped 1416 * before the callback, so only one callback can 1417 * be done at a time. Go to the top and do more 1418 * until zero is returned. If zero is returned, 1419 * either there were no callbacks for this event 1420 * or they were already in progress. 1421 */ 1422 as_setwatch(as); 1423 mutex_enter(&as->a_contents); 1424 if (as->a_callbacks && 1425 (cb = as_find_callback(as, AS_UNMAP_EVENT, 1426 seg->s_base, seg->s_size))) { 1427 AS_LOCK_EXIT(as, &as->a_lock); 1428 as_execute_callback(as, cb, AS_UNMAP_EVENT); 1429 } else { 1430 if (AS_ISUNMAPWAIT(as) == 0) 1431 cv_broadcast(&as->a_cv); 1432 AS_SETUNMAPWAIT(as); 1433 AS_LOCK_EXIT(as, &as->a_lock); 1434 while (AS_ISUNMAPWAIT(as)) 1435 cv_wait(&as->a_cv, &as->a_contents); 1436 } 1437 mutex_exit(&as->a_contents); 1438 goto top; 1439 } else if (err == IE_RETRY) { 1440 as_setwatch(as); 1441 AS_LOCK_EXIT(as, &as->a_lock); 1442 goto top; 1443 } else if (err) { 1444 as_setwatch(as); 1445 AS_LOCK_EXIT(as, &as->a_lock); 1446 return (-1); 1447 } 1448 1449 as->a_size -= ssize; 1450 raddr += ssize; 1451 } 1452 AS_LOCK_EXIT(as, &as->a_lock); 1453 return (0); 1454 } 1455 1456 static int 1457 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, 1458 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1459 { 1460 uint_t szc; 1461 uint_t nszc; 1462 int error; 1463 caddr_t a; 1464 caddr_t eaddr; 1465 size_t segsize; 1466 struct seg *seg; 1467 size_t pgsz; 1468 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); 1469 uint_t save_szcvec; 1470 1471 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1472 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1473 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1474 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); 1475 if (!do_off) { 1476 vn_a->offset = 0; 1477 } 1478 1479 if (szcvec <= 1) { 1480 seg = seg_alloc(as, addr, size); 1481 if (seg == NULL) { 1482 return (ENOMEM); 1483 } 1484 vn_a->szc = 0; 1485 error = (*crfp)(seg, vn_a); 1486 if (error != 0) { 1487 seg_free(seg); 1488 } else { 1489 as->a_size += size; 1490 } 1491 return (error); 1492 } 1493 1494 eaddr = addr + size; 1495 save_szcvec = szcvec; 1496 szcvec >>= 1; 1497 szc = 0; 1498 nszc = 0; 1499 while (szcvec) { 1500 if ((szcvec & 0x1) == 0) { 1501 nszc++; 1502 szcvec >>= 1; 1503 continue; 1504 } 1505 nszc++; 1506 pgsz = page_get_pagesize(nszc); 1507 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1508 if (a != addr) { 1509 ASSERT(a < eaddr); 1510 segsize = a - addr; 1511 seg = seg_alloc(as, addr, segsize); 1512 if (seg == NULL) { 1513 return (ENOMEM); 1514 } 1515 vn_a->szc = szc; 1516 error = (*crfp)(seg, vn_a); 1517 if (error != 0) { 1518 seg_free(seg); 1519 return (error); 1520 } 1521 as->a_size += segsize; 1522 *segcreated = 1; 1523 if (do_off) { 1524 vn_a->offset += segsize; 1525 } 1526 addr = a; 1527 } 1528 szc = nszc; 1529 szcvec >>= 1; 1530 } 1531 1532 ASSERT(addr < eaddr); 1533 szcvec = save_szcvec | 1; /* add 8K pages */ 1534 while (szcvec) { 1535 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1536 ASSERT(a >= addr); 1537 if (a != addr) { 1538 segsize = a - addr; 1539 seg = seg_alloc(as, addr, segsize); 1540 if (seg == NULL) { 1541 return (ENOMEM); 1542 } 1543 vn_a->szc = szc; 1544 error = (*crfp)(seg, vn_a); 1545 if (error != 0) { 1546 seg_free(seg); 1547 return (error); 1548 } 1549 as->a_size += segsize; 1550 *segcreated = 1; 1551 if (do_off) { 1552 vn_a->offset += segsize; 1553 } 1554 addr = a; 1555 } 1556 szcvec &= ~(1 << szc); 1557 if (szcvec) { 1558 szc = highbit(szcvec) - 1; 1559 pgsz = page_get_pagesize(szc); 1560 } 1561 } 1562 ASSERT(addr == eaddr); 1563 1564 return (0); 1565 } 1566 1567 static int 1568 as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1569 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1570 { 1571 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); 1572 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 1573 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1574 type, 0); 1575 int error; 1576 struct seg *seg; 1577 struct vattr va; 1578 u_offset_t eoff; 1579 size_t save_size = 0; 1580 extern size_t textrepl_size_thresh; 1581 1582 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1583 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1584 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1585 ASSERT(vn_a->vp != NULL); 1586 ASSERT(vn_a->amp == NULL); 1587 1588 again: 1589 if (szcvec <= 1) { 1590 seg = seg_alloc(as, addr, size); 1591 if (seg == NULL) { 1592 return (ENOMEM); 1593 } 1594 vn_a->szc = 0; 1595 error = (*crfp)(seg, vn_a); 1596 if (error != 0) { 1597 seg_free(seg); 1598 } else { 1599 as->a_size += size; 1600 } 1601 return (error); 1602 } 1603 1604 va.va_mask = AT_SIZE; 1605 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) { 1606 szcvec = 0; 1607 goto again; 1608 } 1609 eoff = vn_a->offset & PAGEMASK; 1610 if (eoff >= va.va_size) { 1611 szcvec = 0; 1612 goto again; 1613 } 1614 eoff += size; 1615 if (btopr(va.va_size) < btopr(eoff)) { 1616 save_size = size; 1617 size = va.va_size - (vn_a->offset & PAGEMASK); 1618 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1619 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1620 type, 0); 1621 if (szcvec <= 1) { 1622 size = save_size; 1623 goto again; 1624 } 1625 } 1626 1627 if (size > textrepl_size_thresh) { 1628 vn_a->flags |= _MAP_TEXTREPL; 1629 } 1630 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, 1631 segcreated); 1632 if (error != 0) { 1633 return (error); 1634 } 1635 if (save_size) { 1636 addr += size; 1637 size = save_size - size; 1638 szcvec = 0; 1639 goto again; 1640 } 1641 return (0); 1642 } 1643 1644 /* 1645 * as_map_ansegs: shared or private anonymous memory. Note that the flags 1646 * passed to map_pgszvec cannot be MAP_INITDATA, for anon. 1647 */ 1648 static int 1649 as_map_ansegs(struct as *as, caddr_t addr, size_t size, 1650 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1651 { 1652 uint_t szcvec; 1653 uchar_t type; 1654 1655 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); 1656 if (vn_a->type == MAP_SHARED) { 1657 type = MAPPGSZC_SHM; 1658 } else if (vn_a->type == MAP_PRIVATE) { 1659 if (vn_a->szc == AS_MAP_HEAP) { 1660 type = MAPPGSZC_HEAP; 1661 } else if (vn_a->szc == AS_MAP_STACK) { 1662 type = MAPPGSZC_STACK; 1663 } else { 1664 type = MAPPGSZC_PRIVM; 1665 } 1666 } 1667 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? 1668 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), 1669 (vn_a->flags & MAP_TEXT), type, 0); 1670 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1671 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1672 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1673 ASSERT(vn_a->vp == NULL); 1674 1675 return (as_map_segvn_segs(as, addr, size, szcvec, 1676 crfp, vn_a, segcreated)); 1677 } 1678 1679 int 1680 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1681 { 1682 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1683 return (as_map_locked(as, addr, size, crfp, argsp)); 1684 } 1685 1686 int 1687 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), 1688 void *argsp) 1689 { 1690 struct seg *seg = NULL; 1691 caddr_t raddr; /* rounded down addr */ 1692 size_t rsize; /* rounded up size */ 1693 int error; 1694 int unmap = 0; 1695 struct proc *p = curproc; 1696 struct segvn_crargs crargs; 1697 1698 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1699 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1700 (size_t)raddr; 1701 1702 /* 1703 * check for wrap around 1704 */ 1705 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1706 AS_LOCK_EXIT(as, &as->a_lock); 1707 return (ENOMEM); 1708 } 1709 1710 as->a_updatedir = 1; /* inform /proc */ 1711 gethrestime(&as->a_updatetime); 1712 1713 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1714 AS_LOCK_EXIT(as, &as->a_lock); 1715 1716 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1717 RCA_UNSAFE_ALL); 1718 1719 return (ENOMEM); 1720 } 1721 1722 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { 1723 crargs = *(struct segvn_crargs *)argsp; 1724 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap); 1725 if (error != 0) { 1726 AS_LOCK_EXIT(as, &as->a_lock); 1727 if (unmap) { 1728 (void) as_unmap(as, addr, size); 1729 } 1730 return (error); 1731 } 1732 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { 1733 crargs = *(struct segvn_crargs *)argsp; 1734 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap); 1735 if (error != 0) { 1736 AS_LOCK_EXIT(as, &as->a_lock); 1737 if (unmap) { 1738 (void) as_unmap(as, addr, size); 1739 } 1740 return (error); 1741 } 1742 } else { 1743 seg = seg_alloc(as, addr, size); 1744 if (seg == NULL) { 1745 AS_LOCK_EXIT(as, &as->a_lock); 1746 return (ENOMEM); 1747 } 1748 1749 error = (*crfp)(seg, argsp); 1750 if (error != 0) { 1751 seg_free(seg); 1752 AS_LOCK_EXIT(as, &as->a_lock); 1753 return (error); 1754 } 1755 /* 1756 * Add size now so as_unmap will work if as_ctl fails. 1757 */ 1758 as->a_size += rsize; 1759 } 1760 1761 as_setwatch(as); 1762 1763 /* 1764 * If the address space is locked, 1765 * establish memory locks for the new segment. 1766 */ 1767 mutex_enter(&as->a_contents); 1768 if (AS_ISPGLCK(as)) { 1769 mutex_exit(&as->a_contents); 1770 AS_LOCK_EXIT(as, &as->a_lock); 1771 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1772 if (error != 0) 1773 (void) as_unmap(as, addr, size); 1774 } else { 1775 mutex_exit(&as->a_contents); 1776 AS_LOCK_EXIT(as, &as->a_lock); 1777 } 1778 return (error); 1779 } 1780 1781 1782 /* 1783 * Delete all segments in the address space marked with S_PURGE. 1784 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1785 * These segments are deleted as a first step before calls to as_gap(), so 1786 * that they don't affect mmap() or shmat(). 1787 */ 1788 void 1789 as_purge(struct as *as) 1790 { 1791 struct seg *seg; 1792 struct seg *next_seg; 1793 1794 /* 1795 * the setting of NEEDSPURGE is protect by as_rangelock(), so 1796 * no need to grab a_contents mutex for this check 1797 */ 1798 if ((as->a_flags & AS_NEEDSPURGE) == 0) 1799 return; 1800 1801 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1802 next_seg = NULL; 1803 seg = AS_SEGFIRST(as); 1804 while (seg != NULL) { 1805 next_seg = AS_SEGNEXT(as, seg); 1806 if (seg->s_flags & S_PURGE) 1807 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1808 seg = next_seg; 1809 } 1810 AS_LOCK_EXIT(as, &as->a_lock); 1811 1812 mutex_enter(&as->a_contents); 1813 as->a_flags &= ~AS_NEEDSPURGE; 1814 mutex_exit(&as->a_contents); 1815 } 1816 1817 /* 1818 * Find a hole within [*basep, *basep + *lenp), which contains a mappable 1819 * range of addresses at least "minlen" long, where the base of the range is 1820 * at "off" phase from an "align" boundary and there is space for a 1821 * "redzone"-sized redzone on eithe rside of the range. Thus, 1822 * if align was 4M and off was 16k, the user wants a hole which will start 1823 * 16k into a 4M page. 1824 * 1825 * If flags specifies AH_HI, the hole will have the highest possible address 1826 * in the range. We use the as->a_lastgap field to figure out where to 1827 * start looking for a gap. 1828 * 1829 * Otherwise, the gap will have the lowest possible address. 1830 * 1831 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1832 * 1833 * If an adequate hole is found, *basep and *lenp are set to reflect the part of 1834 * the hole that is within range, and 0 is returned. On failure, -1 is returned. 1835 * 1836 * NOTE: This routine is not correct when base+len overflows caddr_t. 1837 */ 1838 int 1839 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, 1840 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off) 1841 { 1842 caddr_t lobound = *basep; 1843 caddr_t hibound = lobound + *lenp; 1844 struct seg *lseg, *hseg; 1845 caddr_t lo, hi; 1846 int forward; 1847 caddr_t save_base; 1848 size_t save_len; 1849 1850 save_base = *basep; 1851 save_len = *lenp; 1852 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1853 if (AS_SEGFIRST(as) == NULL) { 1854 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR, 1855 align, redzone, off)) { 1856 AS_LOCK_EXIT(as, &as->a_lock); 1857 return (0); 1858 } else { 1859 AS_LOCK_EXIT(as, &as->a_lock); 1860 *basep = save_base; 1861 *lenp = save_len; 1862 return (-1); 1863 } 1864 } 1865 1866 /* 1867 * Set up to iterate over all the inter-segment holes in the given 1868 * direction. lseg is NULL for the lowest-addressed hole and hseg is 1869 * NULL for the highest-addressed hole. If moving backwards, we reset 1870 * sseg to denote the highest-addressed segment. 1871 */ 1872 forward = (flags & AH_DIR) == AH_LO; 1873 if (forward) { 1874 hseg = as_findseg(as, lobound, 1); 1875 lseg = AS_SEGPREV(as, hseg); 1876 } else { 1877 1878 /* 1879 * If allocating at least as much as the last allocation, 1880 * use a_lastgap's base as a better estimate of hibound. 1881 */ 1882 if (as->a_lastgap && 1883 minlen >= as->a_lastgap->s_size && 1884 hibound >= as->a_lastgap->s_base) 1885 hibound = as->a_lastgap->s_base; 1886 1887 hseg = as_findseg(as, hibound, 1); 1888 if (hseg->s_base + hseg->s_size < hibound) { 1889 lseg = hseg; 1890 hseg = NULL; 1891 } else { 1892 lseg = AS_SEGPREV(as, hseg); 1893 } 1894 } 1895 1896 for (;;) { 1897 /* 1898 * Set lo and hi to the hole's boundaries. (We should really 1899 * use MAXADDR in place of hibound in the expression below, 1900 * but can't express it easily; using hibound in its place is 1901 * harmless.) 1902 */ 1903 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1904 hi = (hseg == NULL) ? hibound : hseg->s_base; 1905 /* 1906 * If the iteration has moved past the interval from lobound 1907 * to hibound it's pointless to continue. 1908 */ 1909 if ((forward && lo > hibound) || (!forward && hi < lobound)) 1910 break; 1911 else if (lo > hibound || hi < lobound) 1912 goto cont; 1913 /* 1914 * Candidate hole lies at least partially within the allowable 1915 * range. Restrict it to fall completely within that range, 1916 * i.e., to [max(lo, lobound), min(hi, hibound)]. 1917 */ 1918 if (lo < lobound) 1919 lo = lobound; 1920 if (hi > hibound) 1921 hi = hibound; 1922 /* 1923 * Verify that the candidate hole is big enough and meets 1924 * hardware constraints. 1925 */ 1926 *basep = lo; 1927 *lenp = hi - lo; 1928 if (valid_va_range_aligned(basep, lenp, minlen, 1929 forward ? AH_LO : AH_HI, align, redzone, off) && 1930 ((flags & AH_CONTAIN) == 0 || 1931 (*basep <= addr && *basep + *lenp > addr))) { 1932 if (!forward) 1933 as->a_lastgap = hseg; 1934 if (hseg != NULL) 1935 as->a_lastgaphl = hseg; 1936 else 1937 as->a_lastgaphl = lseg; 1938 AS_LOCK_EXIT(as, &as->a_lock); 1939 return (0); 1940 } 1941 cont: 1942 /* 1943 * Move to the next hole. 1944 */ 1945 if (forward) { 1946 lseg = hseg; 1947 if (lseg == NULL) 1948 break; 1949 hseg = AS_SEGNEXT(as, hseg); 1950 } else { 1951 hseg = lseg; 1952 if (hseg == NULL) 1953 break; 1954 lseg = AS_SEGPREV(as, lseg); 1955 } 1956 } 1957 *basep = save_base; 1958 *lenp = save_len; 1959 AS_LOCK_EXIT(as, &as->a_lock); 1960 return (-1); 1961 } 1962 1963 /* 1964 * Find a hole of at least size minlen within [*basep, *basep + *lenp). 1965 * 1966 * If flags specifies AH_HI, the hole will have the highest possible address 1967 * in the range. We use the as->a_lastgap field to figure out where to 1968 * start looking for a gap. 1969 * 1970 * Otherwise, the gap will have the lowest possible address. 1971 * 1972 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1973 * 1974 * If an adequate hole is found, base and len are set to reflect the part of 1975 * the hole that is within range, and 0 is returned, otherwise, 1976 * -1 is returned. 1977 * 1978 * NOTE: This routine is not correct when base+len overflows caddr_t. 1979 */ 1980 int 1981 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1982 caddr_t addr) 1983 { 1984 1985 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0)); 1986 } 1987 1988 /* 1989 * Return the next range within [base, base + len) that is backed 1990 * with "real memory". Skip holes and non-seg_vn segments. 1991 * We're lazy and only return one segment at a time. 1992 */ 1993 int 1994 as_memory(struct as *as, caddr_t *basep, size_t *lenp) 1995 { 1996 extern struct seg_ops segspt_shmops; /* needs a header file */ 1997 struct seg *seg; 1998 caddr_t addr, eaddr; 1999 caddr_t segend; 2000 2001 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2002 2003 addr = *basep; 2004 eaddr = addr + *lenp; 2005 2006 seg = as_findseg(as, addr, 0); 2007 if (seg != NULL) 2008 addr = MAX(seg->s_base, addr); 2009 2010 for (;;) { 2011 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 2012 AS_LOCK_EXIT(as, &as->a_lock); 2013 return (EINVAL); 2014 } 2015 2016 if (seg->s_ops == &segvn_ops) { 2017 segend = seg->s_base + seg->s_size; 2018 break; 2019 } 2020 2021 /* 2022 * We do ISM by looking into the private data 2023 * to determine the real size of the segment. 2024 */ 2025 if (seg->s_ops == &segspt_shmops) { 2026 segend = seg->s_base + spt_realsize(seg); 2027 if (addr < segend) 2028 break; 2029 } 2030 2031 seg = AS_SEGNEXT(as, seg); 2032 2033 if (seg != NULL) 2034 addr = seg->s_base; 2035 } 2036 2037 *basep = addr; 2038 2039 if (segend > eaddr) 2040 *lenp = eaddr - addr; 2041 else 2042 *lenp = segend - addr; 2043 2044 AS_LOCK_EXIT(as, &as->a_lock); 2045 return (0); 2046 } 2047 2048 /* 2049 * Swap the pages associated with the address space as out to 2050 * secondary storage, returning the number of bytes actually 2051 * swapped. 2052 * 2053 * The value returned is intended to correlate well with the process's 2054 * memory requirements. Its usefulness for this purpose depends on 2055 * how well the segment-level routines do at returning accurate 2056 * information. 2057 */ 2058 size_t 2059 as_swapout(struct as *as) 2060 { 2061 struct seg *seg; 2062 size_t swpcnt = 0; 2063 2064 /* 2065 * Kernel-only processes have given up their address 2066 * spaces. Of course, we shouldn't be attempting to 2067 * swap out such processes in the first place... 2068 */ 2069 if (as == NULL) 2070 return (0); 2071 2072 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2073 2074 /* Prevent XHATs from attaching */ 2075 mutex_enter(&as->a_contents); 2076 AS_SETBUSY(as); 2077 mutex_exit(&as->a_contents); 2078 2079 2080 /* 2081 * Free all mapping resources associated with the address 2082 * space. The segment-level swapout routines capitalize 2083 * on this unmapping by scavanging pages that have become 2084 * unmapped here. 2085 */ 2086 hat_swapout(as->a_hat); 2087 if (as->a_xhat != NULL) 2088 xhat_swapout_all(as); 2089 2090 mutex_enter(&as->a_contents); 2091 AS_CLRBUSY(as); 2092 mutex_exit(&as->a_contents); 2093 2094 /* 2095 * Call the swapout routines of all segments in the address 2096 * space to do the actual work, accumulating the amount of 2097 * space reclaimed. 2098 */ 2099 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2100 struct seg_ops *ov = seg->s_ops; 2101 2102 /* 2103 * We have to check to see if the seg has 2104 * an ops vector because the seg may have 2105 * been in the middle of being set up when 2106 * the process was picked for swapout. 2107 */ 2108 if ((ov != NULL) && (ov->swapout != NULL)) 2109 swpcnt += SEGOP_SWAPOUT(seg); 2110 } 2111 AS_LOCK_EXIT(as, &as->a_lock); 2112 return (swpcnt); 2113 } 2114 2115 /* 2116 * Determine whether data from the mappings in interval [addr, addr + size) 2117 * are in the primary memory (core) cache. 2118 */ 2119 int 2120 as_incore(struct as *as, caddr_t addr, 2121 size_t size, char *vec, size_t *sizep) 2122 { 2123 struct seg *seg; 2124 size_t ssize; 2125 caddr_t raddr; /* rounded down addr */ 2126 size_t rsize; /* rounded up size */ 2127 size_t isize; /* iteration size */ 2128 int error = 0; /* result, assume success */ 2129 2130 *sizep = 0; 2131 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2132 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 2133 (size_t)raddr; 2134 2135 if (raddr + rsize < raddr) /* check for wraparound */ 2136 return (ENOMEM); 2137 2138 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2139 seg = as_segat(as, raddr); 2140 if (seg == NULL) { 2141 AS_LOCK_EXIT(as, &as->a_lock); 2142 return (-1); 2143 } 2144 2145 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2146 if (raddr >= seg->s_base + seg->s_size) { 2147 seg = AS_SEGNEXT(as, seg); 2148 if (seg == NULL || raddr != seg->s_base) { 2149 error = -1; 2150 break; 2151 } 2152 } 2153 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2154 ssize = seg->s_base + seg->s_size - raddr; 2155 else 2156 ssize = rsize; 2157 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2158 if (isize != ssize) { 2159 error = -1; 2160 break; 2161 } 2162 vec += btopr(ssize); 2163 } 2164 AS_LOCK_EXIT(as, &as->a_lock); 2165 return (error); 2166 } 2167 2168 static void 2169 as_segunlock(struct seg *seg, caddr_t addr, int attr, 2170 ulong_t *bitmap, size_t position, size_t npages) 2171 { 2172 caddr_t range_start; 2173 size_t pos1 = position; 2174 size_t pos2; 2175 size_t size; 2176 size_t end_pos = npages + position; 2177 2178 while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2179 size = ptob((pos2 - pos1)); 2180 range_start = (caddr_t)((uintptr_t)addr + 2181 ptob(pos1 - position)); 2182 2183 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2184 (ulong_t *)NULL, (size_t)NULL); 2185 pos1 = pos2; 2186 } 2187 } 2188 2189 static void 2190 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2191 caddr_t raddr, size_t rsize) 2192 { 2193 struct seg *seg = as_segat(as, raddr); 2194 size_t ssize; 2195 2196 while (rsize != 0) { 2197 if (raddr >= seg->s_base + seg->s_size) 2198 seg = AS_SEGNEXT(as, seg); 2199 2200 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2201 ssize = seg->s_base + seg->s_size - raddr; 2202 else 2203 ssize = rsize; 2204 2205 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2206 2207 rsize -= ssize; 2208 raddr += ssize; 2209 } 2210 } 2211 2212 /* 2213 * Cache control operations over the interval [addr, addr + size) in 2214 * address space "as". 2215 */ 2216 /*ARGSUSED*/ 2217 int 2218 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2219 uintptr_t arg, ulong_t *lock_map, size_t pos) 2220 { 2221 struct seg *seg; /* working segment */ 2222 caddr_t raddr; /* rounded down addr */ 2223 caddr_t initraddr; /* saved initial rounded down addr */ 2224 size_t rsize; /* rounded up size */ 2225 size_t initrsize; /* saved initial rounded up size */ 2226 size_t ssize; /* size of seg */ 2227 int error = 0; /* result */ 2228 size_t mlock_size; /* size of bitmap */ 2229 ulong_t *mlock_map; /* pointer to bitmap used */ 2230 /* to represent the locked */ 2231 /* pages. */ 2232 retry: 2233 if (error == IE_RETRY) 2234 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2235 else 2236 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2237 2238 /* 2239 * If these are address space lock/unlock operations, loop over 2240 * all segments in the address space, as appropriate. 2241 */ 2242 if (func == MC_LOCKAS) { 2243 size_t npages, idx; 2244 size_t rlen = 0; /* rounded as length */ 2245 2246 idx = pos; 2247 2248 if (arg & MCL_FUTURE) { 2249 mutex_enter(&as->a_contents); 2250 AS_SETPGLCK(as); 2251 mutex_exit(&as->a_contents); 2252 } 2253 if ((arg & MCL_CURRENT) == 0) { 2254 AS_LOCK_EXIT(as, &as->a_lock); 2255 return (0); 2256 } 2257 2258 seg = AS_SEGFIRST(as); 2259 if (seg == NULL) { 2260 AS_LOCK_EXIT(as, &as->a_lock); 2261 return (0); 2262 } 2263 2264 do { 2265 raddr = (caddr_t)((uintptr_t)seg->s_base & 2266 (uintptr_t)PAGEMASK); 2267 rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2268 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2269 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2270 2271 mlock_size = BT_BITOUL(btopr(rlen)); 2272 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2273 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2274 AS_LOCK_EXIT(as, &as->a_lock); 2275 return (EAGAIN); 2276 } 2277 2278 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2279 error = SEGOP_LOCKOP(seg, seg->s_base, 2280 seg->s_size, attr, MC_LOCK, mlock_map, pos); 2281 if (error != 0) 2282 break; 2283 pos += seg_pages(seg); 2284 } 2285 2286 if (error) { 2287 for (seg = AS_SEGFIRST(as); seg != NULL; 2288 seg = AS_SEGNEXT(as, seg)) { 2289 2290 raddr = (caddr_t)((uintptr_t)seg->s_base & 2291 (uintptr_t)PAGEMASK); 2292 npages = seg_pages(seg); 2293 as_segunlock(seg, raddr, attr, mlock_map, 2294 idx, npages); 2295 idx += npages; 2296 } 2297 } 2298 2299 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2300 AS_LOCK_EXIT(as, &as->a_lock); 2301 goto lockerr; 2302 } else if (func == MC_UNLOCKAS) { 2303 mutex_enter(&as->a_contents); 2304 AS_CLRPGLCK(as); 2305 mutex_exit(&as->a_contents); 2306 2307 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2308 error = SEGOP_LOCKOP(seg, seg->s_base, 2309 seg->s_size, attr, MC_UNLOCK, NULL, 0); 2310 if (error != 0) 2311 break; 2312 } 2313 2314 AS_LOCK_EXIT(as, &as->a_lock); 2315 goto lockerr; 2316 } 2317 2318 /* 2319 * Normalize addresses and sizes. 2320 */ 2321 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2322 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2323 (size_t)raddr; 2324 2325 if (raddr + rsize < raddr) { /* check for wraparound */ 2326 AS_LOCK_EXIT(as, &as->a_lock); 2327 return (ENOMEM); 2328 } 2329 2330 /* 2331 * Get initial segment. 2332 */ 2333 if ((seg = as_segat(as, raddr)) == NULL) { 2334 AS_LOCK_EXIT(as, &as->a_lock); 2335 return (ENOMEM); 2336 } 2337 2338 if (func == MC_LOCK) { 2339 mlock_size = BT_BITOUL(btopr(rsize)); 2340 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2341 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2342 AS_LOCK_EXIT(as, &as->a_lock); 2343 return (EAGAIN); 2344 } 2345 } 2346 2347 /* 2348 * Loop over all segments. If a hole in the address range is 2349 * discovered, then fail. For each segment, perform the appropriate 2350 * control operation. 2351 */ 2352 while (rsize != 0) { 2353 2354 /* 2355 * Make sure there's no hole, calculate the portion 2356 * of the next segment to be operated over. 2357 */ 2358 if (raddr >= seg->s_base + seg->s_size) { 2359 seg = AS_SEGNEXT(as, seg); 2360 if (seg == NULL || raddr != seg->s_base) { 2361 if (func == MC_LOCK) { 2362 as_unlockerr(as, attr, mlock_map, 2363 initraddr, initrsize - rsize); 2364 kmem_free(mlock_map, 2365 mlock_size * sizeof (ulong_t)); 2366 } 2367 AS_LOCK_EXIT(as, &as->a_lock); 2368 return (ENOMEM); 2369 } 2370 } 2371 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2372 ssize = seg->s_base + seg->s_size - raddr; 2373 else 2374 ssize = rsize; 2375 2376 /* 2377 * Dispatch on specific function. 2378 */ 2379 switch (func) { 2380 2381 /* 2382 * Synchronize cached data from mappings with backing 2383 * objects. 2384 */ 2385 case MC_SYNC: 2386 if (error = SEGOP_SYNC(seg, raddr, ssize, 2387 attr, (uint_t)arg)) { 2388 AS_LOCK_EXIT(as, &as->a_lock); 2389 return (error); 2390 } 2391 break; 2392 2393 /* 2394 * Lock pages in memory. 2395 */ 2396 case MC_LOCK: 2397 if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2398 attr, func, mlock_map, pos)) { 2399 as_unlockerr(as, attr, mlock_map, initraddr, 2400 initrsize - rsize + ssize); 2401 kmem_free(mlock_map, mlock_size * 2402 sizeof (ulong_t)); 2403 AS_LOCK_EXIT(as, &as->a_lock); 2404 goto lockerr; 2405 } 2406 break; 2407 2408 /* 2409 * Unlock mapped pages. 2410 */ 2411 case MC_UNLOCK: 2412 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2413 (ulong_t *)NULL, (size_t)NULL); 2414 break; 2415 2416 /* 2417 * Store VM advise for mapped pages in segment layer. 2418 */ 2419 case MC_ADVISE: 2420 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2421 2422 /* 2423 * Check for regular errors and special retry error 2424 */ 2425 if (error) { 2426 if (error == IE_RETRY) { 2427 /* 2428 * Need to acquire writers lock, so 2429 * have to drop readers lock and start 2430 * all over again 2431 */ 2432 AS_LOCK_EXIT(as, &as->a_lock); 2433 goto retry; 2434 } else if (error == IE_REATTACH) { 2435 /* 2436 * Find segment for current address 2437 * because current segment just got 2438 * split or concatenated 2439 */ 2440 seg = as_segat(as, raddr); 2441 if (seg == NULL) { 2442 AS_LOCK_EXIT(as, &as->a_lock); 2443 return (ENOMEM); 2444 } 2445 } else { 2446 /* 2447 * Regular error 2448 */ 2449 AS_LOCK_EXIT(as, &as->a_lock); 2450 return (error); 2451 } 2452 } 2453 break; 2454 2455 /* 2456 * Can't happen. 2457 */ 2458 default: 2459 panic("as_ctl: bad operation %d", func); 2460 /*NOTREACHED*/ 2461 } 2462 2463 rsize -= ssize; 2464 raddr += ssize; 2465 } 2466 2467 if (func == MC_LOCK) 2468 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2469 AS_LOCK_EXIT(as, &as->a_lock); 2470 return (0); 2471 lockerr: 2472 2473 /* 2474 * If the lower levels returned EDEADLK for a segment lockop, 2475 * it means that we should retry the operation. Let's wait 2476 * a bit also to let the deadlock causing condition clear. 2477 * This is part of a gross hack to work around a design flaw 2478 * in the ufs/sds logging code and should go away when the 2479 * logging code is re-designed to fix the problem. See bug 2480 * 4125102 for details of the problem. 2481 */ 2482 if (error == EDEADLK) { 2483 delay(deadlk_wait); 2484 error = 0; 2485 goto retry; 2486 } 2487 return (error); 2488 } 2489 2490 /* 2491 * Special code for exec to move the stack segment from its interim 2492 * place in the old address to the right place in the new address space. 2493 */ 2494 /*ARGSUSED*/ 2495 int 2496 as_exec(struct as *oas, caddr_t ostka, size_t stksz, 2497 struct as *nas, caddr_t nstka, uint_t hatflag) 2498 { 2499 struct seg *stkseg; 2500 2501 AS_LOCK_ENTER(oas, &oas->a_lock, RW_WRITER); 2502 stkseg = as_segat(oas, ostka); 2503 stkseg = as_removeseg(oas, stkseg); 2504 ASSERT(stkseg != NULL); 2505 ASSERT(stkseg->s_base == ostka && stkseg->s_size == stksz); 2506 stkseg->s_as = nas; 2507 stkseg->s_base = nstka; 2508 2509 /* 2510 * It's ok to lock the address space we are about to exec to. 2511 */ 2512 AS_LOCK_ENTER(nas, &nas->a_lock, RW_WRITER); 2513 ASSERT(avl_numnodes(&nas->a_wpage) == 0); 2514 nas->a_size += stkseg->s_size; 2515 oas->a_size -= stkseg->s_size; 2516 (void) as_addseg(nas, stkseg); 2517 AS_LOCK_EXIT(nas, &nas->a_lock); 2518 AS_LOCK_EXIT(oas, &oas->a_lock); 2519 return (0); 2520 } 2521 2522 int 2523 fc_decode(faultcode_t fault_err) 2524 { 2525 int error = 0; 2526 2527 switch (FC_CODE(fault_err)) { 2528 case FC_OBJERR: 2529 error = FC_ERRNO(fault_err); 2530 break; 2531 case FC_PROT: 2532 error = EACCES; 2533 break; 2534 default: 2535 error = EFAULT; 2536 break; 2537 } 2538 return (error); 2539 } 2540 2541 /* 2542 * lock pages in a given address space. Return shadow list. If 2543 * the list is NULL, the MMU mapping is also locked. 2544 */ 2545 int 2546 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2547 size_t size, enum seg_rw rw) 2548 { 2549 size_t rsize; 2550 caddr_t base; 2551 caddr_t raddr; 2552 faultcode_t fault_err; 2553 struct seg *seg; 2554 int res; 2555 int prefaulted = 0; 2556 2557 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2558 "as_pagelock_start: addr %p size %ld", addr, size); 2559 2560 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2561 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2562 (size_t)raddr; 2563 top: 2564 /* 2565 * if the request crosses two segments let 2566 * as_fault handle it. 2567 */ 2568 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2569 seg = as_findseg(as, addr, 0); 2570 if ((seg == NULL) || ((base = seg->s_base) > addr) || 2571 (addr + size) > base + seg->s_size) { 2572 AS_LOCK_EXIT(as, &as->a_lock); 2573 goto slow; 2574 } 2575 2576 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2577 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2578 2579 /* 2580 * try to lock pages and pass back shadow list 2581 */ 2582 res = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2583 2584 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2585 AS_LOCK_EXIT(as, &as->a_lock); 2586 if (res == 0) { 2587 return (0); 2588 } else if (res == ENOTSUP || prefaulted) { 2589 /* 2590 * (1) segment driver doesn't support PAGELOCK fastpath, or 2591 * (2) we've already tried fast path unsuccessfully after 2592 * faulting in the addr range below; system might be 2593 * thrashing or there may not be enough availrmem. 2594 */ 2595 goto slow; 2596 } 2597 2598 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_FAULT_START, 2599 "as_fault_start: addr %p size %ld", addr, size); 2600 2601 /* 2602 * we might get here because of some COW fault or non 2603 * existing page. Let as_fault deal with it. Just load 2604 * the page, don't lock the MMU mapping. 2605 */ 2606 fault_err = as_fault(as->a_hat, as, addr, size, F_INVAL, rw); 2607 if (fault_err != 0) { 2608 return (fc_decode(fault_err)); 2609 } 2610 2611 prefaulted = 1; 2612 2613 /* 2614 * try fast path again; since we've dropped a_lock, 2615 * we need to try the dance from the start to see if 2616 * the addr range is still valid. 2617 */ 2618 goto top; 2619 slow: 2620 /* 2621 * load the page and lock the MMU mapping. 2622 */ 2623 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2624 if (fault_err != 0) { 2625 return (fc_decode(fault_err)); 2626 } 2627 *ppp = NULL; 2628 2629 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2630 return (0); 2631 } 2632 2633 /* 2634 * unlock pages in a given address range 2635 */ 2636 void 2637 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2638 enum seg_rw rw) 2639 { 2640 struct seg *seg; 2641 size_t rsize; 2642 caddr_t raddr; 2643 2644 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2645 "as_pageunlock_start: addr %p size %ld", addr, size); 2646 2647 /* 2648 * if the shadow list is NULL, as_pagelock was 2649 * falling back to as_fault 2650 */ 2651 if (pp == NULL) { 2652 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2653 return; 2654 } 2655 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2656 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2657 (size_t)raddr; 2658 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2659 seg = as_findseg(as, addr, 0); 2660 ASSERT(seg); 2661 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2662 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2663 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2664 AS_LOCK_EXIT(as, &as->a_lock); 2665 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2666 } 2667 2668 /* 2669 * reclaim cached pages in a given address range 2670 */ 2671 void 2672 as_pagereclaim(struct as *as, struct page **pp, caddr_t addr, 2673 size_t size, enum seg_rw rw) 2674 { 2675 struct seg *seg; 2676 size_t rsize; 2677 caddr_t raddr; 2678 2679 ASSERT(AS_READ_HELD(as, &as->a_lock)); 2680 ASSERT(pp != NULL); 2681 2682 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2683 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2684 (size_t)raddr; 2685 seg = as_findseg(as, addr, 0); 2686 ASSERT(seg); 2687 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGERECLAIM, rw); 2688 } 2689 2690 #define MAXPAGEFLIP 4 2691 #define MAXPAGEFLIPSIZ MAXPAGEFLIP*PAGESIZE 2692 2693 int 2694 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2695 boolean_t wait) 2696 { 2697 struct seg *seg; 2698 size_t ssize; 2699 caddr_t raddr; /* rounded down addr */ 2700 size_t rsize; /* rounded up size */ 2701 int error = 0; 2702 size_t pgsz = page_get_pagesize(szc); 2703 2704 setpgsz_top: 2705 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2706 return (EINVAL); 2707 } 2708 2709 raddr = addr; 2710 rsize = size; 2711 2712 if (raddr + rsize < raddr) /* check for wraparound */ 2713 return (ENOMEM); 2714 2715 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2716 as_clearwatchprot(as, raddr, rsize); 2717 seg = as_segat(as, raddr); 2718 if (seg == NULL) { 2719 as_setwatch(as); 2720 AS_LOCK_EXIT(as, &as->a_lock); 2721 return (ENOMEM); 2722 } 2723 2724 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2725 if (raddr >= seg->s_base + seg->s_size) { 2726 seg = AS_SEGNEXT(as, seg); 2727 if (seg == NULL || raddr != seg->s_base) { 2728 error = ENOMEM; 2729 break; 2730 } 2731 } 2732 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2733 ssize = seg->s_base + seg->s_size - raddr; 2734 } else { 2735 ssize = rsize; 2736 } 2737 2738 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2739 2740 if (error == IE_NOMEM) { 2741 error = EAGAIN; 2742 break; 2743 } 2744 2745 if (error == IE_RETRY) { 2746 AS_LOCK_EXIT(as, &as->a_lock); 2747 goto setpgsz_top; 2748 } 2749 2750 if (error == ENOTSUP) { 2751 error = EINVAL; 2752 break; 2753 } 2754 2755 if (wait && (error == EAGAIN)) { 2756 /* 2757 * Memory is currently locked. It must be unlocked 2758 * before this operation can succeed through a retry. 2759 * The possible reasons for locked memory and 2760 * corresponding strategies for unlocking are: 2761 * (1) Normal I/O 2762 * wait for a signal that the I/O operation 2763 * has completed and the memory is unlocked. 2764 * (2) Asynchronous I/O 2765 * The aio subsystem does not unlock pages when 2766 * the I/O is completed. Those pages are unlocked 2767 * when the application calls aiowait/aioerror. 2768 * So, to prevent blocking forever, cv_broadcast() 2769 * is done to wake up aio_cleanup_thread. 2770 * Subsequently, segvn_reclaim will be called, and 2771 * that will do AS_CLRUNMAPWAIT() and wake us up. 2772 * (3) Long term page locking: 2773 * This is not relevant for as_setpagesize() 2774 * because we cannot change the page size for 2775 * driver memory. The attempt to do so will 2776 * fail with a different error than EAGAIN so 2777 * there's no need to trigger as callbacks like 2778 * as_unmap, as_setprot or as_free would do. 2779 */ 2780 mutex_enter(&as->a_contents); 2781 if (AS_ISUNMAPWAIT(as) == 0) { 2782 cv_broadcast(&as->a_cv); 2783 } 2784 AS_SETUNMAPWAIT(as); 2785 AS_LOCK_EXIT(as, &as->a_lock); 2786 while (AS_ISUNMAPWAIT(as)) { 2787 cv_wait(&as->a_cv, &as->a_contents); 2788 } 2789 mutex_exit(&as->a_contents); 2790 goto setpgsz_top; 2791 } else if (error != 0) { 2792 break; 2793 } 2794 } 2795 as_setwatch(as); 2796 AS_LOCK_EXIT(as, &as->a_lock); 2797 return (error); 2798 } 2799 2800 /* 2801 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments 2802 * in its chunk where s_szc is less than the szc we want to set. 2803 */ 2804 static int 2805 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 2806 int *retry) 2807 { 2808 struct seg *seg; 2809 size_t ssize; 2810 int error; 2811 2812 seg = as_segat(as, raddr); 2813 if (seg == NULL) { 2814 panic("as_iset3_default_lpsize: no seg"); 2815 } 2816 2817 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2818 if (raddr >= seg->s_base + seg->s_size) { 2819 seg = AS_SEGNEXT(as, seg); 2820 if (seg == NULL || raddr != seg->s_base) { 2821 panic("as_iset3_default_lpsize: as changed"); 2822 } 2823 } 2824 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2825 ssize = seg->s_base + seg->s_size - raddr; 2826 } else { 2827 ssize = rsize; 2828 } 2829 2830 if (szc > seg->s_szc) { 2831 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2832 /* Only retry on EINVAL segments that have no vnode. */ 2833 if (error == EINVAL) { 2834 vnode_t *vp = NULL; 2835 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && 2836 (SEGOP_GETVP(seg, raddr, &vp) != 0 || 2837 vp == NULL)) { 2838 *retry = 1; 2839 } else { 2840 *retry = 0; 2841 } 2842 } 2843 if (error) { 2844 return (error); 2845 } 2846 } 2847 } 2848 return (0); 2849 } 2850 2851 /* 2852 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the 2853 * pagesize on each segment in its range, but if any fails with EINVAL, 2854 * then it reduces the pagesizes to the next size in the bitmap and 2855 * retries as_iset3_default_lpsize(). The reason why the code retries 2856 * smaller allowed sizes on EINVAL is because (a) the anon offset may not 2857 * match the bigger sizes, and (b) it's hard to get this offset (to begin 2858 * with) to pass to map_pgszcvec(). 2859 */ 2860 static int 2861 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2862 uint_t szcvec) 2863 { 2864 int error; 2865 int retry; 2866 2867 for (;;) { 2868 error = as_iset3_default_lpsize(as, addr, size, szc, &retry); 2869 if (error == EINVAL && retry) { 2870 szcvec &= ~(1 << szc); 2871 if (szcvec <= 1) { 2872 return (EINVAL); 2873 } 2874 szc = highbit(szcvec) - 1; 2875 } else { 2876 return (error); 2877 } 2878 } 2879 } 2880 2881 /* 2882 * as_iset1_default_lpsize() breaks its chunk into areas where existing 2883 * segments have a smaller szc than we want to set. For each such area, 2884 * it calls as_iset2_default_lpsize() 2885 */ 2886 static int 2887 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 2888 uint_t szcvec) 2889 { 2890 struct seg *seg; 2891 size_t ssize; 2892 caddr_t setaddr = raddr; 2893 size_t setsize = 0; 2894 int set; 2895 int error; 2896 2897 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2898 2899 seg = as_segat(as, raddr); 2900 if (seg == NULL) { 2901 panic("as_iset1_default_lpsize: no seg"); 2902 } 2903 if (seg->s_szc < szc) { 2904 set = 1; 2905 } else { 2906 set = 0; 2907 } 2908 2909 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 2910 if (raddr >= seg->s_base + seg->s_size) { 2911 seg = AS_SEGNEXT(as, seg); 2912 if (seg == NULL || raddr != seg->s_base) { 2913 panic("as_iset1_default_lpsize: as changed"); 2914 } 2915 if (seg->s_szc >= szc && set) { 2916 ASSERT(setsize != 0); 2917 error = as_iset2_default_lpsize(as, 2918 setaddr, setsize, szc, szcvec); 2919 if (error) { 2920 return (error); 2921 } 2922 set = 0; 2923 } else if (seg->s_szc < szc && !set) { 2924 setaddr = raddr; 2925 setsize = 0; 2926 set = 1; 2927 } 2928 } 2929 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2930 ssize = seg->s_base + seg->s_size - raddr; 2931 } else { 2932 ssize = rsize; 2933 } 2934 } 2935 error = 0; 2936 if (set) { 2937 ASSERT(setsize != 0); 2938 error = as_iset2_default_lpsize(as, setaddr, setsize, 2939 szc, szcvec); 2940 } 2941 return (error); 2942 } 2943 2944 /* 2945 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap 2946 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each 2947 * chunk to as_iset1_default_lpsize(). 2948 */ 2949 static int 2950 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, 2951 int type) 2952 { 2953 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 2954 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, 2955 flags, rtype, 1); 2956 uint_t szc; 2957 uint_t nszc; 2958 int error; 2959 caddr_t a; 2960 caddr_t eaddr; 2961 size_t segsize; 2962 size_t pgsz; 2963 uint_t save_szcvec; 2964 2965 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2966 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2967 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2968 2969 szcvec &= ~1; 2970 if (szcvec <= 1) { /* skip if base page size */ 2971 return (0); 2972 } 2973 2974 /* Get the pagesize of the first larger page size. */ 2975 szc = lowbit(szcvec) - 1; 2976 pgsz = page_get_pagesize(szc); 2977 eaddr = addr + size; 2978 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 2979 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 2980 2981 save_szcvec = szcvec; 2982 szcvec >>= (szc + 1); 2983 nszc = szc; 2984 while (szcvec) { 2985 if ((szcvec & 0x1) == 0) { 2986 nszc++; 2987 szcvec >>= 1; 2988 continue; 2989 } 2990 nszc++; 2991 pgsz = page_get_pagesize(nszc); 2992 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 2993 if (a != addr) { 2994 ASSERT(szc > 0); 2995 ASSERT(a < eaddr); 2996 segsize = a - addr; 2997 error = as_iset1_default_lpsize(as, addr, segsize, szc, 2998 save_szcvec); 2999 if (error) { 3000 return (error); 3001 } 3002 addr = a; 3003 } 3004 szc = nszc; 3005 szcvec >>= 1; 3006 } 3007 3008 ASSERT(addr < eaddr); 3009 szcvec = save_szcvec; 3010 while (szcvec) { 3011 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3012 ASSERT(a >= addr); 3013 if (a != addr) { 3014 ASSERT(szc > 0); 3015 segsize = a - addr; 3016 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3017 save_szcvec); 3018 if (error) { 3019 return (error); 3020 } 3021 addr = a; 3022 } 3023 szcvec &= ~(1 << szc); 3024 if (szcvec) { 3025 szc = highbit(szcvec) - 1; 3026 pgsz = page_get_pagesize(szc); 3027 } 3028 } 3029 ASSERT(addr == eaddr); 3030 3031 return (0); 3032 } 3033 3034 /* 3035 * Set the default large page size for the range. Called via memcntl with 3036 * page size set to 0. as_set_default_lpsize breaks the range down into 3037 * chunks with the same type/flags, ignores-non segvn segments, and passes 3038 * each chunk to as_iset_default_lpsize(). 3039 */ 3040 int 3041 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) 3042 { 3043 struct seg *seg; 3044 caddr_t raddr; 3045 size_t rsize; 3046 size_t ssize; 3047 int rtype, rflags; 3048 int stype, sflags; 3049 int error; 3050 caddr_t setaddr; 3051 size_t setsize; 3052 int segvn; 3053 3054 if (size == 0) 3055 return (0); 3056 3057 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3058 again: 3059 error = 0; 3060 3061 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3062 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 3063 (size_t)raddr; 3064 3065 if (raddr + rsize < raddr) { /* check for wraparound */ 3066 AS_LOCK_EXIT(as, &as->a_lock); 3067 return (ENOMEM); 3068 } 3069 as_clearwatchprot(as, raddr, rsize); 3070 seg = as_segat(as, raddr); 3071 if (seg == NULL) { 3072 as_setwatch(as); 3073 AS_LOCK_EXIT(as, &as->a_lock); 3074 return (ENOMEM); 3075 } 3076 if (seg->s_ops == &segvn_ops) { 3077 rtype = SEGOP_GETTYPE(seg, addr); 3078 rflags = rtype & (MAP_TEXT | MAP_INITDATA); 3079 rtype = rtype & (MAP_SHARED | MAP_PRIVATE); 3080 segvn = 1; 3081 } else { 3082 segvn = 0; 3083 } 3084 setaddr = raddr; 3085 setsize = 0; 3086 3087 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3088 if (raddr >= (seg->s_base + seg->s_size)) { 3089 seg = AS_SEGNEXT(as, seg); 3090 if (seg == NULL || raddr != seg->s_base) { 3091 error = ENOMEM; 3092 break; 3093 } 3094 if (seg->s_ops == &segvn_ops) { 3095 stype = SEGOP_GETTYPE(seg, raddr); 3096 sflags = stype & (MAP_TEXT | MAP_INITDATA); 3097 stype &= (MAP_SHARED | MAP_PRIVATE); 3098 if (segvn && (rflags != sflags || 3099 rtype != stype)) { 3100 /* 3101 * The next segment is also segvn but 3102 * has different flags and/or type. 3103 */ 3104 ASSERT(setsize != 0); 3105 error = as_iset_default_lpsize(as, 3106 setaddr, setsize, rflags, rtype); 3107 if (error) { 3108 break; 3109 } 3110 rflags = sflags; 3111 rtype = stype; 3112 setaddr = raddr; 3113 setsize = 0; 3114 } else if (!segvn) { 3115 rflags = sflags; 3116 rtype = stype; 3117 setaddr = raddr; 3118 setsize = 0; 3119 segvn = 1; 3120 } 3121 } else if (segvn) { 3122 /* The next segment is not segvn. */ 3123 ASSERT(setsize != 0); 3124 error = as_iset_default_lpsize(as, 3125 setaddr, setsize, rflags, rtype); 3126 if (error) { 3127 break; 3128 } 3129 segvn = 0; 3130 } 3131 } 3132 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3133 ssize = seg->s_base + seg->s_size - raddr; 3134 } else { 3135 ssize = rsize; 3136 } 3137 } 3138 if (error == 0 && segvn) { 3139 /* The last chunk when rsize == 0. */ 3140 ASSERT(setsize != 0); 3141 error = as_iset_default_lpsize(as, setaddr, setsize, 3142 rflags, rtype); 3143 } 3144 3145 if (error == IE_RETRY) { 3146 goto again; 3147 } else if (error == IE_NOMEM) { 3148 error = EAGAIN; 3149 } else if (error == ENOTSUP) { 3150 error = EINVAL; 3151 } else if (error == EAGAIN) { 3152 mutex_enter(&as->a_contents); 3153 if (AS_ISUNMAPWAIT(as) == 0) { 3154 cv_broadcast(&as->a_cv); 3155 } 3156 AS_SETUNMAPWAIT(as); 3157 AS_LOCK_EXIT(as, &as->a_lock); 3158 while (AS_ISUNMAPWAIT(as)) { 3159 cv_wait(&as->a_cv, &as->a_contents); 3160 } 3161 mutex_exit(&as->a_contents); 3162 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3163 goto again; 3164 } 3165 3166 as_setwatch(as); 3167 AS_LOCK_EXIT(as, &as->a_lock); 3168 return (error); 3169 } 3170 3171 /* 3172 * Setup all of the uninitialized watched pages that we can. 3173 */ 3174 void 3175 as_setwatch(struct as *as) 3176 { 3177 struct watched_page *pwp; 3178 struct seg *seg; 3179 caddr_t vaddr; 3180 uint_t prot; 3181 int err, retrycnt; 3182 3183 if (avl_numnodes(&as->a_wpage) == 0) 3184 return; 3185 3186 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3187 3188 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3189 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3190 retrycnt = 0; 3191 retry: 3192 vaddr = pwp->wp_vaddr; 3193 if (pwp->wp_oprot != 0 || /* already set up */ 3194 (seg = as_segat(as, vaddr)) == NULL || 3195 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 3196 continue; 3197 3198 pwp->wp_oprot = prot; 3199 if (pwp->wp_read) 3200 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3201 if (pwp->wp_write) 3202 prot &= ~PROT_WRITE; 3203 if (pwp->wp_exec) 3204 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3205 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 3206 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3207 if (err == IE_RETRY) { 3208 pwp->wp_oprot = 0; 3209 ASSERT(retrycnt == 0); 3210 retrycnt++; 3211 goto retry; 3212 } 3213 } 3214 pwp->wp_prot = prot; 3215 } 3216 } 3217 3218 /* 3219 * Clear all of the watched pages in the address space. 3220 */ 3221 void 3222 as_clearwatch(struct as *as) 3223 { 3224 struct watched_page *pwp; 3225 struct seg *seg; 3226 caddr_t vaddr; 3227 uint_t prot; 3228 int err, retrycnt; 3229 3230 if (avl_numnodes(&as->a_wpage) == 0) 3231 return; 3232 3233 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3234 3235 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3236 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3237 retrycnt = 0; 3238 retry: 3239 vaddr = pwp->wp_vaddr; 3240 if (pwp->wp_oprot == 0 || /* not set up */ 3241 (seg = as_segat(as, vaddr)) == NULL) 3242 continue; 3243 3244 if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 3245 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3246 if (err == IE_RETRY) { 3247 ASSERT(retrycnt == 0); 3248 retrycnt++; 3249 goto retry; 3250 } 3251 } 3252 pwp->wp_oprot = 0; 3253 pwp->wp_prot = 0; 3254 } 3255 } 3256 3257 /* 3258 * Force a new setup for all the watched pages in the range. 3259 */ 3260 static void 3261 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 3262 { 3263 struct watched_page *pwp; 3264 struct watched_page tpw; 3265 caddr_t eaddr = addr + size; 3266 caddr_t vaddr; 3267 struct seg *seg; 3268 int err, retrycnt; 3269 uint_t wprot; 3270 avl_index_t where; 3271 3272 if (avl_numnodes(&as->a_wpage) == 0) 3273 return; 3274 3275 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3276 3277 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3278 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3279 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3280 3281 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3282 retrycnt = 0; 3283 vaddr = pwp->wp_vaddr; 3284 3285 wprot = prot; 3286 if (pwp->wp_read) 3287 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3288 if (pwp->wp_write) 3289 wprot &= ~PROT_WRITE; 3290 if (pwp->wp_exec) 3291 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3292 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 3293 retry: 3294 seg = as_segat(as, vaddr); 3295 if (seg == NULL) { 3296 panic("as_setwatchprot: no seg"); 3297 /*NOTREACHED*/ 3298 } 3299 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 3300 if (err == IE_RETRY) { 3301 ASSERT(retrycnt == 0); 3302 retrycnt++; 3303 goto retry; 3304 } 3305 } 3306 pwp->wp_oprot = prot; 3307 pwp->wp_prot = wprot; 3308 3309 pwp = AVL_NEXT(&as->a_wpage, pwp); 3310 } 3311 } 3312 3313 /* 3314 * Clear all of the watched pages in the range. 3315 */ 3316 static void 3317 as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 3318 { 3319 caddr_t eaddr = addr + size; 3320 struct watched_page *pwp; 3321 struct watched_page tpw; 3322 uint_t prot; 3323 struct seg *seg; 3324 int err, retrycnt; 3325 avl_index_t where; 3326 3327 if (avl_numnodes(&as->a_wpage) == 0) 3328 return; 3329 3330 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3331 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3332 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3333 3334 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3335 3336 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3337 3338 if ((prot = pwp->wp_oprot) != 0) { 3339 retrycnt = 0; 3340 3341 if (prot != pwp->wp_prot) { 3342 retry: 3343 seg = as_segat(as, pwp->wp_vaddr); 3344 if (seg == NULL) 3345 continue; 3346 err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 3347 PAGESIZE, prot); 3348 if (err == IE_RETRY) { 3349 ASSERT(retrycnt == 0); 3350 retrycnt++; 3351 goto retry; 3352 3353 } 3354 } 3355 pwp->wp_oprot = 0; 3356 pwp->wp_prot = 0; 3357 } 3358 3359 pwp = AVL_NEXT(&as->a_wpage, pwp); 3360 } 3361 } 3362 3363 void 3364 as_signal_proc(struct as *as, k_siginfo_t *siginfo) 3365 { 3366 struct proc *p; 3367 3368 mutex_enter(&pidlock); 3369 for (p = practive; p; p = p->p_next) { 3370 if (p->p_as == as) { 3371 mutex_enter(&p->p_lock); 3372 if (p->p_as == as) 3373 sigaddq(p, NULL, siginfo, KM_NOSLEEP); 3374 mutex_exit(&p->p_lock); 3375 } 3376 } 3377 mutex_exit(&pidlock); 3378 } 3379 3380 /* 3381 * return memory object ID 3382 */ 3383 int 3384 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 3385 { 3386 struct seg *seg; 3387 int sts; 3388 3389 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 3390 seg = as_segat(as, addr); 3391 if (seg == NULL) { 3392 AS_LOCK_EXIT(as, &as->a_lock); 3393 return (EFAULT); 3394 } 3395 /* 3396 * catch old drivers which may not support getmemid 3397 */ 3398 if (seg->s_ops->getmemid == NULL) { 3399 AS_LOCK_EXIT(as, &as->a_lock); 3400 return (ENODEV); 3401 } 3402 3403 sts = SEGOP_GETMEMID(seg, addr, memidp); 3404 3405 AS_LOCK_EXIT(as, &as->a_lock); 3406 return (sts); 3407 } 3408