1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2015, Joyent, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * University Copyright- Copyright (c) 1982, 1986, 1988 33 * The Regents of the University of California 34 * All Rights Reserved 35 * 36 * University Acknowledgment- Portions of this document are derived from 37 * software developed by the University of California, Berkeley, and its 38 * contributors. 39 */ 40 41 /* 42 * VM - address spaces. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/errno.h> 49 #include <sys/systm.h> 50 #include <sys/mman.h> 51 #include <sys/sysmacros.h> 52 #include <sys/cpuvar.h> 53 #include <sys/sysinfo.h> 54 #include <sys/kmem.h> 55 #include <sys/vnode.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/tnf_probe.h> 60 #include <sys/vtrace.h> 61 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_vn.h> 66 #include <vm/seg_dev.h> 67 #include <vm/seg_kmem.h> 68 #include <vm/seg_map.h> 69 #include <vm/seg_spt.h> 70 #include <vm/page.h> 71 72 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 73 74 static struct kmem_cache *as_cache; 75 76 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 77 static void as_clearwatchprot(struct as *, caddr_t, size_t); 78 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); 79 80 81 /* 82 * Verifying the segment lists is very time-consuming; it may not be 83 * desirable always to define VERIFY_SEGLIST when DEBUG is set. 84 */ 85 #ifdef DEBUG 86 #define VERIFY_SEGLIST 87 int do_as_verify = 0; 88 #endif 89 90 /* 91 * Allocate a new callback data structure entry and fill in the events of 92 * interest, the address range of interest, and the callback argument. 93 * Link the entry on the as->a_callbacks list. A callback entry for the 94 * entire address space may be specified with vaddr = 0 and size = -1. 95 * 96 * CALLERS RESPONSIBILITY: If not calling from within the process context for 97 * the specified as, the caller must guarantee persistence of the specified as 98 * for the duration of this function (eg. pages being locked within the as 99 * will guarantee persistence). 100 */ 101 int 102 as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 103 caddr_t vaddr, size_t size, int sleepflag) 104 { 105 struct as_callback *current_head, *cb; 106 caddr_t saddr; 107 size_t rsize; 108 109 /* callback function and an event are mandatory */ 110 if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 111 return (EINVAL); 112 113 /* Adding a callback after as_free has been called is not allowed */ 114 if (as == &kas) 115 return (ENOMEM); 116 117 /* 118 * vaddr = 0 and size = -1 is used to indicate that the callback range 119 * is the entire address space so no rounding is done in that case. 120 */ 121 if (size != -1) { 122 saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 123 rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 124 (size_t)saddr; 125 /* check for wraparound */ 126 if (saddr + rsize < saddr) 127 return (ENOMEM); 128 } else { 129 if (vaddr != 0) 130 return (EINVAL); 131 saddr = vaddr; 132 rsize = size; 133 } 134 135 /* Allocate and initialize a callback entry */ 136 cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 137 if (cb == NULL) 138 return (EAGAIN); 139 140 cb->ascb_func = cb_func; 141 cb->ascb_arg = arg; 142 cb->ascb_events = events; 143 cb->ascb_saddr = saddr; 144 cb->ascb_len = rsize; 145 146 /* Add the entry to the list */ 147 mutex_enter(&as->a_contents); 148 current_head = as->a_callbacks; 149 as->a_callbacks = cb; 150 cb->ascb_next = current_head; 151 152 /* 153 * The call to this function may lose in a race with 154 * a pertinent event - eg. a thread does long term memory locking 155 * but before the callback is added another thread executes as_unmap. 156 * A broadcast here resolves that. 157 */ 158 if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 159 AS_CLRUNMAPWAIT(as); 160 cv_broadcast(&as->a_cv); 161 } 162 163 mutex_exit(&as->a_contents); 164 return (0); 165 } 166 167 /* 168 * Search the callback list for an entry which pertains to arg. 169 * 170 * This is called from within the client upon completion of the callback. 171 * RETURN VALUES: 172 * AS_CALLBACK_DELETED (callback entry found and deleted) 173 * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 174 * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 175 * entry will be made in as_do_callbacks) 176 * 177 * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 178 * set, it indicates that as_do_callbacks is processing this entry. The 179 * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 180 * to unblock as_do_callbacks, in case it is blocked. 181 * 182 * CALLERS RESPONSIBILITY: If not calling from within the process context for 183 * the specified as, the caller must guarantee persistence of the specified as 184 * for the duration of this function (eg. pages being locked within the as 185 * will guarantee persistence). 186 */ 187 uint_t 188 as_delete_callback(struct as *as, void *arg) 189 { 190 struct as_callback **prevcb = &as->a_callbacks; 191 struct as_callback *cb; 192 uint_t rc = AS_CALLBACK_NOTFOUND; 193 194 mutex_enter(&as->a_contents); 195 for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 196 if (cb->ascb_arg != arg) 197 continue; 198 199 /* 200 * If the events indicate AS_CALLBACK_CALLED, just clear 201 * AS_ALL_EVENT in the events field and wakeup the thread 202 * that may be waiting in as_do_callbacks. as_do_callbacks 203 * will take care of removing this entry from the list. In 204 * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 205 * (AS_CALLBACK_CALLED not set), just remove it from the 206 * list, return the memory and return AS_CALLBACK_DELETED. 207 */ 208 if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 209 /* leave AS_CALLBACK_CALLED */ 210 cb->ascb_events &= ~AS_ALL_EVENT; 211 rc = AS_CALLBACK_DELETE_DEFERRED; 212 cv_broadcast(&as->a_cv); 213 } else { 214 *prevcb = cb->ascb_next; 215 kmem_free(cb, sizeof (struct as_callback)); 216 rc = AS_CALLBACK_DELETED; 217 } 218 break; 219 } 220 mutex_exit(&as->a_contents); 221 return (rc); 222 } 223 224 /* 225 * Searches the as callback list for a matching entry. 226 * Returns a pointer to the first matching callback, or NULL if 227 * nothing is found. 228 * This function never sleeps so it is ok to call it with more 229 * locks held but the (required) a_contents mutex. 230 * 231 * See also comment on as_do_callbacks below. 232 */ 233 static struct as_callback * 234 as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 235 size_t event_len) 236 { 237 struct as_callback *cb; 238 239 ASSERT(MUTEX_HELD(&as->a_contents)); 240 for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 241 /* 242 * If the callback has not already been called, then 243 * check if events or address range pertains. An event_len 244 * of zero means do an unconditional callback. 245 */ 246 if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 247 ((event_len != 0) && (((cb->ascb_events & events) == 0) || 248 (event_addr + event_len < cb->ascb_saddr) || 249 (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 250 continue; 251 } 252 break; 253 } 254 return (cb); 255 } 256 257 /* 258 * Executes a given callback and removes it from the callback list for 259 * this address space. 260 * This function may sleep so the caller must drop all locks except 261 * a_contents before calling this func. 262 * 263 * See also comments on as_do_callbacks below. 264 */ 265 static void 266 as_execute_callback(struct as *as, struct as_callback *cb, 267 uint_t events) 268 { 269 struct as_callback **prevcb; 270 void *cb_arg; 271 272 ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 273 cb->ascb_events |= AS_CALLBACK_CALLED; 274 mutex_exit(&as->a_contents); 275 (*cb->ascb_func)(as, cb->ascb_arg, events); 276 mutex_enter(&as->a_contents); 277 /* 278 * the callback function is required to delete the callback 279 * when the callback function determines it is OK for 280 * this thread to continue. as_delete_callback will clear 281 * the AS_ALL_EVENT in the events field when it is deleted. 282 * If the callback function called as_delete_callback, 283 * events will already be cleared and there will be no blocking. 284 */ 285 while ((cb->ascb_events & events) != 0) { 286 cv_wait(&as->a_cv, &as->a_contents); 287 } 288 /* 289 * This entry needs to be taken off the list. Normally, the 290 * callback func itself does that, but unfortunately the list 291 * may have changed while the callback was running because the 292 * a_contents mutex was dropped and someone else other than the 293 * callback func itself could have called as_delete_callback, 294 * so we have to search to find this entry again. The entry 295 * must have AS_CALLBACK_CALLED, and have the same 'arg'. 296 */ 297 cb_arg = cb->ascb_arg; 298 prevcb = &as->a_callbacks; 299 for (cb = as->a_callbacks; cb != NULL; 300 prevcb = &cb->ascb_next, cb = *prevcb) { 301 if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 302 (cb_arg != cb->ascb_arg)) { 303 continue; 304 } 305 *prevcb = cb->ascb_next; 306 kmem_free(cb, sizeof (struct as_callback)); 307 break; 308 } 309 } 310 311 /* 312 * Check the callback list for a matching event and intersection of 313 * address range. If there is a match invoke the callback. Skip an entry if: 314 * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 315 * - not event of interest 316 * - not address range of interest 317 * 318 * An event_len of zero indicates a request for an unconditional callback 319 * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 320 * a_contents lock must be dropped before a callback, so only one callback 321 * can be done before returning. Return -1 (true) if a callback was 322 * executed and removed from the list, else return 0 (false). 323 * 324 * The logically separate parts, i.e. finding a matching callback and 325 * executing a given callback have been separated into two functions 326 * so that they can be called with different sets of locks held beyond 327 * the always-required a_contents. as_find_callback does not sleep so 328 * it is ok to call it if more locks than a_contents (i.e. the a_lock 329 * rwlock) are held. as_execute_callback on the other hand may sleep 330 * so all locks beyond a_contents must be dropped by the caller if one 331 * does not want to end comatose. 332 */ 333 static int 334 as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 335 size_t event_len) 336 { 337 struct as_callback *cb; 338 339 if ((cb = as_find_callback(as, events, event_addr, event_len))) { 340 as_execute_callback(as, cb, events); 341 return (-1); 342 } 343 return (0); 344 } 345 346 /* 347 * Search for the segment containing addr. If a segment containing addr 348 * exists, that segment is returned. If no such segment exists, and 349 * the list spans addresses greater than addr, then the first segment 350 * whose base is greater than addr is returned; otherwise, NULL is 351 * returned unless tail is true, in which case the last element of the 352 * list is returned. 353 * 354 * a_seglast is used to cache the last found segment for repeated 355 * searches to the same addr (which happens frequently). 356 */ 357 struct seg * 358 as_findseg(struct as *as, caddr_t addr, int tail) 359 { 360 struct seg *seg = as->a_seglast; 361 avl_index_t where; 362 363 ASSERT(AS_LOCK_HELD(as)); 364 365 if (seg != NULL && 366 seg->s_base <= addr && 367 addr < seg->s_base + seg->s_size) 368 return (seg); 369 370 seg = avl_find(&as->a_segtree, &addr, &where); 371 if (seg != NULL) 372 return (as->a_seglast = seg); 373 374 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 375 if (seg == NULL && tail) 376 seg = avl_last(&as->a_segtree); 377 return (as->a_seglast = seg); 378 } 379 380 #ifdef VERIFY_SEGLIST 381 /* 382 * verify that the linked list is coherent 383 */ 384 static void 385 as_verify(struct as *as) 386 { 387 struct seg *seg, *seglast, *p, *n; 388 uint_t nsegs = 0; 389 390 if (do_as_verify == 0) 391 return; 392 393 seglast = as->a_seglast; 394 395 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 396 ASSERT(seg->s_as == as); 397 p = AS_SEGPREV(as, seg); 398 n = AS_SEGNEXT(as, seg); 399 ASSERT(p == NULL || p->s_as == as); 400 ASSERT(p == NULL || p->s_base < seg->s_base); 401 ASSERT(n == NULL || n->s_base > seg->s_base); 402 ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 403 if (seg == seglast) 404 seglast = NULL; 405 nsegs++; 406 } 407 ASSERT(seglast == NULL); 408 ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 409 } 410 #endif /* VERIFY_SEGLIST */ 411 412 /* 413 * Add a new segment to the address space. The avl_find() 414 * may be expensive so we attempt to use last segment accessed 415 * in as_gap() as an insertion point. 416 */ 417 int 418 as_addseg(struct as *as, struct seg *newseg) 419 { 420 struct seg *seg; 421 caddr_t addr; 422 caddr_t eaddr; 423 avl_index_t where; 424 425 ASSERT(AS_WRITE_HELD(as)); 426 427 as->a_updatedir = 1; /* inform /proc */ 428 gethrestime(&as->a_updatetime); 429 430 if (as->a_lastgaphl != NULL) { 431 struct seg *hseg = NULL; 432 struct seg *lseg = NULL; 433 434 if (as->a_lastgaphl->s_base > newseg->s_base) { 435 hseg = as->a_lastgaphl; 436 lseg = AVL_PREV(&as->a_segtree, hseg); 437 } else { 438 lseg = as->a_lastgaphl; 439 hseg = AVL_NEXT(&as->a_segtree, lseg); 440 } 441 442 if (hseg && lseg && lseg->s_base < newseg->s_base && 443 hseg->s_base > newseg->s_base) { 444 avl_insert_here(&as->a_segtree, newseg, lseg, 445 AVL_AFTER); 446 as->a_lastgaphl = NULL; 447 as->a_seglast = newseg; 448 return (0); 449 } 450 as->a_lastgaphl = NULL; 451 } 452 453 addr = newseg->s_base; 454 eaddr = addr + newseg->s_size; 455 again: 456 457 seg = avl_find(&as->a_segtree, &addr, &where); 458 459 if (seg == NULL) 460 seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 461 462 if (seg == NULL) 463 seg = avl_last(&as->a_segtree); 464 465 if (seg != NULL) { 466 caddr_t base = seg->s_base; 467 468 /* 469 * If top of seg is below the requested address, then 470 * the insertion point is at the end of the linked list, 471 * and seg points to the tail of the list. Otherwise, 472 * the insertion point is immediately before seg. 473 */ 474 if (base + seg->s_size > addr) { 475 if (addr >= base || eaddr > base) { 476 #ifdef __sparc 477 extern struct seg_ops segnf_ops; 478 479 /* 480 * no-fault segs must disappear if overlaid. 481 * XXX need new segment type so 482 * we don't have to check s_ops 483 */ 484 if (seg->s_ops == &segnf_ops) { 485 seg_unmap(seg); 486 goto again; 487 } 488 #endif 489 return (-1); /* overlapping segment */ 490 } 491 } 492 } 493 as->a_seglast = newseg; 494 avl_insert(&as->a_segtree, newseg, where); 495 496 #ifdef VERIFY_SEGLIST 497 as_verify(as); 498 #endif 499 return (0); 500 } 501 502 struct seg * 503 as_removeseg(struct as *as, struct seg *seg) 504 { 505 avl_tree_t *t; 506 507 ASSERT(AS_WRITE_HELD(as)); 508 509 as->a_updatedir = 1; /* inform /proc */ 510 gethrestime(&as->a_updatetime); 511 512 if (seg == NULL) 513 return (NULL); 514 515 t = &as->a_segtree; 516 if (as->a_seglast == seg) 517 as->a_seglast = NULL; 518 as->a_lastgaphl = NULL; 519 520 /* 521 * if this segment is at an address higher than 522 * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 523 */ 524 if (as->a_lastgap && 525 (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 526 as->a_lastgap = AVL_NEXT(t, seg); 527 528 /* 529 * remove the segment from the seg tree 530 */ 531 avl_remove(t, seg); 532 533 #ifdef VERIFY_SEGLIST 534 as_verify(as); 535 #endif 536 return (seg); 537 } 538 539 /* 540 * Find a segment containing addr. 541 */ 542 struct seg * 543 as_segat(struct as *as, caddr_t addr) 544 { 545 struct seg *seg = as->a_seglast; 546 547 ASSERT(AS_LOCK_HELD(as)); 548 549 if (seg != NULL && seg->s_base <= addr && 550 addr < seg->s_base + seg->s_size) 551 return (seg); 552 553 seg = avl_find(&as->a_segtree, &addr, NULL); 554 return (seg); 555 } 556 557 /* 558 * Serialize all searches for holes in an address space to 559 * prevent two or more threads from allocating the same virtual 560 * address range. The address space must not be "read/write" 561 * locked by the caller since we may block. 562 */ 563 void 564 as_rangelock(struct as *as) 565 { 566 mutex_enter(&as->a_contents); 567 while (AS_ISCLAIMGAP(as)) 568 cv_wait(&as->a_cv, &as->a_contents); 569 AS_SETCLAIMGAP(as); 570 mutex_exit(&as->a_contents); 571 } 572 573 /* 574 * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 575 */ 576 void 577 as_rangeunlock(struct as *as) 578 { 579 mutex_enter(&as->a_contents); 580 AS_CLRCLAIMGAP(as); 581 cv_signal(&as->a_cv); 582 mutex_exit(&as->a_contents); 583 } 584 585 /* 586 * compar segments (or just an address) by segment address range 587 */ 588 static int 589 as_segcompar(const void *x, const void *y) 590 { 591 struct seg *a = (struct seg *)x; 592 struct seg *b = (struct seg *)y; 593 594 if (a->s_base < b->s_base) 595 return (-1); 596 if (a->s_base >= b->s_base + b->s_size) 597 return (1); 598 return (0); 599 } 600 601 602 void 603 as_avlinit(struct as *as) 604 { 605 avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 606 offsetof(struct seg, s_tree)); 607 avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 608 offsetof(struct watched_page, wp_link)); 609 } 610 611 /*ARGSUSED*/ 612 static int 613 as_constructor(void *buf, void *cdrarg, int kmflags) 614 { 615 struct as *as = buf; 616 617 mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 618 cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 619 rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 620 as_avlinit(as); 621 return (0); 622 } 623 624 /*ARGSUSED1*/ 625 static void 626 as_destructor(void *buf, void *cdrarg) 627 { 628 struct as *as = buf; 629 630 avl_destroy(&as->a_segtree); 631 mutex_destroy(&as->a_contents); 632 cv_destroy(&as->a_cv); 633 rw_destroy(&as->a_lock); 634 } 635 636 void 637 as_init(void) 638 { 639 as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 640 as_constructor, as_destructor, NULL, NULL, NULL, 0); 641 } 642 643 /* 644 * Allocate and initialize an address space data structure. 645 * We call hat_alloc to allow any machine dependent 646 * information in the hat structure to be initialized. 647 */ 648 struct as * 649 as_alloc(void) 650 { 651 struct as *as; 652 653 as = kmem_cache_alloc(as_cache, KM_SLEEP); 654 655 as->a_flags = 0; 656 as->a_vbits = 0; 657 as->a_hrm = NULL; 658 as->a_seglast = NULL; 659 as->a_size = 0; 660 as->a_resvsize = 0; 661 as->a_updatedir = 0; 662 gethrestime(&as->a_updatetime); 663 as->a_objectdir = NULL; 664 as->a_sizedir = 0; 665 as->a_userlimit = (caddr_t)USERLIMIT; 666 as->a_lastgap = NULL; 667 as->a_lastgaphl = NULL; 668 as->a_callbacks = NULL; 669 as->a_proc = NULL; 670 671 AS_LOCK_ENTER(as, RW_WRITER); 672 as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 673 AS_LOCK_EXIT(as); 674 675 return (as); 676 } 677 678 /* 679 * Free an address space data structure. 680 * Need to free the hat first and then 681 * all the segments on this as and finally 682 * the space for the as struct itself. 683 */ 684 void 685 as_free(struct as *as) 686 { 687 struct hat *hat = as->a_hat; 688 struct seg *seg, *next; 689 boolean_t free_started = B_FALSE; 690 691 top: 692 /* 693 * Invoke ALL callbacks. as_do_callbacks will do one callback 694 * per call, and not return (-1) until the callback has completed. 695 * When as_do_callbacks returns zero, all callbacks have completed. 696 */ 697 mutex_enter(&as->a_contents); 698 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)) 699 ; 700 701 mutex_exit(&as->a_contents); 702 AS_LOCK_ENTER(as, RW_WRITER); 703 704 if (!free_started) { 705 free_started = B_TRUE; 706 hat_free_start(hat); 707 } 708 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 709 int err; 710 711 next = AS_SEGNEXT(as, seg); 712 retry: 713 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 714 if (err == EAGAIN) { 715 mutex_enter(&as->a_contents); 716 if (as->a_callbacks) { 717 AS_LOCK_EXIT(as); 718 } else if (!AS_ISNOUNMAPWAIT(as)) { 719 /* 720 * Memory is currently locked. Wait for a 721 * cv_signal that it has been unlocked, then 722 * try the operation again. 723 */ 724 if (AS_ISUNMAPWAIT(as) == 0) 725 cv_broadcast(&as->a_cv); 726 AS_SETUNMAPWAIT(as); 727 AS_LOCK_EXIT(as); 728 while (AS_ISUNMAPWAIT(as)) 729 cv_wait(&as->a_cv, &as->a_contents); 730 } else { 731 /* 732 * We may have raced with 733 * segvn_reclaim()/segspt_reclaim(). In this 734 * case clean nounmapwait flag and retry since 735 * softlockcnt in this segment may be already 736 * 0. We don't drop as writer lock so our 737 * number of retries without sleeping should 738 * be very small. See segvn_reclaim() for 739 * more comments. 740 */ 741 AS_CLRNOUNMAPWAIT(as); 742 mutex_exit(&as->a_contents); 743 goto retry; 744 } 745 mutex_exit(&as->a_contents); 746 goto top; 747 } else { 748 /* 749 * We do not expect any other error return at this 750 * time. This is similar to an ASSERT in seg_unmap() 751 */ 752 ASSERT(err == 0); 753 } 754 } 755 hat_free_end(hat); 756 AS_LOCK_EXIT(as); 757 758 /* /proc stuff */ 759 ASSERT(avl_numnodes(&as->a_wpage) == 0); 760 if (as->a_objectdir) { 761 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 762 as->a_objectdir = NULL; 763 as->a_sizedir = 0; 764 } 765 766 /* 767 * Free the struct as back to kmem. Assert it has no segments. 768 */ 769 ASSERT(avl_numnodes(&as->a_segtree) == 0); 770 kmem_cache_free(as_cache, as); 771 } 772 773 int 774 as_dup(struct as *as, struct proc *forkedproc) 775 { 776 struct as *newas; 777 struct seg *seg, *newseg; 778 size_t purgesize = 0; 779 int error; 780 781 AS_LOCK_ENTER(as, RW_WRITER); 782 as_clearwatch(as); 783 newas = as_alloc(); 784 newas->a_userlimit = as->a_userlimit; 785 newas->a_proc = forkedproc; 786 787 AS_LOCK_ENTER(newas, RW_WRITER); 788 789 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD); 790 791 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 792 793 if (seg->s_flags & S_PURGE) { 794 purgesize += seg->s_size; 795 continue; 796 } 797 798 newseg = seg_alloc(newas, seg->s_base, seg->s_size); 799 if (newseg == NULL) { 800 AS_LOCK_EXIT(newas); 801 as_setwatch(as); 802 AS_LOCK_EXIT(as); 803 as_free(newas); 804 return (-1); 805 } 806 if ((error = SEGOP_DUP(seg, newseg)) != 0) { 807 /* 808 * We call seg_free() on the new seg 809 * because the segment is not set up 810 * completely; i.e. it has no ops. 811 */ 812 as_setwatch(as); 813 AS_LOCK_EXIT(as); 814 seg_free(newseg); 815 AS_LOCK_EXIT(newas); 816 as_free(newas); 817 return (error); 818 } 819 newas->a_size += seg->s_size; 820 } 821 newas->a_resvsize = as->a_resvsize - purgesize; 822 823 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 824 825 AS_LOCK_EXIT(newas); 826 827 as_setwatch(as); 828 AS_LOCK_EXIT(as); 829 if (error != 0) { 830 as_free(newas); 831 return (error); 832 } 833 forkedproc->p_as = newas; 834 return (0); 835 } 836 837 /* 838 * Handle a ``fault'' at addr for size bytes. 839 */ 840 faultcode_t 841 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 842 enum fault_type type, enum seg_rw rw) 843 { 844 struct seg *seg; 845 caddr_t raddr; /* rounded down addr */ 846 size_t rsize; /* rounded up size */ 847 size_t ssize; 848 faultcode_t res = 0; 849 caddr_t addrsav; 850 struct seg *segsav; 851 int as_lock_held; 852 klwp_t *lwp = ttolwp(curthread); 853 854 855 856 retry: 857 /* 858 * Indicate that the lwp is not to be stopped while waiting for a 859 * pagefault. This is to avoid deadlock while debugging a process 860 * via /proc over NFS (in particular). 861 */ 862 if (lwp != NULL) 863 lwp->lwp_nostop++; 864 865 /* 866 * same length must be used when we softlock and softunlock. We 867 * don't support softunlocking lengths less than the original length 868 * when there is largepage support. See seg_dev.c for more 869 * comments. 870 */ 871 switch (type) { 872 873 case F_SOFTLOCK: 874 CPU_STATS_ADD_K(vm, softlock, 1); 875 break; 876 877 case F_SOFTUNLOCK: 878 break; 879 880 case F_PROT: 881 CPU_STATS_ADD_K(vm, prot_fault, 1); 882 break; 883 884 case F_INVAL: 885 CPU_STATS_ENTER_K(); 886 CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 887 if (as == &kas) 888 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 889 CPU_STATS_EXIT_K(); 890 break; 891 } 892 893 /* Kernel probe */ 894 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 895 tnf_opaque, address, addr, 896 tnf_fault_type, fault_type, type, 897 tnf_seg_access, access, rw); 898 899 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 900 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 901 (size_t)raddr; 902 903 /* 904 * XXX -- Don't grab the as lock for segkmap. We should grab it for 905 * correctness, but then we could be stuck holding this lock for 906 * a LONG time if the fault needs to be resolved on a slow 907 * filesystem, and then no-one will be able to exec new commands, 908 * as exec'ing requires the write lock on the as. 909 */ 910 if (as == &kas && segkmap && segkmap->s_base <= raddr && 911 raddr + size < segkmap->s_base + segkmap->s_size) { 912 seg = segkmap; 913 as_lock_held = 0; 914 } else { 915 AS_LOCK_ENTER(as, RW_READER); 916 917 seg = as_segat(as, raddr); 918 if (seg == NULL) { 919 AS_LOCK_EXIT(as); 920 if (lwp != NULL) 921 lwp->lwp_nostop--; 922 return (FC_NOMAP); 923 } 924 925 as_lock_held = 1; 926 } 927 928 addrsav = raddr; 929 segsav = seg; 930 931 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 932 if (raddr >= seg->s_base + seg->s_size) { 933 seg = AS_SEGNEXT(as, seg); 934 if (seg == NULL || raddr != seg->s_base) { 935 res = FC_NOMAP; 936 break; 937 } 938 } 939 if (raddr + rsize > seg->s_base + seg->s_size) 940 ssize = seg->s_base + seg->s_size - raddr; 941 else 942 ssize = rsize; 943 944 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 945 if (res != 0) 946 break; 947 } 948 949 /* 950 * If we were SOFTLOCKing and encountered a failure, 951 * we must SOFTUNLOCK the range we already did. (Maybe we 952 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 953 * right here...) 954 */ 955 if (res != 0 && type == F_SOFTLOCK) { 956 for (seg = segsav; addrsav < raddr; addrsav += ssize) { 957 if (addrsav >= seg->s_base + seg->s_size) 958 seg = AS_SEGNEXT(as, seg); 959 ASSERT(seg != NULL); 960 /* 961 * Now call the fault routine again to perform the 962 * unlock using S_OTHER instead of the rw variable 963 * since we never got a chance to touch the pages. 964 */ 965 if (raddr > seg->s_base + seg->s_size) 966 ssize = seg->s_base + seg->s_size - addrsav; 967 else 968 ssize = raddr - addrsav; 969 (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 970 F_SOFTUNLOCK, S_OTHER); 971 } 972 } 973 if (as_lock_held) 974 AS_LOCK_EXIT(as); 975 if (lwp != NULL) 976 lwp->lwp_nostop--; 977 978 /* 979 * If the lower levels returned EDEADLK for a fault, 980 * It means that we should retry the fault. Let's wait 981 * a bit also to let the deadlock causing condition clear. 982 * This is part of a gross hack to work around a design flaw 983 * in the ufs/sds logging code and should go away when the 984 * logging code is re-designed to fix the problem. See bug 985 * 4125102 for details of the problem. 986 */ 987 if (FC_ERRNO(res) == EDEADLK) { 988 delay(deadlk_wait); 989 res = 0; 990 goto retry; 991 } 992 return (res); 993 } 994 995 996 997 /* 998 * Asynchronous ``fault'' at addr for size bytes. 999 */ 1000 faultcode_t 1001 as_faulta(struct as *as, caddr_t addr, size_t size) 1002 { 1003 struct seg *seg; 1004 caddr_t raddr; /* rounded down addr */ 1005 size_t rsize; /* rounded up size */ 1006 faultcode_t res = 0; 1007 klwp_t *lwp = ttolwp(curthread); 1008 1009 retry: 1010 /* 1011 * Indicate that the lwp is not to be stopped while waiting 1012 * for a pagefault. This is to avoid deadlock while debugging 1013 * a process via /proc over NFS (in particular). 1014 */ 1015 if (lwp != NULL) 1016 lwp->lwp_nostop++; 1017 1018 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1019 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1020 (size_t)raddr; 1021 1022 AS_LOCK_ENTER(as, RW_READER); 1023 seg = as_segat(as, raddr); 1024 if (seg == NULL) { 1025 AS_LOCK_EXIT(as); 1026 if (lwp != NULL) 1027 lwp->lwp_nostop--; 1028 return (FC_NOMAP); 1029 } 1030 1031 for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1032 if (raddr >= seg->s_base + seg->s_size) { 1033 seg = AS_SEGNEXT(as, seg); 1034 if (seg == NULL || raddr != seg->s_base) { 1035 res = FC_NOMAP; 1036 break; 1037 } 1038 } 1039 res = SEGOP_FAULTA(seg, raddr); 1040 if (res != 0) 1041 break; 1042 } 1043 AS_LOCK_EXIT(as); 1044 if (lwp != NULL) 1045 lwp->lwp_nostop--; 1046 /* 1047 * If the lower levels returned EDEADLK for a fault, 1048 * It means that we should retry the fault. Let's wait 1049 * a bit also to let the deadlock causing condition clear. 1050 * This is part of a gross hack to work around a design flaw 1051 * in the ufs/sds logging code and should go away when the 1052 * logging code is re-designed to fix the problem. See bug 1053 * 4125102 for details of the problem. 1054 */ 1055 if (FC_ERRNO(res) == EDEADLK) { 1056 delay(deadlk_wait); 1057 res = 0; 1058 goto retry; 1059 } 1060 return (res); 1061 } 1062 1063 /* 1064 * Set the virtual mapping for the interval from [addr : addr + size) 1065 * in address space `as' to have the specified protection. 1066 * It is ok for the range to cross over several segments, 1067 * as long as they are contiguous. 1068 */ 1069 int 1070 as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1071 { 1072 struct seg *seg; 1073 struct as_callback *cb; 1074 size_t ssize; 1075 caddr_t raddr; /* rounded down addr */ 1076 size_t rsize; /* rounded up size */ 1077 int error = 0, writer = 0; 1078 caddr_t saveraddr; 1079 size_t saversize; 1080 1081 setprot_top: 1082 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1083 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1084 (size_t)raddr; 1085 1086 if (raddr + rsize < raddr) /* check for wraparound */ 1087 return (ENOMEM); 1088 1089 saveraddr = raddr; 1090 saversize = rsize; 1091 1092 /* 1093 * Normally we only lock the as as a reader. But 1094 * if due to setprot the segment driver needs to split 1095 * a segment it will return IE_RETRY. Therefore we re-acquire 1096 * the as lock as a writer so the segment driver can change 1097 * the seg list. Also the segment driver will return IE_RETRY 1098 * after it has changed the segment list so we therefore keep 1099 * locking as a writer. Since these opeartions should be rare 1100 * want to only lock as a writer when necessary. 1101 */ 1102 if (writer || avl_numnodes(&as->a_wpage) != 0) { 1103 AS_LOCK_ENTER(as, RW_WRITER); 1104 } else { 1105 AS_LOCK_ENTER(as, RW_READER); 1106 } 1107 1108 as_clearwatchprot(as, raddr, rsize); 1109 seg = as_segat(as, raddr); 1110 if (seg == NULL) { 1111 as_setwatch(as); 1112 AS_LOCK_EXIT(as); 1113 return (ENOMEM); 1114 } 1115 1116 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1117 if (raddr >= seg->s_base + seg->s_size) { 1118 seg = AS_SEGNEXT(as, seg); 1119 if (seg == NULL || raddr != seg->s_base) { 1120 error = ENOMEM; 1121 break; 1122 } 1123 } 1124 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1125 ssize = seg->s_base + seg->s_size - raddr; 1126 else 1127 ssize = rsize; 1128 retry: 1129 error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1130 1131 if (error == IE_NOMEM) { 1132 error = EAGAIN; 1133 break; 1134 } 1135 1136 if (error == IE_RETRY) { 1137 AS_LOCK_EXIT(as); 1138 writer = 1; 1139 goto setprot_top; 1140 } 1141 1142 if (error == EAGAIN) { 1143 /* 1144 * Make sure we have a_lock as writer. 1145 */ 1146 if (writer == 0) { 1147 AS_LOCK_EXIT(as); 1148 writer = 1; 1149 goto setprot_top; 1150 } 1151 1152 /* 1153 * Memory is currently locked. It must be unlocked 1154 * before this operation can succeed through a retry. 1155 * The possible reasons for locked memory and 1156 * corresponding strategies for unlocking are: 1157 * (1) Normal I/O 1158 * wait for a signal that the I/O operation 1159 * has completed and the memory is unlocked. 1160 * (2) Asynchronous I/O 1161 * The aio subsystem does not unlock pages when 1162 * the I/O is completed. Those pages are unlocked 1163 * when the application calls aiowait/aioerror. 1164 * So, to prevent blocking forever, cv_broadcast() 1165 * is done to wake up aio_cleanup_thread. 1166 * Subsequently, segvn_reclaim will be called, and 1167 * that will do AS_CLRUNMAPWAIT() and wake us up. 1168 * (3) Long term page locking: 1169 * Drivers intending to have pages locked for a 1170 * period considerably longer than for normal I/O 1171 * (essentially forever) may have registered for a 1172 * callback so they may unlock these pages on 1173 * request. This is needed to allow this operation 1174 * to succeed. Each entry on the callback list is 1175 * examined. If the event or address range pertains 1176 * the callback is invoked (unless it already is in 1177 * progress). The a_contents lock must be dropped 1178 * before the callback, so only one callback can 1179 * be done at a time. Go to the top and do more 1180 * until zero is returned. If zero is returned, 1181 * either there were no callbacks for this event 1182 * or they were already in progress. 1183 */ 1184 mutex_enter(&as->a_contents); 1185 if (as->a_callbacks && 1186 (cb = as_find_callback(as, AS_SETPROT_EVENT, 1187 seg->s_base, seg->s_size))) { 1188 AS_LOCK_EXIT(as); 1189 as_execute_callback(as, cb, AS_SETPROT_EVENT); 1190 } else if (!AS_ISNOUNMAPWAIT(as)) { 1191 if (AS_ISUNMAPWAIT(as) == 0) 1192 cv_broadcast(&as->a_cv); 1193 AS_SETUNMAPWAIT(as); 1194 AS_LOCK_EXIT(as); 1195 while (AS_ISUNMAPWAIT(as)) 1196 cv_wait(&as->a_cv, &as->a_contents); 1197 } else { 1198 /* 1199 * We may have raced with 1200 * segvn_reclaim()/segspt_reclaim(). In this 1201 * case clean nounmapwait flag and retry since 1202 * softlockcnt in this segment may be already 1203 * 0. We don't drop as writer lock so our 1204 * number of retries without sleeping should 1205 * be very small. See segvn_reclaim() for 1206 * more comments. 1207 */ 1208 AS_CLRNOUNMAPWAIT(as); 1209 mutex_exit(&as->a_contents); 1210 goto retry; 1211 } 1212 mutex_exit(&as->a_contents); 1213 goto setprot_top; 1214 } else if (error != 0) 1215 break; 1216 } 1217 if (error != 0) { 1218 as_setwatch(as); 1219 } else { 1220 as_setwatchprot(as, saveraddr, saversize, prot); 1221 } 1222 AS_LOCK_EXIT(as); 1223 return (error); 1224 } 1225 1226 /* 1227 * Check to make sure that the interval [addr, addr + size) 1228 * in address space `as' has at least the specified protection. 1229 * It is ok for the range to cross over several segments, as long 1230 * as they are contiguous. 1231 */ 1232 int 1233 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1234 { 1235 struct seg *seg; 1236 size_t ssize; 1237 caddr_t raddr; /* rounded down addr */ 1238 size_t rsize; /* rounded up size */ 1239 int error = 0; 1240 1241 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1242 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1243 (size_t)raddr; 1244 1245 if (raddr + rsize < raddr) /* check for wraparound */ 1246 return (ENOMEM); 1247 1248 /* 1249 * This is ugly as sin... 1250 * Normally, we only acquire the address space readers lock. 1251 * However, if the address space has watchpoints present, 1252 * we must acquire the writer lock on the address space for 1253 * the benefit of as_clearwatchprot() and as_setwatchprot(). 1254 */ 1255 if (avl_numnodes(&as->a_wpage) != 0) 1256 AS_LOCK_ENTER(as, RW_WRITER); 1257 else 1258 AS_LOCK_ENTER(as, RW_READER); 1259 as_clearwatchprot(as, raddr, rsize); 1260 seg = as_segat(as, raddr); 1261 if (seg == NULL) { 1262 as_setwatch(as); 1263 AS_LOCK_EXIT(as); 1264 return (ENOMEM); 1265 } 1266 1267 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1268 if (raddr >= seg->s_base + seg->s_size) { 1269 seg = AS_SEGNEXT(as, seg); 1270 if (seg == NULL || raddr != seg->s_base) { 1271 error = ENOMEM; 1272 break; 1273 } 1274 } 1275 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1276 ssize = seg->s_base + seg->s_size - raddr; 1277 else 1278 ssize = rsize; 1279 1280 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1281 if (error != 0) 1282 break; 1283 } 1284 as_setwatch(as); 1285 AS_LOCK_EXIT(as); 1286 return (error); 1287 } 1288 1289 int 1290 as_unmap(struct as *as, caddr_t addr, size_t size) 1291 { 1292 struct seg *seg, *seg_next; 1293 struct as_callback *cb; 1294 caddr_t raddr, eaddr; 1295 size_t ssize, rsize = 0; 1296 int err; 1297 1298 top: 1299 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1300 eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1301 (uintptr_t)PAGEMASK); 1302 1303 AS_LOCK_ENTER(as, RW_WRITER); 1304 1305 as->a_updatedir = 1; /* inform /proc */ 1306 gethrestime(&as->a_updatetime); 1307 1308 /* 1309 * Use as_findseg to find the first segment in the range, then 1310 * step through the segments in order, following s_next. 1311 */ 1312 as_clearwatchprot(as, raddr, eaddr - raddr); 1313 1314 for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1315 if (eaddr <= seg->s_base) 1316 break; /* eaddr was in a gap; all done */ 1317 1318 /* this is implied by the test above */ 1319 ASSERT(raddr < eaddr); 1320 1321 if (raddr < seg->s_base) 1322 raddr = seg->s_base; /* raddr was in a gap */ 1323 1324 if (eaddr > (seg->s_base + seg->s_size)) 1325 ssize = seg->s_base + seg->s_size - raddr; 1326 else 1327 ssize = eaddr - raddr; 1328 1329 /* 1330 * Save next segment pointer since seg can be 1331 * destroyed during the segment unmap operation. 1332 */ 1333 seg_next = AS_SEGNEXT(as, seg); 1334 1335 /* 1336 * We didn't count /dev/null mappings, so ignore them here. 1337 * We'll handle MAP_NORESERVE cases in segvn_unmap(). (Again, 1338 * we have to do this check here while we have seg.) 1339 */ 1340 rsize = 0; 1341 if (!SEG_IS_DEVNULL_MAPPING(seg) && 1342 !SEG_IS_PARTIAL_RESV(seg)) 1343 rsize = ssize; 1344 1345 retry: 1346 err = SEGOP_UNMAP(seg, raddr, ssize); 1347 if (err == EAGAIN) { 1348 /* 1349 * Memory is currently locked. It must be unlocked 1350 * before this operation can succeed through a retry. 1351 * The possible reasons for locked memory and 1352 * corresponding strategies for unlocking are: 1353 * (1) Normal I/O 1354 * wait for a signal that the I/O operation 1355 * has completed and the memory is unlocked. 1356 * (2) Asynchronous I/O 1357 * The aio subsystem does not unlock pages when 1358 * the I/O is completed. Those pages are unlocked 1359 * when the application calls aiowait/aioerror. 1360 * So, to prevent blocking forever, cv_broadcast() 1361 * is done to wake up aio_cleanup_thread. 1362 * Subsequently, segvn_reclaim will be called, and 1363 * that will do AS_CLRUNMAPWAIT() and wake us up. 1364 * (3) Long term page locking: 1365 * Drivers intending to have pages locked for a 1366 * period considerably longer than for normal I/O 1367 * (essentially forever) may have registered for a 1368 * callback so they may unlock these pages on 1369 * request. This is needed to allow this operation 1370 * to succeed. Each entry on the callback list is 1371 * examined. If the event or address range pertains 1372 * the callback is invoked (unless it already is in 1373 * progress). The a_contents lock must be dropped 1374 * before the callback, so only one callback can 1375 * be done at a time. Go to the top and do more 1376 * until zero is returned. If zero is returned, 1377 * either there were no callbacks for this event 1378 * or they were already in progress. 1379 */ 1380 mutex_enter(&as->a_contents); 1381 if (as->a_callbacks && 1382 (cb = as_find_callback(as, AS_UNMAP_EVENT, 1383 seg->s_base, seg->s_size))) { 1384 AS_LOCK_EXIT(as); 1385 as_execute_callback(as, cb, AS_UNMAP_EVENT); 1386 } else if (!AS_ISNOUNMAPWAIT(as)) { 1387 if (AS_ISUNMAPWAIT(as) == 0) 1388 cv_broadcast(&as->a_cv); 1389 AS_SETUNMAPWAIT(as); 1390 AS_LOCK_EXIT(as); 1391 while (AS_ISUNMAPWAIT(as)) 1392 cv_wait(&as->a_cv, &as->a_contents); 1393 } else { 1394 /* 1395 * We may have raced with 1396 * segvn_reclaim()/segspt_reclaim(). In this 1397 * case clean nounmapwait flag and retry since 1398 * softlockcnt in this segment may be already 1399 * 0. We don't drop as writer lock so our 1400 * number of retries without sleeping should 1401 * be very small. See segvn_reclaim() for 1402 * more comments. 1403 */ 1404 AS_CLRNOUNMAPWAIT(as); 1405 mutex_exit(&as->a_contents); 1406 goto retry; 1407 } 1408 mutex_exit(&as->a_contents); 1409 goto top; 1410 } else if (err == IE_RETRY) { 1411 AS_LOCK_EXIT(as); 1412 goto top; 1413 } else if (err) { 1414 as_setwatch(as); 1415 AS_LOCK_EXIT(as); 1416 return (-1); 1417 } 1418 1419 as->a_size -= ssize; 1420 if (rsize) 1421 as->a_resvsize -= rsize; 1422 raddr += ssize; 1423 } 1424 AS_LOCK_EXIT(as); 1425 return (0); 1426 } 1427 1428 static int 1429 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, 1430 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1431 { 1432 uint_t szc; 1433 uint_t nszc; 1434 int error; 1435 caddr_t a; 1436 caddr_t eaddr; 1437 size_t segsize; 1438 struct seg *seg; 1439 size_t pgsz; 1440 int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); 1441 uint_t save_szcvec; 1442 1443 ASSERT(AS_WRITE_HELD(as)); 1444 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1445 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1446 ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); 1447 if (!do_off) { 1448 vn_a->offset = 0; 1449 } 1450 1451 if (szcvec <= 1) { 1452 seg = seg_alloc(as, addr, size); 1453 if (seg == NULL) { 1454 return (ENOMEM); 1455 } 1456 vn_a->szc = 0; 1457 error = (*crfp)(seg, vn_a); 1458 if (error != 0) { 1459 seg_free(seg); 1460 } else { 1461 as->a_size += size; 1462 as->a_resvsize += size; 1463 } 1464 return (error); 1465 } 1466 1467 eaddr = addr + size; 1468 save_szcvec = szcvec; 1469 szcvec >>= 1; 1470 szc = 0; 1471 nszc = 0; 1472 while (szcvec) { 1473 if ((szcvec & 0x1) == 0) { 1474 nszc++; 1475 szcvec >>= 1; 1476 continue; 1477 } 1478 nszc++; 1479 pgsz = page_get_pagesize(nszc); 1480 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1481 if (a != addr) { 1482 ASSERT(a < eaddr); 1483 segsize = a - addr; 1484 seg = seg_alloc(as, addr, segsize); 1485 if (seg == NULL) { 1486 return (ENOMEM); 1487 } 1488 vn_a->szc = szc; 1489 error = (*crfp)(seg, vn_a); 1490 if (error != 0) { 1491 seg_free(seg); 1492 return (error); 1493 } 1494 as->a_size += segsize; 1495 as->a_resvsize += segsize; 1496 *segcreated = 1; 1497 if (do_off) { 1498 vn_a->offset += segsize; 1499 } 1500 addr = a; 1501 } 1502 szc = nszc; 1503 szcvec >>= 1; 1504 } 1505 1506 ASSERT(addr < eaddr); 1507 szcvec = save_szcvec | 1; /* add 8K pages */ 1508 while (szcvec) { 1509 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1510 ASSERT(a >= addr); 1511 if (a != addr) { 1512 segsize = a - addr; 1513 seg = seg_alloc(as, addr, segsize); 1514 if (seg == NULL) { 1515 return (ENOMEM); 1516 } 1517 vn_a->szc = szc; 1518 error = (*crfp)(seg, vn_a); 1519 if (error != 0) { 1520 seg_free(seg); 1521 return (error); 1522 } 1523 as->a_size += segsize; 1524 as->a_resvsize += segsize; 1525 *segcreated = 1; 1526 if (do_off) { 1527 vn_a->offset += segsize; 1528 } 1529 addr = a; 1530 } 1531 szcvec &= ~(1 << szc); 1532 if (szcvec) { 1533 szc = highbit(szcvec) - 1; 1534 pgsz = page_get_pagesize(szc); 1535 } 1536 } 1537 ASSERT(addr == eaddr); 1538 1539 return (0); 1540 } 1541 1542 static int 1543 as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1544 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1545 { 1546 uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); 1547 int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 1548 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1549 type, 0); 1550 int error; 1551 struct seg *seg; 1552 struct vattr va; 1553 u_offset_t eoff; 1554 size_t save_size = 0; 1555 extern size_t textrepl_size_thresh; 1556 1557 ASSERT(AS_WRITE_HELD(as)); 1558 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1559 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1560 ASSERT(vn_a->vp != NULL); 1561 ASSERT(vn_a->amp == NULL); 1562 1563 again: 1564 if (szcvec <= 1) { 1565 seg = seg_alloc(as, addr, size); 1566 if (seg == NULL) { 1567 return (ENOMEM); 1568 } 1569 vn_a->szc = 0; 1570 error = (*crfp)(seg, vn_a); 1571 if (error != 0) { 1572 seg_free(seg); 1573 } else { 1574 as->a_size += size; 1575 as->a_resvsize += size; 1576 } 1577 return (error); 1578 } 1579 1580 va.va_mask = AT_SIZE; 1581 if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred, NULL) != 0) { 1582 szcvec = 0; 1583 goto again; 1584 } 1585 eoff = vn_a->offset & PAGEMASK; 1586 if (eoff >= va.va_size) { 1587 szcvec = 0; 1588 goto again; 1589 } 1590 eoff += size; 1591 if (btopr(va.va_size) < btopr(eoff)) { 1592 save_size = size; 1593 size = va.va_size - (vn_a->offset & PAGEMASK); 1594 size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1595 szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, 1596 type, 0); 1597 if (szcvec <= 1) { 1598 size = save_size; 1599 goto again; 1600 } 1601 } 1602 1603 if (size > textrepl_size_thresh) { 1604 vn_a->flags |= _MAP_TEXTREPL; 1605 } 1606 error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, 1607 segcreated); 1608 if (error != 0) { 1609 return (error); 1610 } 1611 if (save_size) { 1612 addr += size; 1613 size = save_size - size; 1614 szcvec = 0; 1615 goto again; 1616 } 1617 return (0); 1618 } 1619 1620 /* 1621 * as_map_ansegs: shared or private anonymous memory. Note that the flags 1622 * passed to map_pgszvec cannot be MAP_INITDATA, for anon. 1623 */ 1624 static int 1625 as_map_ansegs(struct as *as, caddr_t addr, size_t size, 1626 int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1627 { 1628 uint_t szcvec; 1629 uchar_t type; 1630 1631 ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); 1632 if (vn_a->type == MAP_SHARED) { 1633 type = MAPPGSZC_SHM; 1634 } else if (vn_a->type == MAP_PRIVATE) { 1635 if (vn_a->szc == AS_MAP_HEAP) { 1636 type = MAPPGSZC_HEAP; 1637 } else if (vn_a->szc == AS_MAP_STACK) { 1638 type = MAPPGSZC_STACK; 1639 } else { 1640 type = MAPPGSZC_PRIVM; 1641 } 1642 } 1643 szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? 1644 (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), 1645 (vn_a->flags & MAP_TEXT), type, 0); 1646 ASSERT(AS_WRITE_HELD(as)); 1647 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1648 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1649 ASSERT(vn_a->vp == NULL); 1650 1651 return (as_map_segvn_segs(as, addr, size, szcvec, 1652 crfp, vn_a, segcreated)); 1653 } 1654 1655 int 1656 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1657 { 1658 AS_LOCK_ENTER(as, RW_WRITER); 1659 return (as_map_locked(as, addr, size, crfp, argsp)); 1660 } 1661 1662 int 1663 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), 1664 void *argsp) 1665 { 1666 struct seg *seg = NULL; 1667 caddr_t raddr; /* rounded down addr */ 1668 size_t rsize; /* rounded up size */ 1669 int error; 1670 int unmap = 0; 1671 /* 1672 * The use of a_proc is preferred to handle the case where curproc is 1673 * a door_call server and is allocating memory in the client's (a_proc) 1674 * address space. 1675 * When creating a shared memory segment a_proc will be NULL so we 1676 * fallback to curproc in that case. 1677 */ 1678 struct proc *p = (as->a_proc == NULL) ? curproc : as->a_proc; 1679 struct segvn_crargs crargs; 1680 1681 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1682 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1683 (size_t)raddr; 1684 1685 /* 1686 * check for wrap around 1687 */ 1688 if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1689 AS_LOCK_EXIT(as); 1690 return (ENOMEM); 1691 } 1692 1693 as->a_updatedir = 1; /* inform /proc */ 1694 gethrestime(&as->a_updatetime); 1695 1696 if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1697 AS_LOCK_EXIT(as); 1698 1699 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1700 RCA_UNSAFE_ALL); 1701 1702 return (ENOMEM); 1703 } 1704 1705 if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { 1706 crargs = *(struct segvn_crargs *)argsp; 1707 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap); 1708 if (error != 0) { 1709 AS_LOCK_EXIT(as); 1710 if (unmap) { 1711 (void) as_unmap(as, addr, size); 1712 } 1713 return (error); 1714 } 1715 } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { 1716 crargs = *(struct segvn_crargs *)argsp; 1717 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap); 1718 if (error != 0) { 1719 AS_LOCK_EXIT(as); 1720 if (unmap) { 1721 (void) as_unmap(as, addr, size); 1722 } 1723 return (error); 1724 } 1725 } else { 1726 seg = seg_alloc(as, addr, size); 1727 if (seg == NULL) { 1728 AS_LOCK_EXIT(as); 1729 return (ENOMEM); 1730 } 1731 1732 error = (*crfp)(seg, argsp); 1733 if (error != 0) { 1734 seg_free(seg); 1735 AS_LOCK_EXIT(as); 1736 return (error); 1737 } 1738 /* 1739 * Add size now so as_unmap will work if as_ctl fails. 1740 */ 1741 as->a_size += rsize; 1742 as->a_resvsize += rsize; 1743 } 1744 1745 as_setwatch(as); 1746 1747 /* 1748 * If the address space is locked, 1749 * establish memory locks for the new segment. 1750 */ 1751 mutex_enter(&as->a_contents); 1752 if (AS_ISPGLCK(as)) { 1753 mutex_exit(&as->a_contents); 1754 AS_LOCK_EXIT(as); 1755 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1756 if (error != 0) 1757 (void) as_unmap(as, addr, size); 1758 } else { 1759 mutex_exit(&as->a_contents); 1760 AS_LOCK_EXIT(as); 1761 } 1762 return (error); 1763 } 1764 1765 1766 /* 1767 * Delete all segments in the address space marked with S_PURGE. 1768 * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1769 * These segments are deleted as a first step before calls to as_gap(), so 1770 * that they don't affect mmap() or shmat(). 1771 */ 1772 void 1773 as_purge(struct as *as) 1774 { 1775 struct seg *seg; 1776 struct seg *next_seg; 1777 1778 /* 1779 * the setting of NEEDSPURGE is protect by as_rangelock(), so 1780 * no need to grab a_contents mutex for this check 1781 */ 1782 if ((as->a_flags & AS_NEEDSPURGE) == 0) 1783 return; 1784 1785 AS_LOCK_ENTER(as, RW_WRITER); 1786 next_seg = NULL; 1787 seg = AS_SEGFIRST(as); 1788 while (seg != NULL) { 1789 next_seg = AS_SEGNEXT(as, seg); 1790 if (seg->s_flags & S_PURGE) 1791 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1792 seg = next_seg; 1793 } 1794 AS_LOCK_EXIT(as); 1795 1796 mutex_enter(&as->a_contents); 1797 as->a_flags &= ~AS_NEEDSPURGE; 1798 mutex_exit(&as->a_contents); 1799 } 1800 1801 /* 1802 * Find a hole within [*basep, *basep + *lenp), which contains a mappable 1803 * range of addresses at least "minlen" long, where the base of the range is 1804 * at "off" phase from an "align" boundary and there is space for a 1805 * "redzone"-sized redzone on eithe rside of the range. Thus, 1806 * if align was 4M and off was 16k, the user wants a hole which will start 1807 * 16k into a 4M page. 1808 * 1809 * If flags specifies AH_HI, the hole will have the highest possible address 1810 * in the range. We use the as->a_lastgap field to figure out where to 1811 * start looking for a gap. 1812 * 1813 * Otherwise, the gap will have the lowest possible address. 1814 * 1815 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1816 * 1817 * If an adequate hole is found, *basep and *lenp are set to reflect the part of 1818 * the hole that is within range, and 0 is returned. On failure, -1 is returned. 1819 * 1820 * NOTE: This routine is not correct when base+len overflows caddr_t. 1821 */ 1822 int 1823 as_gap_aligned(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, 1824 uint_t flags, caddr_t addr, size_t align, size_t redzone, size_t off) 1825 { 1826 caddr_t lobound = *basep; 1827 caddr_t hibound = lobound + *lenp; 1828 struct seg *lseg, *hseg; 1829 caddr_t lo, hi; 1830 int forward; 1831 caddr_t save_base; 1832 size_t save_len; 1833 size_t save_minlen; 1834 size_t save_redzone; 1835 int fast_path = 1; 1836 1837 save_base = *basep; 1838 save_len = *lenp; 1839 save_minlen = minlen; 1840 save_redzone = redzone; 1841 1842 /* 1843 * For the first pass/fast_path, just add align and redzone into 1844 * minlen since if we get an allocation, we can guarantee that it 1845 * will fit the alignment and redzone requested. 1846 * This increases the chance that hibound will be adjusted to 1847 * a_lastgap->s_base which will likely allow us to find an 1848 * acceptable hole in the address space quicker. 1849 * If we can't find a hole with this fast_path, then we look for 1850 * smaller holes in which the alignment and offset may allow 1851 * the allocation to fit. 1852 */ 1853 minlen += align; 1854 minlen += 2 * redzone; 1855 redzone = 0; 1856 1857 AS_LOCK_ENTER(as, RW_READER); 1858 if (AS_SEGFIRST(as) == NULL) { 1859 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR, 1860 align, redzone, off)) { 1861 AS_LOCK_EXIT(as); 1862 return (0); 1863 } else { 1864 AS_LOCK_EXIT(as); 1865 *basep = save_base; 1866 *lenp = save_len; 1867 return (-1); 1868 } 1869 } 1870 1871 retry: 1872 /* 1873 * Set up to iterate over all the inter-segment holes in the given 1874 * direction. lseg is NULL for the lowest-addressed hole and hseg is 1875 * NULL for the highest-addressed hole. If moving backwards, we reset 1876 * sseg to denote the highest-addressed segment. 1877 */ 1878 forward = (flags & AH_DIR) == AH_LO; 1879 if (forward) { 1880 hseg = as_findseg(as, lobound, 1); 1881 lseg = AS_SEGPREV(as, hseg); 1882 } else { 1883 1884 /* 1885 * If allocating at least as much as the last allocation, 1886 * use a_lastgap's base as a better estimate of hibound. 1887 */ 1888 if (as->a_lastgap && 1889 minlen >= as->a_lastgap->s_size && 1890 hibound >= as->a_lastgap->s_base) 1891 hibound = as->a_lastgap->s_base; 1892 1893 hseg = as_findseg(as, hibound, 1); 1894 if (hseg->s_base + hseg->s_size < hibound) { 1895 lseg = hseg; 1896 hseg = NULL; 1897 } else { 1898 lseg = AS_SEGPREV(as, hseg); 1899 } 1900 } 1901 1902 for (;;) { 1903 /* 1904 * Set lo and hi to the hole's boundaries. (We should really 1905 * use MAXADDR in place of hibound in the expression below, 1906 * but can't express it easily; using hibound in its place is 1907 * harmless.) 1908 */ 1909 lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1910 hi = (hseg == NULL) ? hibound : hseg->s_base; 1911 /* 1912 * If the iteration has moved past the interval from lobound 1913 * to hibound it's pointless to continue. 1914 */ 1915 if ((forward && lo > hibound) || (!forward && hi < lobound)) 1916 break; 1917 else if (lo > hibound || hi < lobound) 1918 goto cont; 1919 /* 1920 * Candidate hole lies at least partially within the allowable 1921 * range. Restrict it to fall completely within that range, 1922 * i.e., to [max(lo, lobound), min(hi, hibound)]. 1923 */ 1924 if (lo < lobound) 1925 lo = lobound; 1926 if (hi > hibound) 1927 hi = hibound; 1928 /* 1929 * Verify that the candidate hole is big enough and meets 1930 * hardware constraints. If the hole is too small, no need 1931 * to do the further checks since they will fail. 1932 */ 1933 *basep = lo; 1934 *lenp = hi - lo; 1935 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp, 1936 minlen, forward ? AH_LO : AH_HI, align, redzone, off) && 1937 ((flags & AH_CONTAIN) == 0 || 1938 (*basep <= addr && *basep + *lenp > addr))) { 1939 if (!forward) 1940 as->a_lastgap = hseg; 1941 if (hseg != NULL) 1942 as->a_lastgaphl = hseg; 1943 else 1944 as->a_lastgaphl = lseg; 1945 AS_LOCK_EXIT(as); 1946 return (0); 1947 } 1948 cont: 1949 /* 1950 * Move to the next hole. 1951 */ 1952 if (forward) { 1953 lseg = hseg; 1954 if (lseg == NULL) 1955 break; 1956 hseg = AS_SEGNEXT(as, hseg); 1957 } else { 1958 hseg = lseg; 1959 if (hseg == NULL) 1960 break; 1961 lseg = AS_SEGPREV(as, lseg); 1962 } 1963 } 1964 if (fast_path && (align != 0 || save_redzone != 0)) { 1965 fast_path = 0; 1966 minlen = save_minlen; 1967 redzone = save_redzone; 1968 goto retry; 1969 } 1970 *basep = save_base; 1971 *lenp = save_len; 1972 AS_LOCK_EXIT(as); 1973 return (-1); 1974 } 1975 1976 /* 1977 * Find a hole of at least size minlen within [*basep, *basep + *lenp). 1978 * 1979 * If flags specifies AH_HI, the hole will have the highest possible address 1980 * in the range. We use the as->a_lastgap field to figure out where to 1981 * start looking for a gap. 1982 * 1983 * Otherwise, the gap will have the lowest possible address. 1984 * 1985 * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1986 * 1987 * If an adequate hole is found, base and len are set to reflect the part of 1988 * the hole that is within range, and 0 is returned, otherwise, 1989 * -1 is returned. 1990 * 1991 * NOTE: This routine is not correct when base+len overflows caddr_t. 1992 */ 1993 int 1994 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1995 caddr_t addr) 1996 { 1997 1998 return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0)); 1999 } 2000 2001 /* 2002 * Return the next range within [base, base + len) that is backed 2003 * with "real memory". Skip holes and non-seg_vn segments. 2004 * We're lazy and only return one segment at a time. 2005 */ 2006 int 2007 as_memory(struct as *as, caddr_t *basep, size_t *lenp) 2008 { 2009 extern struct seg_ops segspt_shmops; /* needs a header file */ 2010 struct seg *seg; 2011 caddr_t addr, eaddr; 2012 caddr_t segend; 2013 2014 AS_LOCK_ENTER(as, RW_READER); 2015 2016 addr = *basep; 2017 eaddr = addr + *lenp; 2018 2019 seg = as_findseg(as, addr, 0); 2020 if (seg != NULL) 2021 addr = MAX(seg->s_base, addr); 2022 2023 for (;;) { 2024 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 2025 AS_LOCK_EXIT(as); 2026 return (EINVAL); 2027 } 2028 2029 if (seg->s_ops == &segvn_ops) { 2030 segend = seg->s_base + seg->s_size; 2031 break; 2032 } 2033 2034 /* 2035 * We do ISM by looking into the private data 2036 * to determine the real size of the segment. 2037 */ 2038 if (seg->s_ops == &segspt_shmops) { 2039 segend = seg->s_base + spt_realsize(seg); 2040 if (addr < segend) 2041 break; 2042 } 2043 2044 seg = AS_SEGNEXT(as, seg); 2045 2046 if (seg != NULL) 2047 addr = seg->s_base; 2048 } 2049 2050 *basep = addr; 2051 2052 if (segend > eaddr) 2053 *lenp = eaddr - addr; 2054 else 2055 *lenp = segend - addr; 2056 2057 AS_LOCK_EXIT(as); 2058 return (0); 2059 } 2060 2061 /* 2062 * Swap the pages associated with the address space as out to 2063 * secondary storage, returning the number of bytes actually 2064 * swapped. 2065 * 2066 * The value returned is intended to correlate well with the process's 2067 * memory requirements. Its usefulness for this purpose depends on 2068 * how well the segment-level routines do at returning accurate 2069 * information. 2070 */ 2071 size_t 2072 as_swapout(struct as *as) 2073 { 2074 struct seg *seg; 2075 size_t swpcnt = 0; 2076 2077 /* 2078 * Kernel-only processes have given up their address 2079 * spaces. Of course, we shouldn't be attempting to 2080 * swap out such processes in the first place... 2081 */ 2082 if (as == NULL) 2083 return (0); 2084 2085 AS_LOCK_ENTER(as, RW_READER); 2086 2087 /* 2088 * Free all mapping resources associated with the address 2089 * space. The segment-level swapout routines capitalize 2090 * on this unmapping by scavanging pages that have become 2091 * unmapped here. 2092 */ 2093 hat_swapout(as->a_hat); 2094 2095 /* 2096 * Call the swapout routines of all segments in the address 2097 * space to do the actual work, accumulating the amount of 2098 * space reclaimed. 2099 */ 2100 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2101 struct seg_ops *ov = seg->s_ops; 2102 2103 /* 2104 * We have to check to see if the seg has 2105 * an ops vector because the seg may have 2106 * been in the middle of being set up when 2107 * the process was picked for swapout. 2108 */ 2109 if ((ov != NULL) && (ov->swapout != NULL)) 2110 swpcnt += SEGOP_SWAPOUT(seg); 2111 } 2112 AS_LOCK_EXIT(as); 2113 return (swpcnt); 2114 } 2115 2116 /* 2117 * Determine whether data from the mappings in interval [addr, addr + size) 2118 * are in the primary memory (core) cache. 2119 */ 2120 int 2121 as_incore(struct as *as, caddr_t addr, 2122 size_t size, char *vec, size_t *sizep) 2123 { 2124 struct seg *seg; 2125 size_t ssize; 2126 caddr_t raddr; /* rounded down addr */ 2127 size_t rsize; /* rounded up size */ 2128 size_t isize; /* iteration size */ 2129 int error = 0; /* result, assume success */ 2130 2131 *sizep = 0; 2132 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2133 rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 2134 (size_t)raddr; 2135 2136 if (raddr + rsize < raddr) /* check for wraparound */ 2137 return (ENOMEM); 2138 2139 AS_LOCK_ENTER(as, RW_READER); 2140 seg = as_segat(as, raddr); 2141 if (seg == NULL) { 2142 AS_LOCK_EXIT(as); 2143 return (-1); 2144 } 2145 2146 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2147 if (raddr >= seg->s_base + seg->s_size) { 2148 seg = AS_SEGNEXT(as, seg); 2149 if (seg == NULL || raddr != seg->s_base) { 2150 error = -1; 2151 break; 2152 } 2153 } 2154 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2155 ssize = seg->s_base + seg->s_size - raddr; 2156 else 2157 ssize = rsize; 2158 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2159 if (isize != ssize) { 2160 error = -1; 2161 break; 2162 } 2163 vec += btopr(ssize); 2164 } 2165 AS_LOCK_EXIT(as); 2166 return (error); 2167 } 2168 2169 static void 2170 as_segunlock(struct seg *seg, caddr_t addr, int attr, 2171 ulong_t *bitmap, size_t position, size_t npages) 2172 { 2173 caddr_t range_start; 2174 size_t pos1 = position; 2175 size_t pos2; 2176 size_t size; 2177 size_t end_pos = npages + position; 2178 2179 while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2180 size = ptob((pos2 - pos1)); 2181 range_start = (caddr_t)((uintptr_t)addr + 2182 ptob(pos1 - position)); 2183 2184 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2185 (ulong_t *)NULL, (size_t)NULL); 2186 pos1 = pos2; 2187 } 2188 } 2189 2190 static void 2191 as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2192 caddr_t raddr, size_t rsize) 2193 { 2194 struct seg *seg = as_segat(as, raddr); 2195 size_t ssize; 2196 2197 while (rsize != 0) { 2198 if (raddr >= seg->s_base + seg->s_size) 2199 seg = AS_SEGNEXT(as, seg); 2200 2201 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2202 ssize = seg->s_base + seg->s_size - raddr; 2203 else 2204 ssize = rsize; 2205 2206 as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2207 2208 rsize -= ssize; 2209 raddr += ssize; 2210 } 2211 } 2212 2213 /* 2214 * Cache control operations over the interval [addr, addr + size) in 2215 * address space "as". 2216 */ 2217 /*ARGSUSED*/ 2218 int 2219 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2220 uintptr_t arg, ulong_t *lock_map, size_t pos) 2221 { 2222 struct seg *seg; /* working segment */ 2223 caddr_t raddr; /* rounded down addr */ 2224 caddr_t initraddr; /* saved initial rounded down addr */ 2225 size_t rsize; /* rounded up size */ 2226 size_t initrsize; /* saved initial rounded up size */ 2227 size_t ssize; /* size of seg */ 2228 int error = 0; /* result */ 2229 size_t mlock_size; /* size of bitmap */ 2230 ulong_t *mlock_map; /* pointer to bitmap used */ 2231 /* to represent the locked */ 2232 /* pages. */ 2233 retry: 2234 if (error == IE_RETRY) 2235 AS_LOCK_ENTER(as, RW_WRITER); 2236 else 2237 AS_LOCK_ENTER(as, RW_READER); 2238 2239 /* 2240 * If these are address space lock/unlock operations, loop over 2241 * all segments in the address space, as appropriate. 2242 */ 2243 if (func == MC_LOCKAS) { 2244 size_t npages, idx; 2245 size_t rlen = 0; /* rounded as length */ 2246 2247 idx = pos; 2248 2249 if (arg & MCL_FUTURE) { 2250 mutex_enter(&as->a_contents); 2251 AS_SETPGLCK(as); 2252 mutex_exit(&as->a_contents); 2253 } 2254 if ((arg & MCL_CURRENT) == 0) { 2255 AS_LOCK_EXIT(as); 2256 return (0); 2257 } 2258 2259 seg = AS_SEGFIRST(as); 2260 if (seg == NULL) { 2261 AS_LOCK_EXIT(as); 2262 return (0); 2263 } 2264 2265 do { 2266 raddr = (caddr_t)((uintptr_t)seg->s_base & 2267 (uintptr_t)PAGEMASK); 2268 rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2269 PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2270 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2271 2272 mlock_size = BT_BITOUL(btopr(rlen)); 2273 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2274 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2275 AS_LOCK_EXIT(as); 2276 return (EAGAIN); 2277 } 2278 2279 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2280 error = SEGOP_LOCKOP(seg, seg->s_base, 2281 seg->s_size, attr, MC_LOCK, mlock_map, pos); 2282 if (error != 0) 2283 break; 2284 pos += seg_pages(seg); 2285 } 2286 2287 if (error) { 2288 for (seg = AS_SEGFIRST(as); seg != NULL; 2289 seg = AS_SEGNEXT(as, seg)) { 2290 2291 raddr = (caddr_t)((uintptr_t)seg->s_base & 2292 (uintptr_t)PAGEMASK); 2293 npages = seg_pages(seg); 2294 as_segunlock(seg, raddr, attr, mlock_map, 2295 idx, npages); 2296 idx += npages; 2297 } 2298 } 2299 2300 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2301 AS_LOCK_EXIT(as); 2302 goto lockerr; 2303 } else if (func == MC_UNLOCKAS) { 2304 mutex_enter(&as->a_contents); 2305 AS_CLRPGLCK(as); 2306 mutex_exit(&as->a_contents); 2307 2308 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2309 error = SEGOP_LOCKOP(seg, seg->s_base, 2310 seg->s_size, attr, MC_UNLOCK, NULL, 0); 2311 if (error != 0) 2312 break; 2313 } 2314 2315 AS_LOCK_EXIT(as); 2316 goto lockerr; 2317 } 2318 2319 /* 2320 * Normalize addresses and sizes. 2321 */ 2322 initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2323 initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2324 (size_t)raddr; 2325 2326 if (raddr + rsize < raddr) { /* check for wraparound */ 2327 AS_LOCK_EXIT(as); 2328 return (ENOMEM); 2329 } 2330 2331 /* 2332 * Get initial segment. 2333 */ 2334 if ((seg = as_segat(as, raddr)) == NULL) { 2335 AS_LOCK_EXIT(as); 2336 return (ENOMEM); 2337 } 2338 2339 if (func == MC_LOCK) { 2340 mlock_size = BT_BITOUL(btopr(rsize)); 2341 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2342 sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2343 AS_LOCK_EXIT(as); 2344 return (EAGAIN); 2345 } 2346 } 2347 2348 /* 2349 * Loop over all segments. If a hole in the address range is 2350 * discovered, then fail. For each segment, perform the appropriate 2351 * control operation. 2352 */ 2353 while (rsize != 0) { 2354 2355 /* 2356 * Make sure there's no hole, calculate the portion 2357 * of the next segment to be operated over. 2358 */ 2359 if (raddr >= seg->s_base + seg->s_size) { 2360 seg = AS_SEGNEXT(as, seg); 2361 if (seg == NULL || raddr != seg->s_base) { 2362 if (func == MC_LOCK) { 2363 as_unlockerr(as, attr, mlock_map, 2364 initraddr, initrsize - rsize); 2365 kmem_free(mlock_map, 2366 mlock_size * sizeof (ulong_t)); 2367 } 2368 AS_LOCK_EXIT(as); 2369 return (ENOMEM); 2370 } 2371 } 2372 if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2373 ssize = seg->s_base + seg->s_size - raddr; 2374 else 2375 ssize = rsize; 2376 2377 /* 2378 * Dispatch on specific function. 2379 */ 2380 switch (func) { 2381 2382 /* 2383 * Synchronize cached data from mappings with backing 2384 * objects. 2385 */ 2386 case MC_SYNC: 2387 if (error = SEGOP_SYNC(seg, raddr, ssize, 2388 attr, (uint_t)arg)) { 2389 AS_LOCK_EXIT(as); 2390 return (error); 2391 } 2392 break; 2393 2394 /* 2395 * Lock pages in memory. 2396 */ 2397 case MC_LOCK: 2398 if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2399 attr, func, mlock_map, pos)) { 2400 as_unlockerr(as, attr, mlock_map, initraddr, 2401 initrsize - rsize + ssize); 2402 kmem_free(mlock_map, mlock_size * 2403 sizeof (ulong_t)); 2404 AS_LOCK_EXIT(as); 2405 goto lockerr; 2406 } 2407 break; 2408 2409 /* 2410 * Unlock mapped pages. 2411 */ 2412 case MC_UNLOCK: 2413 (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2414 (ulong_t *)NULL, (size_t)NULL); 2415 break; 2416 2417 /* 2418 * Store VM advise for mapped pages in segment layer. 2419 */ 2420 case MC_ADVISE: 2421 error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2422 2423 /* 2424 * Check for regular errors and special retry error 2425 */ 2426 if (error) { 2427 if (error == IE_RETRY) { 2428 /* 2429 * Need to acquire writers lock, so 2430 * have to drop readers lock and start 2431 * all over again 2432 */ 2433 AS_LOCK_EXIT(as); 2434 goto retry; 2435 } else if (error == IE_REATTACH) { 2436 /* 2437 * Find segment for current address 2438 * because current segment just got 2439 * split or concatenated 2440 */ 2441 seg = as_segat(as, raddr); 2442 if (seg == NULL) { 2443 AS_LOCK_EXIT(as); 2444 return (ENOMEM); 2445 } 2446 } else { 2447 /* 2448 * Regular error 2449 */ 2450 AS_LOCK_EXIT(as); 2451 return (error); 2452 } 2453 } 2454 break; 2455 2456 case MC_INHERIT_ZERO: 2457 if (seg->s_ops->inherit == NULL) { 2458 error = ENOTSUP; 2459 } else { 2460 error = SEGOP_INHERIT(seg, raddr, ssize, 2461 SEGP_INH_ZERO); 2462 } 2463 if (error != 0) { 2464 AS_LOCK_EXIT(as); 2465 return (error); 2466 } 2467 break; 2468 2469 /* 2470 * Can't happen. 2471 */ 2472 default: 2473 panic("as_ctl: bad operation %d", func); 2474 /*NOTREACHED*/ 2475 } 2476 2477 rsize -= ssize; 2478 raddr += ssize; 2479 } 2480 2481 if (func == MC_LOCK) 2482 kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2483 AS_LOCK_EXIT(as); 2484 return (0); 2485 lockerr: 2486 2487 /* 2488 * If the lower levels returned EDEADLK for a segment lockop, 2489 * it means that we should retry the operation. Let's wait 2490 * a bit also to let the deadlock causing condition clear. 2491 * This is part of a gross hack to work around a design flaw 2492 * in the ufs/sds logging code and should go away when the 2493 * logging code is re-designed to fix the problem. See bug 2494 * 4125102 for details of the problem. 2495 */ 2496 if (error == EDEADLK) { 2497 delay(deadlk_wait); 2498 error = 0; 2499 goto retry; 2500 } 2501 return (error); 2502 } 2503 2504 int 2505 fc_decode(faultcode_t fault_err) 2506 { 2507 int error = 0; 2508 2509 switch (FC_CODE(fault_err)) { 2510 case FC_OBJERR: 2511 error = FC_ERRNO(fault_err); 2512 break; 2513 case FC_PROT: 2514 error = EACCES; 2515 break; 2516 default: 2517 error = EFAULT; 2518 break; 2519 } 2520 return (error); 2521 } 2522 2523 /* 2524 * Pagelock pages from a range that spans more than 1 segment. Obtain shadow 2525 * lists from each segment and copy them to one contiguous shadow list (plist) 2526 * as expected by the caller. Save pointers to per segment shadow lists at 2527 * the tail of plist so that they can be used during as_pageunlock(). 2528 */ 2529 static int 2530 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp, 2531 caddr_t addr, size_t size, enum seg_rw rw) 2532 { 2533 caddr_t sv_addr = addr; 2534 size_t sv_size = size; 2535 struct seg *sv_seg = seg; 2536 ulong_t segcnt = 1; 2537 ulong_t cnt; 2538 size_t ssize; 2539 pgcnt_t npages = btop(size); 2540 page_t **plist; 2541 page_t **pl; 2542 int error; 2543 caddr_t eaddr; 2544 faultcode_t fault_err = 0; 2545 pgcnt_t pl_off; 2546 extern struct seg_ops segspt_shmops; 2547 2548 ASSERT(AS_LOCK_HELD(as)); 2549 ASSERT(seg != NULL); 2550 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2551 ASSERT(addr + size > seg->s_base + seg->s_size); 2552 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2553 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2554 2555 /* 2556 * Count the number of segments covered by the range we are about to 2557 * lock. The segment count is used to size the shadow list we return 2558 * back to the caller. 2559 */ 2560 for (; size != 0; size -= ssize, addr += ssize) { 2561 if (addr >= seg->s_base + seg->s_size) { 2562 2563 seg = AS_SEGNEXT(as, seg); 2564 if (seg == NULL || addr != seg->s_base) { 2565 AS_LOCK_EXIT(as); 2566 return (EFAULT); 2567 } 2568 /* 2569 * Do a quick check if subsequent segments 2570 * will most likely support pagelock. 2571 */ 2572 if (seg->s_ops == &segvn_ops) { 2573 vnode_t *vp; 2574 2575 if (SEGOP_GETVP(seg, addr, &vp) != 0 || 2576 vp != NULL) { 2577 AS_LOCK_EXIT(as); 2578 goto slow; 2579 } 2580 } else if (seg->s_ops != &segspt_shmops) { 2581 AS_LOCK_EXIT(as); 2582 goto slow; 2583 } 2584 segcnt++; 2585 } 2586 if (addr + size > seg->s_base + seg->s_size) { 2587 ssize = seg->s_base + seg->s_size - addr; 2588 } else { 2589 ssize = size; 2590 } 2591 } 2592 ASSERT(segcnt > 1); 2593 2594 plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP); 2595 2596 addr = sv_addr; 2597 size = sv_size; 2598 seg = sv_seg; 2599 2600 for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) { 2601 if (addr >= seg->s_base + seg->s_size) { 2602 seg = AS_SEGNEXT(as, seg); 2603 ASSERT(seg != NULL && addr == seg->s_base); 2604 cnt++; 2605 ASSERT(cnt < segcnt); 2606 } 2607 if (addr + size > seg->s_base + seg->s_size) { 2608 ssize = seg->s_base + seg->s_size - addr; 2609 } else { 2610 ssize = size; 2611 } 2612 pl = &plist[npages + cnt]; 2613 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2614 L_PAGELOCK, rw); 2615 if (error) { 2616 break; 2617 } 2618 ASSERT(plist[npages + cnt] != NULL); 2619 ASSERT(pl_off + btop(ssize) <= npages); 2620 bcopy(plist[npages + cnt], &plist[pl_off], 2621 btop(ssize) * sizeof (page_t *)); 2622 pl_off += btop(ssize); 2623 } 2624 2625 if (size == 0) { 2626 AS_LOCK_EXIT(as); 2627 ASSERT(cnt == segcnt - 1); 2628 *ppp = plist; 2629 return (0); 2630 } 2631 2632 /* 2633 * one of pagelock calls failed. The error type is in error variable. 2634 * Unlock what we've locked so far and retry with F_SOFTLOCK if error 2635 * type is either EFAULT or ENOTSUP. Otherwise just return the error 2636 * back to the caller. 2637 */ 2638 2639 eaddr = addr; 2640 seg = sv_seg; 2641 2642 for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) { 2643 if (addr >= seg->s_base + seg->s_size) { 2644 seg = AS_SEGNEXT(as, seg); 2645 ASSERT(seg != NULL && addr == seg->s_base); 2646 cnt++; 2647 ASSERT(cnt < segcnt); 2648 } 2649 if (eaddr > seg->s_base + seg->s_size) { 2650 ssize = seg->s_base + seg->s_size - addr; 2651 } else { 2652 ssize = eaddr - addr; 2653 } 2654 pl = &plist[npages + cnt]; 2655 ASSERT(*pl != NULL); 2656 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2657 L_PAGEUNLOCK, rw); 2658 } 2659 2660 AS_LOCK_EXIT(as); 2661 2662 kmem_free(plist, (npages + segcnt) * sizeof (page_t *)); 2663 2664 if (error != ENOTSUP && error != EFAULT) { 2665 return (error); 2666 } 2667 2668 slow: 2669 /* 2670 * If we are here because pagelock failed due to the need to cow fault 2671 * in the pages we want to lock F_SOFTLOCK will do this job and in 2672 * next as_pagelock() call for this address range pagelock will 2673 * hopefully succeed. 2674 */ 2675 fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw); 2676 if (fault_err != 0) { 2677 return (fc_decode(fault_err)); 2678 } 2679 *ppp = NULL; 2680 2681 return (0); 2682 } 2683 2684 /* 2685 * lock pages in a given address space. Return shadow list. If 2686 * the list is NULL, the MMU mapping is also locked. 2687 */ 2688 int 2689 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2690 size_t size, enum seg_rw rw) 2691 { 2692 size_t rsize; 2693 caddr_t raddr; 2694 faultcode_t fault_err; 2695 struct seg *seg; 2696 int err; 2697 2698 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2699 "as_pagelock_start: addr %p size %ld", addr, size); 2700 2701 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2702 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2703 (size_t)raddr; 2704 2705 /* 2706 * if the request crosses two segments let 2707 * as_fault handle it. 2708 */ 2709 AS_LOCK_ENTER(as, RW_READER); 2710 2711 seg = as_segat(as, raddr); 2712 if (seg == NULL) { 2713 AS_LOCK_EXIT(as); 2714 return (EFAULT); 2715 } 2716 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2717 if (raddr + rsize > seg->s_base + seg->s_size) { 2718 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw)); 2719 } 2720 if (raddr + rsize <= raddr) { 2721 AS_LOCK_EXIT(as); 2722 return (EFAULT); 2723 } 2724 2725 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2726 "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2727 2728 /* 2729 * try to lock pages and pass back shadow list 2730 */ 2731 err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2732 2733 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2734 2735 AS_LOCK_EXIT(as); 2736 2737 if (err == 0 || (err != ENOTSUP && err != EFAULT)) { 2738 return (err); 2739 } 2740 2741 /* 2742 * Use F_SOFTLOCK to lock the pages because pagelock failed either due 2743 * to no pagelock support for this segment or pages need to be cow 2744 * faulted in. If fault is needed F_SOFTLOCK will do this job for 2745 * this as_pagelock() call and in the next as_pagelock() call for the 2746 * same address range pagelock call will hopefull succeed. 2747 */ 2748 fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2749 if (fault_err != 0) { 2750 return (fc_decode(fault_err)); 2751 } 2752 *ppp = NULL; 2753 2754 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2755 return (0); 2756 } 2757 2758 /* 2759 * unlock pages locked by as_pagelock_segs(). Retrieve per segment shadow 2760 * lists from the end of plist and call pageunlock interface for each segment. 2761 * Drop as lock and free plist. 2762 */ 2763 static void 2764 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size, 2765 struct page **plist, enum seg_rw rw) 2766 { 2767 ulong_t cnt; 2768 caddr_t eaddr = addr + size; 2769 pgcnt_t npages = btop(size); 2770 size_t ssize; 2771 page_t **pl; 2772 2773 ASSERT(AS_LOCK_HELD(as)); 2774 ASSERT(seg != NULL); 2775 ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size); 2776 ASSERT(addr + size > seg->s_base + seg->s_size); 2777 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 2778 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 2779 ASSERT(plist != NULL); 2780 2781 for (cnt = 0; addr < eaddr; addr += ssize) { 2782 if (addr >= seg->s_base + seg->s_size) { 2783 seg = AS_SEGNEXT(as, seg); 2784 ASSERT(seg != NULL && addr == seg->s_base); 2785 cnt++; 2786 } 2787 if (eaddr > seg->s_base + seg->s_size) { 2788 ssize = seg->s_base + seg->s_size - addr; 2789 } else { 2790 ssize = eaddr - addr; 2791 } 2792 pl = &plist[npages + cnt]; 2793 ASSERT(*pl != NULL); 2794 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl, 2795 L_PAGEUNLOCK, rw); 2796 } 2797 ASSERT(cnt > 0); 2798 AS_LOCK_EXIT(as); 2799 2800 cnt++; 2801 kmem_free(plist, (npages + cnt) * sizeof (page_t *)); 2802 } 2803 2804 /* 2805 * unlock pages in a given address range 2806 */ 2807 void 2808 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2809 enum seg_rw rw) 2810 { 2811 struct seg *seg; 2812 size_t rsize; 2813 caddr_t raddr; 2814 2815 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2816 "as_pageunlock_start: addr %p size %ld", addr, size); 2817 2818 /* 2819 * if the shadow list is NULL, as_pagelock was 2820 * falling back to as_fault 2821 */ 2822 if (pp == NULL) { 2823 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2824 return; 2825 } 2826 2827 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2828 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2829 (size_t)raddr; 2830 2831 AS_LOCK_ENTER(as, RW_READER); 2832 seg = as_segat(as, raddr); 2833 ASSERT(seg != NULL); 2834 2835 TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2836 "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2837 2838 ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size); 2839 if (raddr + rsize <= seg->s_base + seg->s_size) { 2840 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2841 } else { 2842 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw); 2843 return; 2844 } 2845 AS_LOCK_EXIT(as); 2846 TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2847 } 2848 2849 int 2850 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2851 boolean_t wait) 2852 { 2853 struct seg *seg; 2854 size_t ssize; 2855 caddr_t raddr; /* rounded down addr */ 2856 size_t rsize; /* rounded up size */ 2857 int error = 0; 2858 size_t pgsz = page_get_pagesize(szc); 2859 2860 setpgsz_top: 2861 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2862 return (EINVAL); 2863 } 2864 2865 raddr = addr; 2866 rsize = size; 2867 2868 if (raddr + rsize < raddr) /* check for wraparound */ 2869 return (ENOMEM); 2870 2871 AS_LOCK_ENTER(as, RW_WRITER); 2872 as_clearwatchprot(as, raddr, rsize); 2873 seg = as_segat(as, raddr); 2874 if (seg == NULL) { 2875 as_setwatch(as); 2876 AS_LOCK_EXIT(as); 2877 return (ENOMEM); 2878 } 2879 2880 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2881 if (raddr >= seg->s_base + seg->s_size) { 2882 seg = AS_SEGNEXT(as, seg); 2883 if (seg == NULL || raddr != seg->s_base) { 2884 error = ENOMEM; 2885 break; 2886 } 2887 } 2888 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2889 ssize = seg->s_base + seg->s_size - raddr; 2890 } else { 2891 ssize = rsize; 2892 } 2893 2894 retry: 2895 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2896 2897 if (error == IE_NOMEM) { 2898 error = EAGAIN; 2899 break; 2900 } 2901 2902 if (error == IE_RETRY) { 2903 AS_LOCK_EXIT(as); 2904 goto setpgsz_top; 2905 } 2906 2907 if (error == ENOTSUP) { 2908 error = EINVAL; 2909 break; 2910 } 2911 2912 if (wait && (error == EAGAIN)) { 2913 /* 2914 * Memory is currently locked. It must be unlocked 2915 * before this operation can succeed through a retry. 2916 * The possible reasons for locked memory and 2917 * corresponding strategies for unlocking are: 2918 * (1) Normal I/O 2919 * wait for a signal that the I/O operation 2920 * has completed and the memory is unlocked. 2921 * (2) Asynchronous I/O 2922 * The aio subsystem does not unlock pages when 2923 * the I/O is completed. Those pages are unlocked 2924 * when the application calls aiowait/aioerror. 2925 * So, to prevent blocking forever, cv_broadcast() 2926 * is done to wake up aio_cleanup_thread. 2927 * Subsequently, segvn_reclaim will be called, and 2928 * that will do AS_CLRUNMAPWAIT() and wake us up. 2929 * (3) Long term page locking: 2930 * This is not relevant for as_setpagesize() 2931 * because we cannot change the page size for 2932 * driver memory. The attempt to do so will 2933 * fail with a different error than EAGAIN so 2934 * there's no need to trigger as callbacks like 2935 * as_unmap, as_setprot or as_free would do. 2936 */ 2937 mutex_enter(&as->a_contents); 2938 if (!AS_ISNOUNMAPWAIT(as)) { 2939 if (AS_ISUNMAPWAIT(as) == 0) { 2940 cv_broadcast(&as->a_cv); 2941 } 2942 AS_SETUNMAPWAIT(as); 2943 AS_LOCK_EXIT(as); 2944 while (AS_ISUNMAPWAIT(as)) { 2945 cv_wait(&as->a_cv, &as->a_contents); 2946 } 2947 } else { 2948 /* 2949 * We may have raced with 2950 * segvn_reclaim()/segspt_reclaim(). In this 2951 * case clean nounmapwait flag and retry since 2952 * softlockcnt in this segment may be already 2953 * 0. We don't drop as writer lock so our 2954 * number of retries without sleeping should 2955 * be very small. See segvn_reclaim() for 2956 * more comments. 2957 */ 2958 AS_CLRNOUNMAPWAIT(as); 2959 mutex_exit(&as->a_contents); 2960 goto retry; 2961 } 2962 mutex_exit(&as->a_contents); 2963 goto setpgsz_top; 2964 } else if (error != 0) { 2965 break; 2966 } 2967 } 2968 as_setwatch(as); 2969 AS_LOCK_EXIT(as); 2970 return (error); 2971 } 2972 2973 /* 2974 * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments 2975 * in its chunk where s_szc is less than the szc we want to set. 2976 */ 2977 static int 2978 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 2979 int *retry) 2980 { 2981 struct seg *seg; 2982 size_t ssize; 2983 int error; 2984 2985 ASSERT(AS_WRITE_HELD(as)); 2986 2987 seg = as_segat(as, raddr); 2988 if (seg == NULL) { 2989 panic("as_iset3_default_lpsize: no seg"); 2990 } 2991 2992 for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2993 if (raddr >= seg->s_base + seg->s_size) { 2994 seg = AS_SEGNEXT(as, seg); 2995 if (seg == NULL || raddr != seg->s_base) { 2996 panic("as_iset3_default_lpsize: as changed"); 2997 } 2998 } 2999 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3000 ssize = seg->s_base + seg->s_size - raddr; 3001 } else { 3002 ssize = rsize; 3003 } 3004 3005 if (szc > seg->s_szc) { 3006 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 3007 /* Only retry on EINVAL segments that have no vnode. */ 3008 if (error == EINVAL) { 3009 vnode_t *vp = NULL; 3010 if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && 3011 (SEGOP_GETVP(seg, raddr, &vp) != 0 || 3012 vp == NULL)) { 3013 *retry = 1; 3014 } else { 3015 *retry = 0; 3016 } 3017 } 3018 if (error) { 3019 return (error); 3020 } 3021 } 3022 } 3023 return (0); 3024 } 3025 3026 /* 3027 * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the 3028 * pagesize on each segment in its range, but if any fails with EINVAL, 3029 * then it reduces the pagesizes to the next size in the bitmap and 3030 * retries as_iset3_default_lpsize(). The reason why the code retries 3031 * smaller allowed sizes on EINVAL is because (a) the anon offset may not 3032 * match the bigger sizes, and (b) it's hard to get this offset (to begin 3033 * with) to pass to map_pgszcvec(). 3034 */ 3035 static int 3036 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, 3037 uint_t szcvec) 3038 { 3039 int error; 3040 int retry; 3041 3042 ASSERT(AS_WRITE_HELD(as)); 3043 3044 for (;;) { 3045 error = as_iset3_default_lpsize(as, addr, size, szc, &retry); 3046 if (error == EINVAL && retry) { 3047 szcvec &= ~(1 << szc); 3048 if (szcvec <= 1) { 3049 return (EINVAL); 3050 } 3051 szc = highbit(szcvec) - 1; 3052 } else { 3053 return (error); 3054 } 3055 } 3056 } 3057 3058 /* 3059 * as_iset1_default_lpsize() breaks its chunk into areas where existing 3060 * segments have a smaller szc than we want to set. For each such area, 3061 * it calls as_iset2_default_lpsize() 3062 */ 3063 static int 3064 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, 3065 uint_t szcvec) 3066 { 3067 struct seg *seg; 3068 size_t ssize; 3069 caddr_t setaddr = raddr; 3070 size_t setsize = 0; 3071 int set; 3072 int error; 3073 3074 ASSERT(AS_WRITE_HELD(as)); 3075 3076 seg = as_segat(as, raddr); 3077 if (seg == NULL) { 3078 panic("as_iset1_default_lpsize: no seg"); 3079 } 3080 if (seg->s_szc < szc) { 3081 set = 1; 3082 } else { 3083 set = 0; 3084 } 3085 3086 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3087 if (raddr >= seg->s_base + seg->s_size) { 3088 seg = AS_SEGNEXT(as, seg); 3089 if (seg == NULL || raddr != seg->s_base) { 3090 panic("as_iset1_default_lpsize: as changed"); 3091 } 3092 if (seg->s_szc >= szc && set) { 3093 ASSERT(setsize != 0); 3094 error = as_iset2_default_lpsize(as, 3095 setaddr, setsize, szc, szcvec); 3096 if (error) { 3097 return (error); 3098 } 3099 set = 0; 3100 } else if (seg->s_szc < szc && !set) { 3101 setaddr = raddr; 3102 setsize = 0; 3103 set = 1; 3104 } 3105 } 3106 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3107 ssize = seg->s_base + seg->s_size - raddr; 3108 } else { 3109 ssize = rsize; 3110 } 3111 } 3112 error = 0; 3113 if (set) { 3114 ASSERT(setsize != 0); 3115 error = as_iset2_default_lpsize(as, setaddr, setsize, 3116 szc, szcvec); 3117 } 3118 return (error); 3119 } 3120 3121 /* 3122 * as_iset_default_lpsize() breaks its chunk according to the size code bitmap 3123 * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each 3124 * chunk to as_iset1_default_lpsize(). 3125 */ 3126 static int 3127 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, 3128 int type) 3129 { 3130 int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; 3131 uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, 3132 flags, rtype, 1); 3133 uint_t szc; 3134 uint_t nszc; 3135 int error; 3136 caddr_t a; 3137 caddr_t eaddr; 3138 size_t segsize; 3139 size_t pgsz; 3140 uint_t save_szcvec; 3141 3142 ASSERT(AS_WRITE_HELD(as)); 3143 ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 3144 ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 3145 3146 szcvec &= ~1; 3147 if (szcvec <= 1) { /* skip if base page size */ 3148 return (0); 3149 } 3150 3151 /* Get the pagesize of the first larger page size. */ 3152 szc = lowbit(szcvec) - 1; 3153 pgsz = page_get_pagesize(szc); 3154 eaddr = addr + size; 3155 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3156 eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3157 3158 save_szcvec = szcvec; 3159 szcvec >>= (szc + 1); 3160 nszc = szc; 3161 while (szcvec) { 3162 if ((szcvec & 0x1) == 0) { 3163 nszc++; 3164 szcvec >>= 1; 3165 continue; 3166 } 3167 nszc++; 3168 pgsz = page_get_pagesize(nszc); 3169 a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 3170 if (a != addr) { 3171 ASSERT(szc > 0); 3172 ASSERT(a < eaddr); 3173 segsize = a - addr; 3174 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3175 save_szcvec); 3176 if (error) { 3177 return (error); 3178 } 3179 addr = a; 3180 } 3181 szc = nszc; 3182 szcvec >>= 1; 3183 } 3184 3185 ASSERT(addr < eaddr); 3186 szcvec = save_szcvec; 3187 while (szcvec) { 3188 a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 3189 ASSERT(a >= addr); 3190 if (a != addr) { 3191 ASSERT(szc > 0); 3192 segsize = a - addr; 3193 error = as_iset1_default_lpsize(as, addr, segsize, szc, 3194 save_szcvec); 3195 if (error) { 3196 return (error); 3197 } 3198 addr = a; 3199 } 3200 szcvec &= ~(1 << szc); 3201 if (szcvec) { 3202 szc = highbit(szcvec) - 1; 3203 pgsz = page_get_pagesize(szc); 3204 } 3205 } 3206 ASSERT(addr == eaddr); 3207 3208 return (0); 3209 } 3210 3211 /* 3212 * Set the default large page size for the range. Called via memcntl with 3213 * page size set to 0. as_set_default_lpsize breaks the range down into 3214 * chunks with the same type/flags, ignores-non segvn segments, and passes 3215 * each chunk to as_iset_default_lpsize(). 3216 */ 3217 int 3218 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) 3219 { 3220 struct seg *seg; 3221 caddr_t raddr; 3222 size_t rsize; 3223 size_t ssize; 3224 int rtype, rflags; 3225 int stype, sflags; 3226 int error; 3227 caddr_t setaddr; 3228 size_t setsize; 3229 int segvn; 3230 3231 if (size == 0) 3232 return (0); 3233 3234 AS_LOCK_ENTER(as, RW_WRITER); 3235 again: 3236 error = 0; 3237 3238 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3239 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 3240 (size_t)raddr; 3241 3242 if (raddr + rsize < raddr) { /* check for wraparound */ 3243 AS_LOCK_EXIT(as); 3244 return (ENOMEM); 3245 } 3246 as_clearwatchprot(as, raddr, rsize); 3247 seg = as_segat(as, raddr); 3248 if (seg == NULL) { 3249 as_setwatch(as); 3250 AS_LOCK_EXIT(as); 3251 return (ENOMEM); 3252 } 3253 if (seg->s_ops == &segvn_ops) { 3254 rtype = SEGOP_GETTYPE(seg, addr); 3255 rflags = rtype & (MAP_TEXT | MAP_INITDATA); 3256 rtype = rtype & (MAP_SHARED | MAP_PRIVATE); 3257 segvn = 1; 3258 } else { 3259 segvn = 0; 3260 } 3261 setaddr = raddr; 3262 setsize = 0; 3263 3264 for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { 3265 if (raddr >= (seg->s_base + seg->s_size)) { 3266 seg = AS_SEGNEXT(as, seg); 3267 if (seg == NULL || raddr != seg->s_base) { 3268 error = ENOMEM; 3269 break; 3270 } 3271 if (seg->s_ops == &segvn_ops) { 3272 stype = SEGOP_GETTYPE(seg, raddr); 3273 sflags = stype & (MAP_TEXT | MAP_INITDATA); 3274 stype &= (MAP_SHARED | MAP_PRIVATE); 3275 if (segvn && (rflags != sflags || 3276 rtype != stype)) { 3277 /* 3278 * The next segment is also segvn but 3279 * has different flags and/or type. 3280 */ 3281 ASSERT(setsize != 0); 3282 error = as_iset_default_lpsize(as, 3283 setaddr, setsize, rflags, rtype); 3284 if (error) { 3285 break; 3286 } 3287 rflags = sflags; 3288 rtype = stype; 3289 setaddr = raddr; 3290 setsize = 0; 3291 } else if (!segvn) { 3292 rflags = sflags; 3293 rtype = stype; 3294 setaddr = raddr; 3295 setsize = 0; 3296 segvn = 1; 3297 } 3298 } else if (segvn) { 3299 /* The next segment is not segvn. */ 3300 ASSERT(setsize != 0); 3301 error = as_iset_default_lpsize(as, 3302 setaddr, setsize, rflags, rtype); 3303 if (error) { 3304 break; 3305 } 3306 segvn = 0; 3307 } 3308 } 3309 if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 3310 ssize = seg->s_base + seg->s_size - raddr; 3311 } else { 3312 ssize = rsize; 3313 } 3314 } 3315 if (error == 0 && segvn) { 3316 /* The last chunk when rsize == 0. */ 3317 ASSERT(setsize != 0); 3318 error = as_iset_default_lpsize(as, setaddr, setsize, 3319 rflags, rtype); 3320 } 3321 3322 if (error == IE_RETRY) { 3323 goto again; 3324 } else if (error == IE_NOMEM) { 3325 error = EAGAIN; 3326 } else if (error == ENOTSUP) { 3327 error = EINVAL; 3328 } else if (error == EAGAIN) { 3329 mutex_enter(&as->a_contents); 3330 if (!AS_ISNOUNMAPWAIT(as)) { 3331 if (AS_ISUNMAPWAIT(as) == 0) { 3332 cv_broadcast(&as->a_cv); 3333 } 3334 AS_SETUNMAPWAIT(as); 3335 AS_LOCK_EXIT(as); 3336 while (AS_ISUNMAPWAIT(as)) { 3337 cv_wait(&as->a_cv, &as->a_contents); 3338 } 3339 mutex_exit(&as->a_contents); 3340 AS_LOCK_ENTER(as, RW_WRITER); 3341 } else { 3342 /* 3343 * We may have raced with 3344 * segvn_reclaim()/segspt_reclaim(). In this case 3345 * clean nounmapwait flag and retry since softlockcnt 3346 * in this segment may be already 0. We don't drop as 3347 * writer lock so our number of retries without 3348 * sleeping should be very small. See segvn_reclaim() 3349 * for more comments. 3350 */ 3351 AS_CLRNOUNMAPWAIT(as); 3352 mutex_exit(&as->a_contents); 3353 } 3354 goto again; 3355 } 3356 3357 as_setwatch(as); 3358 AS_LOCK_EXIT(as); 3359 return (error); 3360 } 3361 3362 /* 3363 * Setup all of the uninitialized watched pages that we can. 3364 */ 3365 void 3366 as_setwatch(struct as *as) 3367 { 3368 struct watched_page *pwp; 3369 struct seg *seg; 3370 caddr_t vaddr; 3371 uint_t prot; 3372 int err, retrycnt; 3373 3374 if (avl_numnodes(&as->a_wpage) == 0) 3375 return; 3376 3377 ASSERT(AS_WRITE_HELD(as)); 3378 3379 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3380 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3381 retrycnt = 0; 3382 retry: 3383 vaddr = pwp->wp_vaddr; 3384 if (pwp->wp_oprot != 0 || /* already set up */ 3385 (seg = as_segat(as, vaddr)) == NULL || 3386 SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 3387 continue; 3388 3389 pwp->wp_oprot = prot; 3390 if (pwp->wp_read) 3391 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3392 if (pwp->wp_write) 3393 prot &= ~PROT_WRITE; 3394 if (pwp->wp_exec) 3395 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3396 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 3397 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3398 if (err == IE_RETRY) { 3399 pwp->wp_oprot = 0; 3400 ASSERT(retrycnt == 0); 3401 retrycnt++; 3402 goto retry; 3403 } 3404 } 3405 pwp->wp_prot = prot; 3406 } 3407 } 3408 3409 /* 3410 * Clear all of the watched pages in the address space. 3411 */ 3412 void 3413 as_clearwatch(struct as *as) 3414 { 3415 struct watched_page *pwp; 3416 struct seg *seg; 3417 caddr_t vaddr; 3418 uint_t prot; 3419 int err, retrycnt; 3420 3421 if (avl_numnodes(&as->a_wpage) == 0) 3422 return; 3423 3424 ASSERT(AS_WRITE_HELD(as)); 3425 3426 for (pwp = avl_first(&as->a_wpage); pwp != NULL; 3427 pwp = AVL_NEXT(&as->a_wpage, pwp)) { 3428 retrycnt = 0; 3429 retry: 3430 vaddr = pwp->wp_vaddr; 3431 if (pwp->wp_oprot == 0 || /* not set up */ 3432 (seg = as_segat(as, vaddr)) == NULL) 3433 continue; 3434 3435 if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 3436 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 3437 if (err == IE_RETRY) { 3438 ASSERT(retrycnt == 0); 3439 retrycnt++; 3440 goto retry; 3441 } 3442 } 3443 pwp->wp_oprot = 0; 3444 pwp->wp_prot = 0; 3445 } 3446 } 3447 3448 /* 3449 * Force a new setup for all the watched pages in the range. 3450 */ 3451 static void 3452 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 3453 { 3454 struct watched_page *pwp; 3455 struct watched_page tpw; 3456 caddr_t eaddr = addr + size; 3457 caddr_t vaddr; 3458 struct seg *seg; 3459 int err, retrycnt; 3460 uint_t wprot; 3461 avl_index_t where; 3462 3463 if (avl_numnodes(&as->a_wpage) == 0) 3464 return; 3465 3466 ASSERT(AS_WRITE_HELD(as)); 3467 3468 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3469 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3470 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3471 3472 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3473 retrycnt = 0; 3474 vaddr = pwp->wp_vaddr; 3475 3476 wprot = prot; 3477 if (pwp->wp_read) 3478 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3479 if (pwp->wp_write) 3480 wprot &= ~PROT_WRITE; 3481 if (pwp->wp_exec) 3482 wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3483 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 3484 retry: 3485 seg = as_segat(as, vaddr); 3486 if (seg == NULL) { 3487 panic("as_setwatchprot: no seg"); 3488 /*NOTREACHED*/ 3489 } 3490 err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 3491 if (err == IE_RETRY) { 3492 ASSERT(retrycnt == 0); 3493 retrycnt++; 3494 goto retry; 3495 } 3496 } 3497 pwp->wp_oprot = prot; 3498 pwp->wp_prot = wprot; 3499 3500 pwp = AVL_NEXT(&as->a_wpage, pwp); 3501 } 3502 } 3503 3504 /* 3505 * Clear all of the watched pages in the range. 3506 */ 3507 static void 3508 as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 3509 { 3510 caddr_t eaddr = addr + size; 3511 struct watched_page *pwp; 3512 struct watched_page tpw; 3513 uint_t prot; 3514 struct seg *seg; 3515 int err, retrycnt; 3516 avl_index_t where; 3517 3518 if (avl_numnodes(&as->a_wpage) == 0) 3519 return; 3520 3521 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3522 if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 3523 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 3524 3525 ASSERT(AS_WRITE_HELD(as)); 3526 3527 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3528 3529 if ((prot = pwp->wp_oprot) != 0) { 3530 retrycnt = 0; 3531 3532 if (prot != pwp->wp_prot) { 3533 retry: 3534 seg = as_segat(as, pwp->wp_vaddr); 3535 if (seg == NULL) 3536 continue; 3537 err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 3538 PAGESIZE, prot); 3539 if (err == IE_RETRY) { 3540 ASSERT(retrycnt == 0); 3541 retrycnt++; 3542 goto retry; 3543 3544 } 3545 } 3546 pwp->wp_oprot = 0; 3547 pwp->wp_prot = 0; 3548 } 3549 3550 pwp = AVL_NEXT(&as->a_wpage, pwp); 3551 } 3552 } 3553 3554 void 3555 as_signal_proc(struct as *as, k_siginfo_t *siginfo) 3556 { 3557 struct proc *p; 3558 3559 mutex_enter(&pidlock); 3560 for (p = practive; p; p = p->p_next) { 3561 if (p->p_as == as) { 3562 mutex_enter(&p->p_lock); 3563 if (p->p_as == as) 3564 sigaddq(p, NULL, siginfo, KM_NOSLEEP); 3565 mutex_exit(&p->p_lock); 3566 } 3567 } 3568 mutex_exit(&pidlock); 3569 } 3570 3571 /* 3572 * return memory object ID 3573 */ 3574 int 3575 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 3576 { 3577 struct seg *seg; 3578 int sts; 3579 3580 AS_LOCK_ENTER(as, RW_READER); 3581 seg = as_segat(as, addr); 3582 if (seg == NULL) { 3583 AS_LOCK_EXIT(as); 3584 return (EFAULT); 3585 } 3586 /* 3587 * catch old drivers which may not support getmemid 3588 */ 3589 if (seg->s_ops->getmemid == NULL) { 3590 AS_LOCK_EXIT(as); 3591 return (ENODEV); 3592 } 3593 3594 sts = SEGOP_GETMEMID(seg, addr, memidp); 3595 3596 AS_LOCK_EXIT(as); 3597 return (sts); 3598 } 3599