1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * Portions of this source code were derived from Berkeley 4.3 BSD 30 * under license from the Regents of the University of California. 31 */ 32 33 /* 34 * segkp is a segment driver that administers the allocation and deallocation 35 * of pageable variable size chunks of kernel virtual address space. Each 36 * allocated resource is page-aligned. 37 * 38 * The user may specify whether the resource should be initialized to 0, 39 * include a redzone, or locked in memory. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/thread.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/sysmacros.h> 48 #include <sys/systm.h> 49 #include <sys/buf.h> 50 #include <sys/mman.h> 51 #include <sys/vnode.h> 52 #include <sys/cmn_err.h> 53 #include <sys/swap.h> 54 #include <sys/tuneable.h> 55 #include <sys/kmem.h> 56 #include <sys/vmem.h> 57 #include <sys/cred.h> 58 #include <sys/dumphdr.h> 59 #include <sys/debug.h> 60 #include <sys/vtrace.h> 61 #include <sys/stack.h> 62 #include <sys/atomic.h> 63 #include <sys/archsystm.h> 64 #include <sys/lgrp.h> 65 66 #include <vm/as.h> 67 #include <vm/seg.h> 68 #include <vm/seg_kp.h> 69 #include <vm/seg_kmem.h> 70 #include <vm/anon.h> 71 #include <vm/page.h> 72 #include <vm/hat.h> 73 #include <sys/bitmap.h> 74 75 /* 76 * Private seg op routines 77 */ 78 static void segkp_badop(void); 79 static void segkp_dump(struct seg *seg); 80 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len, 81 uint_t prot); 82 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta); 83 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len, 84 struct page ***page, enum lock_type type, 85 enum seg_rw rw); 86 static void segkp_insert(struct seg *seg, struct segkp_data *kpd); 87 static void segkp_delete(struct seg *seg, struct segkp_data *kpd); 88 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags, 89 struct segkp_data **tkpd, struct anon_map *amp); 90 static void segkp_release_internal(struct seg *seg, 91 struct segkp_data *kpd, size_t len); 92 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr, 93 size_t len, struct segkp_data *kpd, uint_t flags); 94 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr, 95 size_t len, struct segkp_data *kpd, uint_t flags); 96 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr); 97 static int segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 98 static lgrp_mem_policy_info_t *segkp_getpolicy(struct seg *seg, 99 caddr_t addr); 100 static int segkp_capable(struct seg *seg, segcapability_t capability); 101 102 /* 103 * Lock used to protect the hash table(s) and caches. 104 */ 105 static kmutex_t segkp_lock; 106 107 /* 108 * The segkp caches 109 */ 110 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE]; 111 112 #define SEGKP_BADOP(t) (t(*)())segkp_badop 113 114 /* 115 * When there are fewer than red_minavail bytes left on the stack, 116 * segkp_map_red() will map in the redzone (if called). 5000 seems 117 * to work reasonably well... 118 */ 119 long red_minavail = 5000; 120 121 /* 122 * will be set to 1 for 32 bit x86 systems only, in startup.c 123 */ 124 int segkp_fromheap = 0; 125 ulong_t *segkp_bitmap; 126 127 /* 128 * If segkp_map_red() is called with the redzone already mapped and 129 * with less than RED_DEEP_THRESHOLD bytes available on the stack, 130 * then the stack situation has become quite serious; if much more stack 131 * is consumed, we have the potential of scrogging the next thread/LWP 132 * structure. To help debug the "can't happen" panics which may 133 * result from this condition, we record hrestime and the calling thread 134 * in red_deep_hires and red_deep_thread respectively. 135 */ 136 #define RED_DEEP_THRESHOLD 2000 137 138 hrtime_t red_deep_hires; 139 kthread_t *red_deep_thread; 140 141 uint32_t red_nmapped; 142 uint32_t red_closest = UINT_MAX; 143 uint32_t red_ndoubles; 144 145 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */ 146 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */ 147 148 static struct seg_ops segkp_ops = { 149 SEGKP_BADOP(int), /* dup */ 150 SEGKP_BADOP(int), /* unmap */ 151 SEGKP_BADOP(void), /* free */ 152 segkp_fault, 153 SEGKP_BADOP(faultcode_t), /* faulta */ 154 SEGKP_BADOP(int), /* setprot */ 155 segkp_checkprot, 156 segkp_kluster, 157 SEGKP_BADOP(size_t), /* swapout */ 158 SEGKP_BADOP(int), /* sync */ 159 SEGKP_BADOP(size_t), /* incore */ 160 SEGKP_BADOP(int), /* lockop */ 161 SEGKP_BADOP(int), /* getprot */ 162 SEGKP_BADOP(u_offset_t), /* getoffset */ 163 SEGKP_BADOP(int), /* gettype */ 164 SEGKP_BADOP(int), /* getvp */ 165 SEGKP_BADOP(int), /* advise */ 166 segkp_dump, /* dump */ 167 segkp_pagelock, /* pagelock */ 168 SEGKP_BADOP(int), /* setpgsz */ 169 segkp_getmemid, /* getmemid */ 170 segkp_getpolicy, /* getpolicy */ 171 segkp_capable, /* capable */ 172 seg_inherit_notsup /* inherit */ 173 }; 174 175 176 static void 177 segkp_badop(void) 178 { 179 panic("segkp_badop"); 180 /*NOTREACHED*/ 181 } 182 183 static void segkpinit_mem_config(struct seg *); 184 185 static uint32_t segkp_indel; 186 187 /* 188 * Allocate the segment specific private data struct and fill it in 189 * with the per kp segment mutex, anon ptr. array and hash table. 190 */ 191 int 192 segkp_create(struct seg *seg) 193 { 194 struct segkp_segdata *kpsd; 195 size_t np; 196 197 ASSERT(seg != NULL && seg->s_as == &kas); 198 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock)); 199 200 if (seg->s_size & PAGEOFFSET) { 201 panic("Bad segkp size"); 202 /*NOTREACHED*/ 203 } 204 205 kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP); 206 207 /* 208 * Allocate the virtual memory for segkp and initialize it 209 */ 210 if (segkp_fromheap) { 211 np = btop(kvseg.s_size); 212 segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP); 213 kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE, 214 vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP); 215 } else { 216 segkp_bitmap = NULL; 217 np = btop(seg->s_size); 218 kpsd->kpsd_arena = vmem_create("segkp", seg->s_base, 219 seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE, 220 VM_SLEEP); 221 } 222 223 kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE); 224 225 kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *), 226 KM_SLEEP); 227 seg->s_data = (void *)kpsd; 228 seg->s_ops = &segkp_ops; 229 segkpinit_mem_config(seg); 230 return (0); 231 } 232 233 234 /* 235 * Find a free 'freelist' and initialize it with the appropriate attributes 236 */ 237 void * 238 segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags) 239 { 240 int i; 241 242 if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED)) 243 return ((void *)-1); 244 245 mutex_enter(&segkp_lock); 246 for (i = 0; i < SEGKP_MAX_CACHE; i++) { 247 if (segkp_cache[i].kpf_inuse) 248 continue; 249 segkp_cache[i].kpf_inuse = 1; 250 segkp_cache[i].kpf_max = maxsize; 251 segkp_cache[i].kpf_flags = flags; 252 segkp_cache[i].kpf_seg = seg; 253 segkp_cache[i].kpf_len = len; 254 mutex_exit(&segkp_lock); 255 return ((void *)(uintptr_t)i); 256 } 257 mutex_exit(&segkp_lock); 258 return ((void *)-1); 259 } 260 261 /* 262 * Free all the cache resources. 263 */ 264 void 265 segkp_cache_free(void) 266 { 267 struct segkp_data *kpd; 268 struct seg *seg; 269 int i; 270 271 mutex_enter(&segkp_lock); 272 for (i = 0; i < SEGKP_MAX_CACHE; i++) { 273 if (!segkp_cache[i].kpf_inuse) 274 continue; 275 /* 276 * Disconnect the freelist and process each element 277 */ 278 kpd = segkp_cache[i].kpf_list; 279 seg = segkp_cache[i].kpf_seg; 280 segkp_cache[i].kpf_list = NULL; 281 segkp_cache[i].kpf_count = 0; 282 mutex_exit(&segkp_lock); 283 284 while (kpd != NULL) { 285 struct segkp_data *next; 286 287 next = kpd->kp_next; 288 segkp_release_internal(seg, kpd, kpd->kp_len); 289 kpd = next; 290 } 291 mutex_enter(&segkp_lock); 292 } 293 mutex_exit(&segkp_lock); 294 } 295 296 /* 297 * There are 2 entries into segkp_get_internal. The first includes a cookie 298 * used to access a pool of cached segkp resources. The second does not 299 * use the cache. 300 */ 301 caddr_t 302 segkp_get(struct seg *seg, size_t len, uint_t flags) 303 { 304 struct segkp_data *kpd = NULL; 305 306 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) { 307 kpd->kp_cookie = -1; 308 return (stom(kpd->kp_base, flags)); 309 } 310 return (NULL); 311 } 312 313 /* 314 * Return a 'cached' segkp address 315 */ 316 caddr_t 317 segkp_cache_get(void *cookie) 318 { 319 struct segkp_cache *freelist = NULL; 320 struct segkp_data *kpd = NULL; 321 int index = (int)(uintptr_t)cookie; 322 struct seg *seg; 323 size_t len; 324 uint_t flags; 325 326 if (index < 0 || index >= SEGKP_MAX_CACHE) 327 return (NULL); 328 freelist = &segkp_cache[index]; 329 330 mutex_enter(&segkp_lock); 331 seg = freelist->kpf_seg; 332 flags = freelist->kpf_flags; 333 if (freelist->kpf_list != NULL) { 334 kpd = freelist->kpf_list; 335 freelist->kpf_list = kpd->kp_next; 336 freelist->kpf_count--; 337 mutex_exit(&segkp_lock); 338 kpd->kp_next = NULL; 339 segkp_insert(seg, kpd); 340 return (stom(kpd->kp_base, flags)); 341 } 342 len = freelist->kpf_len; 343 mutex_exit(&segkp_lock); 344 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) { 345 kpd->kp_cookie = index; 346 return (stom(kpd->kp_base, flags)); 347 } 348 return (NULL); 349 } 350 351 caddr_t 352 segkp_get_withanonmap( 353 struct seg *seg, 354 size_t len, 355 uint_t flags, 356 struct anon_map *amp) 357 { 358 struct segkp_data *kpd = NULL; 359 360 ASSERT(amp != NULL); 361 flags |= KPD_HASAMP; 362 if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) { 363 kpd->kp_cookie = -1; 364 return (stom(kpd->kp_base, flags)); 365 } 366 return (NULL); 367 } 368 369 /* 370 * This does the real work of segkp allocation. 371 * Return to client base addr. len must be page-aligned. A null value is 372 * returned if there are no more vm resources (e.g. pages, swap). The len 373 * and base recorded in the private data structure include the redzone 374 * and the redzone length (if applicable). If the user requests a redzone 375 * either the first or last page is left unmapped depending whether stacks 376 * grow to low or high memory. 377 * 378 * The client may also specify a no-wait flag. If that is set then the 379 * request will choose a non-blocking path when requesting resources. 380 * The default is make the client wait. 381 */ 382 static caddr_t 383 segkp_get_internal( 384 struct seg *seg, 385 size_t len, 386 uint_t flags, 387 struct segkp_data **tkpd, 388 struct anon_map *amp) 389 { 390 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; 391 struct segkp_data *kpd; 392 caddr_t vbase = NULL; /* always first virtual, may not be mapped */ 393 pgcnt_t np = 0; /* number of pages in the resource */ 394 pgcnt_t segkpindex; 395 long i; 396 caddr_t va; 397 pgcnt_t pages = 0; 398 ulong_t anon_idx = 0; 399 int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; 400 caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base; 401 402 segkpindex = 0; 403 if (len & PAGEOFFSET) { 404 panic("segkp_get: len is not page-aligned"); 405 /*NOTREACHED*/ 406 } 407 408 ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL)); 409 410 /* Only allow KPD_NO_ANON if we are going to lock it down */ 411 if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON) 412 return (NULL); 413 414 if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL) 415 return (NULL); 416 /* 417 * Fix up the len to reflect the REDZONE if applicable 418 */ 419 if (flags & KPD_HASREDZONE) 420 len += PAGESIZE; 421 np = btop(len); 422 423 vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT); 424 if (vbase == NULL) { 425 kmem_free(kpd, sizeof (struct segkp_data)); 426 return (NULL); 427 } 428 429 /* If locking, reserve physical memory */ 430 if (flags & KPD_LOCKED) { 431 pages = btop(SEGKP_MAPLEN(len, flags)); 432 if (page_resv(pages, kmflag) == 0) { 433 vmem_free(SEGKP_VMEM(seg), vbase, len); 434 kmem_free(kpd, sizeof (struct segkp_data)); 435 return (NULL); 436 } 437 if ((flags & KPD_NO_ANON) == 0) 438 atomic_add_long(&anon_segkp_pages_locked, pages); 439 } 440 441 /* 442 * Reserve sufficient swap space for this vm resource. We'll 443 * actually allocate it in the loop below, but reserving it 444 * here allows us to back out more gracefully than if we 445 * had an allocation failure in the body of the loop. 446 * 447 * Note that we don't need swap space for the red zone page. 448 */ 449 if (amp != NULL) { 450 /* 451 * The swap reservation has been done, if required, and the 452 * anon_hdr is separate. 453 */ 454 anon_idx = 0; 455 kpd->kp_anon_idx = anon_idx; 456 kpd->kp_anon = amp->ahp; 457 458 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u", 459 kpd, vbase, len, flags, 1); 460 461 } else if ((flags & KPD_NO_ANON) == 0) { 462 if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) { 463 if (flags & KPD_LOCKED) { 464 atomic_add_long(&anon_segkp_pages_locked, 465 -pages); 466 page_unresv(pages); 467 } 468 vmem_free(SEGKP_VMEM(seg), vbase, len); 469 kmem_free(kpd, sizeof (struct segkp_data)); 470 return (NULL); 471 } 472 atomic_add_long(&anon_segkp_pages_resv, 473 btop(SEGKP_MAPLEN(len, flags))); 474 anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT; 475 kpd->kp_anon_idx = anon_idx; 476 kpd->kp_anon = kpsd->kpsd_anon; 477 478 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u", 479 kpd, vbase, len, flags, 1); 480 } else { 481 kpd->kp_anon = NULL; 482 kpd->kp_anon_idx = 0; 483 } 484 485 /* 486 * Allocate page and anon resources for the virtual address range 487 * except the redzone 488 */ 489 if (segkp_fromheap) 490 segkpindex = btop((uintptr_t)(vbase - kvseg.s_base)); 491 for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) { 492 page_t *pl[2]; 493 struct vnode *vp; 494 anoff_t off; 495 int err; 496 page_t *pp = NULL; 497 498 /* 499 * Mark this page to be a segkp page in the bitmap. 500 */ 501 if (segkp_fromheap) { 502 BT_ATOMIC_SET(segkp_bitmap, segkpindex); 503 segkpindex++; 504 } 505 506 /* 507 * If this page is the red zone page, we don't need swap 508 * space for it. Note that we skip over the code that 509 * establishes MMU mappings, so that the page remains 510 * invalid. 511 */ 512 if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i) 513 continue; 514 515 if (kpd->kp_anon != NULL) { 516 struct anon *ap; 517 518 ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i) 519 == NULL); 520 /* 521 * Determine the "vp" and "off" of the anon slot. 522 */ 523 ap = anon_alloc(NULL, 0); 524 if (amp != NULL) 525 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 526 (void) anon_set_ptr(kpd->kp_anon, anon_idx + i, 527 ap, ANON_SLEEP); 528 if (amp != NULL) 529 ANON_LOCK_EXIT(&->a_rwlock); 530 swap_xlate(ap, &vp, &off); 531 532 /* 533 * Create a page with the specified identity. The 534 * page is returned with the "shared" lock held. 535 */ 536 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, 537 NULL, pl, PAGESIZE, seg, va, S_CREATE, 538 kcred, NULL); 539 if (err) { 540 /* 541 * XXX - This should not fail. 542 */ 543 panic("segkp_get: no pages"); 544 /*NOTREACHED*/ 545 } 546 pp = pl[0]; 547 } else { 548 ASSERT(page_exists(&kvp, 549 (u_offset_t)(uintptr_t)va) == NULL); 550 551 if ((pp = page_create_va(&kvp, 552 (u_offset_t)(uintptr_t)va, PAGESIZE, 553 (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL | 554 PG_NORELOC, seg, va)) == NULL) { 555 /* 556 * Legitimize resource; then destroy it. 557 * Easier than trying to unwind here. 558 */ 559 kpd->kp_flags = flags; 560 kpd->kp_base = vbase; 561 kpd->kp_len = len; 562 segkp_release_internal(seg, kpd, va - vbase); 563 return (NULL); 564 } 565 page_io_unlock(pp); 566 } 567 568 if (flags & KPD_ZERO) 569 pagezero(pp, 0, PAGESIZE); 570 571 /* 572 * Load and lock an MMU translation for the page. 573 */ 574 hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE), 575 ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD)); 576 577 /* 578 * Now, release lock on the page. 579 */ 580 if (flags & KPD_LOCKED) { 581 /* 582 * Indicate to page_retire framework that this 583 * page can only be retired when it is freed. 584 */ 585 PP_SETRAF(pp); 586 page_downgrade(pp); 587 } else 588 page_unlock(pp); 589 } 590 591 kpd->kp_flags = flags; 592 kpd->kp_base = vbase; 593 kpd->kp_len = len; 594 segkp_insert(seg, kpd); 595 *tkpd = kpd; 596 return (stom(kpd->kp_base, flags)); 597 } 598 599 /* 600 * Release the resource to cache if the pool(designate by the cookie) 601 * has less than the maximum allowable. If inserted in cache, 602 * segkp_delete insures element is taken off of active list. 603 */ 604 void 605 segkp_release(struct seg *seg, caddr_t vaddr) 606 { 607 struct segkp_cache *freelist; 608 struct segkp_data *kpd = NULL; 609 610 if ((kpd = segkp_find(seg, vaddr)) == NULL) { 611 panic("segkp_release: null kpd"); 612 /*NOTREACHED*/ 613 } 614 615 if (kpd->kp_cookie != -1) { 616 freelist = &segkp_cache[kpd->kp_cookie]; 617 mutex_enter(&segkp_lock); 618 if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) { 619 segkp_delete(seg, kpd); 620 kpd->kp_next = freelist->kpf_list; 621 freelist->kpf_list = kpd; 622 freelist->kpf_count++; 623 mutex_exit(&segkp_lock); 624 return; 625 } else { 626 mutex_exit(&segkp_lock); 627 kpd->kp_cookie = -1; 628 } 629 } 630 segkp_release_internal(seg, kpd, kpd->kp_len); 631 } 632 633 /* 634 * Free the entire resource. segkp_unlock gets called with the start of the 635 * mapped portion of the resource. The length is the size of the mapped 636 * portion 637 */ 638 static void 639 segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len) 640 { 641 caddr_t va; 642 long i; 643 long redzone; 644 size_t np; 645 page_t *pp; 646 struct vnode *vp; 647 anoff_t off; 648 struct anon *ap; 649 pgcnt_t segkpindex; 650 651 segkpindex = 0; 652 ASSERT(kpd != NULL); 653 ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1); 654 np = btop(len); 655 656 /* Remove from active hash list */ 657 if (kpd->kp_cookie == -1) { 658 mutex_enter(&segkp_lock); 659 segkp_delete(seg, kpd); 660 mutex_exit(&segkp_lock); 661 } 662 663 /* 664 * Precompute redzone page index. 665 */ 666 redzone = -1; 667 if (kpd->kp_flags & KPD_HASREDZONE) 668 redzone = KPD_REDZONE(kpd); 669 670 671 va = kpd->kp_base; 672 673 hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT), 674 ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD)); 675 /* 676 * Free up those anon resources that are quiescent. 677 */ 678 if (segkp_fromheap) 679 segkpindex = btop((uintptr_t)(va - kvseg.s_base)); 680 for (i = 0; i < np; i++, va += PAGESIZE) { 681 682 /* 683 * Clear the bit for this page from the bitmap. 684 */ 685 if (segkp_fromheap) { 686 BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex); 687 segkpindex++; 688 } 689 690 if (i == redzone) 691 continue; 692 if (kpd->kp_anon) { 693 /* 694 * Free up anon resources and destroy the 695 * associated pages. 696 * 697 * Release the lock if there is one. Have to get the 698 * page to do this, unfortunately. 699 */ 700 if (kpd->kp_flags & KPD_LOCKED) { 701 ap = anon_get_ptr(kpd->kp_anon, 702 kpd->kp_anon_idx + i); 703 swap_xlate(ap, &vp, &off); 704 /* Find the shared-locked page. */ 705 pp = page_find(vp, (u_offset_t)off); 706 if (pp == NULL) { 707 panic("segkp_release: " 708 "kp_anon: no page to unlock "); 709 /*NOTREACHED*/ 710 } 711 if (PP_ISRAF(pp)) 712 PP_CLRRAF(pp); 713 714 page_unlock(pp); 715 } 716 if ((kpd->kp_flags & KPD_HASAMP) == 0) { 717 anon_free(kpd->kp_anon, kpd->kp_anon_idx + i, 718 PAGESIZE); 719 anon_unresv_zone(PAGESIZE, NULL); 720 atomic_dec_ulong(&anon_segkp_pages_resv); 721 } 722 TRACE_5(TR_FAC_VM, 723 TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u", 724 kpd, va, PAGESIZE, 0, 0); 725 } else { 726 if (kpd->kp_flags & KPD_LOCKED) { 727 pp = page_find(&kvp, (u_offset_t)(uintptr_t)va); 728 if (pp == NULL) { 729 panic("segkp_release: " 730 "no page to unlock"); 731 /*NOTREACHED*/ 732 } 733 if (PP_ISRAF(pp)) 734 PP_CLRRAF(pp); 735 /* 736 * We should just upgrade the lock here 737 * but there is no upgrade that waits. 738 */ 739 page_unlock(pp); 740 } 741 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va, 742 SE_EXCL); 743 if (pp != NULL) 744 page_destroy(pp, 0); 745 } 746 } 747 748 /* If locked, release physical memory reservation */ 749 if (kpd->kp_flags & KPD_LOCKED) { 750 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)); 751 if ((kpd->kp_flags & KPD_NO_ANON) == 0) 752 atomic_add_long(&anon_segkp_pages_locked, -pages); 753 page_unresv(pages); 754 } 755 756 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len); 757 kmem_free(kpd, sizeof (struct segkp_data)); 758 } 759 760 /* 761 * segkp_map_red() will check the current frame pointer against the 762 * stack base. If the amount of stack remaining is questionable 763 * (less than red_minavail), then segkp_map_red() will map in the redzone 764 * and return 1. Otherwise, it will return 0. segkp_map_red() can 765 * _only_ be called when: 766 * 767 * - it is safe to sleep on page_create_va(). 768 * - the caller is non-swappable. 769 * 770 * It is up to the caller to remember whether segkp_map_red() successfully 771 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later 772 * time. Note that the caller must _remain_ non-swappable until after 773 * calling segkp_unmap_red(). 774 * 775 * Currently, this routine is only called from pagefault() (which necessarily 776 * satisfies the above conditions). 777 */ 778 #if defined(STACK_GROWTH_DOWN) 779 int 780 segkp_map_red(void) 781 { 782 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp(); 783 #ifndef _LP64 784 caddr_t stkbase; 785 #endif 786 787 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 788 789 /* 790 * Optimize for the common case where we simply return. 791 */ 792 if ((curthread->t_red_pp == NULL) && 793 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail)) 794 return (0); 795 796 #if defined(_LP64) 797 /* 798 * XXX We probably need something better than this. 799 */ 800 panic("kernel stack overflow"); 801 /*NOTREACHED*/ 802 #else /* _LP64 */ 803 if (curthread->t_red_pp == NULL) { 804 page_t *red_pp; 805 struct seg kseg; 806 807 caddr_t red_va = (caddr_t) 808 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) - 809 PAGESIZE); 810 811 ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) == 812 NULL); 813 814 /* 815 * Allocate the physical for the red page. 816 */ 817 /* 818 * No PG_NORELOC here to avoid waits. Unlikely to get 819 * a relocate happening in the short time the page exists 820 * and it will be OK anyway. 821 */ 822 823 kseg.s_as = &kas; 824 red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va, 825 PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va); 826 ASSERT(red_pp != NULL); 827 828 /* 829 * So we now have a page to jam into the redzone... 830 */ 831 page_io_unlock(red_pp); 832 833 hat_memload(kas.a_hat, red_va, red_pp, 834 (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK); 835 page_downgrade(red_pp); 836 837 /* 838 * The page is left SE_SHARED locked so we can hold on to 839 * the page_t pointer. 840 */ 841 curthread->t_red_pp = red_pp; 842 843 atomic_inc_32(&red_nmapped); 844 while (fp - (uintptr_t)curthread->t_stkbase < red_closest) { 845 (void) atomic_cas_32(&red_closest, red_closest, 846 (uint32_t)(fp - (uintptr_t)curthread->t_stkbase)); 847 } 848 return (1); 849 } 850 851 stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase & 852 (uintptr_t)PAGEMASK) - PAGESIZE); 853 854 atomic_inc_32(&red_ndoubles); 855 856 if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) { 857 /* 858 * Oh boy. We're already deep within the mapped-in 859 * redzone page, and the caller is trying to prepare 860 * for a deep stack run. We're running without a 861 * redzone right now: if the caller plows off the 862 * end of the stack, it'll plow another thread or 863 * LWP structure. That situation could result in 864 * a very hard-to-debug panic, so, in the spirit of 865 * recording the name of one's killer in one's own 866 * blood, we're going to record hrestime and the calling 867 * thread. 868 */ 869 red_deep_hires = hrestime.tv_nsec; 870 red_deep_thread = curthread; 871 } 872 873 /* 874 * If this is a DEBUG kernel, and we've run too deep for comfort, toss. 875 */ 876 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD); 877 return (0); 878 #endif /* _LP64 */ 879 } 880 881 void 882 segkp_unmap_red(void) 883 { 884 page_t *pp; 885 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase & 886 (uintptr_t)PAGEMASK) - PAGESIZE); 887 888 ASSERT(curthread->t_red_pp != NULL); 889 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 890 891 /* 892 * Because we locked the mapping down, we can't simply rely 893 * on page_destroy() to clean everything up; we need to call 894 * hat_unload() to explicitly unlock the mapping resources. 895 */ 896 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK); 897 898 pp = curthread->t_red_pp; 899 900 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va)); 901 902 /* 903 * Need to upgrade the SE_SHARED lock to SE_EXCL. 904 */ 905 if (!page_tryupgrade(pp)) { 906 /* 907 * As there is now wait for upgrade, release the 908 * SE_SHARED lock and wait for SE_EXCL. 909 */ 910 page_unlock(pp); 911 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL); 912 /* pp may be NULL here, hence the test below */ 913 } 914 915 /* 916 * Destroy the page, with dontfree set to zero (i.e. free it). 917 */ 918 if (pp != NULL) 919 page_destroy(pp, 0); 920 curthread->t_red_pp = NULL; 921 } 922 #else 923 #error Red stacks only supported with downwards stack growth. 924 #endif 925 926 /* 927 * Handle a fault on an address corresponding to one of the 928 * resources in the segkp segment. 929 */ 930 faultcode_t 931 segkp_fault( 932 struct hat *hat, 933 struct seg *seg, 934 caddr_t vaddr, 935 size_t len, 936 enum fault_type type, 937 enum seg_rw rw) 938 { 939 struct segkp_data *kpd = NULL; 940 int err; 941 942 ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock)); 943 944 /* 945 * Sanity checks. 946 */ 947 if (type == F_PROT) { 948 panic("segkp_fault: unexpected F_PROT fault"); 949 /*NOTREACHED*/ 950 } 951 952 if ((kpd = segkp_find(seg, vaddr)) == NULL) 953 return (FC_NOMAP); 954 955 mutex_enter(&kpd->kp_lock); 956 957 if (type == F_SOFTLOCK) { 958 ASSERT(!(kpd->kp_flags & KPD_LOCKED)); 959 /* 960 * The F_SOFTLOCK case has more stringent 961 * range requirements: the given range must exactly coincide 962 * with the resource's mapped portion. Note reference to 963 * redzone is handled since vaddr would not equal base 964 */ 965 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) || 966 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) { 967 mutex_exit(&kpd->kp_lock); 968 return (FC_MAKE_ERR(EFAULT)); 969 } 970 971 if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) { 972 mutex_exit(&kpd->kp_lock); 973 return (FC_MAKE_ERR(err)); 974 } 975 kpd->kp_flags |= KPD_LOCKED; 976 mutex_exit(&kpd->kp_lock); 977 return (0); 978 } 979 980 if (type == F_INVAL) { 981 ASSERT(!(kpd->kp_flags & KPD_NO_ANON)); 982 983 /* 984 * Check if we touched the redzone. Somewhat optimistic 985 * here if we are touching the redzone of our own stack 986 * since we wouldn't have a stack to get this far... 987 */ 988 if ((kpd->kp_flags & KPD_HASREDZONE) && 989 btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd)) 990 panic("segkp_fault: accessing redzone"); 991 992 /* 993 * This fault may occur while the page is being F_SOFTLOCK'ed. 994 * Return since a 2nd segkp_load is unnecessary and also would 995 * result in the page being locked twice and eventually 996 * hang the thread_reaper thread. 997 */ 998 if (kpd->kp_flags & KPD_LOCKED) { 999 mutex_exit(&kpd->kp_lock); 1000 return (0); 1001 } 1002 1003 err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags); 1004 mutex_exit(&kpd->kp_lock); 1005 return (err ? FC_MAKE_ERR(err) : 0); 1006 } 1007 1008 if (type == F_SOFTUNLOCK) { 1009 uint_t flags; 1010 1011 /* 1012 * Make sure the addr is LOCKED and it has anon backing 1013 * before unlocking 1014 */ 1015 if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) { 1016 panic("segkp_fault: bad unlock"); 1017 /*NOTREACHED*/ 1018 } 1019 1020 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) || 1021 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) { 1022 panic("segkp_fault: bad range"); 1023 /*NOTREACHED*/ 1024 } 1025 1026 if (rw == S_WRITE) 1027 flags = kpd->kp_flags | KPD_WRITEDIRTY; 1028 else 1029 flags = kpd->kp_flags; 1030 err = segkp_unlock(hat, seg, vaddr, len, kpd, flags); 1031 kpd->kp_flags &= ~KPD_LOCKED; 1032 mutex_exit(&kpd->kp_lock); 1033 return (err ? FC_MAKE_ERR(err) : 0); 1034 } 1035 mutex_exit(&kpd->kp_lock); 1036 panic("segkp_fault: bogus fault type: %d\n", type); 1037 /*NOTREACHED*/ 1038 } 1039 1040 /* 1041 * Check that the given protections suffice over the range specified by 1042 * vaddr and len. For this segment type, the only issue is whether or 1043 * not the range lies completely within the mapped part of an allocated 1044 * resource. 1045 */ 1046 /* ARGSUSED */ 1047 static int 1048 segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot) 1049 { 1050 struct segkp_data *kpd = NULL; 1051 caddr_t mbase; 1052 size_t mlen; 1053 1054 if ((kpd = segkp_find(seg, vaddr)) == NULL) 1055 return (EACCES); 1056 1057 mutex_enter(&kpd->kp_lock); 1058 mbase = stom(kpd->kp_base, kpd->kp_flags); 1059 mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags); 1060 if (len > mlen || vaddr < mbase || 1061 ((vaddr + len) > (mbase + mlen))) { 1062 mutex_exit(&kpd->kp_lock); 1063 return (EACCES); 1064 } 1065 mutex_exit(&kpd->kp_lock); 1066 return (0); 1067 } 1068 1069 1070 /* 1071 * Check to see if it makes sense to do kluster/read ahead to 1072 * addr + delta relative to the mapping at addr. We assume here 1073 * that delta is a signed PAGESIZE'd multiple (which can be negative). 1074 * 1075 * For seg_u we always "approve" of this action from our standpoint. 1076 */ 1077 /*ARGSUSED*/ 1078 static int 1079 segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 1080 { 1081 return (0); 1082 } 1083 1084 /* 1085 * Load and possibly lock intra-slot resources in the range given by 1086 * vaddr and len. 1087 */ 1088 static int 1089 segkp_load( 1090 struct hat *hat, 1091 struct seg *seg, 1092 caddr_t vaddr, 1093 size_t len, 1094 struct segkp_data *kpd, 1095 uint_t flags) 1096 { 1097 caddr_t va; 1098 caddr_t vlim; 1099 ulong_t i; 1100 uint_t lock; 1101 1102 ASSERT(MUTEX_HELD(&kpd->kp_lock)); 1103 1104 len = P2ROUNDUP(len, PAGESIZE); 1105 1106 /* If locking, reserve physical memory */ 1107 if (flags & KPD_LOCKED) { 1108 pgcnt_t pages = btop(len); 1109 if ((kpd->kp_flags & KPD_NO_ANON) == 0) 1110 atomic_add_long(&anon_segkp_pages_locked, pages); 1111 (void) page_resv(pages, KM_SLEEP); 1112 } 1113 1114 /* 1115 * Loop through the pages in the given range. 1116 */ 1117 va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 1118 vaddr = va; 1119 vlim = va + len; 1120 lock = flags & KPD_LOCKED; 1121 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT; 1122 for (; va < vlim; va += PAGESIZE, i++) { 1123 page_t *pl[2]; /* second element NULL terminator */ 1124 struct vnode *vp; 1125 anoff_t off; 1126 int err; 1127 struct anon *ap; 1128 1129 /* 1130 * Summon the page. If it's not resident, arrange 1131 * for synchronous i/o to pull it in. 1132 */ 1133 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i); 1134 swap_xlate(ap, &vp, &off); 1135 1136 /* 1137 * The returned page list will have exactly one entry, 1138 * which is returned to us already kept. 1139 */ 1140 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL, 1141 pl, PAGESIZE, seg, va, S_READ, kcred, NULL); 1142 1143 if (err) { 1144 /* 1145 * Back out of what we've done so far. 1146 */ 1147 (void) segkp_unlock(hat, seg, vaddr, 1148 (va - vaddr), kpd, flags); 1149 return (err); 1150 } 1151 1152 /* 1153 * Load an MMU translation for the page. 1154 */ 1155 hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE), 1156 lock ? HAT_LOAD_LOCK : HAT_LOAD); 1157 1158 if (!lock) { 1159 /* 1160 * Now, release "shared" lock on the page. 1161 */ 1162 page_unlock(pl[0]); 1163 } 1164 } 1165 return (0); 1166 } 1167 1168 /* 1169 * At the very least unload the mmu-translations and unlock the range if locked 1170 * Can be called with the following flag value KPD_WRITEDIRTY which specifies 1171 * any dirty pages should be written to disk. 1172 */ 1173 static int 1174 segkp_unlock( 1175 struct hat *hat, 1176 struct seg *seg, 1177 caddr_t vaddr, 1178 size_t len, 1179 struct segkp_data *kpd, 1180 uint_t flags) 1181 { 1182 caddr_t va; 1183 caddr_t vlim; 1184 ulong_t i; 1185 struct page *pp; 1186 struct vnode *vp; 1187 anoff_t off; 1188 struct anon *ap; 1189 1190 #ifdef lint 1191 seg = seg; 1192 #endif /* lint */ 1193 1194 ASSERT(MUTEX_HELD(&kpd->kp_lock)); 1195 1196 /* 1197 * Loop through the pages in the given range. It is assumed 1198 * segkp_unlock is called with page aligned base 1199 */ 1200 va = vaddr; 1201 vlim = va + len; 1202 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT; 1203 hat_unload(hat, va, len, 1204 ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD)); 1205 for (; va < vlim; va += PAGESIZE, i++) { 1206 /* 1207 * Find the page associated with this part of the 1208 * slot, tracking it down through its associated swap 1209 * space. 1210 */ 1211 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i); 1212 swap_xlate(ap, &vp, &off); 1213 1214 if (flags & KPD_LOCKED) { 1215 if ((pp = page_find(vp, off)) == NULL) { 1216 if (flags & KPD_LOCKED) { 1217 panic("segkp_softunlock: missing page"); 1218 /*NOTREACHED*/ 1219 } 1220 } 1221 } else { 1222 /* 1223 * Nothing to do if the slot is not locked and the 1224 * page doesn't exist. 1225 */ 1226 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) 1227 continue; 1228 } 1229 1230 /* 1231 * If the page doesn't have any translations, is 1232 * dirty and not being shared, then push it out 1233 * asynchronously and avoid waiting for the 1234 * pageout daemon to do it for us. 1235 * 1236 * XXX - Do we really need to get the "exclusive" 1237 * lock via an upgrade? 1238 */ 1239 if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) && 1240 hat_ismod(pp) && page_tryupgrade(pp)) { 1241 /* 1242 * Hold the vnode before releasing the page lock to 1243 * prevent it from being freed and re-used by some 1244 * other thread. 1245 */ 1246 VN_HOLD(vp); 1247 page_unlock(pp); 1248 1249 /* 1250 * Want most powerful credentials we can get so 1251 * use kcred. 1252 */ 1253 (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE, 1254 B_ASYNC | B_FREE, kcred, NULL); 1255 VN_RELE(vp); 1256 } else { 1257 page_unlock(pp); 1258 } 1259 } 1260 1261 /* If unlocking, release physical memory */ 1262 if (flags & KPD_LOCKED) { 1263 pgcnt_t pages = btopr(len); 1264 if ((kpd->kp_flags & KPD_NO_ANON) == 0) 1265 atomic_add_long(&anon_segkp_pages_locked, -pages); 1266 page_unresv(pages); 1267 } 1268 return (0); 1269 } 1270 1271 /* 1272 * Insert the kpd in the hash table. 1273 */ 1274 static void 1275 segkp_insert(struct seg *seg, struct segkp_data *kpd) 1276 { 1277 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; 1278 int index; 1279 1280 /* 1281 * Insert the kpd based on the address that will be returned 1282 * via segkp_release. 1283 */ 1284 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags)); 1285 mutex_enter(&segkp_lock); 1286 kpd->kp_next = kpsd->kpsd_hash[index]; 1287 kpsd->kpsd_hash[index] = kpd; 1288 mutex_exit(&segkp_lock); 1289 } 1290 1291 /* 1292 * Remove kpd from the hash table. 1293 */ 1294 static void 1295 segkp_delete(struct seg *seg, struct segkp_data *kpd) 1296 { 1297 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; 1298 struct segkp_data **kpp; 1299 int index; 1300 1301 ASSERT(MUTEX_HELD(&segkp_lock)); 1302 1303 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags)); 1304 for (kpp = &kpsd->kpsd_hash[index]; 1305 *kpp != NULL; kpp = &((*kpp)->kp_next)) { 1306 if (*kpp == kpd) { 1307 *kpp = kpd->kp_next; 1308 return; 1309 } 1310 } 1311 panic("segkp_delete: unable to find element to delete"); 1312 /*NOTREACHED*/ 1313 } 1314 1315 /* 1316 * Find the kpd associated with a vaddr. 1317 * 1318 * Most of the callers of segkp_find will pass the vaddr that 1319 * hashes to the desired index, but there are cases where 1320 * this is not true in which case we have to (potentially) scan 1321 * the whole table looking for it. This should be very rare 1322 * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the 1323 * middle of the segkp_data region). 1324 */ 1325 static struct segkp_data * 1326 segkp_find(struct seg *seg, caddr_t vaddr) 1327 { 1328 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; 1329 struct segkp_data *kpd; 1330 int i; 1331 int stop; 1332 1333 i = stop = SEGKP_HASH(vaddr); 1334 mutex_enter(&segkp_lock); 1335 do { 1336 for (kpd = kpsd->kpsd_hash[i]; kpd != NULL; 1337 kpd = kpd->kp_next) { 1338 if (vaddr >= kpd->kp_base && 1339 vaddr < kpd->kp_base + kpd->kp_len) { 1340 mutex_exit(&segkp_lock); 1341 return (kpd); 1342 } 1343 } 1344 if (--i < 0) 1345 i = SEGKP_HASHSZ - 1; /* Wrap */ 1346 } while (i != stop); 1347 mutex_exit(&segkp_lock); 1348 return (NULL); /* Not found */ 1349 } 1350 1351 /* 1352 * returns size of swappable area. 1353 */ 1354 size_t 1355 swapsize(caddr_t v) 1356 { 1357 struct segkp_data *kpd; 1358 1359 if ((kpd = segkp_find(segkp, v)) != NULL) 1360 return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)); 1361 else 1362 return (0); 1363 } 1364 1365 /* 1366 * Dump out all the active segkp pages 1367 */ 1368 static void 1369 segkp_dump(struct seg *seg) 1370 { 1371 int i; 1372 struct segkp_data *kpd; 1373 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data; 1374 1375 for (i = 0; i < SEGKP_HASHSZ; i++) { 1376 for (kpd = kpsd->kpsd_hash[i]; 1377 kpd != NULL; kpd = kpd->kp_next) { 1378 pfn_t pfn; 1379 caddr_t addr; 1380 caddr_t eaddr; 1381 1382 addr = kpd->kp_base; 1383 eaddr = addr + kpd->kp_len; 1384 while (addr < eaddr) { 1385 ASSERT(seg->s_as == &kas); 1386 pfn = hat_getpfnum(seg->s_as->a_hat, addr); 1387 if (pfn != PFN_INVALID) 1388 dump_addpage(seg->s_as, addr, pfn); 1389 addr += PAGESIZE; 1390 dump_timeleft = dump_timeout; 1391 } 1392 } 1393 } 1394 } 1395 1396 /*ARGSUSED*/ 1397 static int 1398 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len, 1399 struct page ***ppp, enum lock_type type, enum seg_rw rw) 1400 { 1401 return (ENOTSUP); 1402 } 1403 1404 /*ARGSUSED*/ 1405 static int 1406 segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 1407 { 1408 return (ENODEV); 1409 } 1410 1411 /*ARGSUSED*/ 1412 static lgrp_mem_policy_info_t * 1413 segkp_getpolicy(struct seg *seg, caddr_t addr) 1414 { 1415 return (NULL); 1416 } 1417 1418 /*ARGSUSED*/ 1419 static int 1420 segkp_capable(struct seg *seg, segcapability_t capability) 1421 { 1422 return (0); 1423 } 1424 1425 #include <sys/mem_config.h> 1426 1427 /*ARGSUSED*/ 1428 static void 1429 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages) 1430 {} 1431 1432 /* 1433 * During memory delete, turn off caches so that pages are not held. 1434 * A better solution may be to unlock the pages while they are 1435 * in the cache so that they may be collected naturally. 1436 */ 1437 1438 /*ARGSUSED*/ 1439 static int 1440 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages) 1441 { 1442 atomic_inc_32(&segkp_indel); 1443 segkp_cache_free(); 1444 return (0); 1445 } 1446 1447 /*ARGSUSED*/ 1448 static void 1449 segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled) 1450 { 1451 atomic_dec_32(&segkp_indel); 1452 } 1453 1454 static kphysm_setup_vector_t segkp_mem_config_vec = { 1455 KPHYSM_SETUP_VECTOR_VERSION, 1456 segkp_mem_config_post_add, 1457 segkp_mem_config_pre_del, 1458 segkp_mem_config_post_del, 1459 }; 1460 1461 static void 1462 segkpinit_mem_config(struct seg *seg) 1463 { 1464 int ret; 1465 1466 ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg); 1467 ASSERT(ret == 0); 1468 } 1469