1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2019 Joyent, Inc. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/user.h> 29 #include <sys/mman.h> 30 #include <sys/kmem.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cmn_err.h> 33 #include <sys/systm.h> 34 #include <sys/tuneable.h> 35 #include <vm/hat.h> 36 #include <vm/seg.h> 37 #include <vm/as.h> 38 #include <vm/anon.h> 39 #include <vm/page.h> 40 #include <sys/buf.h> 41 #include <sys/swap.h> 42 #include <sys/atomic.h> 43 #include <vm/seg_spt.h> 44 #include <sys/debug.h> 45 #include <sys/vtrace.h> 46 #include <sys/shm.h> 47 #include <sys/shm_impl.h> 48 #include <sys/lgrp.h> 49 #include <sys/vmsystm.h> 50 #include <sys/policy.h> 51 #include <sys/project.h> 52 #include <sys/tnf_probe.h> 53 #include <sys/zone.h> 54 55 #define SEGSPTADDR (caddr_t)0x0 56 57 /* 58 * # pages used for spt 59 */ 60 size_t spt_used; 61 62 /* 63 * See spt_setminfree(). 64 */ 65 pgcnt_t segspt_minfree = 0; 66 size_t segspt_minfree_clamp = (1UL << 30); /* 1GB in bytes */ 67 68 static int segspt_create(struct seg **segpp, void *argsp); 69 static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize); 70 static void segspt_free(struct seg *seg); 71 static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len); 72 static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr); 73 74 /* ARGSUSED */ 75 __NORETURN static int 76 segspt_badop_dup(struct seg *seg __unused, struct seg *newseg __unused) 77 { 78 panic("%s called", __func__); 79 } 80 81 /* ARGSUSED */ 82 __NORETURN static faultcode_t 83 segspt_badop_fault(struct hat *hat, struct seg *seg, caddr_t addr, 84 size_t len, enum fault_type type, enum seg_rw rw) 85 { 86 panic("%s called", __func__); 87 } 88 89 /* ARGSUSED */ 90 __NORETURN static faultcode_t 91 segspt_badop_faulta(struct seg *seg __unused, caddr_t addr __unused) 92 { 93 panic("%s called", __func__); 94 } 95 96 /* ARGSUSED */ 97 __NORETURN static int 98 segspt_badop_prot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 99 { 100 panic("%s called", __func__); 101 } 102 103 /* ARGSUSED */ 104 __NORETURN static int 105 segspt_badop_checkprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot) 106 { 107 panic("%s called", __func__); 108 } 109 110 /* ARGSUSED */ 111 __NORETURN static int 112 segspt_badop_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 113 { 114 panic("%s called", __func__); 115 } 116 117 /* ARGSUSED */ 118 __NORETURN static size_t 119 segspt_badop_swapout(struct seg *seg) 120 { 121 panic("%s called", __func__); 122 } 123 124 /* ARGSUSED */ 125 __NORETURN static int 126 segspt_badop_sync(struct seg *seg, caddr_t addr, size_t len, int attr, 127 uint_t flags) 128 { 129 panic("%s called", __func__); 130 } 131 132 /* ARGSUSED */ 133 __NORETURN 134 static size_t 135 segspt_badop_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 136 { 137 panic("%s called", __func__); 138 } 139 140 /* ARGSUSED */ 141 __NORETURN static int 142 segspt_badop_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, 143 int op, ulong_t *lockmap, size_t pos) 144 { 145 panic("%s called", __func__); 146 } 147 148 /* ARGSUSED */ 149 __NORETURN static int 150 segspt_badop_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 151 { 152 panic("%s called", __func__); 153 } 154 155 /* ARGSUSED */ 156 __NORETURN static u_offset_t 157 segspt_badop_getoffset(struct seg *seg, caddr_t addr) 158 { 159 panic("%s called", __func__); 160 } 161 162 /* ARGSUSED */ 163 __NORETURN static int 164 segspt_badop_gettype(struct seg *seg, caddr_t addr) 165 { 166 panic("%s called", __func__); 167 } 168 169 /* ARGSUSED */ 170 __NORETURN static int 171 segspt_badop_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 172 { 173 panic("%s called", __func__); 174 } 175 176 /* ARGSUSED */ 177 __NORETURN static int 178 segspt_badop_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 179 { 180 panic("%s called", __func__); 181 } 182 183 /* ARGSUSED */ 184 __NORETURN static void 185 segspt_badop_dump(struct seg *seg) 186 { 187 panic("%s called", __func__); 188 } 189 190 /* ARGSUSED */ 191 __NORETURN static int 192 segspt_badop_pagelock(struct seg *seg, caddr_t addr, size_t len, 193 struct page ***ppp, enum lock_type type, enum seg_rw rw) 194 { 195 panic("%s called", __func__); 196 } 197 198 /* ARGSUSED */ 199 __NORETURN static int 200 segspt_badop_setpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 201 { 202 panic("%s called", __func__); 203 } 204 205 /* ARGSUSED */ 206 __NORETURN static int 207 segspt_badop_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 208 { 209 panic("%s called", __func__); 210 } 211 212 /* ARGSUSED */ 213 __NORETURN static int 214 segspt_badop_capable(struct seg *seg, segcapability_t capability) 215 { 216 panic("%s called", __func__); 217 } 218 219 struct seg_ops segspt_ops = { 220 segspt_badop_dup, /* dup */ 221 segspt_unmap, 222 segspt_free, 223 segspt_badop_fault, /* fault */ 224 segspt_badop_faulta, /* faulta */ 225 segspt_badop_prot, /* setprot */ 226 segspt_badop_checkprot, /* checkprot */ 227 segspt_badop_kluster, /* kluster */ 228 segspt_badop_swapout, /* swapout */ 229 segspt_badop_sync, /* sync */ 230 segspt_badop_incore, /* incore */ 231 segspt_badop_lockop, /* lockop */ 232 segspt_badop_getprot, /* getprot */ 233 segspt_badop_getoffset, /* getoffset */ 234 segspt_badop_gettype, /* gettype */ 235 segspt_badop_getvp, /* getvp */ 236 segspt_badop_advise, /* advise */ 237 segspt_badop_dump, /* dump */ 238 segspt_badop_pagelock, /* pagelock */ 239 segspt_badop_setpgsz, /* setpgsz */ 240 segspt_badop_getmemid, /* getmemid */ 241 segspt_getpolicy, /* getpolicy */ 242 segspt_badop_capable, /* capable */ 243 seg_inherit_notsup /* inherit */ 244 }; 245 246 static int segspt_shmdup(struct seg *seg, struct seg *newseg); 247 static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize); 248 static void segspt_shmfree(struct seg *seg); 249 static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg, 250 caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw); 251 static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr); 252 static int segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, 253 uint_t prot); 254 static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, 255 uint_t prot); 256 static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta); 257 static size_t segspt_shmswapout(struct seg *seg); 258 static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, 259 char *vec); 260 static int segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, 261 int attr, uint_t flags); 262 static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 263 int attr, int op, ulong_t *lockmap, size_t pos); 264 static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, 265 uint_t *protv); 266 static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr); 267 static int segspt_shmgettype(struct seg *seg, caddr_t addr); 268 static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 269 static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, 270 uint_t behav); 271 static void segspt_shmdump(struct seg *seg); 272 static int segspt_shmpagelock(struct seg *, caddr_t, size_t, 273 struct page ***, enum lock_type, enum seg_rw); 274 static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t); 275 static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *); 276 static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t); 277 static int segspt_shmcapable(struct seg *, segcapability_t); 278 279 struct seg_ops segspt_shmops = { 280 segspt_shmdup, 281 segspt_shmunmap, 282 segspt_shmfree, 283 segspt_shmfault, 284 segspt_shmfaulta, 285 segspt_shmsetprot, 286 segspt_shmcheckprot, 287 segspt_shmkluster, 288 segspt_shmswapout, 289 segspt_shmsync, 290 segspt_shmincore, 291 segspt_shmlockop, 292 segspt_shmgetprot, 293 segspt_shmgetoffset, 294 segspt_shmgettype, 295 segspt_shmgetvp, 296 segspt_shmadvise, /* advise */ 297 segspt_shmdump, 298 segspt_shmpagelock, 299 segspt_shmsetpgsz, 300 segspt_shmgetmemid, 301 segspt_shmgetpolicy, 302 segspt_shmcapable, 303 seg_inherit_notsup 304 }; 305 306 static void segspt_purge(struct seg *seg); 307 static int segspt_reclaim(void *, caddr_t, size_t, struct page **, 308 enum seg_rw, int); 309 static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, 310 page_t **ppa); 311 312 /* 313 * This value corresponds to headroom in availrmem that ISM can never allocate 314 * (but others can). The original intent here was to prevent ISM from locking 315 * all of the remaining availrmem into memory, making forward progress 316 * difficult. It's not clear how much this matters on modern systems. 317 * 318 * The traditional default value of 5% of total memory is used, except on 319 * systems where that quickly gets ridiculous: in that case we clamp at a rather 320 * arbitrary value of 1GB. 321 * 322 * Note that since this is called lazily on the first sptcreate(), in theory, 323 * this could represent a very small value if the system is heavily loaded 324 * already. In practice, the first ISM user is pretty likely to come along 325 * earlier during the system's operation. 326 * 327 * This never gets re-figured. 328 */ 329 static void 330 spt_setminfree(void) 331 { 332 segspt_minfree = availrmem / 20; 333 334 if (segspt_minfree_clamp != 0 && 335 segspt_minfree > (segspt_minfree_clamp / PAGESIZE)) 336 segspt_minfree = segspt_minfree_clamp / PAGESIZE; 337 } 338 339 int 340 sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, 341 uint_t prot, uint_t flags, uint_t share_szc) 342 { 343 int err; 344 struct as *newas; 345 struct segspt_crargs sptcargs; 346 347 #ifdef DEBUG 348 TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, 349 tnf_ulong, size, size ); 350 #endif 351 if (segspt_minfree == 0) 352 spt_setminfree(); 353 354 if (!hat_supported(HAT_SHARED_PT, (void *)0)) 355 return (EINVAL); 356 357 /* 358 * get a new as for this shared memory segment 359 */ 360 newas = as_alloc(); 361 newas->a_proc = NULL; 362 sptcargs.amp = amp; 363 sptcargs.prot = prot; 364 sptcargs.flags = flags; 365 sptcargs.szc = share_szc; 366 /* 367 * create a shared page table (spt) segment 368 */ 369 370 if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) { 371 as_free(newas); 372 return (err); 373 } 374 *sptseg = sptcargs.seg_spt; 375 return (0); 376 } 377 378 void 379 sptdestroy(struct as *as, struct anon_map *amp) 380 { 381 382 #ifdef DEBUG 383 TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */); 384 #endif 385 (void) as_unmap(as, SEGSPTADDR, amp->size); 386 as_free(as); 387 } 388 389 /* 390 * called from seg_free(). 391 * free (i.e., unlock, unmap, return to free list) 392 * all the pages in the given seg. 393 */ 394 void 395 segspt_free(struct seg *seg) 396 { 397 struct spt_data *sptd = (struct spt_data *)seg->s_data; 398 399 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 400 401 if (sptd != NULL) { 402 if (sptd->spt_realsize) 403 segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); 404 405 if (sptd->spt_ppa_lckcnt) { 406 kmem_free(sptd->spt_ppa_lckcnt, 407 sizeof (*sptd->spt_ppa_lckcnt) 408 * btopr(sptd->spt_amp->size)); 409 } 410 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); 411 cv_destroy(&sptd->spt_cv); 412 mutex_destroy(&sptd->spt_lock); 413 kmem_free(sptd, sizeof (*sptd)); 414 } 415 } 416 417 /*ARGSUSED*/ 418 static int 419 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr, 420 uint_t flags) 421 { 422 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 423 424 return (0); 425 } 426 427 /*ARGSUSED*/ 428 static size_t 429 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec) 430 { 431 caddr_t eo_seg; 432 pgcnt_t npages; 433 struct shm_data *shmd = (struct shm_data *)seg->s_data; 434 struct seg *sptseg; 435 struct spt_data *sptd; 436 437 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 438 #ifdef lint 439 seg = seg; 440 #endif 441 sptseg = shmd->shm_sptseg; 442 sptd = sptseg->s_data; 443 444 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 445 eo_seg = addr + len; 446 while (addr < eo_seg) { 447 /* page exists, and it's locked. */ 448 *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED | 449 SEG_PAGE_ANON; 450 addr += PAGESIZE; 451 } 452 return (len); 453 } else { 454 struct anon_map *amp = shmd->shm_amp; 455 struct anon *ap; 456 page_t *pp; 457 pgcnt_t anon_index; 458 struct vnode *vp; 459 u_offset_t off; 460 ulong_t i; 461 int ret; 462 anon_sync_obj_t cookie; 463 464 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 465 anon_index = seg_page(seg, addr); 466 npages = btopr(len); 467 if (anon_index + npages > btopr(shmd->shm_amp->size)) { 468 return (EINVAL); 469 } 470 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 471 for (i = 0; i < npages; i++, anon_index++) { 472 ret = 0; 473 anon_array_enter(amp, anon_index, &cookie); 474 ap = anon_get_ptr(amp->ahp, anon_index); 475 if (ap != NULL) { 476 swap_xlate(ap, &vp, &off); 477 anon_array_exit(&cookie); 478 pp = page_lookup_nowait(vp, off, SE_SHARED); 479 if (pp != NULL) { 480 ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON; 481 page_unlock(pp); 482 } 483 } else { 484 anon_array_exit(&cookie); 485 } 486 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 487 ret |= SEG_PAGE_LOCKED; 488 } 489 *vec++ = (char)ret; 490 } 491 ANON_LOCK_EXIT(&->a_rwlock); 492 return (len); 493 } 494 } 495 496 static int 497 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize) 498 { 499 size_t share_size; 500 501 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 502 503 /* 504 * seg.s_size may have been rounded up to the largest page size 505 * in shmat(). 506 * XXX This should be cleanedup. sptdestroy should take a length 507 * argument which should be the same as sptcreate. Then 508 * this rounding would not be needed (or is done in shm.c) 509 * Only the check for full segment will be needed. 510 * 511 * XXX -- shouldn't raddr == 0 always? These tests don't seem 512 * to be useful at all. 513 */ 514 share_size = page_get_pagesize(seg->s_szc); 515 ssize = P2ROUNDUP(ssize, share_size); 516 517 if (raddr == seg->s_base && ssize == seg->s_size) { 518 seg_free(seg); 519 return (0); 520 } else 521 return (EINVAL); 522 } 523 524 int 525 segspt_create(struct seg **segpp, void *argsp) 526 { 527 struct seg *seg = *segpp; 528 int err; 529 caddr_t addr = seg->s_base; 530 struct spt_data *sptd; 531 struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; 532 struct anon_map *amp = sptcargs->amp; 533 struct kshmid *sp = amp->a_sp; 534 struct cred *cred = CRED(); 535 ulong_t i, j, anon_index = 0; 536 pgcnt_t npages = btopr(amp->size); 537 struct vnode *vp; 538 page_t **ppa; 539 uint_t hat_flags; 540 size_t pgsz; 541 pgcnt_t pgcnt; 542 caddr_t a; 543 pgcnt_t pidx; 544 size_t sz; 545 proc_t *procp = curproc; 546 rctl_qty_t lockedbytes = 0; 547 kproject_t *proj; 548 549 /* 550 * We are holding the a_lock on the underlying dummy as, 551 * so we can make calls to the HAT layer. 552 */ 553 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 554 ASSERT(sp != NULL); 555 556 #ifdef DEBUG 557 TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, 558 tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); 559 #endif 560 if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 561 if (err = anon_swap_adjust(npages)) 562 return (err); 563 } 564 err = ENOMEM; 565 566 if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL) 567 goto out1; 568 569 ppa = NULL; 570 if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 571 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages), 572 KM_NOSLEEP)) == NULL) 573 goto out2; 574 } 575 576 mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL); 577 578 if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL) 579 goto out3; 580 581 seg->s_ops = &segspt_ops; 582 sptd->spt_vp = vp; 583 sptd->spt_amp = amp; 584 sptd->spt_prot = sptcargs->prot; 585 sptd->spt_flags = sptcargs->flags; 586 seg->s_data = (caddr_t)sptd; 587 sptd->spt_ppa = NULL; 588 sptd->spt_ppa_lckcnt = NULL; 589 seg->s_szc = sptcargs->szc; 590 cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL); 591 sptd->spt_gen = 0; 592 593 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 594 if (seg->s_szc > amp->a_szc) { 595 amp->a_szc = seg->s_szc; 596 } 597 ANON_LOCK_EXIT(&->a_rwlock); 598 599 /* 600 * Set policy to affect initial allocation of pages in 601 * anon_map_createpages() 602 */ 603 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index, 604 NULL, 0, ptob(npages)); 605 606 if (sptcargs->flags & SHM_PAGEABLE) { 607 size_t share_sz; 608 pgcnt_t new_npgs, more_pgs; 609 struct anon_hdr *nahp; 610 zone_t *zone; 611 612 share_sz = page_get_pagesize(seg->s_szc); 613 if (!IS_P2ALIGNED(amp->size, share_sz)) { 614 /* 615 * We are rounding up the size of the anon array 616 * on 4 M boundary because we always create 4 M 617 * of page(s) when locking, faulting pages and we 618 * don't have to check for all corner cases e.g. 619 * if there is enough space to allocate 4 M 620 * page. 621 */ 622 new_npgs = btop(P2ROUNDUP(amp->size, share_sz)); 623 more_pgs = new_npgs - npages; 624 625 /* 626 * The zone will never be NULL, as a fully created 627 * shm always has an owning zone. 628 */ 629 zone = sp->shm_perm.ipc_zone_ref.zref_zone; 630 ASSERT(zone != NULL); 631 if (anon_resv_zone(ptob(more_pgs), zone) == 0) { 632 err = ENOMEM; 633 goto out4; 634 } 635 636 nahp = anon_create(new_npgs, ANON_SLEEP); 637 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 638 (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages, 639 ANON_SLEEP); 640 anon_release(amp->ahp, npages); 641 amp->ahp = nahp; 642 ASSERT(amp->swresv == ptob(npages)); 643 amp->swresv = amp->size = ptob(new_npgs); 644 ANON_LOCK_EXIT(&->a_rwlock); 645 npages = new_npgs; 646 } 647 648 sptd->spt_ppa_lckcnt = kmem_zalloc(npages * 649 sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP); 650 sptd->spt_pcachecnt = 0; 651 sptd->spt_realsize = ptob(npages); 652 sptcargs->seg_spt = seg; 653 return (0); 654 } 655 656 /* 657 * get array of pages for each anon slot in amp 658 */ 659 if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa, 660 seg, addr, S_CREATE, cred)) != 0) 661 goto out4; 662 663 mutex_enter(&sp->shm_mlock); 664 665 /* May be partially locked, so, count bytes to charge for locking */ 666 for (i = 0; i < npages; i++) 667 if (ppa[i]->p_lckcnt == 0) 668 lockedbytes += PAGESIZE; 669 670 proj = sp->shm_perm.ipc_proj; 671 672 if (lockedbytes > 0) { 673 mutex_enter(&procp->p_lock); 674 if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { 675 mutex_exit(&procp->p_lock); 676 mutex_exit(&sp->shm_mlock); 677 for (i = 0; i < npages; i++) 678 page_unlock(ppa[i]); 679 err = ENOMEM; 680 goto out4; 681 } 682 mutex_exit(&procp->p_lock); 683 } 684 685 /* 686 * addr is initial address corresponding to the first page on ppa list 687 */ 688 for (i = 0; i < npages; i++) { 689 /* attempt to lock all pages */ 690 if (page_pp_lock(ppa[i], 0, 1) == 0) { 691 /* 692 * if unable to lock any page, unlock all 693 * of them and return error 694 */ 695 for (j = 0; j < i; j++) 696 page_pp_unlock(ppa[j], 0, 1); 697 for (i = 0; i < npages; i++) 698 page_unlock(ppa[i]); 699 rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); 700 mutex_exit(&sp->shm_mlock); 701 err = ENOMEM; 702 goto out4; 703 } 704 } 705 mutex_exit(&sp->shm_mlock); 706 707 /* 708 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 709 * for the entire life of the segment. For example platforms 710 * that do not support Dynamic Reconfiguration. 711 */ 712 hat_flags = HAT_LOAD_SHARE; 713 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL)) 714 hat_flags |= HAT_LOAD_LOCK; 715 716 /* 717 * Load translations one lare page at a time 718 * to make sure we don't create mappings bigger than 719 * segment's size code in case underlying pages 720 * are shared with segvn's segment that uses bigger 721 * size code than we do. 722 */ 723 pgsz = page_get_pagesize(seg->s_szc); 724 pgcnt = page_get_pagecnt(seg->s_szc); 725 for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) { 726 sz = MIN(pgsz, ptob(npages - pidx)); 727 hat_memload_array(seg->s_as->a_hat, a, sz, 728 &ppa[pidx], sptd->spt_prot, hat_flags); 729 } 730 731 /* 732 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 733 * we will leave the pages locked SE_SHARED for the life 734 * of the ISM segment. This will prevent any calls to 735 * hat_pageunload() on this ISM segment for those platforms. 736 */ 737 if (!(hat_flags & HAT_LOAD_LOCK)) { 738 /* 739 * On platforms that support HAT_DYNAMIC_ISM_UNMAP, 740 * we no longer need to hold the SE_SHARED lock on the pages, 741 * since L_PAGELOCK and F_SOFTLOCK calls will grab the 742 * SE_SHARED lock on the pages as necessary. 743 */ 744 for (i = 0; i < npages; i++) 745 page_unlock(ppa[i]); 746 } 747 sptd->spt_pcachecnt = 0; 748 kmem_free(ppa, ((sizeof (page_t *)) * npages)); 749 sptd->spt_realsize = ptob(npages); 750 atomic_add_long(&spt_used, npages); 751 sptcargs->seg_spt = seg; 752 return (0); 753 754 out4: 755 seg->s_data = NULL; 756 kmem_free(vp, sizeof (*vp)); 757 cv_destroy(&sptd->spt_cv); 758 out3: 759 mutex_destroy(&sptd->spt_lock); 760 if ((sptcargs->flags & SHM_PAGEABLE) == 0) 761 kmem_free(ppa, (sizeof (*ppa) * npages)); 762 out2: 763 kmem_free(sptd, sizeof (*sptd)); 764 out1: 765 if ((sptcargs->flags & SHM_PAGEABLE) == 0) 766 anon_swap_restore(npages); 767 return (err); 768 } 769 770 /*ARGSUSED*/ 771 void 772 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) 773 { 774 struct page *pp; 775 struct spt_data *sptd = (struct spt_data *)seg->s_data; 776 pgcnt_t npages; 777 ulong_t anon_idx; 778 struct anon_map *amp; 779 struct anon *ap; 780 struct vnode *vp; 781 u_offset_t off; 782 uint_t hat_flags; 783 int root = 0; 784 pgcnt_t pgs, curnpgs = 0; 785 page_t *rootpp; 786 rctl_qty_t unlocked_bytes = 0; 787 kproject_t *proj; 788 kshmid_t *sp; 789 790 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 791 792 len = P2ROUNDUP(len, PAGESIZE); 793 794 npages = btop(len); 795 796 hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP; 797 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) || 798 (sptd->spt_flags & SHM_PAGEABLE)) { 799 hat_flags = HAT_UNLOAD_UNMAP; 800 } 801 802 hat_unload(seg->s_as->a_hat, addr, len, hat_flags); 803 804 amp = sptd->spt_amp; 805 if (sptd->spt_flags & SHM_PAGEABLE) 806 npages = btop(amp->size); 807 808 ASSERT(amp != NULL); 809 810 proj = NULL; 811 rootpp = NULL; 812 sp = NULL; 813 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 814 sp = amp->a_sp; 815 proj = sp->shm_perm.ipc_proj; 816 mutex_enter(&sp->shm_mlock); 817 } 818 for (anon_idx = 0; anon_idx < npages; anon_idx++) { 819 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 820 if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { 821 panic("segspt_free_pages: null app"); 822 /*NOTREACHED*/ 823 } 824 } else { 825 if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx)) 826 == NULL) 827 continue; 828 } 829 ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0); 830 swap_xlate(ap, &vp, &off); 831 832 /* 833 * If this platform supports HAT_DYNAMIC_ISM_UNMAP, 834 * the pages won't be having SE_SHARED lock at this 835 * point. 836 * 837 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 838 * the pages are still held SE_SHARED locked from the 839 * original segspt_create() 840 * 841 * Our goal is to get SE_EXCL lock on each page, remove 842 * permanent lock on it and invalidate the page. 843 */ 844 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 845 if (hat_flags == HAT_UNLOAD_UNMAP) 846 pp = page_lookup(vp, off, SE_EXCL); 847 else { 848 if ((pp = page_find(vp, off)) == NULL) { 849 panic("segspt_free_pages: " 850 "page not locked"); 851 /*NOTREACHED*/ 852 } 853 if (!page_tryupgrade(pp)) { 854 page_unlock(pp); 855 pp = page_lookup(vp, off, SE_EXCL); 856 } 857 } 858 if (pp == NULL) { 859 panic("segspt_free_pages: " 860 "page not in the system"); 861 /*NOTREACHED*/ 862 } 863 ASSERT(pp->p_lckcnt > 0); 864 page_pp_unlock(pp, 0, 1); 865 if (pp->p_lckcnt == 0) 866 unlocked_bytes += PAGESIZE; 867 } else { 868 if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) 869 continue; 870 } 871 /* 872 * It's logical to invalidate the pages here as in most cases 873 * these were created by segspt. 874 */ 875 if (pp->p_szc != 0) { 876 if (root == 0) { 877 ASSERT(curnpgs == 0); 878 root = 1; 879 rootpp = pp; 880 pgs = curnpgs = page_get_pagecnt(pp->p_szc); 881 ASSERT(pgs > 1); 882 ASSERT(IS_P2ALIGNED(pgs, pgs)); 883 ASSERT(!(page_pptonum(pp) & (pgs - 1))); 884 curnpgs--; 885 } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) { 886 ASSERT(curnpgs == 1); 887 ASSERT(page_pptonum(pp) == 888 page_pptonum(rootpp) + (pgs - 1)); 889 page_destroy_pages(rootpp); 890 root = 0; 891 curnpgs = 0; 892 } else { 893 ASSERT(curnpgs > 1); 894 ASSERT(page_pptonum(pp) == 895 page_pptonum(rootpp) + (pgs - curnpgs)); 896 curnpgs--; 897 } 898 } else { 899 if (root != 0 || curnpgs != 0) { 900 panic("segspt_free_pages: bad large page"); 901 /*NOTREACHED*/ 902 } 903 /* 904 * Before destroying the pages, we need to take care 905 * of the rctl locked memory accounting. For that 906 * we need to calculte the unlocked_bytes. 907 */ 908 if (pp->p_lckcnt > 0) 909 unlocked_bytes += PAGESIZE; 910 /*LINTED: constant in conditional context */ 911 VN_DISPOSE(pp, B_INVAL, 0, kcred); 912 } 913 } 914 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 915 if (unlocked_bytes > 0) 916 rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); 917 mutex_exit(&sp->shm_mlock); 918 } 919 if (root != 0 || curnpgs != 0) { 920 panic("segspt_free_pages: bad large page"); 921 /*NOTREACHED*/ 922 } 923 924 /* 925 * mark that pages have been released 926 */ 927 sptd->spt_realsize = 0; 928 929 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 930 atomic_add_long(&spt_used, -npages); 931 anon_swap_restore(npages); 932 } 933 } 934 935 /* 936 * Get memory allocation policy info for specified address in given segment 937 */ 938 static lgrp_mem_policy_info_t * 939 segspt_getpolicy(struct seg *seg, caddr_t addr) 940 { 941 struct anon_map *amp; 942 ulong_t anon_index; 943 lgrp_mem_policy_info_t *policy_info; 944 struct spt_data *spt_data; 945 946 ASSERT(seg != NULL); 947 948 /* 949 * Get anon_map from segspt 950 * 951 * Assume that no lock needs to be held on anon_map, since 952 * it should be protected by its reference count which must be 953 * nonzero for an existing segment 954 * Need to grab readers lock on policy tree though 955 */ 956 spt_data = (struct spt_data *)seg->s_data; 957 if (spt_data == NULL) 958 return (NULL); 959 amp = spt_data->spt_amp; 960 ASSERT(amp->refcnt != 0); 961 962 /* 963 * Get policy info 964 * 965 * Assume starting anon index of 0 966 */ 967 anon_index = seg_page(seg, addr); 968 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 969 970 return (policy_info); 971 } 972 973 /* 974 * DISM only. 975 * Return locked pages over a given range. 976 * 977 * We will cache all DISM locked pages and save the pplist for the 978 * entire segment in the ppa field of the underlying DISM segment structure. 979 * Later, during a call to segspt_reclaim() we will use this ppa array 980 * to page_unlock() all of the pages and then we will free this ppa list. 981 */ 982 /*ARGSUSED*/ 983 static int 984 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len, 985 struct page ***ppp, enum lock_type type, enum seg_rw rw) 986 { 987 struct shm_data *shmd = (struct shm_data *)seg->s_data; 988 struct seg *sptseg = shmd->shm_sptseg; 989 struct spt_data *sptd = sptseg->s_data; 990 pgcnt_t pg_idx, npages, tot_npages, npgs; 991 struct page **pplist, **pl, **ppa, *pp; 992 struct anon_map *amp; 993 spgcnt_t an_idx; 994 int ret = ENOTSUP; 995 uint_t pl_built = 0; 996 struct anon *ap; 997 struct vnode *vp; 998 u_offset_t off; 999 pgcnt_t claim_availrmem = 0; 1000 uint_t szc; 1001 1002 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 1003 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 1004 1005 /* 1006 * We want to lock/unlock the entire ISM segment. Therefore, 1007 * we will be using the underlying sptseg and it's base address 1008 * and length for the caching arguments. 1009 */ 1010 ASSERT(sptseg); 1011 ASSERT(sptd); 1012 1013 pg_idx = seg_page(seg, addr); 1014 npages = btopr(len); 1015 1016 /* 1017 * check if the request is larger than number of pages covered 1018 * by amp 1019 */ 1020 if (pg_idx + npages > btopr(sptd->spt_amp->size)) { 1021 *ppp = NULL; 1022 return (ENOTSUP); 1023 } 1024 1025 if (type == L_PAGEUNLOCK) { 1026 ASSERT(sptd->spt_ppa != NULL); 1027 1028 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 1029 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 1030 1031 /* 1032 * If someone is blocked while unmapping, we purge 1033 * segment page cache and thus reclaim pplist synchronously 1034 * without waiting for seg_pasync_thread. This speeds up 1035 * unmapping in cases where munmap(2) is called, while 1036 * raw async i/o is still in progress or where a thread 1037 * exits on data fault in a multithreaded application. 1038 */ 1039 if ((sptd->spt_flags & DISM_PPA_CHANGED) || 1040 (AS_ISUNMAPWAIT(seg->s_as) && 1041 shmd->shm_softlockcnt > 0)) { 1042 segspt_purge(seg); 1043 } 1044 return (0); 1045 } 1046 1047 /* The L_PAGELOCK case ... */ 1048 1049 if (sptd->spt_flags & DISM_PPA_CHANGED) { 1050 segspt_purge(seg); 1051 /* 1052 * for DISM ppa needs to be rebuild since 1053 * number of locked pages could be changed 1054 */ 1055 *ppp = NULL; 1056 return (ENOTSUP); 1057 } 1058 1059 /* 1060 * First try to find pages in segment page cache, without 1061 * holding the segment lock. 1062 */ 1063 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1064 S_WRITE, SEGP_FORCE_WIRED); 1065 if (pplist != NULL) { 1066 ASSERT(sptd->spt_ppa != NULL); 1067 ASSERT(sptd->spt_ppa == pplist); 1068 ppa = sptd->spt_ppa; 1069 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 1070 if (ppa[an_idx] == NULL) { 1071 seg_pinactive(seg, NULL, seg->s_base, 1072 sptd->spt_amp->size, ppa, 1073 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 1074 *ppp = NULL; 1075 return (ENOTSUP); 1076 } 1077 if ((szc = ppa[an_idx]->p_szc) != 0) { 1078 npgs = page_get_pagecnt(szc); 1079 an_idx = P2ROUNDUP(an_idx + 1, npgs); 1080 } else { 1081 an_idx++; 1082 } 1083 } 1084 /* 1085 * Since we cache the entire DISM segment, we want to 1086 * set ppp to point to the first slot that corresponds 1087 * to the requested addr, i.e. pg_idx. 1088 */ 1089 *ppp = &(sptd->spt_ppa[pg_idx]); 1090 return (0); 1091 } 1092 1093 mutex_enter(&sptd->spt_lock); 1094 /* 1095 * try to find pages in segment page cache with mutex 1096 */ 1097 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1098 S_WRITE, SEGP_FORCE_WIRED); 1099 if (pplist != NULL) { 1100 ASSERT(sptd->spt_ppa != NULL); 1101 ASSERT(sptd->spt_ppa == pplist); 1102 ppa = sptd->spt_ppa; 1103 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 1104 if (ppa[an_idx] == NULL) { 1105 mutex_exit(&sptd->spt_lock); 1106 seg_pinactive(seg, NULL, seg->s_base, 1107 sptd->spt_amp->size, ppa, 1108 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 1109 *ppp = NULL; 1110 return (ENOTSUP); 1111 } 1112 if ((szc = ppa[an_idx]->p_szc) != 0) { 1113 npgs = page_get_pagecnt(szc); 1114 an_idx = P2ROUNDUP(an_idx + 1, npgs); 1115 } else { 1116 an_idx++; 1117 } 1118 } 1119 /* 1120 * Since we cache the entire DISM segment, we want to 1121 * set ppp to point to the first slot that corresponds 1122 * to the requested addr, i.e. pg_idx. 1123 */ 1124 mutex_exit(&sptd->spt_lock); 1125 *ppp = &(sptd->spt_ppa[pg_idx]); 1126 return (0); 1127 } 1128 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 1129 SEGP_FORCE_WIRED) == SEGP_FAIL) { 1130 mutex_exit(&sptd->spt_lock); 1131 *ppp = NULL; 1132 return (ENOTSUP); 1133 } 1134 1135 /* 1136 * No need to worry about protections because DISM pages are always rw. 1137 */ 1138 pl = pplist = NULL; 1139 amp = sptd->spt_amp; 1140 1141 /* 1142 * Do we need to build the ppa array? 1143 */ 1144 if (sptd->spt_ppa == NULL) { 1145 pgcnt_t lpg_cnt = 0; 1146 1147 pl_built = 1; 1148 tot_npages = btopr(sptd->spt_amp->size); 1149 1150 ASSERT(sptd->spt_pcachecnt == 0); 1151 pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP); 1152 pl = pplist; 1153 1154 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1155 for (an_idx = 0; an_idx < tot_npages; ) { 1156 ap = anon_get_ptr(amp->ahp, an_idx); 1157 /* 1158 * Cache only mlocked pages. For large pages 1159 * if one (constituent) page is mlocked 1160 * all pages for that large page 1161 * are cached also. This is for quick 1162 * lookups of ppa array; 1163 */ 1164 if ((ap != NULL) && (lpg_cnt != 0 || 1165 (sptd->spt_ppa_lckcnt[an_idx] != 0))) { 1166 1167 swap_xlate(ap, &vp, &off); 1168 pp = page_lookup(vp, off, SE_SHARED); 1169 ASSERT(pp != NULL); 1170 if (lpg_cnt == 0) { 1171 lpg_cnt++; 1172 /* 1173 * For a small page, we are done -- 1174 * lpg_count is reset to 0 below. 1175 * 1176 * For a large page, we are guaranteed 1177 * to find the anon structures of all 1178 * constituent pages and a non-zero 1179 * lpg_cnt ensures that we don't test 1180 * for mlock for these. We are done 1181 * when lpg_count reaches (npgs + 1). 1182 * If we are not the first constituent 1183 * page, restart at the first one. 1184 */ 1185 npgs = page_get_pagecnt(pp->p_szc); 1186 if (!IS_P2ALIGNED(an_idx, npgs)) { 1187 an_idx = P2ALIGN(an_idx, npgs); 1188 page_unlock(pp); 1189 continue; 1190 } 1191 } 1192 if (++lpg_cnt > npgs) 1193 lpg_cnt = 0; 1194 1195 /* 1196 * availrmem is decremented only 1197 * for unlocked pages 1198 */ 1199 if (sptd->spt_ppa_lckcnt[an_idx] == 0) 1200 claim_availrmem++; 1201 pplist[an_idx] = pp; 1202 } 1203 an_idx++; 1204 } 1205 ANON_LOCK_EXIT(&->a_rwlock); 1206 1207 if (claim_availrmem) { 1208 mutex_enter(&freemem_lock); 1209 if (availrmem < tune.t_minarmem + claim_availrmem) { 1210 mutex_exit(&freemem_lock); 1211 ret = ENOTSUP; 1212 claim_availrmem = 0; 1213 goto insert_fail; 1214 } else { 1215 availrmem -= claim_availrmem; 1216 } 1217 mutex_exit(&freemem_lock); 1218 } 1219 1220 sptd->spt_ppa = pl; 1221 } else { 1222 /* 1223 * We already have a valid ppa[]. 1224 */ 1225 pl = sptd->spt_ppa; 1226 } 1227 1228 ASSERT(pl != NULL); 1229 1230 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 1231 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 1232 segspt_reclaim); 1233 if (ret == SEGP_FAIL) { 1234 /* 1235 * seg_pinsert failed. We return 1236 * ENOTSUP, so that the as_pagelock() code will 1237 * then try the slower F_SOFTLOCK path. 1238 */ 1239 if (pl_built) { 1240 /* 1241 * No one else has referenced the ppa[]. 1242 * We created it and we need to destroy it. 1243 */ 1244 sptd->spt_ppa = NULL; 1245 } 1246 ret = ENOTSUP; 1247 goto insert_fail; 1248 } 1249 1250 /* 1251 * In either case, we increment softlockcnt on the 'real' segment. 1252 */ 1253 sptd->spt_pcachecnt++; 1254 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 1255 1256 ppa = sptd->spt_ppa; 1257 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 1258 if (ppa[an_idx] == NULL) { 1259 mutex_exit(&sptd->spt_lock); 1260 seg_pinactive(seg, NULL, seg->s_base, 1261 sptd->spt_amp->size, 1262 pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 1263 *ppp = NULL; 1264 return (ENOTSUP); 1265 } 1266 if ((szc = ppa[an_idx]->p_szc) != 0) { 1267 npgs = page_get_pagecnt(szc); 1268 an_idx = P2ROUNDUP(an_idx + 1, npgs); 1269 } else { 1270 an_idx++; 1271 } 1272 } 1273 /* 1274 * We can now drop the sptd->spt_lock since the ppa[] 1275 * exists and we have incremented pacachecnt. 1276 */ 1277 mutex_exit(&sptd->spt_lock); 1278 1279 /* 1280 * Since we cache the entire segment, we want to 1281 * set ppp to point to the first slot that corresponds 1282 * to the requested addr, i.e. pg_idx. 1283 */ 1284 *ppp = &(sptd->spt_ppa[pg_idx]); 1285 return (0); 1286 1287 insert_fail: 1288 /* 1289 * We will only reach this code if we tried and failed. 1290 * 1291 * And we can drop the lock on the dummy seg, once we've failed 1292 * to set up a new ppa[]. 1293 */ 1294 mutex_exit(&sptd->spt_lock); 1295 1296 if (pl_built) { 1297 if (claim_availrmem) { 1298 mutex_enter(&freemem_lock); 1299 availrmem += claim_availrmem; 1300 mutex_exit(&freemem_lock); 1301 } 1302 1303 /* 1304 * We created pl and we need to destroy it. 1305 */ 1306 pplist = pl; 1307 for (an_idx = 0; an_idx < tot_npages; an_idx++) { 1308 if (pplist[an_idx] != NULL) 1309 page_unlock(pplist[an_idx]); 1310 } 1311 kmem_free(pl, sizeof (page_t *) * tot_npages); 1312 } 1313 1314 if (shmd->shm_softlockcnt <= 0) { 1315 if (AS_ISUNMAPWAIT(seg->s_as)) { 1316 mutex_enter(&seg->s_as->a_contents); 1317 if (AS_ISUNMAPWAIT(seg->s_as)) { 1318 AS_CLRUNMAPWAIT(seg->s_as); 1319 cv_broadcast(&seg->s_as->a_cv); 1320 } 1321 mutex_exit(&seg->s_as->a_contents); 1322 } 1323 } 1324 *ppp = NULL; 1325 return (ret); 1326 } 1327 1328 1329 1330 /* 1331 * return locked pages over a given range. 1332 * 1333 * We will cache the entire ISM segment and save the pplist for the 1334 * entire segment in the ppa field of the underlying ISM segment structure. 1335 * Later, during a call to segspt_reclaim() we will use this ppa array 1336 * to page_unlock() all of the pages and then we will free this ppa list. 1337 */ 1338 /*ARGSUSED*/ 1339 static int 1340 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len, 1341 struct page ***ppp, enum lock_type type, enum seg_rw rw) 1342 { 1343 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1344 struct seg *sptseg = shmd->shm_sptseg; 1345 struct spt_data *sptd = sptseg->s_data; 1346 pgcnt_t np, page_index, npages; 1347 caddr_t a, spt_base; 1348 struct page **pplist, **pl, *pp; 1349 struct anon_map *amp; 1350 ulong_t anon_index; 1351 int ret = ENOTSUP; 1352 uint_t pl_built = 0; 1353 struct anon *ap; 1354 struct vnode *vp; 1355 u_offset_t off; 1356 1357 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 1358 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 1359 1360 1361 /* 1362 * We want to lock/unlock the entire ISM segment. Therefore, 1363 * we will be using the underlying sptseg and it's base address 1364 * and length for the caching arguments. 1365 */ 1366 ASSERT(sptseg); 1367 ASSERT(sptd); 1368 1369 if (sptd->spt_flags & SHM_PAGEABLE) { 1370 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw)); 1371 } 1372 1373 page_index = seg_page(seg, addr); 1374 npages = btopr(len); 1375 1376 /* 1377 * check if the request is larger than number of pages covered 1378 * by amp 1379 */ 1380 if (page_index + npages > btopr(sptd->spt_amp->size)) { 1381 *ppp = NULL; 1382 return (ENOTSUP); 1383 } 1384 1385 if (type == L_PAGEUNLOCK) { 1386 1387 ASSERT(sptd->spt_ppa != NULL); 1388 1389 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 1390 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 1391 1392 /* 1393 * If someone is blocked while unmapping, we purge 1394 * segment page cache and thus reclaim pplist synchronously 1395 * without waiting for seg_pasync_thread. This speeds up 1396 * unmapping in cases where munmap(2) is called, while 1397 * raw async i/o is still in progress or where a thread 1398 * exits on data fault in a multithreaded application. 1399 */ 1400 if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) { 1401 segspt_purge(seg); 1402 } 1403 return (0); 1404 } 1405 1406 /* The L_PAGELOCK case... */ 1407 1408 /* 1409 * First try to find pages in segment page cache, without 1410 * holding the segment lock. 1411 */ 1412 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1413 S_WRITE, SEGP_FORCE_WIRED); 1414 if (pplist != NULL) { 1415 ASSERT(sptd->spt_ppa == pplist); 1416 ASSERT(sptd->spt_ppa[page_index]); 1417 /* 1418 * Since we cache the entire ISM segment, we want to 1419 * set ppp to point to the first slot that corresponds 1420 * to the requested addr, i.e. page_index. 1421 */ 1422 *ppp = &(sptd->spt_ppa[page_index]); 1423 return (0); 1424 } 1425 1426 mutex_enter(&sptd->spt_lock); 1427 1428 /* 1429 * try to find pages in segment page cache 1430 */ 1431 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1432 S_WRITE, SEGP_FORCE_WIRED); 1433 if (pplist != NULL) { 1434 ASSERT(sptd->spt_ppa == pplist); 1435 /* 1436 * Since we cache the entire segment, we want to 1437 * set ppp to point to the first slot that corresponds 1438 * to the requested addr, i.e. page_index. 1439 */ 1440 mutex_exit(&sptd->spt_lock); 1441 *ppp = &(sptd->spt_ppa[page_index]); 1442 return (0); 1443 } 1444 1445 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 1446 SEGP_FORCE_WIRED) == SEGP_FAIL) { 1447 mutex_exit(&sptd->spt_lock); 1448 *ppp = NULL; 1449 return (ENOTSUP); 1450 } 1451 1452 /* 1453 * No need to worry about protections because ISM pages 1454 * are always rw. 1455 */ 1456 pl = pplist = NULL; 1457 1458 /* 1459 * Do we need to build the ppa array? 1460 */ 1461 if (sptd->spt_ppa == NULL) { 1462 ASSERT(sptd->spt_ppa == pplist); 1463 1464 spt_base = sptseg->s_base; 1465 pl_built = 1; 1466 1467 /* 1468 * availrmem is decremented once during anon_swap_adjust() 1469 * and is incremented during the anon_unresv(), which is 1470 * called from shm_rm_amp() when the segment is destroyed. 1471 */ 1472 amp = sptd->spt_amp; 1473 ASSERT(amp != NULL); 1474 1475 /* pcachecnt is protected by sptd->spt_lock */ 1476 ASSERT(sptd->spt_pcachecnt == 0); 1477 pplist = kmem_zalloc(sizeof (page_t *) 1478 * btopr(sptd->spt_amp->size), KM_SLEEP); 1479 pl = pplist; 1480 1481 anon_index = seg_page(sptseg, spt_base); 1482 1483 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1484 for (a = spt_base; a < (spt_base + sptd->spt_amp->size); 1485 a += PAGESIZE, anon_index++, pplist++) { 1486 ap = anon_get_ptr(amp->ahp, anon_index); 1487 ASSERT(ap != NULL); 1488 swap_xlate(ap, &vp, &off); 1489 pp = page_lookup(vp, off, SE_SHARED); 1490 ASSERT(pp != NULL); 1491 *pplist = pp; 1492 } 1493 ANON_LOCK_EXIT(&->a_rwlock); 1494 1495 if (a < (spt_base + sptd->spt_amp->size)) { 1496 ret = ENOTSUP; 1497 goto insert_fail; 1498 } 1499 sptd->spt_ppa = pl; 1500 } else { 1501 /* 1502 * We already have a valid ppa[]. 1503 */ 1504 pl = sptd->spt_ppa; 1505 } 1506 1507 ASSERT(pl != NULL); 1508 1509 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 1510 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 1511 segspt_reclaim); 1512 if (ret == SEGP_FAIL) { 1513 /* 1514 * seg_pinsert failed. We return 1515 * ENOTSUP, so that the as_pagelock() code will 1516 * then try the slower F_SOFTLOCK path. 1517 */ 1518 if (pl_built) { 1519 /* 1520 * No one else has referenced the ppa[]. 1521 * We created it and we need to destroy it. 1522 */ 1523 sptd->spt_ppa = NULL; 1524 } 1525 ret = ENOTSUP; 1526 goto insert_fail; 1527 } 1528 1529 /* 1530 * In either case, we increment softlockcnt on the 'real' segment. 1531 */ 1532 sptd->spt_pcachecnt++; 1533 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 1534 1535 /* 1536 * We can now drop the sptd->spt_lock since the ppa[] 1537 * exists and we have incremented pacachecnt. 1538 */ 1539 mutex_exit(&sptd->spt_lock); 1540 1541 /* 1542 * Since we cache the entire segment, we want to 1543 * set ppp to point to the first slot that corresponds 1544 * to the requested addr, i.e. page_index. 1545 */ 1546 *ppp = &(sptd->spt_ppa[page_index]); 1547 return (0); 1548 1549 insert_fail: 1550 /* 1551 * We will only reach this code if we tried and failed. 1552 * 1553 * And we can drop the lock on the dummy seg, once we've failed 1554 * to set up a new ppa[]. 1555 */ 1556 mutex_exit(&sptd->spt_lock); 1557 1558 if (pl_built) { 1559 /* 1560 * We created pl and we need to destroy it. 1561 */ 1562 pplist = pl; 1563 np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT); 1564 while (np) { 1565 page_unlock(*pplist); 1566 np--; 1567 pplist++; 1568 } 1569 kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size)); 1570 } 1571 if (shmd->shm_softlockcnt <= 0) { 1572 if (AS_ISUNMAPWAIT(seg->s_as)) { 1573 mutex_enter(&seg->s_as->a_contents); 1574 if (AS_ISUNMAPWAIT(seg->s_as)) { 1575 AS_CLRUNMAPWAIT(seg->s_as); 1576 cv_broadcast(&seg->s_as->a_cv); 1577 } 1578 mutex_exit(&seg->s_as->a_contents); 1579 } 1580 } 1581 *ppp = NULL; 1582 return (ret); 1583 } 1584 1585 /* 1586 * purge any cached pages in the I/O page cache 1587 */ 1588 static void 1589 segspt_purge(struct seg *seg) 1590 { 1591 seg_ppurge(seg, NULL, SEGP_FORCE_WIRED); 1592 } 1593 1594 static int 1595 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, 1596 enum seg_rw rw, int async) 1597 { 1598 struct seg *seg = (struct seg *)ptag; 1599 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1600 struct seg *sptseg; 1601 struct spt_data *sptd; 1602 pgcnt_t npages, i, free_availrmem = 0; 1603 int done = 0; 1604 1605 #ifdef lint 1606 addr = addr; 1607 #endif 1608 sptseg = shmd->shm_sptseg; 1609 sptd = sptseg->s_data; 1610 npages = (len >> PAGESHIFT); 1611 ASSERT(npages); 1612 ASSERT(sptd->spt_pcachecnt != 0); 1613 ASSERT(sptd->spt_ppa == pplist); 1614 ASSERT(npages == btopr(sptd->spt_amp->size)); 1615 ASSERT(async || AS_LOCK_HELD(seg->s_as)); 1616 1617 /* 1618 * Acquire the lock on the dummy seg and destroy the 1619 * ppa array IF this is the last pcachecnt. 1620 */ 1621 mutex_enter(&sptd->spt_lock); 1622 if (--sptd->spt_pcachecnt == 0) { 1623 for (i = 0; i < npages; i++) { 1624 if (pplist[i] == NULL) { 1625 continue; 1626 } 1627 if (rw == S_WRITE) { 1628 hat_setrefmod(pplist[i]); 1629 } else { 1630 hat_setref(pplist[i]); 1631 } 1632 if ((sptd->spt_flags & SHM_PAGEABLE) && 1633 (sptd->spt_ppa_lckcnt[i] == 0)) 1634 free_availrmem++; 1635 page_unlock(pplist[i]); 1636 } 1637 if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) { 1638 mutex_enter(&freemem_lock); 1639 availrmem += free_availrmem; 1640 mutex_exit(&freemem_lock); 1641 } 1642 /* 1643 * Since we want to cach/uncache the entire ISM segment, 1644 * we will track the pplist in a segspt specific field 1645 * ppa, that is initialized at the time we add an entry to 1646 * the cache. 1647 */ 1648 ASSERT(sptd->spt_pcachecnt == 0); 1649 kmem_free(pplist, sizeof (page_t *) * npages); 1650 sptd->spt_ppa = NULL; 1651 sptd->spt_flags &= ~DISM_PPA_CHANGED; 1652 sptd->spt_gen++; 1653 cv_broadcast(&sptd->spt_cv); 1654 done = 1; 1655 } 1656 mutex_exit(&sptd->spt_lock); 1657 1658 /* 1659 * If we are pcache async thread or called via seg_ppurge_wiredpp() we 1660 * may not hold AS lock (in this case async argument is not 0). This 1661 * means if softlockcnt drops to 0 after the decrement below address 1662 * space may get freed. We can't allow it since after softlock 1663 * derement to 0 we still need to access as structure for possible 1664 * wakeup of unmap waiters. To prevent the disappearance of as we take 1665 * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes 1666 * this mutex as a barrier to make sure this routine completes before 1667 * segment is freed. 1668 * 1669 * The second complication we have to deal with in async case is a 1670 * possibility of missed wake up of unmap wait thread. When we don't 1671 * hold as lock here we may take a_contents lock before unmap wait 1672 * thread that was first to see softlockcnt was still not 0. As a 1673 * result we'll fail to wake up an unmap wait thread. To avoid this 1674 * race we set nounmapwait flag in as structure if we drop softlockcnt 1675 * to 0 if async is not 0. unmapwait thread 1676 * will not block if this flag is set. 1677 */ 1678 if (async) 1679 mutex_enter(&shmd->shm_segfree_syncmtx); 1680 1681 /* 1682 * Now decrement softlockcnt. 1683 */ 1684 ASSERT(shmd->shm_softlockcnt > 0); 1685 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 1686 1687 if (shmd->shm_softlockcnt <= 0) { 1688 if (async || AS_ISUNMAPWAIT(seg->s_as)) { 1689 mutex_enter(&seg->s_as->a_contents); 1690 if (async) 1691 AS_SETNOUNMAPWAIT(seg->s_as); 1692 if (AS_ISUNMAPWAIT(seg->s_as)) { 1693 AS_CLRUNMAPWAIT(seg->s_as); 1694 cv_broadcast(&seg->s_as->a_cv); 1695 } 1696 mutex_exit(&seg->s_as->a_contents); 1697 } 1698 } 1699 1700 if (async) 1701 mutex_exit(&shmd->shm_segfree_syncmtx); 1702 1703 return (done); 1704 } 1705 1706 /* 1707 * Do a F_SOFTUNLOCK call over the range requested. 1708 * The range must have already been F_SOFTLOCK'ed. 1709 * 1710 * The calls to acquire and release the anon map lock mutex were 1711 * removed in order to avoid a deadly embrace during a DR 1712 * memory delete operation. (Eg. DR blocks while waiting for a 1713 * exclusive lock on a page that is being used for kaio; the 1714 * thread that will complete the kaio and call segspt_softunlock 1715 * blocks on the anon map lock; another thread holding the anon 1716 * map lock blocks on another page lock via the segspt_shmfault 1717 * -> page_lookup -> page_lookup_create -> page_lock_es code flow.) 1718 * 1719 * The appropriateness of the removal is based upon the following: 1720 * 1. If we are holding a segment's reader lock and the page is held 1721 * shared, then the corresponding element in anonmap which points to 1722 * anon struct cannot change and there is no need to acquire the 1723 * anonymous map lock. 1724 * 2. Threads in segspt_softunlock have a reader lock on the segment 1725 * and already have the shared page lock, so we are guaranteed that 1726 * the anon map slot cannot change and therefore can call anon_get_ptr() 1727 * without grabbing the anonymous map lock. 1728 * 3. Threads that softlock a shared page break copy-on-write, even if 1729 * its a read. Thus cow faults can be ignored with respect to soft 1730 * unlocking, since the breaking of cow means that the anon slot(s) will 1731 * not be shared. 1732 */ 1733 static void 1734 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr, 1735 size_t len, enum seg_rw rw) 1736 { 1737 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1738 struct seg *sptseg; 1739 struct spt_data *sptd; 1740 page_t *pp; 1741 caddr_t adr; 1742 struct vnode *vp; 1743 u_offset_t offset; 1744 ulong_t anon_index; 1745 struct anon_map *amp; /* XXX - for locknest */ 1746 struct anon *ap = NULL; 1747 pgcnt_t npages; 1748 1749 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 1750 1751 sptseg = shmd->shm_sptseg; 1752 sptd = sptseg->s_data; 1753 1754 /* 1755 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 1756 * and therefore their pages are SE_SHARED locked 1757 * for the entire life of the segment. 1758 */ 1759 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) && 1760 ((sptd->spt_flags & SHM_PAGEABLE) == 0)) { 1761 goto softlock_decrement; 1762 } 1763 1764 /* 1765 * Any thread is free to do a page_find and 1766 * page_unlock() on the pages within this seg. 1767 * 1768 * We are already holding the as->a_lock on the user's 1769 * real segment, but we need to hold the a_lock on the 1770 * underlying dummy as. This is mostly to satisfy the 1771 * underlying HAT layer. 1772 */ 1773 AS_LOCK_ENTER(sptseg->s_as, RW_READER); 1774 hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len); 1775 AS_LOCK_EXIT(sptseg->s_as); 1776 1777 amp = sptd->spt_amp; 1778 ASSERT(amp != NULL); 1779 anon_index = seg_page(sptseg, sptseg_addr); 1780 1781 for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) { 1782 ap = anon_get_ptr(amp->ahp, anon_index++); 1783 ASSERT(ap != NULL); 1784 swap_xlate(ap, &vp, &offset); 1785 1786 /* 1787 * Use page_find() instead of page_lookup() to 1788 * find the page since we know that it has a 1789 * "shared" lock. 1790 */ 1791 pp = page_find(vp, offset); 1792 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1)); 1793 if (pp == NULL) { 1794 panic("segspt_softunlock: " 1795 "addr %p, ap %p, vp %p, off %llx", 1796 (void *)adr, (void *)ap, (void *)vp, offset); 1797 /*NOTREACHED*/ 1798 } 1799 1800 if (rw == S_WRITE) { 1801 hat_setrefmod(pp); 1802 } else if (rw != S_OTHER) { 1803 hat_setref(pp); 1804 } 1805 page_unlock(pp); 1806 } 1807 1808 softlock_decrement: 1809 npages = btopr(len); 1810 ASSERT(shmd->shm_softlockcnt >= npages); 1811 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages); 1812 if (shmd->shm_softlockcnt == 0) { 1813 /* 1814 * All SOFTLOCKS are gone. Wakeup any waiting 1815 * unmappers so they can try again to unmap. 1816 * Check for waiters first without the mutex 1817 * held so we don't always grab the mutex on 1818 * softunlocks. 1819 */ 1820 if (AS_ISUNMAPWAIT(seg->s_as)) { 1821 mutex_enter(&seg->s_as->a_contents); 1822 if (AS_ISUNMAPWAIT(seg->s_as)) { 1823 AS_CLRUNMAPWAIT(seg->s_as); 1824 cv_broadcast(&seg->s_as->a_cv); 1825 } 1826 mutex_exit(&seg->s_as->a_contents); 1827 } 1828 } 1829 } 1830 1831 int 1832 segspt_shmattach(struct seg **segpp, void *argsp) 1833 { 1834 struct seg *seg = *segpp; 1835 struct shm_data *shmd_arg = (struct shm_data *)argsp; 1836 struct shm_data *shmd; 1837 struct anon_map *shm_amp = shmd_arg->shm_amp; 1838 struct spt_data *sptd; 1839 int error = 0; 1840 1841 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 1842 1843 shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP); 1844 if (shmd == NULL) 1845 return (ENOMEM); 1846 1847 shmd->shm_sptas = shmd_arg->shm_sptas; 1848 shmd->shm_amp = shm_amp; 1849 shmd->shm_sptseg = shmd_arg->shm_sptseg; 1850 1851 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0, 1852 NULL, 0, seg->s_size); 1853 1854 mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL); 1855 1856 seg->s_data = (void *)shmd; 1857 seg->s_ops = &segspt_shmops; 1858 seg->s_szc = shmd->shm_sptseg->s_szc; 1859 sptd = shmd->shm_sptseg->s_data; 1860 1861 if (sptd->spt_flags & SHM_PAGEABLE) { 1862 if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size), 1863 KM_NOSLEEP)) == NULL) { 1864 seg->s_data = (void *)NULL; 1865 kmem_free(shmd, (sizeof (*shmd))); 1866 return (ENOMEM); 1867 } 1868 shmd->shm_lckpgs = 0; 1869 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 1870 if ((error = hat_share(seg->s_as->a_hat, seg->s_base, 1871 shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 1872 seg->s_size, seg->s_szc)) != 0) { 1873 kmem_free(shmd->shm_vpage, 1874 btopr(shm_amp->size)); 1875 } 1876 } 1877 } else { 1878 error = hat_share(seg->s_as->a_hat, seg->s_base, 1879 shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 1880 seg->s_size, seg->s_szc); 1881 } 1882 if (error) { 1883 seg->s_szc = 0; 1884 seg->s_data = (void *)NULL; 1885 kmem_free(shmd, (sizeof (*shmd))); 1886 } else { 1887 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 1888 shm_amp->refcnt++; 1889 ANON_LOCK_EXIT(&shm_amp->a_rwlock); 1890 } 1891 return (error); 1892 } 1893 1894 int 1895 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize) 1896 { 1897 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1898 int reclaim = 1; 1899 1900 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 1901 retry: 1902 if (shmd->shm_softlockcnt > 0) { 1903 if (reclaim == 1) { 1904 segspt_purge(seg); 1905 reclaim = 0; 1906 goto retry; 1907 } 1908 return (EAGAIN); 1909 } 1910 1911 if (ssize != seg->s_size) { 1912 #ifdef DEBUG 1913 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n", 1914 ssize, seg->s_size); 1915 #endif 1916 return (EINVAL); 1917 } 1918 1919 (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK, 1920 NULL, 0); 1921 hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc); 1922 1923 seg_free(seg); 1924 1925 return (0); 1926 } 1927 1928 void 1929 segspt_shmfree(struct seg *seg) 1930 { 1931 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1932 struct anon_map *shm_amp = shmd->shm_amp; 1933 1934 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 1935 1936 (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0, 1937 MC_UNLOCK, NULL, 0); 1938 1939 /* 1940 * Need to increment refcnt when attaching 1941 * and decrement when detaching because of dup(). 1942 */ 1943 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 1944 shm_amp->refcnt--; 1945 ANON_LOCK_EXIT(&shm_amp->a_rwlock); 1946 1947 if (shmd->shm_vpage) { /* only for DISM */ 1948 kmem_free(shmd->shm_vpage, btopr(shm_amp->size)); 1949 shmd->shm_vpage = NULL; 1950 } 1951 1952 /* 1953 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's 1954 * still working with this segment without holding as lock. 1955 */ 1956 ASSERT(shmd->shm_softlockcnt == 0); 1957 mutex_enter(&shmd->shm_segfree_syncmtx); 1958 mutex_destroy(&shmd->shm_segfree_syncmtx); 1959 1960 kmem_free(shmd, sizeof (*shmd)); 1961 } 1962 1963 /*ARGSUSED*/ 1964 int 1965 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 1966 { 1967 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 1968 1969 /* 1970 * Shared page table is more than shared mapping. 1971 * Individual process sharing page tables can't change prot 1972 * because there is only one set of page tables. 1973 * This will be allowed after private page table is 1974 * supported. 1975 */ 1976 /* need to return correct status error? */ 1977 return (0); 1978 } 1979 1980 1981 faultcode_t 1982 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, 1983 size_t len, enum fault_type type, enum seg_rw rw) 1984 { 1985 struct shm_data *shmd = (struct shm_data *)seg->s_data; 1986 struct seg *sptseg = shmd->shm_sptseg; 1987 struct as *curspt = shmd->shm_sptas; 1988 struct spt_data *sptd = sptseg->s_data; 1989 pgcnt_t npages; 1990 size_t size; 1991 caddr_t segspt_addr, shm_addr; 1992 page_t **ppa; 1993 int i; 1994 ulong_t an_idx = 0; 1995 int err = 0; 1996 int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0); 1997 size_t pgsz; 1998 pgcnt_t pgcnt; 1999 caddr_t a; 2000 pgcnt_t pidx; 2001 2002 #ifdef lint 2003 hat = hat; 2004 #endif 2005 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2006 2007 /* 2008 * Because of the way spt is implemented 2009 * the realsize of the segment does not have to be 2010 * equal to the segment size itself. The segment size is 2011 * often in multiples of a page size larger than PAGESIZE. 2012 * The realsize is rounded up to the nearest PAGESIZE 2013 * based on what the user requested. This is a bit of 2014 * ungliness that is historical but not easily fixed 2015 * without re-designing the higher levels of ISM. 2016 */ 2017 ASSERT(addr >= seg->s_base); 2018 if (((addr + len) - seg->s_base) > sptd->spt_realsize) 2019 return (FC_NOMAP); 2020 /* 2021 * For all of the following cases except F_PROT, we need to 2022 * make any necessary adjustments to addr and len 2023 * and get all of the necessary page_t's into an array called ppa[]. 2024 * 2025 * The code in shmat() forces base addr and len of ISM segment 2026 * to be aligned to largest page size supported. Therefore, 2027 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 2028 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 2029 * in large pagesize chunks, or else we will screw up the HAT 2030 * layer by calling hat_memload_array() with differing page sizes 2031 * over a given virtual range. 2032 */ 2033 pgsz = page_get_pagesize(sptseg->s_szc); 2034 pgcnt = page_get_pagecnt(sptseg->s_szc); 2035 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 2036 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 2037 npages = btopr(size); 2038 2039 /* 2040 * Now we need to convert from addr in segshm to addr in segspt. 2041 */ 2042 an_idx = seg_page(seg, shm_addr); 2043 segspt_addr = sptseg->s_base + ptob(an_idx); 2044 2045 ASSERT((segspt_addr + ptob(npages)) <= 2046 (sptseg->s_base + sptd->spt_realsize)); 2047 ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size)); 2048 2049 switch (type) { 2050 2051 case F_SOFTLOCK: 2052 2053 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 2054 /* 2055 * Fall through to the F_INVAL case to load up the hat layer 2056 * entries with the HAT_LOAD_LOCK flag. 2057 */ 2058 /* FALLTHRU */ 2059 case F_INVAL: 2060 2061 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 2062 return (FC_NOMAP); 2063 2064 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 2065 2066 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa); 2067 if (err != 0) { 2068 if (type == F_SOFTLOCK) { 2069 atomic_add_long((ulong_t *)( 2070 &(shmd->shm_softlockcnt)), -npages); 2071 } 2072 goto dism_err; 2073 } 2074 AS_LOCK_ENTER(sptseg->s_as, RW_READER); 2075 a = segspt_addr; 2076 pidx = 0; 2077 if (type == F_SOFTLOCK) { 2078 2079 /* 2080 * Load up the translation keeping it 2081 * locked and don't unlock the page. 2082 */ 2083 for (; pidx < npages; a += pgsz, pidx += pgcnt) { 2084 hat_memload_array(sptseg->s_as->a_hat, 2085 a, pgsz, &ppa[pidx], sptd->spt_prot, 2086 HAT_LOAD_LOCK | HAT_LOAD_SHARE); 2087 } 2088 } else { 2089 /* 2090 * Migrate pages marked for migration 2091 */ 2092 if (lgrp_optimizations()) 2093 page_migrate(seg, shm_addr, ppa, npages); 2094 2095 for (; pidx < npages; a += pgsz, pidx += pgcnt) { 2096 hat_memload_array(sptseg->s_as->a_hat, 2097 a, pgsz, &ppa[pidx], 2098 sptd->spt_prot, 2099 HAT_LOAD_SHARE); 2100 } 2101 2102 /* 2103 * And now drop the SE_SHARED lock(s). 2104 */ 2105 if (dyn_ism_unmap) { 2106 for (i = 0; i < npages; i++) { 2107 page_unlock(ppa[i]); 2108 } 2109 } 2110 } 2111 2112 if (!dyn_ism_unmap) { 2113 if (hat_share(seg->s_as->a_hat, shm_addr, 2114 curspt->a_hat, segspt_addr, ptob(npages), 2115 seg->s_szc) != 0) { 2116 panic("hat_share err in DISM fault"); 2117 /* NOTREACHED */ 2118 } 2119 if (type == F_INVAL) { 2120 for (i = 0; i < npages; i++) { 2121 page_unlock(ppa[i]); 2122 } 2123 } 2124 } 2125 AS_LOCK_EXIT(sptseg->s_as); 2126 dism_err: 2127 kmem_free(ppa, npages * sizeof (page_t *)); 2128 return (err); 2129 2130 case F_SOFTUNLOCK: 2131 2132 /* 2133 * This is a bit ugly, we pass in the real seg pointer, 2134 * but the segspt_addr is the virtual address within the 2135 * dummy seg. 2136 */ 2137 segspt_softunlock(seg, segspt_addr, size, rw); 2138 return (0); 2139 2140 case F_PROT: 2141 2142 /* 2143 * This takes care of the unusual case where a user 2144 * allocates a stack in shared memory and a register 2145 * window overflow is written to that stack page before 2146 * it is otherwise modified. 2147 * 2148 * We can get away with this because ISM segments are 2149 * always rw. Other than this unusual case, there 2150 * should be no instances of protection violations. 2151 */ 2152 return (0); 2153 2154 default: 2155 #ifdef DEBUG 2156 panic("segspt_dismfault default type?"); 2157 #else 2158 return (FC_NOMAP); 2159 #endif 2160 } 2161 } 2162 2163 2164 faultcode_t 2165 segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, 2166 size_t len, enum fault_type type, enum seg_rw rw) 2167 { 2168 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2169 struct seg *sptseg = shmd->shm_sptseg; 2170 struct as *curspt = shmd->shm_sptas; 2171 struct spt_data *sptd = sptseg->s_data; 2172 pgcnt_t npages; 2173 size_t size; 2174 caddr_t sptseg_addr, shm_addr; 2175 page_t *pp, **ppa; 2176 int i; 2177 u_offset_t offset; 2178 ulong_t anon_index = 0; 2179 struct vnode *vp; 2180 struct anon_map *amp; /* XXX - for locknest */ 2181 struct anon *ap = NULL; 2182 size_t pgsz; 2183 pgcnt_t pgcnt; 2184 caddr_t a; 2185 pgcnt_t pidx; 2186 size_t sz; 2187 2188 #ifdef lint 2189 hat = hat; 2190 #endif 2191 2192 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2193 2194 if (sptd->spt_flags & SHM_PAGEABLE) { 2195 return (segspt_dismfault(hat, seg, addr, len, type, rw)); 2196 } 2197 2198 /* 2199 * Because of the way spt is implemented 2200 * the realsize of the segment does not have to be 2201 * equal to the segment size itself. The segment size is 2202 * often in multiples of a page size larger than PAGESIZE. 2203 * The realsize is rounded up to the nearest PAGESIZE 2204 * based on what the user requested. This is a bit of 2205 * ungliness that is historical but not easily fixed 2206 * without re-designing the higher levels of ISM. 2207 */ 2208 ASSERT(addr >= seg->s_base); 2209 if (((addr + len) - seg->s_base) > sptd->spt_realsize) 2210 return (FC_NOMAP); 2211 /* 2212 * For all of the following cases except F_PROT, we need to 2213 * make any necessary adjustments to addr and len 2214 * and get all of the necessary page_t's into an array called ppa[]. 2215 * 2216 * The code in shmat() forces base addr and len of ISM segment 2217 * to be aligned to largest page size supported. Therefore, 2218 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 2219 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 2220 * in large pagesize chunks, or else we will screw up the HAT 2221 * layer by calling hat_memload_array() with differing page sizes 2222 * over a given virtual range. 2223 */ 2224 pgsz = page_get_pagesize(sptseg->s_szc); 2225 pgcnt = page_get_pagecnt(sptseg->s_szc); 2226 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 2227 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 2228 npages = btopr(size); 2229 2230 /* 2231 * Now we need to convert from addr in segshm to addr in segspt. 2232 */ 2233 anon_index = seg_page(seg, shm_addr); 2234 sptseg_addr = sptseg->s_base + ptob(anon_index); 2235 2236 /* 2237 * And now we may have to adjust npages downward if we have 2238 * exceeded the realsize of the segment or initial anon 2239 * allocations. 2240 */ 2241 if ((sptseg_addr + ptob(npages)) > 2242 (sptseg->s_base + sptd->spt_realsize)) 2243 size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr; 2244 2245 npages = btopr(size); 2246 2247 ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size)); 2248 ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0); 2249 2250 switch (type) { 2251 2252 case F_SOFTLOCK: 2253 2254 /* 2255 * availrmem is decremented once during anon_swap_adjust() 2256 * and is incremented during the anon_unresv(), which is 2257 * called from shm_rm_amp() when the segment is destroyed. 2258 */ 2259 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 2260 /* 2261 * Some platforms assume that ISM pages are SE_SHARED 2262 * locked for the entire life of the segment. 2263 */ 2264 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) 2265 return (0); 2266 /* 2267 * Fall through to the F_INVAL case to load up the hat layer 2268 * entries with the HAT_LOAD_LOCK flag. 2269 */ 2270 2271 /* FALLTHRU */ 2272 case F_INVAL: 2273 2274 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 2275 return (FC_NOMAP); 2276 2277 /* 2278 * Some platforms that do NOT support DYNAMIC_ISM_UNMAP 2279 * may still rely on this call to hat_share(). That 2280 * would imply that those hat's can fault on a 2281 * HAT_LOAD_LOCK translation, which would seem 2282 * contradictory. 2283 */ 2284 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 2285 if (hat_share(seg->s_as->a_hat, seg->s_base, 2286 curspt->a_hat, sptseg->s_base, 2287 sptseg->s_size, sptseg->s_szc) != 0) { 2288 panic("hat_share error in ISM fault"); 2289 /*NOTREACHED*/ 2290 } 2291 return (0); 2292 } 2293 ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP); 2294 2295 /* 2296 * I see no need to lock the real seg, 2297 * here, because all of our work will be on the underlying 2298 * dummy seg. 2299 * 2300 * sptseg_addr and npages now account for large pages. 2301 */ 2302 amp = sptd->spt_amp; 2303 ASSERT(amp != NULL); 2304 anon_index = seg_page(sptseg, sptseg_addr); 2305 2306 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2307 for (i = 0; i < npages; i++) { 2308 ap = anon_get_ptr(amp->ahp, anon_index++); 2309 ASSERT(ap != NULL); 2310 swap_xlate(ap, &vp, &offset); 2311 pp = page_lookup(vp, offset, SE_SHARED); 2312 ASSERT(pp != NULL); 2313 ppa[i] = pp; 2314 } 2315 ANON_LOCK_EXIT(&->a_rwlock); 2316 ASSERT(i == npages); 2317 2318 /* 2319 * We are already holding the as->a_lock on the user's 2320 * real segment, but we need to hold the a_lock on the 2321 * underlying dummy as. This is mostly to satisfy the 2322 * underlying HAT layer. 2323 */ 2324 AS_LOCK_ENTER(sptseg->s_as, RW_READER); 2325 a = sptseg_addr; 2326 pidx = 0; 2327 if (type == F_SOFTLOCK) { 2328 /* 2329 * Load up the translation keeping it 2330 * locked and don't unlock the page. 2331 */ 2332 for (; pidx < npages; a += pgsz, pidx += pgcnt) { 2333 sz = MIN(pgsz, ptob(npages - pidx)); 2334 hat_memload_array(sptseg->s_as->a_hat, a, 2335 sz, &ppa[pidx], sptd->spt_prot, 2336 HAT_LOAD_LOCK | HAT_LOAD_SHARE); 2337 } 2338 } else { 2339 /* 2340 * Migrate pages marked for migration. 2341 */ 2342 if (lgrp_optimizations()) 2343 page_migrate(seg, shm_addr, ppa, npages); 2344 2345 for (; pidx < npages; a += pgsz, pidx += pgcnt) { 2346 sz = MIN(pgsz, ptob(npages - pidx)); 2347 hat_memload_array(sptseg->s_as->a_hat, 2348 a, sz, &ppa[pidx], 2349 sptd->spt_prot, HAT_LOAD_SHARE); 2350 } 2351 2352 /* 2353 * And now drop the SE_SHARED lock(s). 2354 */ 2355 for (i = 0; i < npages; i++) 2356 page_unlock(ppa[i]); 2357 } 2358 AS_LOCK_EXIT(sptseg->s_as); 2359 2360 kmem_free(ppa, sizeof (page_t *) * npages); 2361 return (0); 2362 case F_SOFTUNLOCK: 2363 2364 /* 2365 * This is a bit ugly, we pass in the real seg pointer, 2366 * but the sptseg_addr is the virtual address within the 2367 * dummy seg. 2368 */ 2369 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw); 2370 return (0); 2371 2372 case F_PROT: 2373 2374 /* 2375 * This takes care of the unusual case where a user 2376 * allocates a stack in shared memory and a register 2377 * window overflow is written to that stack page before 2378 * it is otherwise modified. 2379 * 2380 * We can get away with this because ISM segments are 2381 * always rw. Other than this unusual case, there 2382 * should be no instances of protection violations. 2383 */ 2384 return (0); 2385 2386 default: 2387 #ifdef DEBUG 2388 cmn_err(CE_WARN, "segspt_shmfault default type?"); 2389 #endif 2390 return (FC_NOMAP); 2391 } 2392 } 2393 2394 /*ARGSUSED*/ 2395 static faultcode_t 2396 segspt_shmfaulta(struct seg *seg, caddr_t addr) 2397 { 2398 return (0); 2399 } 2400 2401 /*ARGSUSED*/ 2402 static int 2403 segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta) 2404 { 2405 return (0); 2406 } 2407 2408 /*ARGSUSED*/ 2409 static size_t 2410 segspt_shmswapout(struct seg *seg) 2411 { 2412 return (0); 2413 } 2414 2415 /* 2416 * duplicate the shared page tables 2417 */ 2418 int 2419 segspt_shmdup(struct seg *seg, struct seg *newseg) 2420 { 2421 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2422 struct anon_map *amp = shmd->shm_amp; 2423 struct shm_data *shmd_new; 2424 struct seg *spt_seg = shmd->shm_sptseg; 2425 struct spt_data *sptd = spt_seg->s_data; 2426 int error = 0; 2427 2428 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 2429 2430 shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP); 2431 newseg->s_data = (void *)shmd_new; 2432 shmd_new->shm_sptas = shmd->shm_sptas; 2433 shmd_new->shm_amp = amp; 2434 shmd_new->shm_sptseg = shmd->shm_sptseg; 2435 newseg->s_ops = &segspt_shmops; 2436 newseg->s_szc = seg->s_szc; 2437 ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc); 2438 2439 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 2440 amp->refcnt++; 2441 ANON_LOCK_EXIT(&->a_rwlock); 2442 2443 if (sptd->spt_flags & SHM_PAGEABLE) { 2444 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP); 2445 shmd_new->shm_lckpgs = 0; 2446 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 2447 if ((error = hat_share(newseg->s_as->a_hat, 2448 newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR, 2449 seg->s_size, seg->s_szc)) != 0) { 2450 kmem_free(shmd_new->shm_vpage, 2451 btopr(amp->size)); 2452 } 2453 } 2454 return (error); 2455 } else { 2456 return (hat_share(newseg->s_as->a_hat, newseg->s_base, 2457 shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size, 2458 seg->s_szc)); 2459 2460 } 2461 } 2462 2463 /*ARGSUSED*/ 2464 int 2465 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot) 2466 { 2467 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2468 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2469 2470 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2471 2472 /* 2473 * ISM segment is always rw. 2474 */ 2475 return (((sptd->spt_prot & prot) != prot) ? EACCES : 0); 2476 } 2477 2478 /* 2479 * Return an array of locked large pages, for empty slots allocate 2480 * private zero-filled anon pages. 2481 */ 2482 static int 2483 spt_anon_getpages( 2484 struct seg *sptseg, 2485 caddr_t sptaddr, 2486 size_t len, 2487 page_t *ppa[]) 2488 { 2489 struct spt_data *sptd = sptseg->s_data; 2490 struct anon_map *amp = sptd->spt_amp; 2491 enum seg_rw rw = sptd->spt_prot; 2492 uint_t szc = sptseg->s_szc; 2493 size_t pg_sz, share_sz = page_get_pagesize(szc); 2494 pgcnt_t lp_npgs; 2495 caddr_t lp_addr, e_sptaddr; 2496 uint_t vpprot, ppa_szc = 0; 2497 struct vpage *vpage = NULL; 2498 ulong_t j, ppa_idx; 2499 int err, ierr = 0; 2500 pgcnt_t an_idx; 2501 anon_sync_obj_t cookie; 2502 int anon_locked = 0; 2503 pgcnt_t amp_pgs; 2504 2505 2506 ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz)); 2507 ASSERT(len != 0); 2508 2509 pg_sz = share_sz; 2510 lp_npgs = btop(pg_sz); 2511 lp_addr = sptaddr; 2512 e_sptaddr = sptaddr + len; 2513 an_idx = seg_page(sptseg, sptaddr); 2514 ppa_idx = 0; 2515 2516 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2517 2518 amp_pgs = page_get_pagecnt(amp->a_szc); 2519 2520 /*CONSTCOND*/ 2521 while (1) { 2522 for (; lp_addr < e_sptaddr; 2523 an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) { 2524 2525 /* 2526 * If we're currently locked, and we get to a new 2527 * page, unlock our current anon chunk. 2528 */ 2529 if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) { 2530 anon_array_exit(&cookie); 2531 anon_locked = 0; 2532 } 2533 if (!anon_locked) { 2534 anon_array_enter(amp, an_idx, &cookie); 2535 anon_locked = 1; 2536 } 2537 ppa_szc = (uint_t)-1; 2538 ierr = anon_map_getpages(amp, an_idx, szc, sptseg, 2539 lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx], 2540 &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred); 2541 2542 if (ierr != 0) { 2543 if (ierr > 0) { 2544 err = FC_MAKE_ERR(ierr); 2545 goto lpgs_err; 2546 } 2547 break; 2548 } 2549 } 2550 if (lp_addr == e_sptaddr) { 2551 break; 2552 } 2553 ASSERT(lp_addr < e_sptaddr); 2554 2555 /* 2556 * ierr == -1 means we failed to allocate a large page. 2557 * so do a size down operation. 2558 * 2559 * ierr == -2 means some other process that privately shares 2560 * pages with this process has allocated a larger page and we 2561 * need to retry with larger pages. So do a size up 2562 * operation. This relies on the fact that large pages are 2563 * never partially shared i.e. if we share any constituent 2564 * page of a large page with another process we must share the 2565 * entire large page. Note this cannot happen for SOFTLOCK 2566 * case, unless current address (lpaddr) is at the beginning 2567 * of the next page size boundary because the other process 2568 * couldn't have relocated locked pages. 2569 */ 2570 ASSERT(ierr == -1 || ierr == -2); 2571 if (segvn_anypgsz) { 2572 ASSERT(ierr == -2 || szc != 0); 2573 ASSERT(ierr == -1 || szc < sptseg->s_szc); 2574 szc = (ierr == -1) ? szc - 1 : szc + 1; 2575 } else { 2576 /* 2577 * For faults and segvn_anypgsz == 0 2578 * we need to be careful not to loop forever 2579 * if existing page is found with szc other 2580 * than 0 or seg->s_szc. This could be due 2581 * to page relocations on behalf of DR or 2582 * more likely large page creation. For this 2583 * case simply re-size to existing page's szc 2584 * if returned by anon_map_getpages(). 2585 */ 2586 if (ppa_szc == (uint_t)-1) { 2587 szc = (ierr == -1) ? 0 : sptseg->s_szc; 2588 } else { 2589 ASSERT(ppa_szc <= sptseg->s_szc); 2590 ASSERT(ierr == -2 || ppa_szc < szc); 2591 ASSERT(ierr == -1 || ppa_szc > szc); 2592 szc = ppa_szc; 2593 } 2594 } 2595 pg_sz = page_get_pagesize(szc); 2596 lp_npgs = btop(pg_sz); 2597 ASSERT(IS_P2ALIGNED(lp_addr, pg_sz)); 2598 } 2599 if (anon_locked) { 2600 anon_array_exit(&cookie); 2601 } 2602 ANON_LOCK_EXIT(&->a_rwlock); 2603 return (0); 2604 2605 lpgs_err: 2606 if (anon_locked) { 2607 anon_array_exit(&cookie); 2608 } 2609 ANON_LOCK_EXIT(&->a_rwlock); 2610 for (j = 0; j < ppa_idx; j++) 2611 page_unlock(ppa[j]); 2612 return (err); 2613 } 2614 2615 /* 2616 * count the number of bytes in a set of spt pages that are currently not 2617 * locked 2618 */ 2619 static rctl_qty_t 2620 spt_unlockedbytes(pgcnt_t npages, page_t **ppa) 2621 { 2622 ulong_t i; 2623 rctl_qty_t unlocked = 0; 2624 2625 for (i = 0; i < npages; i++) { 2626 if (ppa[i]->p_lckcnt == 0) 2627 unlocked += PAGESIZE; 2628 } 2629 return (unlocked); 2630 } 2631 2632 extern u_longlong_t randtick(void); 2633 /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */ 2634 #define NLCK (NCPU_P2) 2635 /* Random number with a range [0, n-1], n must be power of two */ 2636 #define RAND_P2(n) \ 2637 ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1)) 2638 2639 int 2640 spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 2641 page_t **ppa, ulong_t *lockmap, size_t pos, 2642 rctl_qty_t *locked) 2643 { 2644 struct shm_data *shmd = seg->s_data; 2645 struct spt_data *sptd = shmd->shm_sptseg->s_data; 2646 ulong_t i; 2647 int kernel; 2648 pgcnt_t nlck = 0; 2649 int rv = 0; 2650 int use_reserved = 1; 2651 2652 /* return the number of bytes actually locked */ 2653 *locked = 0; 2654 2655 /* 2656 * To avoid contention on freemem_lock, availrmem and pages_locked 2657 * global counters are updated only every nlck locked pages instead of 2658 * every time. Reserve nlck locks up front and deduct from this 2659 * reservation for each page that requires a lock. When the reservation 2660 * is consumed, reserve again. nlck is randomized, so the competing 2661 * threads do not fall into a cyclic lock contention pattern. When 2662 * memory is low, the lock ahead is disabled, and instead page_pp_lock() 2663 * is used to lock pages. 2664 */ 2665 for (i = 0; i < npages; anon_index++, pos++, i++) { 2666 if (nlck == 0 && use_reserved == 1) { 2667 nlck = NLCK + RAND_P2(NLCK); 2668 /* if fewer loops left, decrease nlck */ 2669 nlck = MIN(nlck, npages - i); 2670 /* 2671 * Reserve nlck locks up front and deduct from this 2672 * reservation for each page that requires a lock. When 2673 * the reservation is consumed, reserve again. 2674 */ 2675 mutex_enter(&freemem_lock); 2676 if ((availrmem - nlck) < pages_pp_maximum) { 2677 /* Do not do advance memory reserves */ 2678 use_reserved = 0; 2679 } else { 2680 availrmem -= nlck; 2681 pages_locked += nlck; 2682 } 2683 mutex_exit(&freemem_lock); 2684 } 2685 if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { 2686 if (sptd->spt_ppa_lckcnt[anon_index] < 2687 (ushort_t)DISM_LOCK_MAX) { 2688 if (++sptd->spt_ppa_lckcnt[anon_index] == 2689 (ushort_t)DISM_LOCK_MAX) { 2690 cmn_err(CE_WARN, 2691 "DISM page lock limit " 2692 "reached on DISM offset 0x%lx\n", 2693 anon_index << PAGESHIFT); 2694 } 2695 kernel = (sptd->spt_ppa && 2696 sptd->spt_ppa[anon_index]); 2697 if (!page_pp_lock(ppa[i], 0, kernel || 2698 use_reserved)) { 2699 sptd->spt_ppa_lckcnt[anon_index]--; 2700 rv = EAGAIN; 2701 break; 2702 } 2703 /* if this is a newly locked page, count it */ 2704 if (ppa[i]->p_lckcnt == 1) { 2705 if (kernel == 0 && use_reserved == 1) 2706 nlck--; 2707 *locked += PAGESIZE; 2708 } 2709 shmd->shm_lckpgs++; 2710 shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; 2711 if (lockmap != NULL) 2712 BT_SET(lockmap, pos); 2713 } 2714 } 2715 } 2716 /* Return unused lock reservation */ 2717 if (nlck != 0 && use_reserved == 1) { 2718 mutex_enter(&freemem_lock); 2719 availrmem += nlck; 2720 pages_locked -= nlck; 2721 mutex_exit(&freemem_lock); 2722 } 2723 2724 return (rv); 2725 } 2726 2727 int 2728 spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 2729 rctl_qty_t *unlocked) 2730 { 2731 struct shm_data *shmd = seg->s_data; 2732 struct spt_data *sptd = shmd->shm_sptseg->s_data; 2733 struct anon_map *amp = sptd->spt_amp; 2734 struct anon *ap; 2735 struct vnode *vp; 2736 u_offset_t off; 2737 struct page *pp; 2738 int kernel; 2739 anon_sync_obj_t cookie; 2740 ulong_t i; 2741 pgcnt_t nlck = 0; 2742 pgcnt_t nlck_limit = NLCK; 2743 2744 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2745 for (i = 0; i < npages; i++, anon_index++) { 2746 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 2747 anon_array_enter(amp, anon_index, &cookie); 2748 ap = anon_get_ptr(amp->ahp, anon_index); 2749 ASSERT(ap); 2750 2751 swap_xlate(ap, &vp, &off); 2752 anon_array_exit(&cookie); 2753 pp = page_lookup(vp, off, SE_SHARED); 2754 ASSERT(pp); 2755 /* 2756 * availrmem is decremented only for pages which are not 2757 * in seg pcache, for pages in seg pcache availrmem was 2758 * decremented in _dismpagelock() 2759 */ 2760 kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]); 2761 ASSERT(pp->p_lckcnt > 0); 2762 2763 /* 2764 * lock page but do not change availrmem, we do it 2765 * ourselves every nlck loops. 2766 */ 2767 page_pp_unlock(pp, 0, 1); 2768 if (pp->p_lckcnt == 0) { 2769 if (kernel == 0) 2770 nlck++; 2771 *unlocked += PAGESIZE; 2772 } 2773 page_unlock(pp); 2774 shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED; 2775 sptd->spt_ppa_lckcnt[anon_index]--; 2776 shmd->shm_lckpgs--; 2777 } 2778 2779 /* 2780 * To reduce freemem_lock contention, do not update availrmem 2781 * until at least NLCK pages have been unlocked. 2782 * 1. No need to update if nlck is zero 2783 * 2. Always update if the last iteration 2784 */ 2785 if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) { 2786 mutex_enter(&freemem_lock); 2787 availrmem += nlck; 2788 pages_locked -= nlck; 2789 mutex_exit(&freemem_lock); 2790 nlck = 0; 2791 nlck_limit = NLCK + RAND_P2(NLCK); 2792 } 2793 } 2794 ANON_LOCK_EXIT(&->a_rwlock); 2795 2796 return (0); 2797 } 2798 2799 /*ARGSUSED*/ 2800 static int 2801 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 2802 int attr, int op, ulong_t *lockmap, size_t pos) 2803 { 2804 struct shm_data *shmd = seg->s_data; 2805 struct seg *sptseg = shmd->shm_sptseg; 2806 struct spt_data *sptd = sptseg->s_data; 2807 struct kshmid *sp = sptd->spt_amp->a_sp; 2808 pgcnt_t npages, a_npages; 2809 page_t **ppa; 2810 pgcnt_t an_idx, a_an_idx, ppa_idx; 2811 caddr_t spt_addr, a_addr; /* spt and aligned address */ 2812 size_t a_len; /* aligned len */ 2813 size_t share_sz; 2814 ulong_t i; 2815 int sts = 0; 2816 rctl_qty_t unlocked = 0; 2817 rctl_qty_t locked = 0; 2818 struct proc *p = curproc; 2819 kproject_t *proj; 2820 2821 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2822 ASSERT(sp != NULL); 2823 2824 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 2825 return (0); 2826 } 2827 2828 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2829 an_idx = seg_page(seg, addr); 2830 npages = btopr(len); 2831 2832 if (an_idx + npages > btopr(shmd->shm_amp->size)) { 2833 return (ENOMEM); 2834 } 2835 2836 /* 2837 * A shm's project never changes, so no lock needed. 2838 * The shm has a hold on the project, so it will not go away. 2839 * Since we have a mapping to shm within this zone, we know 2840 * that the zone will not go away. 2841 */ 2842 proj = sp->shm_perm.ipc_proj; 2843 2844 if (op == MC_LOCK) { 2845 2846 /* 2847 * Need to align addr and size request if they are not 2848 * aligned so we can always allocate large page(s) however 2849 * we only lock what was requested in initial request. 2850 */ 2851 share_sz = page_get_pagesize(sptseg->s_szc); 2852 a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); 2853 a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)), 2854 share_sz); 2855 a_npages = btop(a_len); 2856 a_an_idx = seg_page(seg, a_addr); 2857 spt_addr = sptseg->s_base + ptob(a_an_idx); 2858 ppa_idx = an_idx - a_an_idx; 2859 2860 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages), 2861 KM_NOSLEEP)) == NULL) { 2862 return (ENOMEM); 2863 } 2864 2865 /* 2866 * Don't cache any new pages for IO and 2867 * flush any cached pages. 2868 */ 2869 mutex_enter(&sptd->spt_lock); 2870 if (sptd->spt_ppa != NULL) 2871 sptd->spt_flags |= DISM_PPA_CHANGED; 2872 2873 sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa); 2874 if (sts != 0) { 2875 mutex_exit(&sptd->spt_lock); 2876 kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 2877 return (sts); 2878 } 2879 2880 mutex_enter(&sp->shm_mlock); 2881 /* enforce locked memory rctl */ 2882 unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); 2883 2884 mutex_enter(&p->p_lock); 2885 if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { 2886 mutex_exit(&p->p_lock); 2887 sts = EAGAIN; 2888 } else { 2889 mutex_exit(&p->p_lock); 2890 sts = spt_lockpages(seg, an_idx, npages, 2891 &ppa[ppa_idx], lockmap, pos, &locked); 2892 2893 /* 2894 * correct locked count if not all pages could be 2895 * locked 2896 */ 2897 if ((unlocked - locked) > 0) { 2898 rctl_decr_locked_mem(NULL, proj, 2899 (unlocked - locked), 0); 2900 } 2901 } 2902 /* 2903 * unlock pages 2904 */ 2905 for (i = 0; i < a_npages; i++) 2906 page_unlock(ppa[i]); 2907 if (sptd->spt_ppa != NULL) 2908 sptd->spt_flags |= DISM_PPA_CHANGED; 2909 mutex_exit(&sp->shm_mlock); 2910 mutex_exit(&sptd->spt_lock); 2911 2912 kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 2913 2914 } else if (op == MC_UNLOCK) { /* unlock */ 2915 page_t **ppa; 2916 2917 mutex_enter(&sptd->spt_lock); 2918 if (shmd->shm_lckpgs == 0) { 2919 mutex_exit(&sptd->spt_lock); 2920 return (0); 2921 } 2922 /* 2923 * Don't cache new IO pages. 2924 */ 2925 if (sptd->spt_ppa != NULL) 2926 sptd->spt_flags |= DISM_PPA_CHANGED; 2927 2928 mutex_enter(&sp->shm_mlock); 2929 sts = spt_unlockpages(seg, an_idx, npages, &unlocked); 2930 if ((ppa = sptd->spt_ppa) != NULL) 2931 sptd->spt_flags |= DISM_PPA_CHANGED; 2932 mutex_exit(&sptd->spt_lock); 2933 2934 rctl_decr_locked_mem(NULL, proj, unlocked, 0); 2935 mutex_exit(&sp->shm_mlock); 2936 2937 if (ppa != NULL) 2938 seg_ppurge_wiredpp(ppa); 2939 } 2940 return (sts); 2941 } 2942 2943 /*ARGSUSED*/ 2944 int 2945 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 2946 { 2947 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2948 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2949 spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1; 2950 2951 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2952 2953 /* 2954 * ISM segment is always rw. 2955 */ 2956 while (--pgno >= 0) 2957 *protv++ = sptd->spt_prot; 2958 return (0); 2959 } 2960 2961 /*ARGSUSED*/ 2962 u_offset_t 2963 segspt_shmgetoffset(struct seg *seg, caddr_t addr) 2964 { 2965 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2966 2967 /* Offset does not matter in ISM memory */ 2968 2969 return ((u_offset_t)0); 2970 } 2971 2972 /* ARGSUSED */ 2973 int 2974 segspt_shmgettype(struct seg *seg, caddr_t addr) 2975 { 2976 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2977 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2978 2979 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2980 2981 /* 2982 * The shared memory mapping is always MAP_SHARED, SWAP is only 2983 * reserved for DISM 2984 */ 2985 return (MAP_SHARED | 2986 ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE)); 2987 } 2988 2989 /*ARGSUSED*/ 2990 int 2991 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 2992 { 2993 struct shm_data *shmd = (struct shm_data *)seg->s_data; 2994 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2995 2996 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2997 2998 *vpp = sptd->spt_vp; 2999 return (0); 3000 } 3001 3002 /* 3003 * We need to wait for pending IO to complete to a DISM segment in order for 3004 * pages to get kicked out of the seg_pcache. 120 seconds should be more 3005 * than enough time to wait. 3006 */ 3007 static clock_t spt_pcache_wait = 120; 3008 3009 /*ARGSUSED*/ 3010 static int 3011 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 3012 { 3013 struct shm_data *shmd = (struct shm_data *)seg->s_data; 3014 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 3015 struct anon_map *amp; 3016 pgcnt_t pg_idx; 3017 ushort_t gen; 3018 clock_t end_lbolt; 3019 int writer; 3020 page_t **ppa; 3021 3022 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 3023 3024 if (behav == MADV_FREE || behav == MADV_PURGE) { 3025 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) 3026 return (0); 3027 3028 amp = sptd->spt_amp; 3029 pg_idx = seg_page(seg, addr); 3030 3031 mutex_enter(&sptd->spt_lock); 3032 if ((ppa = sptd->spt_ppa) == NULL) { 3033 mutex_exit(&sptd->spt_lock); 3034 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3035 (void) anon_disclaim(amp, pg_idx, len, behav, NULL); 3036 ANON_LOCK_EXIT(&->a_rwlock); 3037 return (0); 3038 } 3039 3040 sptd->spt_flags |= DISM_PPA_CHANGED; 3041 gen = sptd->spt_gen; 3042 3043 mutex_exit(&sptd->spt_lock); 3044 3045 /* 3046 * Purge all DISM cached pages 3047 */ 3048 seg_ppurge_wiredpp(ppa); 3049 3050 /* 3051 * Drop the AS_LOCK so that other threads can grab it 3052 * in the as_pageunlock path and hopefully get the segment 3053 * kicked out of the seg_pcache. We bump the shm_softlockcnt 3054 * to keep this segment resident. 3055 */ 3056 writer = AS_WRITE_HELD(seg->s_as); 3057 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 3058 AS_LOCK_EXIT(seg->s_as); 3059 3060 mutex_enter(&sptd->spt_lock); 3061 3062 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait); 3063 3064 /* 3065 * Try to wait for pages to get kicked out of the seg_pcache. 3066 */ 3067 while (sptd->spt_gen == gen && 3068 (sptd->spt_flags & DISM_PPA_CHANGED) && 3069 ddi_get_lbolt() < end_lbolt) { 3070 if (!cv_timedwait_sig(&sptd->spt_cv, 3071 &sptd->spt_lock, end_lbolt)) { 3072 break; 3073 } 3074 } 3075 3076 mutex_exit(&sptd->spt_lock); 3077 3078 /* Regrab the AS_LOCK and release our hold on the segment */ 3079 AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER); 3080 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 3081 if (shmd->shm_softlockcnt <= 0) { 3082 if (AS_ISUNMAPWAIT(seg->s_as)) { 3083 mutex_enter(&seg->s_as->a_contents); 3084 if (AS_ISUNMAPWAIT(seg->s_as)) { 3085 AS_CLRUNMAPWAIT(seg->s_as); 3086 cv_broadcast(&seg->s_as->a_cv); 3087 } 3088 mutex_exit(&seg->s_as->a_contents); 3089 } 3090 } 3091 3092 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3093 (void) anon_disclaim(amp, pg_idx, len, behav, NULL); 3094 ANON_LOCK_EXIT(&->a_rwlock); 3095 } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP || 3096 behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) { 3097 int already_set; 3098 ulong_t anon_index; 3099 lgrp_mem_policy_t policy; 3100 caddr_t shm_addr; 3101 size_t share_size; 3102 size_t size; 3103 struct seg *sptseg = shmd->shm_sptseg; 3104 caddr_t sptseg_addr; 3105 3106 /* 3107 * Align address and length to page size of underlying segment 3108 */ 3109 share_size = page_get_pagesize(shmd->shm_sptseg->s_szc); 3110 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); 3111 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), 3112 share_size); 3113 3114 amp = shmd->shm_amp; 3115 anon_index = seg_page(seg, shm_addr); 3116 3117 /* 3118 * And now we may have to adjust size downward if we have 3119 * exceeded the realsize of the segment or initial anon 3120 * allocations. 3121 */ 3122 sptseg_addr = sptseg->s_base + ptob(anon_index); 3123 if ((sptseg_addr + size) > 3124 (sptseg->s_base + sptd->spt_realsize)) 3125 size = (sptseg->s_base + sptd->spt_realsize) - 3126 sptseg_addr; 3127 3128 /* 3129 * Set memory allocation policy for this segment 3130 */ 3131 policy = lgrp_madv_to_policy(behav, len, MAP_SHARED); 3132 already_set = lgrp_shm_policy_set(policy, amp, anon_index, 3133 NULL, 0, len); 3134 3135 /* 3136 * If random memory allocation policy set already, 3137 * don't bother reapplying it. 3138 */ 3139 if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 3140 return (0); 3141 3142 /* 3143 * Mark any existing pages in the given range for 3144 * migration, flushing the I/O page cache, and using 3145 * underlying segment to calculate anon index and get 3146 * anonmap and vnode pointer from 3147 */ 3148 if (shmd->shm_softlockcnt > 0) 3149 segspt_purge(seg); 3150 3151 page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0); 3152 } 3153 3154 return (0); 3155 } 3156 3157 /*ARGSUSED*/ 3158 void 3159 segspt_shmdump(struct seg *seg) 3160 { 3161 /* no-op for ISM segment */ 3162 } 3163 3164 /*ARGSUSED*/ 3165 static int 3166 segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 3167 { 3168 return (ENOTSUP); 3169 } 3170 3171 /* 3172 * get a memory ID for an addr in a given segment 3173 */ 3174 static int 3175 segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 3176 { 3177 struct shm_data *shmd = (struct shm_data *)seg->s_data; 3178 struct anon *ap; 3179 size_t anon_index; 3180 struct anon_map *amp = shmd->shm_amp; 3181 struct spt_data *sptd = shmd->shm_sptseg->s_data; 3182 struct seg *sptseg = shmd->shm_sptseg; 3183 anon_sync_obj_t cookie; 3184 3185 anon_index = seg_page(seg, addr); 3186 3187 if (addr > (seg->s_base + sptd->spt_realsize)) { 3188 return (EFAULT); 3189 } 3190 3191 ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3192 anon_array_enter(amp, anon_index, &cookie); 3193 ap = anon_get_ptr(amp->ahp, anon_index); 3194 if (ap == NULL) { 3195 struct page *pp; 3196 caddr_t spt_addr = sptseg->s_base + ptob(anon_index); 3197 3198 pp = anon_zero(sptseg, spt_addr, &ap, kcred); 3199 if (pp == NULL) { 3200 anon_array_exit(&cookie); 3201 ANON_LOCK_EXIT(&->a_rwlock); 3202 return (ENOMEM); 3203 } 3204 (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); 3205 page_unlock(pp); 3206 } 3207 anon_array_exit(&cookie); 3208 ANON_LOCK_EXIT(&->a_rwlock); 3209 memidp->val[0] = (uintptr_t)ap; 3210 memidp->val[1] = (uintptr_t)addr & PAGEOFFSET; 3211 return (0); 3212 } 3213 3214 /* 3215 * Get memory allocation policy info for specified address in given segment 3216 */ 3217 static lgrp_mem_policy_info_t * 3218 segspt_shmgetpolicy(struct seg *seg, caddr_t addr) 3219 { 3220 struct anon_map *amp; 3221 ulong_t anon_index; 3222 lgrp_mem_policy_info_t *policy_info; 3223 struct shm_data *shm_data; 3224 3225 ASSERT(seg != NULL); 3226 3227 /* 3228 * Get anon_map from segshm 3229 * 3230 * Assume that no lock needs to be held on anon_map, since 3231 * it should be protected by its reference count which must be 3232 * nonzero for an existing segment 3233 * Need to grab readers lock on policy tree though 3234 */ 3235 shm_data = (struct shm_data *)seg->s_data; 3236 if (shm_data == NULL) 3237 return (NULL); 3238 amp = shm_data->shm_amp; 3239 ASSERT(amp->refcnt != 0); 3240 3241 /* 3242 * Get policy info 3243 * 3244 * Assume starting anon index of 0 3245 */ 3246 anon_index = seg_page(seg, addr); 3247 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 3248 3249 return (policy_info); 3250 } 3251 3252 /*ARGSUSED*/ 3253 static int 3254 segspt_shmcapable(struct seg *seg, segcapability_t capability) 3255 { 3256 return (0); 3257 } 3258