1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
24 */
25
26 #include <sys/param.h>
27 #include <sys/user.h>
28 #include <sys/mman.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/cmn_err.h>
32 #include <sys/systm.h>
33 #include <sys/tuneable.h>
34 #include <vm/hat.h>
35 #include <vm/seg.h>
36 #include <vm/as.h>
37 #include <vm/anon.h>
38 #include <vm/page.h>
39 #include <sys/buf.h>
40 #include <sys/swap.h>
41 #include <sys/atomic.h>
42 #include <vm/seg_spt.h>
43 #include <sys/debug.h>
44 #include <sys/vtrace.h>
45 #include <sys/shm.h>
46 #include <sys/shm_impl.h>
47 #include <sys/lgrp.h>
48 #include <sys/vmsystm.h>
49 #include <sys/policy.h>
50 #include <sys/project.h>
51 #include <sys/tnf_probe.h>
52 #include <sys/zone.h>
53
54 #define SEGSPTADDR (caddr_t)0x0
55
56 /*
57 * # pages used for spt
58 */
59 size_t spt_used;
60
61 /*
62 * segspt_minfree is the memory left for system after ISM
63 * locked its pages; it is set up to 5% of availrmem in
64 * sptcreate when ISM is created. ISM should not use more
65 * than ~90% of availrmem; if it does, then the performance
66 * of the system may decrease. Machines with large memories may
67 * be able to use up more memory for ISM so we set the default
68 * segspt_minfree to 5% (which gives ISM max 95% of availrmem.
69 * If somebody wants even more memory for ISM (risking hanging
70 * the system) they can patch the segspt_minfree to smaller number.
71 */
72 pgcnt_t segspt_minfree = 0;
73
74 static int segspt_create(struct seg *seg, caddr_t argsp);
75 static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize);
76 static void segspt_free(struct seg *seg);
77 static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len);
78 static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr);
79
80 static void
segspt_badop()81 segspt_badop()
82 {
83 panic("segspt_badop called");
84 /*NOTREACHED*/
85 }
86
87 #define SEGSPT_BADOP(t) (t(*)())segspt_badop
88
89 struct seg_ops segspt_ops = {
90 SEGSPT_BADOP(int), /* dup */
91 segspt_unmap,
92 segspt_free,
93 SEGSPT_BADOP(int), /* fault */
94 SEGSPT_BADOP(faultcode_t), /* faulta */
95 SEGSPT_BADOP(int), /* setprot */
96 SEGSPT_BADOP(int), /* checkprot */
97 SEGSPT_BADOP(int), /* kluster */
98 SEGSPT_BADOP(size_t), /* swapout */
99 SEGSPT_BADOP(int), /* sync */
100 SEGSPT_BADOP(size_t), /* incore */
101 SEGSPT_BADOP(int), /* lockop */
102 SEGSPT_BADOP(int), /* getprot */
103 SEGSPT_BADOP(u_offset_t), /* getoffset */
104 SEGSPT_BADOP(int), /* gettype */
105 SEGSPT_BADOP(int), /* getvp */
106 SEGSPT_BADOP(int), /* advise */
107 SEGSPT_BADOP(void), /* dump */
108 SEGSPT_BADOP(int), /* pagelock */
109 SEGSPT_BADOP(int), /* setpgsz */
110 SEGSPT_BADOP(int), /* getmemid */
111 segspt_getpolicy, /* getpolicy */
112 SEGSPT_BADOP(int), /* capable */
113 seg_inherit_notsup /* inherit */
114 };
115
116 static int segspt_shmdup(struct seg *seg, struct seg *newseg);
117 static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize);
118 static void segspt_shmfree(struct seg *seg);
119 static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg,
120 caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw);
121 static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr);
122 static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr,
123 register size_t len, register uint_t prot);
124 static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size,
125 uint_t prot);
126 static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta);
127 static size_t segspt_shmswapout(struct seg *seg);
128 static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len,
129 register char *vec);
130 static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len,
131 int attr, uint_t flags);
132 static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
133 int attr, int op, ulong_t *lockmap, size_t pos);
134 static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len,
135 uint_t *protv);
136 static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr);
137 static int segspt_shmgettype(struct seg *seg, caddr_t addr);
138 static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
139 static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len,
140 uint_t behav);
141 static void segspt_shmdump(struct seg *seg);
142 static int segspt_shmpagelock(struct seg *, caddr_t, size_t,
143 struct page ***, enum lock_type, enum seg_rw);
144 static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t);
145 static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *);
146 static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t);
147 static int segspt_shmcapable(struct seg *, segcapability_t);
148
149 struct seg_ops segspt_shmops = {
150 segspt_shmdup,
151 segspt_shmunmap,
152 segspt_shmfree,
153 segspt_shmfault,
154 segspt_shmfaulta,
155 segspt_shmsetprot,
156 segspt_shmcheckprot,
157 segspt_shmkluster,
158 segspt_shmswapout,
159 segspt_shmsync,
160 segspt_shmincore,
161 segspt_shmlockop,
162 segspt_shmgetprot,
163 segspt_shmgetoffset,
164 segspt_shmgettype,
165 segspt_shmgetvp,
166 segspt_shmadvise, /* advise */
167 segspt_shmdump,
168 segspt_shmpagelock,
169 segspt_shmsetpgsz,
170 segspt_shmgetmemid,
171 segspt_shmgetpolicy,
172 segspt_shmcapable,
173 seg_inherit_notsup
174 };
175
176 static void segspt_purge(struct seg *seg);
177 static int segspt_reclaim(void *, caddr_t, size_t, struct page **,
178 enum seg_rw, int);
179 static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
180 page_t **ppa);
181
182
183
184 /*ARGSUSED*/
185 int
sptcreate(size_t size,struct seg ** sptseg,struct anon_map * amp,uint_t prot,uint_t flags,uint_t share_szc)186 sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
187 uint_t prot, uint_t flags, uint_t share_szc)
188 {
189 int err;
190 struct as *newas;
191 struct segspt_crargs sptcargs;
192
193 #ifdef DEBUG
194 TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */,
195 tnf_ulong, size, size );
196 #endif
197 if (segspt_minfree == 0) /* leave min 5% of availrmem for */
198 segspt_minfree = availrmem/20; /* for the system */
199
200 if (!hat_supported(HAT_SHARED_PT, (void *)0))
201 return (EINVAL);
202
203 /*
204 * get a new as for this shared memory segment
205 */
206 newas = as_alloc();
207 newas->a_proc = NULL;
208 sptcargs.amp = amp;
209 sptcargs.prot = prot;
210 sptcargs.flags = flags;
211 sptcargs.szc = share_szc;
212 /*
213 * create a shared page table (spt) segment
214 */
215
216 if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) {
217 as_free(newas);
218 return (err);
219 }
220 *sptseg = sptcargs.seg_spt;
221 return (0);
222 }
223
224 void
sptdestroy(struct as * as,struct anon_map * amp)225 sptdestroy(struct as *as, struct anon_map *amp)
226 {
227
228 #ifdef DEBUG
229 TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
230 #endif
231 (void) as_unmap(as, SEGSPTADDR, amp->size);
232 as_free(as);
233 }
234
235 /*
236 * called from seg_free().
237 * free (i.e., unlock, unmap, return to free list)
238 * all the pages in the given seg.
239 */
240 void
segspt_free(struct seg * seg)241 segspt_free(struct seg *seg)
242 {
243 struct spt_data *sptd = (struct spt_data *)seg->s_data;
244
245 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
246
247 if (sptd != NULL) {
248 if (sptd->spt_realsize)
249 segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
250
251 if (sptd->spt_ppa_lckcnt)
252 kmem_free(sptd->spt_ppa_lckcnt,
253 sizeof (*sptd->spt_ppa_lckcnt)
254 * btopr(sptd->spt_amp->size));
255 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
256 cv_destroy(&sptd->spt_cv);
257 mutex_destroy(&sptd->spt_lock);
258 kmem_free(sptd, sizeof (*sptd));
259 }
260 }
261
262 /*ARGSUSED*/
263 static int
segspt_shmsync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)264 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
265 uint_t flags)
266 {
267 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
268
269 return (0);
270 }
271
272 /*ARGSUSED*/
273 static size_t
segspt_shmincore(struct seg * seg,caddr_t addr,size_t len,char * vec)274 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
275 {
276 caddr_t eo_seg;
277 pgcnt_t npages;
278 struct shm_data *shmd = (struct shm_data *)seg->s_data;
279 struct seg *sptseg;
280 struct spt_data *sptd;
281
282 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
283 #ifdef lint
284 seg = seg;
285 #endif
286 sptseg = shmd->shm_sptseg;
287 sptd = sptseg->s_data;
288
289 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
290 eo_seg = addr + len;
291 while (addr < eo_seg) {
292 /* page exists, and it's locked. */
293 *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
294 SEG_PAGE_ANON;
295 addr += PAGESIZE;
296 }
297 return (len);
298 } else {
299 struct anon_map *amp = shmd->shm_amp;
300 struct anon *ap;
301 page_t *pp;
302 pgcnt_t anon_index;
303 struct vnode *vp;
304 u_offset_t off;
305 ulong_t i;
306 int ret;
307 anon_sync_obj_t cookie;
308
309 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
310 anon_index = seg_page(seg, addr);
311 npages = btopr(len);
312 if (anon_index + npages > btopr(shmd->shm_amp->size)) {
313 return (EINVAL);
314 }
315 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
316 for (i = 0; i < npages; i++, anon_index++) {
317 ret = 0;
318 anon_array_enter(amp, anon_index, &cookie);
319 ap = anon_get_ptr(amp->ahp, anon_index);
320 if (ap != NULL) {
321 swap_xlate(ap, &vp, &off);
322 anon_array_exit(&cookie);
323 pp = page_lookup_nowait(vp, off, SE_SHARED);
324 if (pp != NULL) {
325 ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON;
326 page_unlock(pp);
327 }
328 } else {
329 anon_array_exit(&cookie);
330 }
331 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
332 ret |= SEG_PAGE_LOCKED;
333 }
334 *vec++ = (char)ret;
335 }
336 ANON_LOCK_EXIT(&->a_rwlock);
337 return (len);
338 }
339 }
340
341 static int
segspt_unmap(struct seg * seg,caddr_t raddr,size_t ssize)342 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
343 {
344 size_t share_size;
345
346 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
347
348 /*
349 * seg.s_size may have been rounded up to the largest page size
350 * in shmat().
351 * XXX This should be cleanedup. sptdestroy should take a length
352 * argument which should be the same as sptcreate. Then
353 * this rounding would not be needed (or is done in shm.c)
354 * Only the check for full segment will be needed.
355 *
356 * XXX -- shouldn't raddr == 0 always? These tests don't seem
357 * to be useful at all.
358 */
359 share_size = page_get_pagesize(seg->s_szc);
360 ssize = P2ROUNDUP(ssize, share_size);
361
362 if (raddr == seg->s_base && ssize == seg->s_size) {
363 seg_free(seg);
364 return (0);
365 } else
366 return (EINVAL);
367 }
368
369 int
segspt_create(struct seg * seg,caddr_t argsp)370 segspt_create(struct seg *seg, caddr_t argsp)
371 {
372 int err;
373 caddr_t addr = seg->s_base;
374 struct spt_data *sptd;
375 struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp;
376 struct anon_map *amp = sptcargs->amp;
377 struct kshmid *sp = amp->a_sp;
378 struct cred *cred = CRED();
379 ulong_t i, j, anon_index = 0;
380 pgcnt_t npages = btopr(amp->size);
381 struct vnode *vp;
382 page_t **ppa;
383 uint_t hat_flags;
384 size_t pgsz;
385 pgcnt_t pgcnt;
386 caddr_t a;
387 pgcnt_t pidx;
388 size_t sz;
389 proc_t *procp = curproc;
390 rctl_qty_t lockedbytes = 0;
391 kproject_t *proj;
392
393 /*
394 * We are holding the a_lock on the underlying dummy as,
395 * so we can make calls to the HAT layer.
396 */
397 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
398 ASSERT(sp != NULL);
399
400 #ifdef DEBUG
401 TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
402 tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
403 #endif
404 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
405 if (err = anon_swap_adjust(npages))
406 return (err);
407 }
408 err = ENOMEM;
409
410 if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
411 goto out1;
412
413 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
414 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
415 KM_NOSLEEP)) == NULL)
416 goto out2;
417 }
418
419 mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL);
420
421 if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL)
422 goto out3;
423
424 seg->s_ops = &segspt_ops;
425 sptd->spt_vp = vp;
426 sptd->spt_amp = amp;
427 sptd->spt_prot = sptcargs->prot;
428 sptd->spt_flags = sptcargs->flags;
429 seg->s_data = (caddr_t)sptd;
430 sptd->spt_ppa = NULL;
431 sptd->spt_ppa_lckcnt = NULL;
432 seg->s_szc = sptcargs->szc;
433 cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL);
434 sptd->spt_gen = 0;
435
436 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
437 if (seg->s_szc > amp->a_szc) {
438 amp->a_szc = seg->s_szc;
439 }
440 ANON_LOCK_EXIT(&->a_rwlock);
441
442 /*
443 * Set policy to affect initial allocation of pages in
444 * anon_map_createpages()
445 */
446 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index,
447 NULL, 0, ptob(npages));
448
449 if (sptcargs->flags & SHM_PAGEABLE) {
450 size_t share_sz;
451 pgcnt_t new_npgs, more_pgs;
452 struct anon_hdr *nahp;
453 zone_t *zone;
454
455 share_sz = page_get_pagesize(seg->s_szc);
456 if (!IS_P2ALIGNED(amp->size, share_sz)) {
457 /*
458 * We are rounding up the size of the anon array
459 * on 4 M boundary because we always create 4 M
460 * of page(s) when locking, faulting pages and we
461 * don't have to check for all corner cases e.g.
462 * if there is enough space to allocate 4 M
463 * page.
464 */
465 new_npgs = btop(P2ROUNDUP(amp->size, share_sz));
466 more_pgs = new_npgs - npages;
467
468 /*
469 * The zone will never be NULL, as a fully created
470 * shm always has an owning zone.
471 */
472 zone = sp->shm_perm.ipc_zone_ref.zref_zone;
473 ASSERT(zone != NULL);
474 if (anon_resv_zone(ptob(more_pgs), zone) == 0) {
475 err = ENOMEM;
476 goto out4;
477 }
478
479 nahp = anon_create(new_npgs, ANON_SLEEP);
480 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
481 (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages,
482 ANON_SLEEP);
483 anon_release(amp->ahp, npages);
484 amp->ahp = nahp;
485 ASSERT(amp->swresv == ptob(npages));
486 amp->swresv = amp->size = ptob(new_npgs);
487 ANON_LOCK_EXIT(&->a_rwlock);
488 npages = new_npgs;
489 }
490
491 sptd->spt_ppa_lckcnt = kmem_zalloc(npages *
492 sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP);
493 sptd->spt_pcachecnt = 0;
494 sptd->spt_realsize = ptob(npages);
495 sptcargs->seg_spt = seg;
496 return (0);
497 }
498
499 /*
500 * get array of pages for each anon slot in amp
501 */
502 if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa,
503 seg, addr, S_CREATE, cred)) != 0)
504 goto out4;
505
506 mutex_enter(&sp->shm_mlock);
507
508 /* May be partially locked, so, count bytes to charge for locking */
509 for (i = 0; i < npages; i++)
510 if (ppa[i]->p_lckcnt == 0)
511 lockedbytes += PAGESIZE;
512
513 proj = sp->shm_perm.ipc_proj;
514
515 if (lockedbytes > 0) {
516 mutex_enter(&procp->p_lock);
517 if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) {
518 mutex_exit(&procp->p_lock);
519 mutex_exit(&sp->shm_mlock);
520 for (i = 0; i < npages; i++)
521 page_unlock(ppa[i]);
522 err = ENOMEM;
523 goto out4;
524 }
525 mutex_exit(&procp->p_lock);
526 }
527
528 /*
529 * addr is initial address corresponding to the first page on ppa list
530 */
531 for (i = 0; i < npages; i++) {
532 /* attempt to lock all pages */
533 if (page_pp_lock(ppa[i], 0, 1) == 0) {
534 /*
535 * if unable to lock any page, unlock all
536 * of them and return error
537 */
538 for (j = 0; j < i; j++)
539 page_pp_unlock(ppa[j], 0, 1);
540 for (i = 0; i < npages; i++)
541 page_unlock(ppa[i]);
542 rctl_decr_locked_mem(NULL, proj, lockedbytes, 0);
543 mutex_exit(&sp->shm_mlock);
544 err = ENOMEM;
545 goto out4;
546 }
547 }
548 mutex_exit(&sp->shm_mlock);
549
550 /*
551 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
552 * for the entire life of the segment. For example platforms
553 * that do not support Dynamic Reconfiguration.
554 */
555 hat_flags = HAT_LOAD_SHARE;
556 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL))
557 hat_flags |= HAT_LOAD_LOCK;
558
559 /*
560 * Load translations one lare page at a time
561 * to make sure we don't create mappings bigger than
562 * segment's size code in case underlying pages
563 * are shared with segvn's segment that uses bigger
564 * size code than we do.
565 */
566 pgsz = page_get_pagesize(seg->s_szc);
567 pgcnt = page_get_pagecnt(seg->s_szc);
568 for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) {
569 sz = MIN(pgsz, ptob(npages - pidx));
570 hat_memload_array(seg->s_as->a_hat, a, sz,
571 &ppa[pidx], sptd->spt_prot, hat_flags);
572 }
573
574 /*
575 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
576 * we will leave the pages locked SE_SHARED for the life
577 * of the ISM segment. This will prevent any calls to
578 * hat_pageunload() on this ISM segment for those platforms.
579 */
580 if (!(hat_flags & HAT_LOAD_LOCK)) {
581 /*
582 * On platforms that support HAT_DYNAMIC_ISM_UNMAP,
583 * we no longer need to hold the SE_SHARED lock on the pages,
584 * since L_PAGELOCK and F_SOFTLOCK calls will grab the
585 * SE_SHARED lock on the pages as necessary.
586 */
587 for (i = 0; i < npages; i++)
588 page_unlock(ppa[i]);
589 }
590 sptd->spt_pcachecnt = 0;
591 kmem_free(ppa, ((sizeof (page_t *)) * npages));
592 sptd->spt_realsize = ptob(npages);
593 atomic_add_long(&spt_used, npages);
594 sptcargs->seg_spt = seg;
595 return (0);
596
597 out4:
598 seg->s_data = NULL;
599 kmem_free(vp, sizeof (*vp));
600 cv_destroy(&sptd->spt_cv);
601 out3:
602 mutex_destroy(&sptd->spt_lock);
603 if ((sptcargs->flags & SHM_PAGEABLE) == 0)
604 kmem_free(ppa, (sizeof (*ppa) * npages));
605 out2:
606 kmem_free(sptd, sizeof (*sptd));
607 out1:
608 if ((sptcargs->flags & SHM_PAGEABLE) == 0)
609 anon_swap_restore(npages);
610 return (err);
611 }
612
613 /*ARGSUSED*/
614 void
segspt_free_pages(struct seg * seg,caddr_t addr,size_t len)615 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
616 {
617 struct page *pp;
618 struct spt_data *sptd = (struct spt_data *)seg->s_data;
619 pgcnt_t npages;
620 ulong_t anon_idx;
621 struct anon_map *amp;
622 struct anon *ap;
623 struct vnode *vp;
624 u_offset_t off;
625 uint_t hat_flags;
626 int root = 0;
627 pgcnt_t pgs, curnpgs = 0;
628 page_t *rootpp;
629 rctl_qty_t unlocked_bytes = 0;
630 kproject_t *proj;
631 kshmid_t *sp;
632
633 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
634
635 len = P2ROUNDUP(len, PAGESIZE);
636
637 npages = btop(len);
638
639 hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
640 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
641 (sptd->spt_flags & SHM_PAGEABLE)) {
642 hat_flags = HAT_UNLOAD_UNMAP;
643 }
644
645 hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
646
647 amp = sptd->spt_amp;
648 if (sptd->spt_flags & SHM_PAGEABLE)
649 npages = btop(amp->size);
650
651 ASSERT(amp != NULL);
652
653 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
654 sp = amp->a_sp;
655 proj = sp->shm_perm.ipc_proj;
656 mutex_enter(&sp->shm_mlock);
657 }
658 for (anon_idx = 0; anon_idx < npages; anon_idx++) {
659 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
660 if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
661 panic("segspt_free_pages: null app");
662 /*NOTREACHED*/
663 }
664 } else {
665 if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx))
666 == NULL)
667 continue;
668 }
669 ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0);
670 swap_xlate(ap, &vp, &off);
671
672 /*
673 * If this platform supports HAT_DYNAMIC_ISM_UNMAP,
674 * the pages won't be having SE_SHARED lock at this
675 * point.
676 *
677 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
678 * the pages are still held SE_SHARED locked from the
679 * original segspt_create()
680 *
681 * Our goal is to get SE_EXCL lock on each page, remove
682 * permanent lock on it and invalidate the page.
683 */
684 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
685 if (hat_flags == HAT_UNLOAD_UNMAP)
686 pp = page_lookup(vp, off, SE_EXCL);
687 else {
688 if ((pp = page_find(vp, off)) == NULL) {
689 panic("segspt_free_pages: "
690 "page not locked");
691 /*NOTREACHED*/
692 }
693 if (!page_tryupgrade(pp)) {
694 page_unlock(pp);
695 pp = page_lookup(vp, off, SE_EXCL);
696 }
697 }
698 if (pp == NULL) {
699 panic("segspt_free_pages: "
700 "page not in the system");
701 /*NOTREACHED*/
702 }
703 ASSERT(pp->p_lckcnt > 0);
704 page_pp_unlock(pp, 0, 1);
705 if (pp->p_lckcnt == 0)
706 unlocked_bytes += PAGESIZE;
707 } else {
708 if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL)
709 continue;
710 }
711 /*
712 * It's logical to invalidate the pages here as in most cases
713 * these were created by segspt.
714 */
715 if (pp->p_szc != 0) {
716 if (root == 0) {
717 ASSERT(curnpgs == 0);
718 root = 1;
719 rootpp = pp;
720 pgs = curnpgs = page_get_pagecnt(pp->p_szc);
721 ASSERT(pgs > 1);
722 ASSERT(IS_P2ALIGNED(pgs, pgs));
723 ASSERT(!(page_pptonum(pp) & (pgs - 1)));
724 curnpgs--;
725 } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) {
726 ASSERT(curnpgs == 1);
727 ASSERT(page_pptonum(pp) ==
728 page_pptonum(rootpp) + (pgs - 1));
729 page_destroy_pages(rootpp);
730 root = 0;
731 curnpgs = 0;
732 } else {
733 ASSERT(curnpgs > 1);
734 ASSERT(page_pptonum(pp) ==
735 page_pptonum(rootpp) + (pgs - curnpgs));
736 curnpgs--;
737 }
738 } else {
739 if (root != 0 || curnpgs != 0) {
740 panic("segspt_free_pages: bad large page");
741 /*NOTREACHED*/
742 }
743 /*
744 * Before destroying the pages, we need to take care
745 * of the rctl locked memory accounting. For that
746 * we need to calculte the unlocked_bytes.
747 */
748 if (pp->p_lckcnt > 0)
749 unlocked_bytes += PAGESIZE;
750 /*LINTED: constant in conditional context */
751 VN_DISPOSE(pp, B_INVAL, 0, kcred);
752 }
753 }
754 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
755 if (unlocked_bytes > 0)
756 rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
757 mutex_exit(&sp->shm_mlock);
758 }
759 if (root != 0 || curnpgs != 0) {
760 panic("segspt_free_pages: bad large page");
761 /*NOTREACHED*/
762 }
763
764 /*
765 * mark that pages have been released
766 */
767 sptd->spt_realsize = 0;
768
769 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
770 atomic_add_long(&spt_used, -npages);
771 anon_swap_restore(npages);
772 }
773 }
774
775 /*
776 * Get memory allocation policy info for specified address in given segment
777 */
778 static lgrp_mem_policy_info_t *
segspt_getpolicy(struct seg * seg,caddr_t addr)779 segspt_getpolicy(struct seg *seg, caddr_t addr)
780 {
781 struct anon_map *amp;
782 ulong_t anon_index;
783 lgrp_mem_policy_info_t *policy_info;
784 struct spt_data *spt_data;
785
786 ASSERT(seg != NULL);
787
788 /*
789 * Get anon_map from segspt
790 *
791 * Assume that no lock needs to be held on anon_map, since
792 * it should be protected by its reference count which must be
793 * nonzero for an existing segment
794 * Need to grab readers lock on policy tree though
795 */
796 spt_data = (struct spt_data *)seg->s_data;
797 if (spt_data == NULL)
798 return (NULL);
799 amp = spt_data->spt_amp;
800 ASSERT(amp->refcnt != 0);
801
802 /*
803 * Get policy info
804 *
805 * Assume starting anon index of 0
806 */
807 anon_index = seg_page(seg, addr);
808 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
809
810 return (policy_info);
811 }
812
813 /*
814 * DISM only.
815 * Return locked pages over a given range.
816 *
817 * We will cache all DISM locked pages and save the pplist for the
818 * entire segment in the ppa field of the underlying DISM segment structure.
819 * Later, during a call to segspt_reclaim() we will use this ppa array
820 * to page_unlock() all of the pages and then we will free this ppa list.
821 */
822 /*ARGSUSED*/
823 static int
segspt_dismpagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)824 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
825 struct page ***ppp, enum lock_type type, enum seg_rw rw)
826 {
827 struct shm_data *shmd = (struct shm_data *)seg->s_data;
828 struct seg *sptseg = shmd->shm_sptseg;
829 struct spt_data *sptd = sptseg->s_data;
830 pgcnt_t pg_idx, npages, tot_npages, npgs;
831 struct page **pplist, **pl, **ppa, *pp;
832 struct anon_map *amp;
833 spgcnt_t an_idx;
834 int ret = ENOTSUP;
835 uint_t pl_built = 0;
836 struct anon *ap;
837 struct vnode *vp;
838 u_offset_t off;
839 pgcnt_t claim_availrmem = 0;
840 uint_t szc;
841
842 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
843 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
844
845 /*
846 * We want to lock/unlock the entire ISM segment. Therefore,
847 * we will be using the underlying sptseg and it's base address
848 * and length for the caching arguments.
849 */
850 ASSERT(sptseg);
851 ASSERT(sptd);
852
853 pg_idx = seg_page(seg, addr);
854 npages = btopr(len);
855
856 /*
857 * check if the request is larger than number of pages covered
858 * by amp
859 */
860 if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
861 *ppp = NULL;
862 return (ENOTSUP);
863 }
864
865 if (type == L_PAGEUNLOCK) {
866 ASSERT(sptd->spt_ppa != NULL);
867
868 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
869 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
870
871 /*
872 * If someone is blocked while unmapping, we purge
873 * segment page cache and thus reclaim pplist synchronously
874 * without waiting for seg_pasync_thread. This speeds up
875 * unmapping in cases where munmap(2) is called, while
876 * raw async i/o is still in progress or where a thread
877 * exits on data fault in a multithreaded application.
878 */
879 if ((sptd->spt_flags & DISM_PPA_CHANGED) ||
880 (AS_ISUNMAPWAIT(seg->s_as) &&
881 shmd->shm_softlockcnt > 0)) {
882 segspt_purge(seg);
883 }
884 return (0);
885 }
886
887 /* The L_PAGELOCK case ... */
888
889 if (sptd->spt_flags & DISM_PPA_CHANGED) {
890 segspt_purge(seg);
891 /*
892 * for DISM ppa needs to be rebuild since
893 * number of locked pages could be changed
894 */
895 *ppp = NULL;
896 return (ENOTSUP);
897 }
898
899 /*
900 * First try to find pages in segment page cache, without
901 * holding the segment lock.
902 */
903 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
904 S_WRITE, SEGP_FORCE_WIRED);
905 if (pplist != NULL) {
906 ASSERT(sptd->spt_ppa != NULL);
907 ASSERT(sptd->spt_ppa == pplist);
908 ppa = sptd->spt_ppa;
909 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
910 if (ppa[an_idx] == NULL) {
911 seg_pinactive(seg, NULL, seg->s_base,
912 sptd->spt_amp->size, ppa,
913 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
914 *ppp = NULL;
915 return (ENOTSUP);
916 }
917 if ((szc = ppa[an_idx]->p_szc) != 0) {
918 npgs = page_get_pagecnt(szc);
919 an_idx = P2ROUNDUP(an_idx + 1, npgs);
920 } else {
921 an_idx++;
922 }
923 }
924 /*
925 * Since we cache the entire DISM segment, we want to
926 * set ppp to point to the first slot that corresponds
927 * to the requested addr, i.e. pg_idx.
928 */
929 *ppp = &(sptd->spt_ppa[pg_idx]);
930 return (0);
931 }
932
933 mutex_enter(&sptd->spt_lock);
934 /*
935 * try to find pages in segment page cache with mutex
936 */
937 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
938 S_WRITE, SEGP_FORCE_WIRED);
939 if (pplist != NULL) {
940 ASSERT(sptd->spt_ppa != NULL);
941 ASSERT(sptd->spt_ppa == pplist);
942 ppa = sptd->spt_ppa;
943 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
944 if (ppa[an_idx] == NULL) {
945 mutex_exit(&sptd->spt_lock);
946 seg_pinactive(seg, NULL, seg->s_base,
947 sptd->spt_amp->size, ppa,
948 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
949 *ppp = NULL;
950 return (ENOTSUP);
951 }
952 if ((szc = ppa[an_idx]->p_szc) != 0) {
953 npgs = page_get_pagecnt(szc);
954 an_idx = P2ROUNDUP(an_idx + 1, npgs);
955 } else {
956 an_idx++;
957 }
958 }
959 /*
960 * Since we cache the entire DISM segment, we want to
961 * set ppp to point to the first slot that corresponds
962 * to the requested addr, i.e. pg_idx.
963 */
964 mutex_exit(&sptd->spt_lock);
965 *ppp = &(sptd->spt_ppa[pg_idx]);
966 return (0);
967 }
968 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
969 SEGP_FORCE_WIRED) == SEGP_FAIL) {
970 mutex_exit(&sptd->spt_lock);
971 *ppp = NULL;
972 return (ENOTSUP);
973 }
974
975 /*
976 * No need to worry about protections because DISM pages are always rw.
977 */
978 pl = pplist = NULL;
979 amp = sptd->spt_amp;
980
981 /*
982 * Do we need to build the ppa array?
983 */
984 if (sptd->spt_ppa == NULL) {
985 pgcnt_t lpg_cnt = 0;
986
987 pl_built = 1;
988 tot_npages = btopr(sptd->spt_amp->size);
989
990 ASSERT(sptd->spt_pcachecnt == 0);
991 pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP);
992 pl = pplist;
993
994 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
995 for (an_idx = 0; an_idx < tot_npages; ) {
996 ap = anon_get_ptr(amp->ahp, an_idx);
997 /*
998 * Cache only mlocked pages. For large pages
999 * if one (constituent) page is mlocked
1000 * all pages for that large page
1001 * are cached also. This is for quick
1002 * lookups of ppa array;
1003 */
1004 if ((ap != NULL) && (lpg_cnt != 0 ||
1005 (sptd->spt_ppa_lckcnt[an_idx] != 0))) {
1006
1007 swap_xlate(ap, &vp, &off);
1008 pp = page_lookup(vp, off, SE_SHARED);
1009 ASSERT(pp != NULL);
1010 if (lpg_cnt == 0) {
1011 lpg_cnt++;
1012 /*
1013 * For a small page, we are done --
1014 * lpg_count is reset to 0 below.
1015 *
1016 * For a large page, we are guaranteed
1017 * to find the anon structures of all
1018 * constituent pages and a non-zero
1019 * lpg_cnt ensures that we don't test
1020 * for mlock for these. We are done
1021 * when lpg_count reaches (npgs + 1).
1022 * If we are not the first constituent
1023 * page, restart at the first one.
1024 */
1025 npgs = page_get_pagecnt(pp->p_szc);
1026 if (!IS_P2ALIGNED(an_idx, npgs)) {
1027 an_idx = P2ALIGN(an_idx, npgs);
1028 page_unlock(pp);
1029 continue;
1030 }
1031 }
1032 if (++lpg_cnt > npgs)
1033 lpg_cnt = 0;
1034
1035 /*
1036 * availrmem is decremented only
1037 * for unlocked pages
1038 */
1039 if (sptd->spt_ppa_lckcnt[an_idx] == 0)
1040 claim_availrmem++;
1041 pplist[an_idx] = pp;
1042 }
1043 an_idx++;
1044 }
1045 ANON_LOCK_EXIT(&->a_rwlock);
1046
1047 if (claim_availrmem) {
1048 mutex_enter(&freemem_lock);
1049 if (availrmem < tune.t_minarmem + claim_availrmem) {
1050 mutex_exit(&freemem_lock);
1051 ret = ENOTSUP;
1052 claim_availrmem = 0;
1053 goto insert_fail;
1054 } else {
1055 availrmem -= claim_availrmem;
1056 }
1057 mutex_exit(&freemem_lock);
1058 }
1059
1060 sptd->spt_ppa = pl;
1061 } else {
1062 /*
1063 * We already have a valid ppa[].
1064 */
1065 pl = sptd->spt_ppa;
1066 }
1067
1068 ASSERT(pl != NULL);
1069
1070 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1071 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1072 segspt_reclaim);
1073 if (ret == SEGP_FAIL) {
1074 /*
1075 * seg_pinsert failed. We return
1076 * ENOTSUP, so that the as_pagelock() code will
1077 * then try the slower F_SOFTLOCK path.
1078 */
1079 if (pl_built) {
1080 /*
1081 * No one else has referenced the ppa[].
1082 * We created it and we need to destroy it.
1083 */
1084 sptd->spt_ppa = NULL;
1085 }
1086 ret = ENOTSUP;
1087 goto insert_fail;
1088 }
1089
1090 /*
1091 * In either case, we increment softlockcnt on the 'real' segment.
1092 */
1093 sptd->spt_pcachecnt++;
1094 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1095
1096 ppa = sptd->spt_ppa;
1097 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
1098 if (ppa[an_idx] == NULL) {
1099 mutex_exit(&sptd->spt_lock);
1100 seg_pinactive(seg, NULL, seg->s_base,
1101 sptd->spt_amp->size,
1102 pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1103 *ppp = NULL;
1104 return (ENOTSUP);
1105 }
1106 if ((szc = ppa[an_idx]->p_szc) != 0) {
1107 npgs = page_get_pagecnt(szc);
1108 an_idx = P2ROUNDUP(an_idx + 1, npgs);
1109 } else {
1110 an_idx++;
1111 }
1112 }
1113 /*
1114 * We can now drop the sptd->spt_lock since the ppa[]
1115 * exists and he have incremented pacachecnt.
1116 */
1117 mutex_exit(&sptd->spt_lock);
1118
1119 /*
1120 * Since we cache the entire segment, we want to
1121 * set ppp to point to the first slot that corresponds
1122 * to the requested addr, i.e. pg_idx.
1123 */
1124 *ppp = &(sptd->spt_ppa[pg_idx]);
1125 return (0);
1126
1127 insert_fail:
1128 /*
1129 * We will only reach this code if we tried and failed.
1130 *
1131 * And we can drop the lock on the dummy seg, once we've failed
1132 * to set up a new ppa[].
1133 */
1134 mutex_exit(&sptd->spt_lock);
1135
1136 if (pl_built) {
1137 if (claim_availrmem) {
1138 mutex_enter(&freemem_lock);
1139 availrmem += claim_availrmem;
1140 mutex_exit(&freemem_lock);
1141 }
1142
1143 /*
1144 * We created pl and we need to destroy it.
1145 */
1146 pplist = pl;
1147 for (an_idx = 0; an_idx < tot_npages; an_idx++) {
1148 if (pplist[an_idx] != NULL)
1149 page_unlock(pplist[an_idx]);
1150 }
1151 kmem_free(pl, sizeof (page_t *) * tot_npages);
1152 }
1153
1154 if (shmd->shm_softlockcnt <= 0) {
1155 if (AS_ISUNMAPWAIT(seg->s_as)) {
1156 mutex_enter(&seg->s_as->a_contents);
1157 if (AS_ISUNMAPWAIT(seg->s_as)) {
1158 AS_CLRUNMAPWAIT(seg->s_as);
1159 cv_broadcast(&seg->s_as->a_cv);
1160 }
1161 mutex_exit(&seg->s_as->a_contents);
1162 }
1163 }
1164 *ppp = NULL;
1165 return (ret);
1166 }
1167
1168
1169
1170 /*
1171 * return locked pages over a given range.
1172 *
1173 * We will cache the entire ISM segment and save the pplist for the
1174 * entire segment in the ppa field of the underlying ISM segment structure.
1175 * Later, during a call to segspt_reclaim() we will use this ppa array
1176 * to page_unlock() all of the pages and then we will free this ppa list.
1177 */
1178 /*ARGSUSED*/
1179 static int
segspt_shmpagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)1180 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1181 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1182 {
1183 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1184 struct seg *sptseg = shmd->shm_sptseg;
1185 struct spt_data *sptd = sptseg->s_data;
1186 pgcnt_t np, page_index, npages;
1187 caddr_t a, spt_base;
1188 struct page **pplist, **pl, *pp;
1189 struct anon_map *amp;
1190 ulong_t anon_index;
1191 int ret = ENOTSUP;
1192 uint_t pl_built = 0;
1193 struct anon *ap;
1194 struct vnode *vp;
1195 u_offset_t off;
1196
1197 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1198 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1199
1200
1201 /*
1202 * We want to lock/unlock the entire ISM segment. Therefore,
1203 * we will be using the underlying sptseg and it's base address
1204 * and length for the caching arguments.
1205 */
1206 ASSERT(sptseg);
1207 ASSERT(sptd);
1208
1209 if (sptd->spt_flags & SHM_PAGEABLE) {
1210 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1211 }
1212
1213 page_index = seg_page(seg, addr);
1214 npages = btopr(len);
1215
1216 /*
1217 * check if the request is larger than number of pages covered
1218 * by amp
1219 */
1220 if (page_index + npages > btopr(sptd->spt_amp->size)) {
1221 *ppp = NULL;
1222 return (ENOTSUP);
1223 }
1224
1225 if (type == L_PAGEUNLOCK) {
1226
1227 ASSERT(sptd->spt_ppa != NULL);
1228
1229 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
1230 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1231
1232 /*
1233 * If someone is blocked while unmapping, we purge
1234 * segment page cache and thus reclaim pplist synchronously
1235 * without waiting for seg_pasync_thread. This speeds up
1236 * unmapping in cases where munmap(2) is called, while
1237 * raw async i/o is still in progress or where a thread
1238 * exits on data fault in a multithreaded application.
1239 */
1240 if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) {
1241 segspt_purge(seg);
1242 }
1243 return (0);
1244 }
1245
1246 /* The L_PAGELOCK case... */
1247
1248 /*
1249 * First try to find pages in segment page cache, without
1250 * holding the segment lock.
1251 */
1252 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1253 S_WRITE, SEGP_FORCE_WIRED);
1254 if (pplist != NULL) {
1255 ASSERT(sptd->spt_ppa == pplist);
1256 ASSERT(sptd->spt_ppa[page_index]);
1257 /*
1258 * Since we cache the entire ISM segment, we want to
1259 * set ppp to point to the first slot that corresponds
1260 * to the requested addr, i.e. page_index.
1261 */
1262 *ppp = &(sptd->spt_ppa[page_index]);
1263 return (0);
1264 }
1265
1266 mutex_enter(&sptd->spt_lock);
1267
1268 /*
1269 * try to find pages in segment page cache
1270 */
1271 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1272 S_WRITE, SEGP_FORCE_WIRED);
1273 if (pplist != NULL) {
1274 ASSERT(sptd->spt_ppa == pplist);
1275 /*
1276 * Since we cache the entire segment, we want to
1277 * set ppp to point to the first slot that corresponds
1278 * to the requested addr, i.e. page_index.
1279 */
1280 mutex_exit(&sptd->spt_lock);
1281 *ppp = &(sptd->spt_ppa[page_index]);
1282 return (0);
1283 }
1284
1285 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
1286 SEGP_FORCE_WIRED) == SEGP_FAIL) {
1287 mutex_exit(&sptd->spt_lock);
1288 *ppp = NULL;
1289 return (ENOTSUP);
1290 }
1291
1292 /*
1293 * No need to worry about protections because ISM pages
1294 * are always rw.
1295 */
1296 pl = pplist = NULL;
1297
1298 /*
1299 * Do we need to build the ppa array?
1300 */
1301 if (sptd->spt_ppa == NULL) {
1302 ASSERT(sptd->spt_ppa == pplist);
1303
1304 spt_base = sptseg->s_base;
1305 pl_built = 1;
1306
1307 /*
1308 * availrmem is decremented once during anon_swap_adjust()
1309 * and is incremented during the anon_unresv(), which is
1310 * called from shm_rm_amp() when the segment is destroyed.
1311 */
1312 amp = sptd->spt_amp;
1313 ASSERT(amp != NULL);
1314
1315 /* pcachecnt is protected by sptd->spt_lock */
1316 ASSERT(sptd->spt_pcachecnt == 0);
1317 pplist = kmem_zalloc(sizeof (page_t *)
1318 * btopr(sptd->spt_amp->size), KM_SLEEP);
1319 pl = pplist;
1320
1321 anon_index = seg_page(sptseg, spt_base);
1322
1323 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
1324 for (a = spt_base; a < (spt_base + sptd->spt_amp->size);
1325 a += PAGESIZE, anon_index++, pplist++) {
1326 ap = anon_get_ptr(amp->ahp, anon_index);
1327 ASSERT(ap != NULL);
1328 swap_xlate(ap, &vp, &off);
1329 pp = page_lookup(vp, off, SE_SHARED);
1330 ASSERT(pp != NULL);
1331 *pplist = pp;
1332 }
1333 ANON_LOCK_EXIT(&->a_rwlock);
1334
1335 if (a < (spt_base + sptd->spt_amp->size)) {
1336 ret = ENOTSUP;
1337 goto insert_fail;
1338 }
1339 sptd->spt_ppa = pl;
1340 } else {
1341 /*
1342 * We already have a valid ppa[].
1343 */
1344 pl = sptd->spt_ppa;
1345 }
1346
1347 ASSERT(pl != NULL);
1348
1349 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1350 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1351 segspt_reclaim);
1352 if (ret == SEGP_FAIL) {
1353 /*
1354 * seg_pinsert failed. We return
1355 * ENOTSUP, so that the as_pagelock() code will
1356 * then try the slower F_SOFTLOCK path.
1357 */
1358 if (pl_built) {
1359 /*
1360 * No one else has referenced the ppa[].
1361 * We created it and we need to destroy it.
1362 */
1363 sptd->spt_ppa = NULL;
1364 }
1365 ret = ENOTSUP;
1366 goto insert_fail;
1367 }
1368
1369 /*
1370 * In either case, we increment softlockcnt on the 'real' segment.
1371 */
1372 sptd->spt_pcachecnt++;
1373 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1374
1375 /*
1376 * We can now drop the sptd->spt_lock since the ppa[]
1377 * exists and he have incremented pacachecnt.
1378 */
1379 mutex_exit(&sptd->spt_lock);
1380
1381 /*
1382 * Since we cache the entire segment, we want to
1383 * set ppp to point to the first slot that corresponds
1384 * to the requested addr, i.e. page_index.
1385 */
1386 *ppp = &(sptd->spt_ppa[page_index]);
1387 return (0);
1388
1389 insert_fail:
1390 /*
1391 * We will only reach this code if we tried and failed.
1392 *
1393 * And we can drop the lock on the dummy seg, once we've failed
1394 * to set up a new ppa[].
1395 */
1396 mutex_exit(&sptd->spt_lock);
1397
1398 if (pl_built) {
1399 /*
1400 * We created pl and we need to destroy it.
1401 */
1402 pplist = pl;
1403 np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT);
1404 while (np) {
1405 page_unlock(*pplist);
1406 np--;
1407 pplist++;
1408 }
1409 kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size));
1410 }
1411 if (shmd->shm_softlockcnt <= 0) {
1412 if (AS_ISUNMAPWAIT(seg->s_as)) {
1413 mutex_enter(&seg->s_as->a_contents);
1414 if (AS_ISUNMAPWAIT(seg->s_as)) {
1415 AS_CLRUNMAPWAIT(seg->s_as);
1416 cv_broadcast(&seg->s_as->a_cv);
1417 }
1418 mutex_exit(&seg->s_as->a_contents);
1419 }
1420 }
1421 *ppp = NULL;
1422 return (ret);
1423 }
1424
1425 /*
1426 * purge any cached pages in the I/O page cache
1427 */
1428 static void
segspt_purge(struct seg * seg)1429 segspt_purge(struct seg *seg)
1430 {
1431 seg_ppurge(seg, NULL, SEGP_FORCE_WIRED);
1432 }
1433
1434 static int
segspt_reclaim(void * ptag,caddr_t addr,size_t len,struct page ** pplist,enum seg_rw rw,int async)1435 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1436 enum seg_rw rw, int async)
1437 {
1438 struct seg *seg = (struct seg *)ptag;
1439 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1440 struct seg *sptseg;
1441 struct spt_data *sptd;
1442 pgcnt_t npages, i, free_availrmem = 0;
1443 int done = 0;
1444
1445 #ifdef lint
1446 addr = addr;
1447 #endif
1448 sptseg = shmd->shm_sptseg;
1449 sptd = sptseg->s_data;
1450 npages = (len >> PAGESHIFT);
1451 ASSERT(npages);
1452 ASSERT(sptd->spt_pcachecnt != 0);
1453 ASSERT(sptd->spt_ppa == pplist);
1454 ASSERT(npages == btopr(sptd->spt_amp->size));
1455 ASSERT(async || AS_LOCK_HELD(seg->s_as));
1456
1457 /*
1458 * Acquire the lock on the dummy seg and destroy the
1459 * ppa array IF this is the last pcachecnt.
1460 */
1461 mutex_enter(&sptd->spt_lock);
1462 if (--sptd->spt_pcachecnt == 0) {
1463 for (i = 0; i < npages; i++) {
1464 if (pplist[i] == NULL) {
1465 continue;
1466 }
1467 if (rw == S_WRITE) {
1468 hat_setrefmod(pplist[i]);
1469 } else {
1470 hat_setref(pplist[i]);
1471 }
1472 if ((sptd->spt_flags & SHM_PAGEABLE) &&
1473 (sptd->spt_ppa_lckcnt[i] == 0))
1474 free_availrmem++;
1475 page_unlock(pplist[i]);
1476 }
1477 if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) {
1478 mutex_enter(&freemem_lock);
1479 availrmem += free_availrmem;
1480 mutex_exit(&freemem_lock);
1481 }
1482 /*
1483 * Since we want to cach/uncache the entire ISM segment,
1484 * we will track the pplist in a segspt specific field
1485 * ppa, that is initialized at the time we add an entry to
1486 * the cache.
1487 */
1488 ASSERT(sptd->spt_pcachecnt == 0);
1489 kmem_free(pplist, sizeof (page_t *) * npages);
1490 sptd->spt_ppa = NULL;
1491 sptd->spt_flags &= ~DISM_PPA_CHANGED;
1492 sptd->spt_gen++;
1493 cv_broadcast(&sptd->spt_cv);
1494 done = 1;
1495 }
1496 mutex_exit(&sptd->spt_lock);
1497
1498 /*
1499 * If we are pcache async thread or called via seg_ppurge_wiredpp() we
1500 * may not hold AS lock (in this case async argument is not 0). This
1501 * means if softlockcnt drops to 0 after the decrement below address
1502 * space may get freed. We can't allow it since after softlock
1503 * derement to 0 we still need to access as structure for possible
1504 * wakeup of unmap waiters. To prevent the disappearance of as we take
1505 * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes
1506 * this mutex as a barrier to make sure this routine completes before
1507 * segment is freed.
1508 *
1509 * The second complication we have to deal with in async case is a
1510 * possibility of missed wake up of unmap wait thread. When we don't
1511 * hold as lock here we may take a_contents lock before unmap wait
1512 * thread that was first to see softlockcnt was still not 0. As a
1513 * result we'll fail to wake up an unmap wait thread. To avoid this
1514 * race we set nounmapwait flag in as structure if we drop softlockcnt
1515 * to 0 if async is not 0. unmapwait thread
1516 * will not block if this flag is set.
1517 */
1518 if (async)
1519 mutex_enter(&shmd->shm_segfree_syncmtx);
1520
1521 /*
1522 * Now decrement softlockcnt.
1523 */
1524 ASSERT(shmd->shm_softlockcnt > 0);
1525 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1526
1527 if (shmd->shm_softlockcnt <= 0) {
1528 if (async || AS_ISUNMAPWAIT(seg->s_as)) {
1529 mutex_enter(&seg->s_as->a_contents);
1530 if (async)
1531 AS_SETNOUNMAPWAIT(seg->s_as);
1532 if (AS_ISUNMAPWAIT(seg->s_as)) {
1533 AS_CLRUNMAPWAIT(seg->s_as);
1534 cv_broadcast(&seg->s_as->a_cv);
1535 }
1536 mutex_exit(&seg->s_as->a_contents);
1537 }
1538 }
1539
1540 if (async)
1541 mutex_exit(&shmd->shm_segfree_syncmtx);
1542
1543 return (done);
1544 }
1545
1546 /*
1547 * Do a F_SOFTUNLOCK call over the range requested.
1548 * The range must have already been F_SOFTLOCK'ed.
1549 *
1550 * The calls to acquire and release the anon map lock mutex were
1551 * removed in order to avoid a deadly embrace during a DR
1552 * memory delete operation. (Eg. DR blocks while waiting for a
1553 * exclusive lock on a page that is being used for kaio; the
1554 * thread that will complete the kaio and call segspt_softunlock
1555 * blocks on the anon map lock; another thread holding the anon
1556 * map lock blocks on another page lock via the segspt_shmfault
1557 * -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
1558 *
1559 * The appropriateness of the removal is based upon the following:
1560 * 1. If we are holding a segment's reader lock and the page is held
1561 * shared, then the corresponding element in anonmap which points to
1562 * anon struct cannot change and there is no need to acquire the
1563 * anonymous map lock.
1564 * 2. Threads in segspt_softunlock have a reader lock on the segment
1565 * and already have the shared page lock, so we are guaranteed that
1566 * the anon map slot cannot change and therefore can call anon_get_ptr()
1567 * without grabbing the anonymous map lock.
1568 * 3. Threads that softlock a shared page break copy-on-write, even if
1569 * its a read. Thus cow faults can be ignored with respect to soft
1570 * unlocking, since the breaking of cow means that the anon slot(s) will
1571 * not be shared.
1572 */
1573 static void
segspt_softunlock(struct seg * seg,caddr_t sptseg_addr,size_t len,enum seg_rw rw)1574 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1575 size_t len, enum seg_rw rw)
1576 {
1577 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1578 struct seg *sptseg;
1579 struct spt_data *sptd;
1580 page_t *pp;
1581 caddr_t adr;
1582 struct vnode *vp;
1583 u_offset_t offset;
1584 ulong_t anon_index;
1585 struct anon_map *amp; /* XXX - for locknest */
1586 struct anon *ap = NULL;
1587 pgcnt_t npages;
1588
1589 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1590
1591 sptseg = shmd->shm_sptseg;
1592 sptd = sptseg->s_data;
1593
1594 /*
1595 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1596 * and therefore their pages are SE_SHARED locked
1597 * for the entire life of the segment.
1598 */
1599 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1600 ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1601 goto softlock_decrement;
1602 }
1603
1604 /*
1605 * Any thread is free to do a page_find and
1606 * page_unlock() on the pages within this seg.
1607 *
1608 * We are already holding the as->a_lock on the user's
1609 * real segment, but we need to hold the a_lock on the
1610 * underlying dummy as. This is mostly to satisfy the
1611 * underlying HAT layer.
1612 */
1613 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1614 hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1615 AS_LOCK_EXIT(sptseg->s_as);
1616
1617 amp = sptd->spt_amp;
1618 ASSERT(amp != NULL);
1619 anon_index = seg_page(sptseg, sptseg_addr);
1620
1621 for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1622 ap = anon_get_ptr(amp->ahp, anon_index++);
1623 ASSERT(ap != NULL);
1624 swap_xlate(ap, &vp, &offset);
1625
1626 /*
1627 * Use page_find() instead of page_lookup() to
1628 * find the page since we know that it has a
1629 * "shared" lock.
1630 */
1631 pp = page_find(vp, offset);
1632 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1633 if (pp == NULL) {
1634 panic("segspt_softunlock: "
1635 "addr %p, ap %p, vp %p, off %llx",
1636 (void *)adr, (void *)ap, (void *)vp, offset);
1637 /*NOTREACHED*/
1638 }
1639
1640 if (rw == S_WRITE) {
1641 hat_setrefmod(pp);
1642 } else if (rw != S_OTHER) {
1643 hat_setref(pp);
1644 }
1645 page_unlock(pp);
1646 }
1647
1648 softlock_decrement:
1649 npages = btopr(len);
1650 ASSERT(shmd->shm_softlockcnt >= npages);
1651 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages);
1652 if (shmd->shm_softlockcnt == 0) {
1653 /*
1654 * All SOFTLOCKS are gone. Wakeup any waiting
1655 * unmappers so they can try again to unmap.
1656 * Check for waiters first without the mutex
1657 * held so we don't always grab the mutex on
1658 * softunlocks.
1659 */
1660 if (AS_ISUNMAPWAIT(seg->s_as)) {
1661 mutex_enter(&seg->s_as->a_contents);
1662 if (AS_ISUNMAPWAIT(seg->s_as)) {
1663 AS_CLRUNMAPWAIT(seg->s_as);
1664 cv_broadcast(&seg->s_as->a_cv);
1665 }
1666 mutex_exit(&seg->s_as->a_contents);
1667 }
1668 }
1669 }
1670
1671 int
segspt_shmattach(struct seg * seg,caddr_t * argsp)1672 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1673 {
1674 struct shm_data *shmd_arg = (struct shm_data *)argsp;
1675 struct shm_data *shmd;
1676 struct anon_map *shm_amp = shmd_arg->shm_amp;
1677 struct spt_data *sptd;
1678 int error = 0;
1679
1680 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1681
1682 shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1683 if (shmd == NULL)
1684 return (ENOMEM);
1685
1686 shmd->shm_sptas = shmd_arg->shm_sptas;
1687 shmd->shm_amp = shm_amp;
1688 shmd->shm_sptseg = shmd_arg->shm_sptseg;
1689
1690 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1691 NULL, 0, seg->s_size);
1692
1693 mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1694
1695 seg->s_data = (void *)shmd;
1696 seg->s_ops = &segspt_shmops;
1697 seg->s_szc = shmd->shm_sptseg->s_szc;
1698 sptd = shmd->shm_sptseg->s_data;
1699
1700 if (sptd->spt_flags & SHM_PAGEABLE) {
1701 if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size),
1702 KM_NOSLEEP)) == NULL) {
1703 seg->s_data = (void *)NULL;
1704 kmem_free(shmd, (sizeof (*shmd)));
1705 return (ENOMEM);
1706 }
1707 shmd->shm_lckpgs = 0;
1708 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1709 if ((error = hat_share(seg->s_as->a_hat, seg->s_base,
1710 shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1711 seg->s_size, seg->s_szc)) != 0) {
1712 kmem_free(shmd->shm_vpage,
1713 btopr(shm_amp->size));
1714 }
1715 }
1716 } else {
1717 error = hat_share(seg->s_as->a_hat, seg->s_base,
1718 shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1719 seg->s_size, seg->s_szc);
1720 }
1721 if (error) {
1722 seg->s_szc = 0;
1723 seg->s_data = (void *)NULL;
1724 kmem_free(shmd, (sizeof (*shmd)));
1725 } else {
1726 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1727 shm_amp->refcnt++;
1728 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1729 }
1730 return (error);
1731 }
1732
1733 int
segspt_shmunmap(struct seg * seg,caddr_t raddr,size_t ssize)1734 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1735 {
1736 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1737 int reclaim = 1;
1738
1739 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1740 retry:
1741 if (shmd->shm_softlockcnt > 0) {
1742 if (reclaim == 1) {
1743 segspt_purge(seg);
1744 reclaim = 0;
1745 goto retry;
1746 }
1747 return (EAGAIN);
1748 }
1749
1750 if (ssize != seg->s_size) {
1751 #ifdef DEBUG
1752 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1753 ssize, seg->s_size);
1754 #endif
1755 return (EINVAL);
1756 }
1757
1758 (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1759 NULL, 0);
1760 hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1761
1762 seg_free(seg);
1763
1764 return (0);
1765 }
1766
1767 void
segspt_shmfree(struct seg * seg)1768 segspt_shmfree(struct seg *seg)
1769 {
1770 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1771 struct anon_map *shm_amp = shmd->shm_amp;
1772
1773 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1774
1775 (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1776 MC_UNLOCK, NULL, 0);
1777
1778 /*
1779 * Need to increment refcnt when attaching
1780 * and decrement when detaching because of dup().
1781 */
1782 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1783 shm_amp->refcnt--;
1784 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1785
1786 if (shmd->shm_vpage) { /* only for DISM */
1787 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1788 shmd->shm_vpage = NULL;
1789 }
1790
1791 /*
1792 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1793 * still working with this segment without holding as lock.
1794 */
1795 ASSERT(shmd->shm_softlockcnt == 0);
1796 mutex_enter(&shmd->shm_segfree_syncmtx);
1797 mutex_destroy(&shmd->shm_segfree_syncmtx);
1798
1799 kmem_free(shmd, sizeof (*shmd));
1800 }
1801
1802 /*ARGSUSED*/
1803 int
segspt_shmsetprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)1804 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1805 {
1806 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1807
1808 /*
1809 * Shared page table is more than shared mapping.
1810 * Individual process sharing page tables can't change prot
1811 * because there is only one set of page tables.
1812 * This will be allowed after private page table is
1813 * supported.
1814 */
1815 /* need to return correct status error? */
1816 return (0);
1817 }
1818
1819
1820 faultcode_t
segspt_dismfault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)1821 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1822 size_t len, enum fault_type type, enum seg_rw rw)
1823 {
1824 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1825 struct seg *sptseg = shmd->shm_sptseg;
1826 struct as *curspt = shmd->shm_sptas;
1827 struct spt_data *sptd = sptseg->s_data;
1828 pgcnt_t npages;
1829 size_t size;
1830 caddr_t segspt_addr, shm_addr;
1831 page_t **ppa;
1832 int i;
1833 ulong_t an_idx = 0;
1834 int err = 0;
1835 int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1836 size_t pgsz;
1837 pgcnt_t pgcnt;
1838 caddr_t a;
1839 pgcnt_t pidx;
1840
1841 #ifdef lint
1842 hat = hat;
1843 #endif
1844 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1845
1846 /*
1847 * Because of the way spt is implemented
1848 * the realsize of the segment does not have to be
1849 * equal to the segment size itself. The segment size is
1850 * often in multiples of a page size larger than PAGESIZE.
1851 * The realsize is rounded up to the nearest PAGESIZE
1852 * based on what the user requested. This is a bit of
1853 * ungliness that is historical but not easily fixed
1854 * without re-designing the higher levels of ISM.
1855 */
1856 ASSERT(addr >= seg->s_base);
1857 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1858 return (FC_NOMAP);
1859 /*
1860 * For all of the following cases except F_PROT, we need to
1861 * make any necessary adjustments to addr and len
1862 * and get all of the necessary page_t's into an array called ppa[].
1863 *
1864 * The code in shmat() forces base addr and len of ISM segment
1865 * to be aligned to largest page size supported. Therefore,
1866 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
1867 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
1868 * in large pagesize chunks, or else we will screw up the HAT
1869 * layer by calling hat_memload_array() with differing page sizes
1870 * over a given virtual range.
1871 */
1872 pgsz = page_get_pagesize(sptseg->s_szc);
1873 pgcnt = page_get_pagecnt(sptseg->s_szc);
1874 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
1875 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
1876 npages = btopr(size);
1877
1878 /*
1879 * Now we need to convert from addr in segshm to addr in segspt.
1880 */
1881 an_idx = seg_page(seg, shm_addr);
1882 segspt_addr = sptseg->s_base + ptob(an_idx);
1883
1884 ASSERT((segspt_addr + ptob(npages)) <=
1885 (sptseg->s_base + sptd->spt_realsize));
1886 ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size));
1887
1888 switch (type) {
1889
1890 case F_SOFTLOCK:
1891
1892 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
1893 /*
1894 * Fall through to the F_INVAL case to load up the hat layer
1895 * entries with the HAT_LOAD_LOCK flag.
1896 */
1897 /* FALLTHRU */
1898 case F_INVAL:
1899
1900 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1901 return (FC_NOMAP);
1902
1903 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1904
1905 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1906 if (err != 0) {
1907 if (type == F_SOFTLOCK) {
1908 atomic_add_long((ulong_t *)(
1909 &(shmd->shm_softlockcnt)), -npages);
1910 }
1911 goto dism_err;
1912 }
1913 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1914 a = segspt_addr;
1915 pidx = 0;
1916 if (type == F_SOFTLOCK) {
1917
1918 /*
1919 * Load up the translation keeping it
1920 * locked and don't unlock the page.
1921 */
1922 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1923 hat_memload_array(sptseg->s_as->a_hat,
1924 a, pgsz, &ppa[pidx], sptd->spt_prot,
1925 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1926 }
1927 } else {
1928 /*
1929 * Migrate pages marked for migration
1930 */
1931 if (lgrp_optimizations())
1932 page_migrate(seg, shm_addr, ppa, npages);
1933
1934 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1935 hat_memload_array(sptseg->s_as->a_hat,
1936 a, pgsz, &ppa[pidx],
1937 sptd->spt_prot,
1938 HAT_LOAD_SHARE);
1939 }
1940
1941 /*
1942 * And now drop the SE_SHARED lock(s).
1943 */
1944 if (dyn_ism_unmap) {
1945 for (i = 0; i < npages; i++) {
1946 page_unlock(ppa[i]);
1947 }
1948 }
1949 }
1950
1951 if (!dyn_ism_unmap) {
1952 if (hat_share(seg->s_as->a_hat, shm_addr,
1953 curspt->a_hat, segspt_addr, ptob(npages),
1954 seg->s_szc) != 0) {
1955 panic("hat_share err in DISM fault");
1956 /* NOTREACHED */
1957 }
1958 if (type == F_INVAL) {
1959 for (i = 0; i < npages; i++) {
1960 page_unlock(ppa[i]);
1961 }
1962 }
1963 }
1964 AS_LOCK_EXIT(sptseg->s_as);
1965 dism_err:
1966 kmem_free(ppa, npages * sizeof (page_t *));
1967 return (err);
1968
1969 case F_SOFTUNLOCK:
1970
1971 /*
1972 * This is a bit ugly, we pass in the real seg pointer,
1973 * but the segspt_addr is the virtual address within the
1974 * dummy seg.
1975 */
1976 segspt_softunlock(seg, segspt_addr, size, rw);
1977 return (0);
1978
1979 case F_PROT:
1980
1981 /*
1982 * This takes care of the unusual case where a user
1983 * allocates a stack in shared memory and a register
1984 * window overflow is written to that stack page before
1985 * it is otherwise modified.
1986 *
1987 * We can get away with this because ISM segments are
1988 * always rw. Other than this unusual case, there
1989 * should be no instances of protection violations.
1990 */
1991 return (0);
1992
1993 default:
1994 #ifdef DEBUG
1995 panic("segspt_dismfault default type?");
1996 #else
1997 return (FC_NOMAP);
1998 #endif
1999 }
2000 }
2001
2002
2003 faultcode_t
segspt_shmfault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)2004 segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
2005 size_t len, enum fault_type type, enum seg_rw rw)
2006 {
2007 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2008 struct seg *sptseg = shmd->shm_sptseg;
2009 struct as *curspt = shmd->shm_sptas;
2010 struct spt_data *sptd = sptseg->s_data;
2011 pgcnt_t npages;
2012 size_t size;
2013 caddr_t sptseg_addr, shm_addr;
2014 page_t *pp, **ppa;
2015 int i;
2016 u_offset_t offset;
2017 ulong_t anon_index = 0;
2018 struct vnode *vp;
2019 struct anon_map *amp; /* XXX - for locknest */
2020 struct anon *ap = NULL;
2021 size_t pgsz;
2022 pgcnt_t pgcnt;
2023 caddr_t a;
2024 pgcnt_t pidx;
2025 size_t sz;
2026
2027 #ifdef lint
2028 hat = hat;
2029 #endif
2030
2031 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2032
2033 if (sptd->spt_flags & SHM_PAGEABLE) {
2034 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2035 }
2036
2037 /*
2038 * Because of the way spt is implemented
2039 * the realsize of the segment does not have to be
2040 * equal to the segment size itself. The segment size is
2041 * often in multiples of a page size larger than PAGESIZE.
2042 * The realsize is rounded up to the nearest PAGESIZE
2043 * based on what the user requested. This is a bit of
2044 * ungliness that is historical but not easily fixed
2045 * without re-designing the higher levels of ISM.
2046 */
2047 ASSERT(addr >= seg->s_base);
2048 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2049 return (FC_NOMAP);
2050 /*
2051 * For all of the following cases except F_PROT, we need to
2052 * make any necessary adjustments to addr and len
2053 * and get all of the necessary page_t's into an array called ppa[].
2054 *
2055 * The code in shmat() forces base addr and len of ISM segment
2056 * to be aligned to largest page size supported. Therefore,
2057 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
2058 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
2059 * in large pagesize chunks, or else we will screw up the HAT
2060 * layer by calling hat_memload_array() with differing page sizes
2061 * over a given virtual range.
2062 */
2063 pgsz = page_get_pagesize(sptseg->s_szc);
2064 pgcnt = page_get_pagecnt(sptseg->s_szc);
2065 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
2066 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
2067 npages = btopr(size);
2068
2069 /*
2070 * Now we need to convert from addr in segshm to addr in segspt.
2071 */
2072 anon_index = seg_page(seg, shm_addr);
2073 sptseg_addr = sptseg->s_base + ptob(anon_index);
2074
2075 /*
2076 * And now we may have to adjust npages downward if we have
2077 * exceeded the realsize of the segment or initial anon
2078 * allocations.
2079 */
2080 if ((sptseg_addr + ptob(npages)) >
2081 (sptseg->s_base + sptd->spt_realsize))
2082 size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr;
2083
2084 npages = btopr(size);
2085
2086 ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size));
2087 ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0);
2088
2089 switch (type) {
2090
2091 case F_SOFTLOCK:
2092
2093 /*
2094 * availrmem is decremented once during anon_swap_adjust()
2095 * and is incremented during the anon_unresv(), which is
2096 * called from shm_rm_amp() when the segment is destroyed.
2097 */
2098 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
2099 /*
2100 * Some platforms assume that ISM pages are SE_SHARED
2101 * locked for the entire life of the segment.
2102 */
2103 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0))
2104 return (0);
2105 /*
2106 * Fall through to the F_INVAL case to load up the hat layer
2107 * entries with the HAT_LOAD_LOCK flag.
2108 */
2109
2110 /* FALLTHRU */
2111 case F_INVAL:
2112
2113 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
2114 return (FC_NOMAP);
2115
2116 /*
2117 * Some platforms that do NOT support DYNAMIC_ISM_UNMAP
2118 * may still rely on this call to hat_share(). That
2119 * would imply that those hat's can fault on a
2120 * HAT_LOAD_LOCK translation, which would seem
2121 * contradictory.
2122 */
2123 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2124 if (hat_share(seg->s_as->a_hat, seg->s_base,
2125 curspt->a_hat, sptseg->s_base,
2126 sptseg->s_size, sptseg->s_szc) != 0) {
2127 panic("hat_share error in ISM fault");
2128 /*NOTREACHED*/
2129 }
2130 return (0);
2131 }
2132 ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP);
2133
2134 /*
2135 * I see no need to lock the real seg,
2136 * here, because all of our work will be on the underlying
2137 * dummy seg.
2138 *
2139 * sptseg_addr and npages now account for large pages.
2140 */
2141 amp = sptd->spt_amp;
2142 ASSERT(amp != NULL);
2143 anon_index = seg_page(sptseg, sptseg_addr);
2144
2145 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2146 for (i = 0; i < npages; i++) {
2147 ap = anon_get_ptr(amp->ahp, anon_index++);
2148 ASSERT(ap != NULL);
2149 swap_xlate(ap, &vp, &offset);
2150 pp = page_lookup(vp, offset, SE_SHARED);
2151 ASSERT(pp != NULL);
2152 ppa[i] = pp;
2153 }
2154 ANON_LOCK_EXIT(&->a_rwlock);
2155 ASSERT(i == npages);
2156
2157 /*
2158 * We are already holding the as->a_lock on the user's
2159 * real segment, but we need to hold the a_lock on the
2160 * underlying dummy as. This is mostly to satisfy the
2161 * underlying HAT layer.
2162 */
2163 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
2164 a = sptseg_addr;
2165 pidx = 0;
2166 if (type == F_SOFTLOCK) {
2167 /*
2168 * Load up the translation keeping it
2169 * locked and don't unlock the page.
2170 */
2171 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2172 sz = MIN(pgsz, ptob(npages - pidx));
2173 hat_memload_array(sptseg->s_as->a_hat, a,
2174 sz, &ppa[pidx], sptd->spt_prot,
2175 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2176 }
2177 } else {
2178 /*
2179 * Migrate pages marked for migration.
2180 */
2181 if (lgrp_optimizations())
2182 page_migrate(seg, shm_addr, ppa, npages);
2183
2184 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2185 sz = MIN(pgsz, ptob(npages - pidx));
2186 hat_memload_array(sptseg->s_as->a_hat,
2187 a, sz, &ppa[pidx],
2188 sptd->spt_prot, HAT_LOAD_SHARE);
2189 }
2190
2191 /*
2192 * And now drop the SE_SHARED lock(s).
2193 */
2194 for (i = 0; i < npages; i++)
2195 page_unlock(ppa[i]);
2196 }
2197 AS_LOCK_EXIT(sptseg->s_as);
2198
2199 kmem_free(ppa, sizeof (page_t *) * npages);
2200 return (0);
2201 case F_SOFTUNLOCK:
2202
2203 /*
2204 * This is a bit ugly, we pass in the real seg pointer,
2205 * but the sptseg_addr is the virtual address within the
2206 * dummy seg.
2207 */
2208 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2209 return (0);
2210
2211 case F_PROT:
2212
2213 /*
2214 * This takes care of the unusual case where a user
2215 * allocates a stack in shared memory and a register
2216 * window overflow is written to that stack page before
2217 * it is otherwise modified.
2218 *
2219 * We can get away with this because ISM segments are
2220 * always rw. Other than this unusual case, there
2221 * should be no instances of protection violations.
2222 */
2223 return (0);
2224
2225 default:
2226 #ifdef DEBUG
2227 cmn_err(CE_WARN, "segspt_shmfault default type?");
2228 #endif
2229 return (FC_NOMAP);
2230 }
2231 }
2232
2233 /*ARGSUSED*/
2234 static faultcode_t
segspt_shmfaulta(struct seg * seg,caddr_t addr)2235 segspt_shmfaulta(struct seg *seg, caddr_t addr)
2236 {
2237 return (0);
2238 }
2239
2240 /*ARGSUSED*/
2241 static int
segspt_shmkluster(struct seg * seg,caddr_t addr,ssize_t delta)2242 segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta)
2243 {
2244 return (0);
2245 }
2246
2247 /*ARGSUSED*/
2248 static size_t
segspt_shmswapout(struct seg * seg)2249 segspt_shmswapout(struct seg *seg)
2250 {
2251 return (0);
2252 }
2253
2254 /*
2255 * duplicate the shared page tables
2256 */
2257 int
segspt_shmdup(struct seg * seg,struct seg * newseg)2258 segspt_shmdup(struct seg *seg, struct seg *newseg)
2259 {
2260 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2261 struct anon_map *amp = shmd->shm_amp;
2262 struct shm_data *shmd_new;
2263 struct seg *spt_seg = shmd->shm_sptseg;
2264 struct spt_data *sptd = spt_seg->s_data;
2265 int error = 0;
2266
2267 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
2268
2269 shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2270 newseg->s_data = (void *)shmd_new;
2271 shmd_new->shm_sptas = shmd->shm_sptas;
2272 shmd_new->shm_amp = amp;
2273 shmd_new->shm_sptseg = shmd->shm_sptseg;
2274 newseg->s_ops = &segspt_shmops;
2275 newseg->s_szc = seg->s_szc;
2276 ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2277
2278 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
2279 amp->refcnt++;
2280 ANON_LOCK_EXIT(&->a_rwlock);
2281
2282 if (sptd->spt_flags & SHM_PAGEABLE) {
2283 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2284 shmd_new->shm_lckpgs = 0;
2285 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2286 if ((error = hat_share(newseg->s_as->a_hat,
2287 newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,
2288 seg->s_size, seg->s_szc)) != 0) {
2289 kmem_free(shmd_new->shm_vpage,
2290 btopr(amp->size));
2291 }
2292 }
2293 return (error);
2294 } else {
2295 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2296 shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2297 seg->s_szc));
2298
2299 }
2300 }
2301
2302 /*ARGSUSED*/
2303 int
segspt_shmcheckprot(struct seg * seg,caddr_t addr,size_t size,uint_t prot)2304 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2305 {
2306 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2307 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2308
2309 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2310
2311 /*
2312 * ISM segment is always rw.
2313 */
2314 return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2315 }
2316
2317 /*
2318 * Return an array of locked large pages, for empty slots allocate
2319 * private zero-filled anon pages.
2320 */
2321 static int
spt_anon_getpages(struct seg * sptseg,caddr_t sptaddr,size_t len,page_t * ppa[])2322 spt_anon_getpages(
2323 struct seg *sptseg,
2324 caddr_t sptaddr,
2325 size_t len,
2326 page_t *ppa[])
2327 {
2328 struct spt_data *sptd = sptseg->s_data;
2329 struct anon_map *amp = sptd->spt_amp;
2330 enum seg_rw rw = sptd->spt_prot;
2331 uint_t szc = sptseg->s_szc;
2332 size_t pg_sz, share_sz = page_get_pagesize(szc);
2333 pgcnt_t lp_npgs;
2334 caddr_t lp_addr, e_sptaddr;
2335 uint_t vpprot, ppa_szc = 0;
2336 struct vpage *vpage = NULL;
2337 ulong_t j, ppa_idx;
2338 int err, ierr = 0;
2339 pgcnt_t an_idx;
2340 anon_sync_obj_t cookie;
2341 int anon_locked = 0;
2342 pgcnt_t amp_pgs;
2343
2344
2345 ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz));
2346 ASSERT(len != 0);
2347
2348 pg_sz = share_sz;
2349 lp_npgs = btop(pg_sz);
2350 lp_addr = sptaddr;
2351 e_sptaddr = sptaddr + len;
2352 an_idx = seg_page(sptseg, sptaddr);
2353 ppa_idx = 0;
2354
2355 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2356
2357 amp_pgs = page_get_pagecnt(amp->a_szc);
2358
2359 /*CONSTCOND*/
2360 while (1) {
2361 for (; lp_addr < e_sptaddr;
2362 an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) {
2363
2364 /*
2365 * If we're currently locked, and we get to a new
2366 * page, unlock our current anon chunk.
2367 */
2368 if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) {
2369 anon_array_exit(&cookie);
2370 anon_locked = 0;
2371 }
2372 if (!anon_locked) {
2373 anon_array_enter(amp, an_idx, &cookie);
2374 anon_locked = 1;
2375 }
2376 ppa_szc = (uint_t)-1;
2377 ierr = anon_map_getpages(amp, an_idx, szc, sptseg,
2378 lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx],
2379 &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred);
2380
2381 if (ierr != 0) {
2382 if (ierr > 0) {
2383 err = FC_MAKE_ERR(ierr);
2384 goto lpgs_err;
2385 }
2386 break;
2387 }
2388 }
2389 if (lp_addr == e_sptaddr) {
2390 break;
2391 }
2392 ASSERT(lp_addr < e_sptaddr);
2393
2394 /*
2395 * ierr == -1 means we failed to allocate a large page.
2396 * so do a size down operation.
2397 *
2398 * ierr == -2 means some other process that privately shares
2399 * pages with this process has allocated a larger page and we
2400 * need to retry with larger pages. So do a size up
2401 * operation. This relies on the fact that large pages are
2402 * never partially shared i.e. if we share any constituent
2403 * page of a large page with another process we must share the
2404 * entire large page. Note this cannot happen for SOFTLOCK
2405 * case, unless current address (lpaddr) is at the beginning
2406 * of the next page size boundary because the other process
2407 * couldn't have relocated locked pages.
2408 */
2409 ASSERT(ierr == -1 || ierr == -2);
2410 if (segvn_anypgsz) {
2411 ASSERT(ierr == -2 || szc != 0);
2412 ASSERT(ierr == -1 || szc < sptseg->s_szc);
2413 szc = (ierr == -1) ? szc - 1 : szc + 1;
2414 } else {
2415 /*
2416 * For faults and segvn_anypgsz == 0
2417 * we need to be careful not to loop forever
2418 * if existing page is found with szc other
2419 * than 0 or seg->s_szc. This could be due
2420 * to page relocations on behalf of DR or
2421 * more likely large page creation. For this
2422 * case simply re-size to existing page's szc
2423 * if returned by anon_map_getpages().
2424 */
2425 if (ppa_szc == (uint_t)-1) {
2426 szc = (ierr == -1) ? 0 : sptseg->s_szc;
2427 } else {
2428 ASSERT(ppa_szc <= sptseg->s_szc);
2429 ASSERT(ierr == -2 || ppa_szc < szc);
2430 ASSERT(ierr == -1 || ppa_szc > szc);
2431 szc = ppa_szc;
2432 }
2433 }
2434 pg_sz = page_get_pagesize(szc);
2435 lp_npgs = btop(pg_sz);
2436 ASSERT(IS_P2ALIGNED(lp_addr, pg_sz));
2437 }
2438 if (anon_locked) {
2439 anon_array_exit(&cookie);
2440 }
2441 ANON_LOCK_EXIT(&->a_rwlock);
2442 return (0);
2443
2444 lpgs_err:
2445 if (anon_locked) {
2446 anon_array_exit(&cookie);
2447 }
2448 ANON_LOCK_EXIT(&->a_rwlock);
2449 for (j = 0; j < ppa_idx; j++)
2450 page_unlock(ppa[j]);
2451 return (err);
2452 }
2453
2454 /*
2455 * count the number of bytes in a set of spt pages that are currently not
2456 * locked
2457 */
2458 static rctl_qty_t
spt_unlockedbytes(pgcnt_t npages,page_t ** ppa)2459 spt_unlockedbytes(pgcnt_t npages, page_t **ppa)
2460 {
2461 ulong_t i;
2462 rctl_qty_t unlocked = 0;
2463
2464 for (i = 0; i < npages; i++) {
2465 if (ppa[i]->p_lckcnt == 0)
2466 unlocked += PAGESIZE;
2467 }
2468 return (unlocked);
2469 }
2470
2471 extern u_longlong_t randtick(void);
2472 /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */
2473 #define NLCK (NCPU_P2)
2474 /* Random number with a range [0, n-1], n must be power of two */
2475 #define RAND_P2(n) \
2476 ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1))
2477
2478 int
spt_lockpages(struct seg * seg,pgcnt_t anon_index,pgcnt_t npages,page_t ** ppa,ulong_t * lockmap,size_t pos,rctl_qty_t * locked)2479 spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2480 page_t **ppa, ulong_t *lockmap, size_t pos,
2481 rctl_qty_t *locked)
2482 {
2483 struct shm_data *shmd = seg->s_data;
2484 struct spt_data *sptd = shmd->shm_sptseg->s_data;
2485 ulong_t i;
2486 int kernel;
2487 pgcnt_t nlck = 0;
2488 int rv = 0;
2489 int use_reserved = 1;
2490
2491 /* return the number of bytes actually locked */
2492 *locked = 0;
2493
2494 /*
2495 * To avoid contention on freemem_lock, availrmem and pages_locked
2496 * global counters are updated only every nlck locked pages instead of
2497 * every time. Reserve nlck locks up front and deduct from this
2498 * reservation for each page that requires a lock. When the reservation
2499 * is consumed, reserve again. nlck is randomized, so the competing
2500 * threads do not fall into a cyclic lock contention pattern. When
2501 * memory is low, the lock ahead is disabled, and instead page_pp_lock()
2502 * is used to lock pages.
2503 */
2504 for (i = 0; i < npages; anon_index++, pos++, i++) {
2505 if (nlck == 0 && use_reserved == 1) {
2506 nlck = NLCK + RAND_P2(NLCK);
2507 /* if fewer loops left, decrease nlck */
2508 nlck = MIN(nlck, npages - i);
2509 /*
2510 * Reserve nlck locks up front and deduct from this
2511 * reservation for each page that requires a lock. When
2512 * the reservation is consumed, reserve again.
2513 */
2514 mutex_enter(&freemem_lock);
2515 if ((availrmem - nlck) < pages_pp_maximum) {
2516 /* Do not do advance memory reserves */
2517 use_reserved = 0;
2518 } else {
2519 availrmem -= nlck;
2520 pages_locked += nlck;
2521 }
2522 mutex_exit(&freemem_lock);
2523 }
2524 if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) {
2525 if (sptd->spt_ppa_lckcnt[anon_index] <
2526 (ushort_t)DISM_LOCK_MAX) {
2527 if (++sptd->spt_ppa_lckcnt[anon_index] ==
2528 (ushort_t)DISM_LOCK_MAX) {
2529 cmn_err(CE_WARN,
2530 "DISM page lock limit "
2531 "reached on DISM offset 0x%lx\n",
2532 anon_index << PAGESHIFT);
2533 }
2534 kernel = (sptd->spt_ppa &&
2535 sptd->spt_ppa[anon_index]);
2536 if (!page_pp_lock(ppa[i], 0, kernel ||
2537 use_reserved)) {
2538 sptd->spt_ppa_lckcnt[anon_index]--;
2539 rv = EAGAIN;
2540 break;
2541 }
2542 /* if this is a newly locked page, count it */
2543 if (ppa[i]->p_lckcnt == 1) {
2544 if (kernel == 0 && use_reserved == 1)
2545 nlck--;
2546 *locked += PAGESIZE;
2547 }
2548 shmd->shm_lckpgs++;
2549 shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED;
2550 if (lockmap != NULL)
2551 BT_SET(lockmap, pos);
2552 }
2553 }
2554 }
2555 /* Return unused lock reservation */
2556 if (nlck != 0 && use_reserved == 1) {
2557 mutex_enter(&freemem_lock);
2558 availrmem += nlck;
2559 pages_locked -= nlck;
2560 mutex_exit(&freemem_lock);
2561 }
2562
2563 return (rv);
2564 }
2565
2566 int
spt_unlockpages(struct seg * seg,pgcnt_t anon_index,pgcnt_t npages,rctl_qty_t * unlocked)2567 spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2568 rctl_qty_t *unlocked)
2569 {
2570 struct shm_data *shmd = seg->s_data;
2571 struct spt_data *sptd = shmd->shm_sptseg->s_data;
2572 struct anon_map *amp = sptd->spt_amp;
2573 struct anon *ap;
2574 struct vnode *vp;
2575 u_offset_t off;
2576 struct page *pp;
2577 int kernel;
2578 anon_sync_obj_t cookie;
2579 ulong_t i;
2580 pgcnt_t nlck = 0;
2581 pgcnt_t nlck_limit = NLCK;
2582
2583 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2584 for (i = 0; i < npages; i++, anon_index++) {
2585 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
2586 anon_array_enter(amp, anon_index, &cookie);
2587 ap = anon_get_ptr(amp->ahp, anon_index);
2588 ASSERT(ap);
2589
2590 swap_xlate(ap, &vp, &off);
2591 anon_array_exit(&cookie);
2592 pp = page_lookup(vp, off, SE_SHARED);
2593 ASSERT(pp);
2594 /*
2595 * availrmem is decremented only for pages which are not
2596 * in seg pcache, for pages in seg pcache availrmem was
2597 * decremented in _dismpagelock()
2598 */
2599 kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]);
2600 ASSERT(pp->p_lckcnt > 0);
2601
2602 /*
2603 * lock page but do not change availrmem, we do it
2604 * ourselves every nlck loops.
2605 */
2606 page_pp_unlock(pp, 0, 1);
2607 if (pp->p_lckcnt == 0) {
2608 if (kernel == 0)
2609 nlck++;
2610 *unlocked += PAGESIZE;
2611 }
2612 page_unlock(pp);
2613 shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED;
2614 sptd->spt_ppa_lckcnt[anon_index]--;
2615 shmd->shm_lckpgs--;
2616 }
2617
2618 /*
2619 * To reduce freemem_lock contention, do not update availrmem
2620 * until at least NLCK pages have been unlocked.
2621 * 1. No need to update if nlck is zero
2622 * 2. Always update if the last iteration
2623 */
2624 if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) {
2625 mutex_enter(&freemem_lock);
2626 availrmem += nlck;
2627 pages_locked -= nlck;
2628 mutex_exit(&freemem_lock);
2629 nlck = 0;
2630 nlck_limit = NLCK + RAND_P2(NLCK);
2631 }
2632 }
2633 ANON_LOCK_EXIT(&->a_rwlock);
2634
2635 return (0);
2636 }
2637
2638 /*ARGSUSED*/
2639 static int
segspt_shmlockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)2640 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2641 int attr, int op, ulong_t *lockmap, size_t pos)
2642 {
2643 struct shm_data *shmd = seg->s_data;
2644 struct seg *sptseg = shmd->shm_sptseg;
2645 struct spt_data *sptd = sptseg->s_data;
2646 struct kshmid *sp = sptd->spt_amp->a_sp;
2647 pgcnt_t npages, a_npages;
2648 page_t **ppa;
2649 pgcnt_t an_idx, a_an_idx, ppa_idx;
2650 caddr_t spt_addr, a_addr; /* spt and aligned address */
2651 size_t a_len; /* aligned len */
2652 size_t share_sz;
2653 ulong_t i;
2654 int sts = 0;
2655 rctl_qty_t unlocked = 0;
2656 rctl_qty_t locked = 0;
2657 struct proc *p = curproc;
2658 kproject_t *proj;
2659
2660 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2661 ASSERT(sp != NULL);
2662
2663 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2664 return (0);
2665 }
2666
2667 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2668 an_idx = seg_page(seg, addr);
2669 npages = btopr(len);
2670
2671 if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2672 return (ENOMEM);
2673 }
2674
2675 /*
2676 * A shm's project never changes, so no lock needed.
2677 * The shm has a hold on the project, so it will not go away.
2678 * Since we have a mapping to shm within this zone, we know
2679 * that the zone will not go away.
2680 */
2681 proj = sp->shm_perm.ipc_proj;
2682
2683 if (op == MC_LOCK) {
2684
2685 /*
2686 * Need to align addr and size request if they are not
2687 * aligned so we can always allocate large page(s) however
2688 * we only lock what was requested in initial request.
2689 */
2690 share_sz = page_get_pagesize(sptseg->s_szc);
2691 a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
2692 a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)),
2693 share_sz);
2694 a_npages = btop(a_len);
2695 a_an_idx = seg_page(seg, a_addr);
2696 spt_addr = sptseg->s_base + ptob(a_an_idx);
2697 ppa_idx = an_idx - a_an_idx;
2698
2699 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages),
2700 KM_NOSLEEP)) == NULL) {
2701 return (ENOMEM);
2702 }
2703
2704 /*
2705 * Don't cache any new pages for IO and
2706 * flush any cached pages.
2707 */
2708 mutex_enter(&sptd->spt_lock);
2709 if (sptd->spt_ppa != NULL)
2710 sptd->spt_flags |= DISM_PPA_CHANGED;
2711
2712 sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa);
2713 if (sts != 0) {
2714 mutex_exit(&sptd->spt_lock);
2715 kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2716 return (sts);
2717 }
2718
2719 mutex_enter(&sp->shm_mlock);
2720 /* enforce locked memory rctl */
2721 unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]);
2722
2723 mutex_enter(&p->p_lock);
2724 if (rctl_incr_locked_mem(p, proj, unlocked, 0)) {
2725 mutex_exit(&p->p_lock);
2726 sts = EAGAIN;
2727 } else {
2728 mutex_exit(&p->p_lock);
2729 sts = spt_lockpages(seg, an_idx, npages,
2730 &ppa[ppa_idx], lockmap, pos, &locked);
2731
2732 /*
2733 * correct locked count if not all pages could be
2734 * locked
2735 */
2736 if ((unlocked - locked) > 0) {
2737 rctl_decr_locked_mem(NULL, proj,
2738 (unlocked - locked), 0);
2739 }
2740 }
2741 /*
2742 * unlock pages
2743 */
2744 for (i = 0; i < a_npages; i++)
2745 page_unlock(ppa[i]);
2746 if (sptd->spt_ppa != NULL)
2747 sptd->spt_flags |= DISM_PPA_CHANGED;
2748 mutex_exit(&sp->shm_mlock);
2749 mutex_exit(&sptd->spt_lock);
2750
2751 kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2752
2753 } else if (op == MC_UNLOCK) { /* unlock */
2754 page_t **ppa;
2755
2756 mutex_enter(&sptd->spt_lock);
2757 if (shmd->shm_lckpgs == 0) {
2758 mutex_exit(&sptd->spt_lock);
2759 return (0);
2760 }
2761 /*
2762 * Don't cache new IO pages.
2763 */
2764 if (sptd->spt_ppa != NULL)
2765 sptd->spt_flags |= DISM_PPA_CHANGED;
2766
2767 mutex_enter(&sp->shm_mlock);
2768 sts = spt_unlockpages(seg, an_idx, npages, &unlocked);
2769 if ((ppa = sptd->spt_ppa) != NULL)
2770 sptd->spt_flags |= DISM_PPA_CHANGED;
2771 mutex_exit(&sptd->spt_lock);
2772
2773 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2774 mutex_exit(&sp->shm_mlock);
2775
2776 if (ppa != NULL)
2777 seg_ppurge_wiredpp(ppa);
2778 }
2779 return (sts);
2780 }
2781
2782 /*ARGSUSED*/
2783 int
segspt_shmgetprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)2784 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2785 {
2786 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2787 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2788 spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2789
2790 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2791
2792 /*
2793 * ISM segment is always rw.
2794 */
2795 while (--pgno >= 0)
2796 *protv++ = sptd->spt_prot;
2797 return (0);
2798 }
2799
2800 /*ARGSUSED*/
2801 u_offset_t
segspt_shmgetoffset(struct seg * seg,caddr_t addr)2802 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2803 {
2804 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2805
2806 /* Offset does not matter in ISM memory */
2807
2808 return ((u_offset_t)0);
2809 }
2810
2811 /* ARGSUSED */
2812 int
segspt_shmgettype(struct seg * seg,caddr_t addr)2813 segspt_shmgettype(struct seg *seg, caddr_t addr)
2814 {
2815 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2816 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2817
2818 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2819
2820 /*
2821 * The shared memory mapping is always MAP_SHARED, SWAP is only
2822 * reserved for DISM
2823 */
2824 return (MAP_SHARED |
2825 ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2826 }
2827
2828 /*ARGSUSED*/
2829 int
segspt_shmgetvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)2830 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2831 {
2832 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2833 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2834
2835 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2836
2837 *vpp = sptd->spt_vp;
2838 return (0);
2839 }
2840
2841 /*
2842 * We need to wait for pending IO to complete to a DISM segment in order for
2843 * pages to get kicked out of the seg_pcache. 120 seconds should be more
2844 * than enough time to wait.
2845 */
2846 static clock_t spt_pcache_wait = 120;
2847
2848 /*ARGSUSED*/
2849 static int
segspt_shmadvise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)2850 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2851 {
2852 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2853 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2854 struct anon_map *amp;
2855 pgcnt_t pg_idx;
2856 ushort_t gen;
2857 clock_t end_lbolt;
2858 int writer;
2859 page_t **ppa;
2860
2861 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2862
2863 if (behav == MADV_FREE || behav == MADV_PURGE) {
2864 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2865 return (0);
2866
2867 amp = sptd->spt_amp;
2868 pg_idx = seg_page(seg, addr);
2869
2870 mutex_enter(&sptd->spt_lock);
2871 if ((ppa = sptd->spt_ppa) == NULL) {
2872 mutex_exit(&sptd->spt_lock);
2873 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2874 (void) anon_disclaim(amp, pg_idx, len, behav, NULL);
2875 ANON_LOCK_EXIT(&->a_rwlock);
2876 return (0);
2877 }
2878
2879 sptd->spt_flags |= DISM_PPA_CHANGED;
2880 gen = sptd->spt_gen;
2881
2882 mutex_exit(&sptd->spt_lock);
2883
2884 /*
2885 * Purge all DISM cached pages
2886 */
2887 seg_ppurge_wiredpp(ppa);
2888
2889 /*
2890 * Drop the AS_LOCK so that other threads can grab it
2891 * in the as_pageunlock path and hopefully get the segment
2892 * kicked out of the seg_pcache. We bump the shm_softlockcnt
2893 * to keep this segment resident.
2894 */
2895 writer = AS_WRITE_HELD(seg->s_as);
2896 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2897 AS_LOCK_EXIT(seg->s_as);
2898
2899 mutex_enter(&sptd->spt_lock);
2900
2901 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2902
2903 /*
2904 * Try to wait for pages to get kicked out of the seg_pcache.
2905 */
2906 while (sptd->spt_gen == gen &&
2907 (sptd->spt_flags & DISM_PPA_CHANGED) &&
2908 ddi_get_lbolt() < end_lbolt) {
2909 if (!cv_timedwait_sig(&sptd->spt_cv,
2910 &sptd->spt_lock, end_lbolt)) {
2911 break;
2912 }
2913 }
2914
2915 mutex_exit(&sptd->spt_lock);
2916
2917 /* Regrab the AS_LOCK and release our hold on the segment */
2918 AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER);
2919 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2920 if (shmd->shm_softlockcnt <= 0) {
2921 if (AS_ISUNMAPWAIT(seg->s_as)) {
2922 mutex_enter(&seg->s_as->a_contents);
2923 if (AS_ISUNMAPWAIT(seg->s_as)) {
2924 AS_CLRUNMAPWAIT(seg->s_as);
2925 cv_broadcast(&seg->s_as->a_cv);
2926 }
2927 mutex_exit(&seg->s_as->a_contents);
2928 }
2929 }
2930
2931 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2932 (void) anon_disclaim(amp, pg_idx, len, behav, NULL);
2933 ANON_LOCK_EXIT(&->a_rwlock);
2934 } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2935 behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2936 int already_set;
2937 ulong_t anon_index;
2938 lgrp_mem_policy_t policy;
2939 caddr_t shm_addr;
2940 size_t share_size;
2941 size_t size;
2942 struct seg *sptseg = shmd->shm_sptseg;
2943 caddr_t sptseg_addr;
2944
2945 /*
2946 * Align address and length to page size of underlying segment
2947 */
2948 share_size = page_get_pagesize(shmd->shm_sptseg->s_szc);
2949 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
2950 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)),
2951 share_size);
2952
2953 amp = shmd->shm_amp;
2954 anon_index = seg_page(seg, shm_addr);
2955
2956 /*
2957 * And now we may have to adjust size downward if we have
2958 * exceeded the realsize of the segment or initial anon
2959 * allocations.
2960 */
2961 sptseg_addr = sptseg->s_base + ptob(anon_index);
2962 if ((sptseg_addr + size) >
2963 (sptseg->s_base + sptd->spt_realsize))
2964 size = (sptseg->s_base + sptd->spt_realsize) -
2965 sptseg_addr;
2966
2967 /*
2968 * Set memory allocation policy for this segment
2969 */
2970 policy = lgrp_madv_to_policy(behav, len, MAP_SHARED);
2971 already_set = lgrp_shm_policy_set(policy, amp, anon_index,
2972 NULL, 0, len);
2973
2974 /*
2975 * If random memory allocation policy set already,
2976 * don't bother reapplying it.
2977 */
2978 if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy))
2979 return (0);
2980
2981 /*
2982 * Mark any existing pages in the given range for
2983 * migration, flushing the I/O page cache, and using
2984 * underlying segment to calculate anon index and get
2985 * anonmap and vnode pointer from
2986 */
2987 if (shmd->shm_softlockcnt > 0)
2988 segspt_purge(seg);
2989
2990 page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0);
2991 }
2992
2993 return (0);
2994 }
2995
2996 /*ARGSUSED*/
2997 void
segspt_shmdump(struct seg * seg)2998 segspt_shmdump(struct seg *seg)
2999 {
3000 /* no-op for ISM segment */
3001 }
3002
3003 /*ARGSUSED*/
3004 static faultcode_t
segspt_shmsetpgsz(struct seg * seg,caddr_t addr,size_t len,uint_t szc)3005 segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
3006 {
3007 return (ENOTSUP);
3008 }
3009
3010 /*
3011 * get a memory ID for an addr in a given segment
3012 */
3013 static int
segspt_shmgetmemid(struct seg * seg,caddr_t addr,memid_t * memidp)3014 segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3015 {
3016 struct shm_data *shmd = (struct shm_data *)seg->s_data;
3017 struct anon *ap;
3018 size_t anon_index;
3019 struct anon_map *amp = shmd->shm_amp;
3020 struct spt_data *sptd = shmd->shm_sptseg->s_data;
3021 struct seg *sptseg = shmd->shm_sptseg;
3022 anon_sync_obj_t cookie;
3023
3024 anon_index = seg_page(seg, addr);
3025
3026 if (addr > (seg->s_base + sptd->spt_realsize)) {
3027 return (EFAULT);
3028 }
3029
3030 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
3031 anon_array_enter(amp, anon_index, &cookie);
3032 ap = anon_get_ptr(amp->ahp, anon_index);
3033 if (ap == NULL) {
3034 struct page *pp;
3035 caddr_t spt_addr = sptseg->s_base + ptob(anon_index);
3036
3037 pp = anon_zero(sptseg, spt_addr, &ap, kcred);
3038 if (pp == NULL) {
3039 anon_array_exit(&cookie);
3040 ANON_LOCK_EXIT(&->a_rwlock);
3041 return (ENOMEM);
3042 }
3043 (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP);
3044 page_unlock(pp);
3045 }
3046 anon_array_exit(&cookie);
3047 ANON_LOCK_EXIT(&->a_rwlock);
3048 memidp->val[0] = (uintptr_t)ap;
3049 memidp->val[1] = (uintptr_t)addr & PAGEOFFSET;
3050 return (0);
3051 }
3052
3053 /*
3054 * Get memory allocation policy info for specified address in given segment
3055 */
3056 static lgrp_mem_policy_info_t *
segspt_shmgetpolicy(struct seg * seg,caddr_t addr)3057 segspt_shmgetpolicy(struct seg *seg, caddr_t addr)
3058 {
3059 struct anon_map *amp;
3060 ulong_t anon_index;
3061 lgrp_mem_policy_info_t *policy_info;
3062 struct shm_data *shm_data;
3063
3064 ASSERT(seg != NULL);
3065
3066 /*
3067 * Get anon_map from segshm
3068 *
3069 * Assume that no lock needs to be held on anon_map, since
3070 * it should be protected by its reference count which must be
3071 * nonzero for an existing segment
3072 * Need to grab readers lock on policy tree though
3073 */
3074 shm_data = (struct shm_data *)seg->s_data;
3075 if (shm_data == NULL)
3076 return (NULL);
3077 amp = shm_data->shm_amp;
3078 ASSERT(amp->refcnt != 0);
3079
3080 /*
3081 * Get policy info
3082 *
3083 * Assume starting anon index of 0
3084 */
3085 anon_index = seg_page(seg, addr);
3086 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
3087
3088 return (policy_info);
3089 }
3090
3091 /*ARGSUSED*/
3092 static int
segspt_shmcapable(struct seg * seg,segcapability_t capability)3093 segspt_shmcapable(struct seg *seg, segcapability_t capability)
3094 {
3095 return (0);
3096 }
3097