1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
24 */
25
26 #include <sys/param.h>
27 #include <sys/user.h>
28 #include <sys/mman.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/cmn_err.h>
32 #include <sys/systm.h>
33 #include <sys/tuneable.h>
34 #include <vm/hat.h>
35 #include <vm/seg.h>
36 #include <vm/as.h>
37 #include <vm/anon.h>
38 #include <vm/page.h>
39 #include <sys/buf.h>
40 #include <sys/swap.h>
41 #include <sys/atomic.h>
42 #include <vm/seg_spt.h>
43 #include <sys/debug.h>
44 #include <sys/vtrace.h>
45 #include <sys/shm.h>
46 #include <sys/shm_impl.h>
47 #include <sys/lgrp.h>
48 #include <sys/vmsystm.h>
49 #include <sys/policy.h>
50 #include <sys/project.h>
51 #include <sys/tnf_probe.h>
52 #include <sys/zone.h>
53
54 #define SEGSPTADDR (caddr_t)0x0
55
56 /*
57 * # pages used for spt
58 */
59 size_t spt_used;
60
61 /*
62 * segspt_minfree is the memory left for system after ISM
63 * locked its pages; it is set up to 5% of availrmem in
64 * sptcreate when ISM is created. ISM should not use more
65 * than ~90% of availrmem; if it does, then the performance
66 * of the system may decrease. Machines with large memories may
67 * be able to use up more memory for ISM so we set the default
68 * segspt_minfree to 5% (which gives ISM max 95% of availrmem.
69 * If somebody wants even more memory for ISM (risking hanging
70 * the system) they can patch the segspt_minfree to smaller number.
71 */
72 pgcnt_t segspt_minfree = 0;
73
74 static int segspt_create(struct seg *seg, caddr_t argsp);
75 static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize);
76 static void segspt_free(struct seg *seg);
77 static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len);
78 static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr);
79
80 static void
segspt_badop()81 segspt_badop()
82 {
83 panic("segspt_badop called");
84 /*NOTREACHED*/
85 }
86
87 #define SEGSPT_BADOP(t) (t(*)())segspt_badop
88
89 struct seg_ops segspt_ops = {
90 SEGSPT_BADOP(int), /* dup */
91 segspt_unmap,
92 segspt_free,
93 SEGSPT_BADOP(int), /* fault */
94 SEGSPT_BADOP(faultcode_t), /* faulta */
95 SEGSPT_BADOP(int), /* setprot */
96 SEGSPT_BADOP(int), /* checkprot */
97 SEGSPT_BADOP(int), /* kluster */
98 SEGSPT_BADOP(size_t), /* swapout */
99 SEGSPT_BADOP(int), /* sync */
100 SEGSPT_BADOP(size_t), /* incore */
101 SEGSPT_BADOP(int), /* lockop */
102 SEGSPT_BADOP(int), /* getprot */
103 SEGSPT_BADOP(u_offset_t), /* getoffset */
104 SEGSPT_BADOP(int), /* gettype */
105 SEGSPT_BADOP(int), /* getvp */
106 SEGSPT_BADOP(int), /* advise */
107 SEGSPT_BADOP(void), /* dump */
108 SEGSPT_BADOP(int), /* pagelock */
109 SEGSPT_BADOP(int), /* setpgsz */
110 SEGSPT_BADOP(int), /* getmemid */
111 segspt_getpolicy, /* getpolicy */
112 SEGSPT_BADOP(int), /* capable */
113 seg_inherit_notsup /* inherit */
114 };
115
116 static int segspt_shmdup(struct seg *seg, struct seg *newseg);
117 static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize);
118 static void segspt_shmfree(struct seg *seg);
119 static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg,
120 caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw);
121 static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr);
122 static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr,
123 register size_t len, register uint_t prot);
124 static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size,
125 uint_t prot);
126 static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta);
127 static size_t segspt_shmswapout(struct seg *seg);
128 static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len,
129 register char *vec);
130 static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len,
131 int attr, uint_t flags);
132 static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
133 int attr, int op, ulong_t *lockmap, size_t pos);
134 static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len,
135 uint_t *protv);
136 static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr);
137 static int segspt_shmgettype(struct seg *seg, caddr_t addr);
138 static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
139 static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len,
140 uint_t behav);
141 static void segspt_shmdump(struct seg *seg);
142 static int segspt_shmpagelock(struct seg *, caddr_t, size_t,
143 struct page ***, enum lock_type, enum seg_rw);
144 static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t);
145 static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *);
146 static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t);
147 static int segspt_shmcapable(struct seg *, segcapability_t);
148
149 struct seg_ops segspt_shmops = {
150 segspt_shmdup,
151 segspt_shmunmap,
152 segspt_shmfree,
153 segspt_shmfault,
154 segspt_shmfaulta,
155 segspt_shmsetprot,
156 segspt_shmcheckprot,
157 segspt_shmkluster,
158 segspt_shmswapout,
159 segspt_shmsync,
160 segspt_shmincore,
161 segspt_shmlockop,
162 segspt_shmgetprot,
163 segspt_shmgetoffset,
164 segspt_shmgettype,
165 segspt_shmgetvp,
166 segspt_shmadvise, /* advise */
167 segspt_shmdump,
168 segspt_shmpagelock,
169 segspt_shmsetpgsz,
170 segspt_shmgetmemid,
171 segspt_shmgetpolicy,
172 segspt_shmcapable,
173 seg_inherit_notsup
174 };
175
176 static void segspt_purge(struct seg *seg);
177 static int segspt_reclaim(void *, caddr_t, size_t, struct page **,
178 enum seg_rw, int);
179 static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
180 page_t **ppa);
181
182
183
184 /*ARGSUSED*/
185 int
sptcreate(size_t size,struct seg ** sptseg,struct anon_map * amp,uint_t prot,uint_t flags,uint_t share_szc)186 sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
187 uint_t prot, uint_t flags, uint_t share_szc)
188 {
189 int err;
190 struct as *newas;
191 struct segspt_crargs sptcargs;
192
193 #ifdef DEBUG
194 TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */,
195 tnf_ulong, size, size );
196 #endif
197 if (segspt_minfree == 0) /* leave min 5% of availrmem for */
198 segspt_minfree = availrmem/20; /* for the system */
199
200 if (!hat_supported(HAT_SHARED_PT, (void *)0))
201 return (EINVAL);
202
203 /*
204 * get a new as for this shared memory segment
205 */
206 newas = as_alloc();
207 newas->a_proc = NULL;
208 sptcargs.amp = amp;
209 sptcargs.prot = prot;
210 sptcargs.flags = flags;
211 sptcargs.szc = share_szc;
212 /*
213 * create a shared page table (spt) segment
214 */
215
216 if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) {
217 as_free(newas);
218 return (err);
219 }
220 *sptseg = sptcargs.seg_spt;
221 return (0);
222 }
223
224 void
sptdestroy(struct as * as,struct anon_map * amp)225 sptdestroy(struct as *as, struct anon_map *amp)
226 {
227
228 #ifdef DEBUG
229 TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
230 #endif
231 (void) as_unmap(as, SEGSPTADDR, amp->size);
232 as_free(as);
233 }
234
235 /*
236 * called from seg_free().
237 * free (i.e., unlock, unmap, return to free list)
238 * all the pages in the given seg.
239 */
240 void
segspt_free(struct seg * seg)241 segspt_free(struct seg *seg)
242 {
243 struct spt_data *sptd = (struct spt_data *)seg->s_data;
244
245 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
246
247 if (sptd != NULL) {
248 if (sptd->spt_realsize)
249 segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
250
251 if (sptd->spt_ppa_lckcnt)
252 kmem_free(sptd->spt_ppa_lckcnt,
253 sizeof (*sptd->spt_ppa_lckcnt)
254 * btopr(sptd->spt_amp->size));
255 kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
256 cv_destroy(&sptd->spt_cv);
257 mutex_destroy(&sptd->spt_lock);
258 kmem_free(sptd, sizeof (*sptd));
259 }
260 }
261
262 /*ARGSUSED*/
263 static int
segspt_shmsync(struct seg * seg,caddr_t addr,size_t len,int attr,uint_t flags)264 segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
265 uint_t flags)
266 {
267 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
268
269 return (0);
270 }
271
272 /*ARGSUSED*/
273 static size_t
segspt_shmincore(struct seg * seg,caddr_t addr,size_t len,char * vec)274 segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
275 {
276 caddr_t eo_seg;
277 pgcnt_t npages;
278 struct shm_data *shmd = (struct shm_data *)seg->s_data;
279 struct seg *sptseg;
280 struct spt_data *sptd;
281
282 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
283 #ifdef lint
284 seg = seg;
285 #endif
286 sptseg = shmd->shm_sptseg;
287 sptd = sptseg->s_data;
288
289 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
290 eo_seg = addr + len;
291 while (addr < eo_seg) {
292 /* page exists, and it's locked. */
293 *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
294 SEG_PAGE_ANON;
295 addr += PAGESIZE;
296 }
297 return (len);
298 } else {
299 struct anon_map *amp = shmd->shm_amp;
300 struct anon *ap;
301 page_t *pp;
302 pgcnt_t anon_index;
303 struct vnode *vp;
304 u_offset_t off;
305 ulong_t i;
306 int ret;
307 anon_sync_obj_t cookie;
308
309 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
310 anon_index = seg_page(seg, addr);
311 npages = btopr(len);
312 if (anon_index + npages > btopr(shmd->shm_amp->size)) {
313 return (EINVAL);
314 }
315 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
316 for (i = 0; i < npages; i++, anon_index++) {
317 ret = 0;
318 anon_array_enter(amp, anon_index, &cookie);
319 ap = anon_get_ptr(amp->ahp, anon_index);
320 if (ap != NULL) {
321 swap_xlate(ap, &vp, &off);
322 anon_array_exit(&cookie);
323 pp = page_lookup_nowait(vp, off, SE_SHARED);
324 if (pp != NULL) {
325 ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON;
326 page_unlock(pp);
327 }
328 } else {
329 anon_array_exit(&cookie);
330 }
331 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
332 ret |= SEG_PAGE_LOCKED;
333 }
334 *vec++ = (char)ret;
335 }
336 ANON_LOCK_EXIT(&->a_rwlock);
337 return (len);
338 }
339 }
340
341 static int
segspt_unmap(struct seg * seg,caddr_t raddr,size_t ssize)342 segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
343 {
344 size_t share_size;
345
346 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
347
348 /*
349 * seg.s_size may have been rounded up to the largest page size
350 * in shmat().
351 * XXX This should be cleanedup. sptdestroy should take a length
352 * argument which should be the same as sptcreate. Then
353 * this rounding would not be needed (or is done in shm.c)
354 * Only the check for full segment will be needed.
355 *
356 * XXX -- shouldn't raddr == 0 always? These tests don't seem
357 * to be useful at all.
358 */
359 share_size = page_get_pagesize(seg->s_szc);
360 ssize = P2ROUNDUP(ssize, share_size);
361
362 if (raddr == seg->s_base && ssize == seg->s_size) {
363 seg_free(seg);
364 return (0);
365 } else
366 return (EINVAL);
367 }
368
369 int
segspt_create(struct seg * seg,caddr_t argsp)370 segspt_create(struct seg *seg, caddr_t argsp)
371 {
372 int err;
373 caddr_t addr = seg->s_base;
374 struct spt_data *sptd;
375 struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp;
376 struct anon_map *amp = sptcargs->amp;
377 struct kshmid *sp = amp->a_sp;
378 struct cred *cred = CRED();
379 ulong_t i, j, anon_index = 0;
380 pgcnt_t npages = btopr(amp->size);
381 struct vnode *vp;
382 page_t **ppa;
383 uint_t hat_flags;
384 size_t pgsz;
385 pgcnt_t pgcnt;
386 caddr_t a;
387 pgcnt_t pidx;
388 size_t sz;
389 proc_t *procp = curproc;
390 rctl_qty_t lockedbytes = 0;
391 kproject_t *proj;
392
393 /*
394 * We are holding the a_lock on the underlying dummy as,
395 * so we can make calls to the HAT layer.
396 */
397 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
398 ASSERT(sp != NULL);
399
400 #ifdef DEBUG
401 TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
402 tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size);
403 #endif
404 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
405 if (err = anon_swap_adjust(npages))
406 return (err);
407 }
408 err = ENOMEM;
409
410 if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
411 goto out1;
412
413 if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
414 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
415 KM_NOSLEEP)) == NULL)
416 goto out2;
417 }
418
419 mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL);
420
421 if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL)
422 goto out3;
423
424 seg->s_ops = &segspt_ops;
425 sptd->spt_vp = vp;
426 sptd->spt_amp = amp;
427 sptd->spt_prot = sptcargs->prot;
428 sptd->spt_flags = sptcargs->flags;
429 seg->s_data = (caddr_t)sptd;
430 sptd->spt_ppa = NULL;
431 sptd->spt_ppa_lckcnt = NULL;
432 seg->s_szc = sptcargs->szc;
433 cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL);
434 sptd->spt_gen = 0;
435
436 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
437 if (seg->s_szc > amp->a_szc) {
438 amp->a_szc = seg->s_szc;
439 }
440 ANON_LOCK_EXIT(&->a_rwlock);
441
442 /*
443 * Set policy to affect initial allocation of pages in
444 * anon_map_createpages()
445 */
446 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index,
447 NULL, 0, ptob(npages));
448
449 if (sptcargs->flags & SHM_PAGEABLE) {
450 size_t share_sz;
451 pgcnt_t new_npgs, more_pgs;
452 struct anon_hdr *nahp;
453 zone_t *zone;
454
455 share_sz = page_get_pagesize(seg->s_szc);
456 if (!IS_P2ALIGNED(amp->size, share_sz)) {
457 /*
458 * We are rounding up the size of the anon array
459 * on 4 M boundary because we always create 4 M
460 * of page(s) when locking, faulting pages and we
461 * don't have to check for all corner cases e.g.
462 * if there is enough space to allocate 4 M
463 * page.
464 */
465 new_npgs = btop(P2ROUNDUP(amp->size, share_sz));
466 more_pgs = new_npgs - npages;
467
468 /*
469 * The zone will never be NULL, as a fully created
470 * shm always has an owning zone.
471 */
472 zone = sp->shm_perm.ipc_zone_ref.zref_zone;
473 ASSERT(zone != NULL);
474 if (anon_resv_zone(ptob(more_pgs), zone) == 0) {
475 err = ENOMEM;
476 goto out4;
477 }
478
479 nahp = anon_create(new_npgs, ANON_SLEEP);
480 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
481 (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages,
482 ANON_SLEEP);
483 anon_release(amp->ahp, npages);
484 amp->ahp = nahp;
485 ASSERT(amp->swresv == ptob(npages));
486 amp->swresv = amp->size = ptob(new_npgs);
487 ANON_LOCK_EXIT(&->a_rwlock);
488 npages = new_npgs;
489 }
490
491 sptd->spt_ppa_lckcnt = kmem_zalloc(npages *
492 sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP);
493 sptd->spt_pcachecnt = 0;
494 sptd->spt_realsize = ptob(npages);
495 sptcargs->seg_spt = seg;
496 return (0);
497 }
498
499 /*
500 * get array of pages for each anon slot in amp
501 */
502 if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa,
503 seg, addr, S_CREATE, cred)) != 0)
504 goto out4;
505
506 mutex_enter(&sp->shm_mlock);
507
508 /* May be partially locked, so, count bytes to charge for locking */
509 for (i = 0; i < npages; i++)
510 if (ppa[i]->p_lckcnt == 0)
511 lockedbytes += PAGESIZE;
512
513 proj = sp->shm_perm.ipc_proj;
514
515 if (lockedbytes > 0) {
516 mutex_enter(&procp->p_lock);
517 if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) {
518 mutex_exit(&procp->p_lock);
519 mutex_exit(&sp->shm_mlock);
520 for (i = 0; i < npages; i++)
521 page_unlock(ppa[i]);
522 err = ENOMEM;
523 goto out4;
524 }
525 mutex_exit(&procp->p_lock);
526 }
527
528 /*
529 * addr is initial address corresponding to the first page on ppa list
530 */
531 for (i = 0; i < npages; i++) {
532 /* attempt to lock all pages */
533 if (page_pp_lock(ppa[i], 0, 1) == 0) {
534 /*
535 * if unable to lock any page, unlock all
536 * of them and return error
537 */
538 for (j = 0; j < i; j++)
539 page_pp_unlock(ppa[j], 0, 1);
540 for (i = 0; i < npages; i++)
541 page_unlock(ppa[i]);
542 rctl_decr_locked_mem(NULL, proj, lockedbytes, 0);
543 mutex_exit(&sp->shm_mlock);
544 err = ENOMEM;
545 goto out4;
546 }
547 }
548 mutex_exit(&sp->shm_mlock);
549
550 /*
551 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
552 * for the entire life of the segment. For example platforms
553 * that do not support Dynamic Reconfiguration.
554 */
555 hat_flags = HAT_LOAD_SHARE;
556 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL))
557 hat_flags |= HAT_LOAD_LOCK;
558
559 /*
560 * Load translations one lare page at a time
561 * to make sure we don't create mappings bigger than
562 * segment's size code in case underlying pages
563 * are shared with segvn's segment that uses bigger
564 * size code than we do.
565 */
566 pgsz = page_get_pagesize(seg->s_szc);
567 pgcnt = page_get_pagecnt(seg->s_szc);
568 for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) {
569 sz = MIN(pgsz, ptob(npages - pidx));
570 hat_memload_array(seg->s_as->a_hat, a, sz,
571 &ppa[pidx], sptd->spt_prot, hat_flags);
572 }
573
574 /*
575 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
576 * we will leave the pages locked SE_SHARED for the life
577 * of the ISM segment. This will prevent any calls to
578 * hat_pageunload() on this ISM segment for those platforms.
579 */
580 if (!(hat_flags & HAT_LOAD_LOCK)) {
581 /*
582 * On platforms that support HAT_DYNAMIC_ISM_UNMAP,
583 * we no longer need to hold the SE_SHARED lock on the pages,
584 * since L_PAGELOCK and F_SOFTLOCK calls will grab the
585 * SE_SHARED lock on the pages as necessary.
586 */
587 for (i = 0; i < npages; i++)
588 page_unlock(ppa[i]);
589 }
590 sptd->spt_pcachecnt = 0;
591 kmem_free(ppa, ((sizeof (page_t *)) * npages));
592 sptd->spt_realsize = ptob(npages);
593 atomic_add_long(&spt_used, npages);
594 sptcargs->seg_spt = seg;
595 return (0);
596
597 out4:
598 seg->s_data = NULL;
599 kmem_free(vp, sizeof (*vp));
600 cv_destroy(&sptd->spt_cv);
601 out3:
602 mutex_destroy(&sptd->spt_lock);
603 if ((sptcargs->flags & SHM_PAGEABLE) == 0)
604 kmem_free(ppa, (sizeof (*ppa) * npages));
605 out2:
606 kmem_free(sptd, sizeof (*sptd));
607 out1:
608 if ((sptcargs->flags & SHM_PAGEABLE) == 0)
609 anon_swap_restore(npages);
610 return (err);
611 }
612
613 /*ARGSUSED*/
614 void
segspt_free_pages(struct seg * seg,caddr_t addr,size_t len)615 segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
616 {
617 struct page *pp;
618 struct spt_data *sptd = (struct spt_data *)seg->s_data;
619 pgcnt_t npages;
620 ulong_t anon_idx;
621 struct anon_map *amp;
622 struct anon *ap;
623 struct vnode *vp;
624 u_offset_t off;
625 uint_t hat_flags;
626 int root = 0;
627 pgcnt_t pgs, curnpgs = 0;
628 page_t *rootpp;
629 rctl_qty_t unlocked_bytes = 0;
630 kproject_t *proj;
631 kshmid_t *sp;
632
633 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
634
635 len = P2ROUNDUP(len, PAGESIZE);
636
637 npages = btop(len);
638
639 hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP;
640 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
641 (sptd->spt_flags & SHM_PAGEABLE)) {
642 hat_flags = HAT_UNLOAD_UNMAP;
643 }
644
645 hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
646
647 amp = sptd->spt_amp;
648 if (sptd->spt_flags & SHM_PAGEABLE)
649 npages = btop(amp->size);
650
651 ASSERT(amp != NULL);
652
653 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
654 sp = amp->a_sp;
655 proj = sp->shm_perm.ipc_proj;
656 mutex_enter(&sp->shm_mlock);
657 }
658 for (anon_idx = 0; anon_idx < npages; anon_idx++) {
659 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
660 if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
661 panic("segspt_free_pages: null app");
662 /*NOTREACHED*/
663 }
664 } else {
665 if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx))
666 == NULL)
667 continue;
668 }
669 ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0);
670 swap_xlate(ap, &vp, &off);
671
672 /*
673 * If this platform supports HAT_DYNAMIC_ISM_UNMAP,
674 * the pages won't be having SE_SHARED lock at this
675 * point.
676 *
677 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
678 * the pages are still held SE_SHARED locked from the
679 * original segspt_create()
680 *
681 * Our goal is to get SE_EXCL lock on each page, remove
682 * permanent lock on it and invalidate the page.
683 */
684 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
685 if (hat_flags == HAT_UNLOAD_UNMAP)
686 pp = page_lookup(vp, off, SE_EXCL);
687 else {
688 if ((pp = page_find(vp, off)) == NULL) {
689 panic("segspt_free_pages: "
690 "page not locked");
691 /*NOTREACHED*/
692 }
693 if (!page_tryupgrade(pp)) {
694 page_unlock(pp);
695 pp = page_lookup(vp, off, SE_EXCL);
696 }
697 }
698 if (pp == NULL) {
699 panic("segspt_free_pages: "
700 "page not in the system");
701 /*NOTREACHED*/
702 }
703 ASSERT(pp->p_lckcnt > 0);
704 page_pp_unlock(pp, 0, 1);
705 if (pp->p_lckcnt == 0)
706 unlocked_bytes += PAGESIZE;
707 } else {
708 if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL)
709 continue;
710 }
711 /*
712 * It's logical to invalidate the pages here as in most cases
713 * these were created by segspt.
714 */
715 if (pp->p_szc != 0) {
716 if (root == 0) {
717 ASSERT(curnpgs == 0);
718 root = 1;
719 rootpp = pp;
720 pgs = curnpgs = page_get_pagecnt(pp->p_szc);
721 ASSERT(pgs > 1);
722 ASSERT(IS_P2ALIGNED(pgs, pgs));
723 ASSERT(!(page_pptonum(pp) & (pgs - 1)));
724 curnpgs--;
725 } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) {
726 ASSERT(curnpgs == 1);
727 ASSERT(page_pptonum(pp) ==
728 page_pptonum(rootpp) + (pgs - 1));
729 page_destroy_pages(rootpp);
730 root = 0;
731 curnpgs = 0;
732 } else {
733 ASSERT(curnpgs > 1);
734 ASSERT(page_pptonum(pp) ==
735 page_pptonum(rootpp) + (pgs - curnpgs));
736 curnpgs--;
737 }
738 } else {
739 if (root != 0 || curnpgs != 0) {
740 panic("segspt_free_pages: bad large page");
741 /*NOTREACHED*/
742 }
743 /*
744 * Before destroying the pages, we need to take care
745 * of the rctl locked memory accounting. For that
746 * we need to calculte the unlocked_bytes.
747 */
748 if (pp->p_lckcnt > 0)
749 unlocked_bytes += PAGESIZE;
750 /*LINTED: constant in conditional context */
751 VN_DISPOSE(pp, B_INVAL, 0, kcred);
752 }
753 }
754 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
755 if (unlocked_bytes > 0)
756 rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
757 mutex_exit(&sp->shm_mlock);
758 }
759 if (root != 0 || curnpgs != 0) {
760 panic("segspt_free_pages: bad large page");
761 /*NOTREACHED*/
762 }
763
764 /*
765 * mark that pages have been released
766 */
767 sptd->spt_realsize = 0;
768
769 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
770 atomic_add_long(&spt_used, -npages);
771 anon_swap_restore(npages);
772 }
773 }
774
775 /*
776 * Get memory allocation policy info for specified address in given segment
777 */
778 static lgrp_mem_policy_info_t *
segspt_getpolicy(struct seg * seg,caddr_t addr)779 segspt_getpolicy(struct seg *seg, caddr_t addr)
780 {
781 struct anon_map *amp;
782 ulong_t anon_index;
783 lgrp_mem_policy_info_t *policy_info;
784 struct spt_data *spt_data;
785
786 ASSERT(seg != NULL);
787
788 /*
789 * Get anon_map from segspt
790 *
791 * Assume that no lock needs to be held on anon_map, since
792 * it should be protected by its reference count which must be
793 * nonzero for an existing segment
794 * Need to grab readers lock on policy tree though
795 */
796 spt_data = (struct spt_data *)seg->s_data;
797 if (spt_data == NULL)
798 return (NULL);
799 amp = spt_data->spt_amp;
800 ASSERT(amp->refcnt != 0);
801
802 /*
803 * Get policy info
804 *
805 * Assume starting anon index of 0
806 */
807 anon_index = seg_page(seg, addr);
808 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
809
810 return (policy_info);
811 }
812
813 /*
814 * DISM only.
815 * Return locked pages over a given range.
816 *
817 * We will cache all DISM locked pages and save the pplist for the
818 * entire segment in the ppa field of the underlying DISM segment structure.
819 * Later, during a call to segspt_reclaim() we will use this ppa array
820 * to page_unlock() all of the pages and then we will free this ppa list.
821 */
822 /*ARGSUSED*/
823 static int
segspt_dismpagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)824 segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
825 struct page ***ppp, enum lock_type type, enum seg_rw rw)
826 {
827 struct shm_data *shmd = (struct shm_data *)seg->s_data;
828 struct seg *sptseg = shmd->shm_sptseg;
829 struct spt_data *sptd = sptseg->s_data;
830 pgcnt_t pg_idx, npages, tot_npages, npgs;
831 struct page **pplist, **pl, **ppa, *pp;
832 struct anon_map *amp;
833 spgcnt_t an_idx;
834 int ret = ENOTSUP;
835 uint_t pl_built = 0;
836 struct anon *ap;
837 struct vnode *vp;
838 u_offset_t off;
839 pgcnt_t claim_availrmem = 0;
840 uint_t szc;
841
842 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
843 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
844
845 /*
846 * We want to lock/unlock the entire ISM segment. Therefore,
847 * we will be using the underlying sptseg and it's base address
848 * and length for the caching arguments.
849 */
850 ASSERT(sptseg);
851 ASSERT(sptd);
852
853 pg_idx = seg_page(seg, addr);
854 npages = btopr(len);
855
856 /*
857 * check if the request is larger than number of pages covered
858 * by amp
859 */
860 if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
861 *ppp = NULL;
862 return (ENOTSUP);
863 }
864
865 if (type == L_PAGEUNLOCK) {
866 ASSERT(sptd->spt_ppa != NULL);
867
868 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
869 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
870
871 /*
872 * If someone is blocked while unmapping, we purge
873 * segment page cache and thus reclaim pplist synchronously
874 * without waiting for seg_pasync_thread. This speeds up
875 * unmapping in cases where munmap(2) is called, while
876 * raw async i/o is still in progress or where a thread
877 * exits on data fault in a multithreaded application.
878 */
879 if ((sptd->spt_flags & DISM_PPA_CHANGED) ||
880 (AS_ISUNMAPWAIT(seg->s_as) &&
881 shmd->shm_softlockcnt > 0)) {
882 segspt_purge(seg);
883 }
884 return (0);
885 }
886
887 /* The L_PAGELOCK case ... */
888
889 if (sptd->spt_flags & DISM_PPA_CHANGED) {
890 segspt_purge(seg);
891 /*
892 * for DISM ppa needs to be rebuild since
893 * number of locked pages could be changed
894 */
895 *ppp = NULL;
896 return (ENOTSUP);
897 }
898
899 /*
900 * First try to find pages in segment page cache, without
901 * holding the segment lock.
902 */
903 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
904 S_WRITE, SEGP_FORCE_WIRED);
905 if (pplist != NULL) {
906 ASSERT(sptd->spt_ppa != NULL);
907 ASSERT(sptd->spt_ppa == pplist);
908 ppa = sptd->spt_ppa;
909 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
910 if (ppa[an_idx] == NULL) {
911 seg_pinactive(seg, NULL, seg->s_base,
912 sptd->spt_amp->size, ppa,
913 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
914 *ppp = NULL;
915 return (ENOTSUP);
916 }
917 if ((szc = ppa[an_idx]->p_szc) != 0) {
918 npgs = page_get_pagecnt(szc);
919 an_idx = P2ROUNDUP(an_idx + 1, npgs);
920 } else {
921 an_idx++;
922 }
923 }
924 /*
925 * Since we cache the entire DISM segment, we want to
926 * set ppp to point to the first slot that corresponds
927 * to the requested addr, i.e. pg_idx.
928 */
929 *ppp = &(sptd->spt_ppa[pg_idx]);
930 return (0);
931 }
932
933 mutex_enter(&sptd->spt_lock);
934 /*
935 * try to find pages in segment page cache with mutex
936 */
937 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
938 S_WRITE, SEGP_FORCE_WIRED);
939 if (pplist != NULL) {
940 ASSERT(sptd->spt_ppa != NULL);
941 ASSERT(sptd->spt_ppa == pplist);
942 ppa = sptd->spt_ppa;
943 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
944 if (ppa[an_idx] == NULL) {
945 mutex_exit(&sptd->spt_lock);
946 seg_pinactive(seg, NULL, seg->s_base,
947 sptd->spt_amp->size, ppa,
948 S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
949 *ppp = NULL;
950 return (ENOTSUP);
951 }
952 if ((szc = ppa[an_idx]->p_szc) != 0) {
953 npgs = page_get_pagecnt(szc);
954 an_idx = P2ROUNDUP(an_idx + 1, npgs);
955 } else {
956 an_idx++;
957 }
958 }
959 /*
960 * Since we cache the entire DISM segment, we want to
961 * set ppp to point to the first slot that corresponds
962 * to the requested addr, i.e. pg_idx.
963 */
964 mutex_exit(&sptd->spt_lock);
965 *ppp = &(sptd->spt_ppa[pg_idx]);
966 return (0);
967 }
968 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
969 SEGP_FORCE_WIRED) == SEGP_FAIL) {
970 mutex_exit(&sptd->spt_lock);
971 *ppp = NULL;
972 return (ENOTSUP);
973 }
974
975 /*
976 * No need to worry about protections because DISM pages are always rw.
977 */
978 pl = pplist = NULL;
979 amp = sptd->spt_amp;
980
981 /*
982 * Do we need to build the ppa array?
983 */
984 if (sptd->spt_ppa == NULL) {
985 pgcnt_t lpg_cnt = 0;
986
987 pl_built = 1;
988 tot_npages = btopr(sptd->spt_amp->size);
989
990 ASSERT(sptd->spt_pcachecnt == 0);
991 pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP);
992 pl = pplist;
993
994 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
995 for (an_idx = 0; an_idx < tot_npages; ) {
996 ap = anon_get_ptr(amp->ahp, an_idx);
997 /*
998 * Cache only mlocked pages. For large pages
999 * if one (constituent) page is mlocked
1000 * all pages for that large page
1001 * are cached also. This is for quick
1002 * lookups of ppa array;
1003 */
1004 if ((ap != NULL) && (lpg_cnt != 0 ||
1005 (sptd->spt_ppa_lckcnt[an_idx] != 0))) {
1006
1007 swap_xlate(ap, &vp, &off);
1008 pp = page_lookup(vp, off, SE_SHARED);
1009 ASSERT(pp != NULL);
1010 if (lpg_cnt == 0) {
1011 lpg_cnt++;
1012 /*
1013 * For a small page, we are done --
1014 * lpg_count is reset to 0 below.
1015 *
1016 * For a large page, we are guaranteed
1017 * to find the anon structures of all
1018 * constituent pages and a non-zero
1019 * lpg_cnt ensures that we don't test
1020 * for mlock for these. We are done
1021 * when lpg_count reaches (npgs + 1).
1022 * If we are not the first constituent
1023 * page, restart at the first one.
1024 */
1025 npgs = page_get_pagecnt(pp->p_szc);
1026 if (!IS_P2ALIGNED(an_idx, npgs)) {
1027 an_idx = P2ALIGN(an_idx, npgs);
1028 page_unlock(pp);
1029 continue;
1030 }
1031 }
1032 if (++lpg_cnt > npgs)
1033 lpg_cnt = 0;
1034
1035 /*
1036 * availrmem is decremented only
1037 * for unlocked pages
1038 */
1039 if (sptd->spt_ppa_lckcnt[an_idx] == 0)
1040 claim_availrmem++;
1041 pplist[an_idx] = pp;
1042 }
1043 an_idx++;
1044 }
1045 ANON_LOCK_EXIT(&->a_rwlock);
1046
1047 if (claim_availrmem) {
1048 mutex_enter(&freemem_lock);
1049 if (availrmem < tune.t_minarmem + claim_availrmem) {
1050 mutex_exit(&freemem_lock);
1051 ret = ENOTSUP;
1052 claim_availrmem = 0;
1053 goto insert_fail;
1054 } else {
1055 availrmem -= claim_availrmem;
1056 }
1057 mutex_exit(&freemem_lock);
1058 }
1059
1060 sptd->spt_ppa = pl;
1061 } else {
1062 /*
1063 * We already have a valid ppa[].
1064 */
1065 pl = sptd->spt_ppa;
1066 }
1067
1068 ASSERT(pl != NULL);
1069
1070 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1071 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1072 segspt_reclaim);
1073 if (ret == SEGP_FAIL) {
1074 /*
1075 * seg_pinsert failed. We return
1076 * ENOTSUP, so that the as_pagelock() code will
1077 * then try the slower F_SOFTLOCK path.
1078 */
1079 if (pl_built) {
1080 /*
1081 * No one else has referenced the ppa[].
1082 * We created it and we need to destroy it.
1083 */
1084 sptd->spt_ppa = NULL;
1085 }
1086 ret = ENOTSUP;
1087 goto insert_fail;
1088 }
1089
1090 /*
1091 * In either case, we increment softlockcnt on the 'real' segment.
1092 */
1093 sptd->spt_pcachecnt++;
1094 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1095
1096 ppa = sptd->spt_ppa;
1097 for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
1098 if (ppa[an_idx] == NULL) {
1099 mutex_exit(&sptd->spt_lock);
1100 seg_pinactive(seg, NULL, seg->s_base,
1101 sptd->spt_amp->size,
1102 pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1103 *ppp = NULL;
1104 return (ENOTSUP);
1105 }
1106 if ((szc = ppa[an_idx]->p_szc) != 0) {
1107 npgs = page_get_pagecnt(szc);
1108 an_idx = P2ROUNDUP(an_idx + 1, npgs);
1109 } else {
1110 an_idx++;
1111 }
1112 }
1113 /*
1114 * We can now drop the sptd->spt_lock since the ppa[]
1115 * exists and he have incremented pacachecnt.
1116 */
1117 mutex_exit(&sptd->spt_lock);
1118
1119 /*
1120 * Since we cache the entire segment, we want to
1121 * set ppp to point to the first slot that corresponds
1122 * to the requested addr, i.e. pg_idx.
1123 */
1124 *ppp = &(sptd->spt_ppa[pg_idx]);
1125 return (0);
1126
1127 insert_fail:
1128 /*
1129 * We will only reach this code if we tried and failed.
1130 *
1131 * And we can drop the lock on the dummy seg, once we've failed
1132 * to set up a new ppa[].
1133 */
1134 mutex_exit(&sptd->spt_lock);
1135
1136 if (pl_built) {
1137 if (claim_availrmem) {
1138 mutex_enter(&freemem_lock);
1139 availrmem += claim_availrmem;
1140 mutex_exit(&freemem_lock);
1141 }
1142
1143 /*
1144 * We created pl and we need to destroy it.
1145 */
1146 pplist = pl;
1147 for (an_idx = 0; an_idx < tot_npages; an_idx++) {
1148 if (pplist[an_idx] != NULL)
1149 page_unlock(pplist[an_idx]);
1150 }
1151 kmem_free(pl, sizeof (page_t *) * tot_npages);
1152 }
1153
1154 if (shmd->shm_softlockcnt <= 0) {
1155 if (AS_ISUNMAPWAIT(seg->s_as)) {
1156 mutex_enter(&seg->s_as->a_contents);
1157 if (AS_ISUNMAPWAIT(seg->s_as)) {
1158 AS_CLRUNMAPWAIT(seg->s_as);
1159 cv_broadcast(&seg->s_as->a_cv);
1160 }
1161 mutex_exit(&seg->s_as->a_contents);
1162 }
1163 }
1164 *ppp = NULL;
1165 return (ret);
1166 }
1167
1168
1169
1170 /*
1171 * return locked pages over a given range.
1172 *
1173 * We will cache the entire ISM segment and save the pplist for the
1174 * entire segment in the ppa field of the underlying ISM segment structure.
1175 * Later, during a call to segspt_reclaim() we will use this ppa array
1176 * to page_unlock() all of the pages and then we will free this ppa list.
1177 */
1178 /*ARGSUSED*/
1179 static int
segspt_shmpagelock(struct seg * seg,caddr_t addr,size_t len,struct page *** ppp,enum lock_type type,enum seg_rw rw)1180 segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1181 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1182 {
1183 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1184 struct seg *sptseg = shmd->shm_sptseg;
1185 struct spt_data *sptd = sptseg->s_data;
1186 pgcnt_t np, page_index, npages;
1187 caddr_t a, spt_base;
1188 struct page **pplist, **pl, *pp;
1189 struct anon_map *amp;
1190 ulong_t anon_index;
1191 int ret = ENOTSUP;
1192 uint_t pl_built = 0;
1193 struct anon *ap;
1194 struct vnode *vp;
1195 u_offset_t off;
1196
1197 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1198 ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
1199
1200
1201 /*
1202 * We want to lock/unlock the entire ISM segment. Therefore,
1203 * we will be using the underlying sptseg and it's base address
1204 * and length for the caching arguments.
1205 */
1206 ASSERT(sptseg);
1207 ASSERT(sptd);
1208
1209 if (sptd->spt_flags & SHM_PAGEABLE) {
1210 return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1211 }
1212
1213 page_index = seg_page(seg, addr);
1214 npages = btopr(len);
1215
1216 /*
1217 * check if the request is larger than number of pages covered
1218 * by amp
1219 */
1220 if (page_index + npages > btopr(sptd->spt_amp->size)) {
1221 *ppp = NULL;
1222 return (ENOTSUP);
1223 }
1224
1225 if (type == L_PAGEUNLOCK) {
1226
1227 ASSERT(sptd->spt_ppa != NULL);
1228
1229 seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size,
1230 sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim);
1231
1232 /*
1233 * If someone is blocked while unmapping, we purge
1234 * segment page cache and thus reclaim pplist synchronously
1235 * without waiting for seg_pasync_thread. This speeds up
1236 * unmapping in cases where munmap(2) is called, while
1237 * raw async i/o is still in progress or where a thread
1238 * exits on data fault in a multithreaded application.
1239 */
1240 if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) {
1241 segspt_purge(seg);
1242 }
1243 return (0);
1244 }
1245
1246 /* The L_PAGELOCK case... */
1247
1248 /*
1249 * First try to find pages in segment page cache, without
1250 * holding the segment lock.
1251 */
1252 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1253 S_WRITE, SEGP_FORCE_WIRED);
1254 if (pplist != NULL) {
1255 ASSERT(sptd->spt_ppa == pplist);
1256 ASSERT(sptd->spt_ppa[page_index]);
1257 /*
1258 * Since we cache the entire ISM segment, we want to
1259 * set ppp to point to the first slot that corresponds
1260 * to the requested addr, i.e. page_index.
1261 */
1262 *ppp = &(sptd->spt_ppa[page_index]);
1263 return (0);
1264 }
1265
1266 mutex_enter(&sptd->spt_lock);
1267
1268 /*
1269 * try to find pages in segment page cache
1270 */
1271 pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size,
1272 S_WRITE, SEGP_FORCE_WIRED);
1273 if (pplist != NULL) {
1274 ASSERT(sptd->spt_ppa == pplist);
1275 /*
1276 * Since we cache the entire segment, we want to
1277 * set ppp to point to the first slot that corresponds
1278 * to the requested addr, i.e. page_index.
1279 */
1280 mutex_exit(&sptd->spt_lock);
1281 *ppp = &(sptd->spt_ppa[page_index]);
1282 return (0);
1283 }
1284
1285 if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size,
1286 SEGP_FORCE_WIRED) == SEGP_FAIL) {
1287 mutex_exit(&sptd->spt_lock);
1288 *ppp = NULL;
1289 return (ENOTSUP);
1290 }
1291
1292 /*
1293 * No need to worry about protections because ISM pages
1294 * are always rw.
1295 */
1296 pl = pplist = NULL;
1297
1298 /*
1299 * Do we need to build the ppa array?
1300 */
1301 if (sptd->spt_ppa == NULL) {
1302 ASSERT(sptd->spt_ppa == pplist);
1303
1304 spt_base = sptseg->s_base;
1305 pl_built = 1;
1306
1307 /*
1308 * availrmem is decremented once during anon_swap_adjust()
1309 * and is incremented during the anon_unresv(), which is
1310 * called from shm_rm_amp() when the segment is destroyed.
1311 */
1312 amp = sptd->spt_amp;
1313 ASSERT(amp != NULL);
1314
1315 /* pcachecnt is protected by sptd->spt_lock */
1316 ASSERT(sptd->spt_pcachecnt == 0);
1317 pplist = kmem_zalloc(sizeof (page_t *)
1318 * btopr(sptd->spt_amp->size), KM_SLEEP);
1319 pl = pplist;
1320
1321 anon_index = seg_page(sptseg, spt_base);
1322
1323 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
1324 for (a = spt_base; a < (spt_base + sptd->spt_amp->size);
1325 a += PAGESIZE, anon_index++, pplist++) {
1326 ap = anon_get_ptr(amp->ahp, anon_index);
1327 ASSERT(ap != NULL);
1328 swap_xlate(ap, &vp, &off);
1329 pp = page_lookup(vp, off, SE_SHARED);
1330 ASSERT(pp != NULL);
1331 *pplist = pp;
1332 }
1333 ANON_LOCK_EXIT(&->a_rwlock);
1334
1335 if (a < (spt_base + sptd->spt_amp->size)) {
1336 ret = ENOTSUP;
1337 goto insert_fail;
1338 }
1339 sptd->spt_ppa = pl;
1340 } else {
1341 /*
1342 * We already have a valid ppa[].
1343 */
1344 pl = sptd->spt_ppa;
1345 }
1346
1347 ASSERT(pl != NULL);
1348
1349 ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size,
1350 sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED,
1351 segspt_reclaim);
1352 if (ret == SEGP_FAIL) {
1353 /*
1354 * seg_pinsert failed. We return
1355 * ENOTSUP, so that the as_pagelock() code will
1356 * then try the slower F_SOFTLOCK path.
1357 */
1358 if (pl_built) {
1359 /*
1360 * No one else has referenced the ppa[].
1361 * We created it and we need to destroy it.
1362 */
1363 sptd->spt_ppa = NULL;
1364 }
1365 ret = ENOTSUP;
1366 goto insert_fail;
1367 }
1368
1369 /*
1370 * In either case, we increment softlockcnt on the 'real' segment.
1371 */
1372 sptd->spt_pcachecnt++;
1373 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1374
1375 /*
1376 * We can now drop the sptd->spt_lock since the ppa[]
1377 * exists and he have incremented pacachecnt.
1378 */
1379 mutex_exit(&sptd->spt_lock);
1380
1381 /*
1382 * Since we cache the entire segment, we want to
1383 * set ppp to point to the first slot that corresponds
1384 * to the requested addr, i.e. page_index.
1385 */
1386 *ppp = &(sptd->spt_ppa[page_index]);
1387 return (0);
1388
1389 insert_fail:
1390 /*
1391 * We will only reach this code if we tried and failed.
1392 *
1393 * And we can drop the lock on the dummy seg, once we've failed
1394 * to set up a new ppa[].
1395 */
1396 mutex_exit(&sptd->spt_lock);
1397
1398 if (pl_built) {
1399 /*
1400 * We created pl and we need to destroy it.
1401 */
1402 pplist = pl;
1403 np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT);
1404 while (np) {
1405 page_unlock(*pplist);
1406 np--;
1407 pplist++;
1408 }
1409 kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size));
1410 }
1411 if (shmd->shm_softlockcnt <= 0) {
1412 if (AS_ISUNMAPWAIT(seg->s_as)) {
1413 mutex_enter(&seg->s_as->a_contents);
1414 if (AS_ISUNMAPWAIT(seg->s_as)) {
1415 AS_CLRUNMAPWAIT(seg->s_as);
1416 cv_broadcast(&seg->s_as->a_cv);
1417 }
1418 mutex_exit(&seg->s_as->a_contents);
1419 }
1420 }
1421 *ppp = NULL;
1422 return (ret);
1423 }
1424
1425 /*
1426 * purge any cached pages in the I/O page cache
1427 */
1428 static void
segspt_purge(struct seg * seg)1429 segspt_purge(struct seg *seg)
1430 {
1431 seg_ppurge(seg, NULL, SEGP_FORCE_WIRED);
1432 }
1433
1434 static int
segspt_reclaim(void * ptag,caddr_t addr,size_t len,struct page ** pplist,enum seg_rw rw,int async)1435 segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
1436 enum seg_rw rw, int async)
1437 {
1438 struct seg *seg = (struct seg *)ptag;
1439 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1440 struct seg *sptseg;
1441 struct spt_data *sptd;
1442 pgcnt_t npages, i, free_availrmem = 0;
1443 int done = 0;
1444
1445 #ifdef lint
1446 addr = addr;
1447 #endif
1448 sptseg = shmd->shm_sptseg;
1449 sptd = sptseg->s_data;
1450 npages = (len >> PAGESHIFT);
1451 ASSERT(npages);
1452 ASSERT(sptd->spt_pcachecnt != 0);
1453 ASSERT(sptd->spt_ppa == pplist);
1454 ASSERT(npages == btopr(sptd->spt_amp->size));
1455 ASSERT(async || AS_LOCK_HELD(seg->s_as));
1456
1457 /*
1458 * Acquire the lock on the dummy seg and destroy the
1459 * ppa array IF this is the last pcachecnt.
1460 */
1461 mutex_enter(&sptd->spt_lock);
1462 if (--sptd->spt_pcachecnt == 0) {
1463 for (i = 0; i < npages; i++) {
1464 if (pplist[i] == NULL) {
1465 continue;
1466 }
1467 if (rw == S_WRITE) {
1468 hat_setrefmod(pplist[i]);
1469 } else {
1470 hat_setref(pplist[i]);
1471 }
1472 if ((sptd->spt_flags & SHM_PAGEABLE) &&
1473 (sptd->spt_ppa_lckcnt[i] == 0))
1474 free_availrmem++;
1475 page_unlock(pplist[i]);
1476 }
1477 if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) {
1478 mutex_enter(&freemem_lock);
1479 availrmem += free_availrmem;
1480 mutex_exit(&freemem_lock);
1481 }
1482 /*
1483 * Since we want to cach/uncache the entire ISM segment,
1484 * we will track the pplist in a segspt specific field
1485 * ppa, that is initialized at the time we add an entry to
1486 * the cache.
1487 */
1488 ASSERT(sptd->spt_pcachecnt == 0);
1489 kmem_free(pplist, sizeof (page_t *) * npages);
1490 sptd->spt_ppa = NULL;
1491 sptd->spt_flags &= ~DISM_PPA_CHANGED;
1492 sptd->spt_gen++;
1493 cv_broadcast(&sptd->spt_cv);
1494 done = 1;
1495 }
1496 mutex_exit(&sptd->spt_lock);
1497
1498 /*
1499 * If we are pcache async thread or called via seg_ppurge_wiredpp() we
1500 * may not hold AS lock (in this case async argument is not 0). This
1501 * means if softlockcnt drops to 0 after the decrement below address
1502 * space may get freed. We can't allow it since after softlock
1503 * derement to 0 we still need to access as structure for possible
1504 * wakeup of unmap waiters. To prevent the disappearance of as we take
1505 * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes
1506 * this mutex as a barrier to make sure this routine completes before
1507 * segment is freed.
1508 *
1509 * The second complication we have to deal with in async case is a
1510 * possibility of missed wake up of unmap wait thread. When we don't
1511 * hold as lock here we may take a_contents lock before unmap wait
1512 * thread that was first to see softlockcnt was still not 0. As a
1513 * result we'll fail to wake up an unmap wait thread. To avoid this
1514 * race we set nounmapwait flag in as structure if we drop softlockcnt
1515 * to 0 if async is not 0. unmapwait thread
1516 * will not block if this flag is set.
1517 */
1518 if (async)
1519 mutex_enter(&shmd->shm_segfree_syncmtx);
1520
1521 /*
1522 * Now decrement softlockcnt.
1523 */
1524 ASSERT(shmd->shm_softlockcnt > 0);
1525 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
1526
1527 if (shmd->shm_softlockcnt <= 0) {
1528 if (async || AS_ISUNMAPWAIT(seg->s_as)) {
1529 mutex_enter(&seg->s_as->a_contents);
1530 if (async)
1531 AS_SETNOUNMAPWAIT(seg->s_as);
1532 if (AS_ISUNMAPWAIT(seg->s_as)) {
1533 AS_CLRUNMAPWAIT(seg->s_as);
1534 cv_broadcast(&seg->s_as->a_cv);
1535 }
1536 mutex_exit(&seg->s_as->a_contents);
1537 }
1538 }
1539
1540 if (async)
1541 mutex_exit(&shmd->shm_segfree_syncmtx);
1542
1543 return (done);
1544 }
1545
1546 /*
1547 * Do a F_SOFTUNLOCK call over the range requested.
1548 * The range must have already been F_SOFTLOCK'ed.
1549 *
1550 * The calls to acquire and release the anon map lock mutex were
1551 * removed in order to avoid a deadly embrace during a DR
1552 * memory delete operation. (Eg. DR blocks while waiting for a
1553 * exclusive lock on a page that is being used for kaio; the
1554 * thread that will complete the kaio and call segspt_softunlock
1555 * blocks on the anon map lock; another thread holding the anon
1556 * map lock blocks on another page lock via the segspt_shmfault
1557 * -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
1558 *
1559 * The appropriateness of the removal is based upon the following:
1560 * 1. If we are holding a segment's reader lock and the page is held
1561 * shared, then the corresponding element in anonmap which points to
1562 * anon struct cannot change and there is no need to acquire the
1563 * anonymous map lock.
1564 * 2. Threads in segspt_softunlock have a reader lock on the segment
1565 * and already have the shared page lock, so we are guaranteed that
1566 * the anon map slot cannot change and therefore can call anon_get_ptr()
1567 * without grabbing the anonymous map lock.
1568 * 3. Threads that softlock a shared page break copy-on-write, even if
1569 * its a read. Thus cow faults can be ignored with respect to soft
1570 * unlocking, since the breaking of cow means that the anon slot(s) will
1571 * not be shared.
1572 */
1573 static void
segspt_softunlock(struct seg * seg,caddr_t sptseg_addr,size_t len,enum seg_rw rw)1574 segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1575 size_t len, enum seg_rw rw)
1576 {
1577 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1578 struct seg *sptseg;
1579 struct spt_data *sptd;
1580 page_t *pp;
1581 caddr_t adr;
1582 struct vnode *vp;
1583 u_offset_t offset;
1584 ulong_t anon_index;
1585 struct anon_map *amp; /* XXX - for locknest */
1586 struct anon *ap = NULL;
1587 pgcnt_t npages;
1588
1589 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1590
1591 sptseg = shmd->shm_sptseg;
1592 sptd = sptseg->s_data;
1593
1594 /*
1595 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1596 * and therefore their pages are SE_SHARED locked
1597 * for the entire life of the segment.
1598 */
1599 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1600 ((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1601 goto softlock_decrement;
1602 }
1603
1604 /*
1605 * Any thread is free to do a page_find and
1606 * page_unlock() on the pages within this seg.
1607 *
1608 * We are already holding the as->a_lock on the user's
1609 * real segment, but we need to hold the a_lock on the
1610 * underlying dummy as. This is mostly to satisfy the
1611 * underlying HAT layer.
1612 */
1613 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1614 hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1615 AS_LOCK_EXIT(sptseg->s_as);
1616
1617 amp = sptd->spt_amp;
1618 ASSERT(amp != NULL);
1619 anon_index = seg_page(sptseg, sptseg_addr);
1620
1621 for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1622 ap = anon_get_ptr(amp->ahp, anon_index++);
1623 ASSERT(ap != NULL);
1624 swap_xlate(ap, &vp, &offset);
1625
1626 /*
1627 * Use page_find() instead of page_lookup() to
1628 * find the page since we know that it has a
1629 * "shared" lock.
1630 */
1631 pp = page_find(vp, offset);
1632 ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1633 if (pp == NULL) {
1634 panic("segspt_softunlock: "
1635 "addr %p, ap %p, vp %p, off %llx",
1636 (void *)adr, (void *)ap, (void *)vp, offset);
1637 /*NOTREACHED*/
1638 }
1639
1640 if (rw == S_WRITE) {
1641 hat_setrefmod(pp);
1642 } else if (rw != S_OTHER) {
1643 hat_setref(pp);
1644 }
1645 page_unlock(pp);
1646 }
1647
1648 softlock_decrement:
1649 npages = btopr(len);
1650 ASSERT(shmd->shm_softlockcnt >= npages);
1651 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages);
1652 if (shmd->shm_softlockcnt == 0) {
1653 /*
1654 * All SOFTLOCKS are gone. Wakeup any waiting
1655 * unmappers so they can try again to unmap.
1656 * Check for waiters first without the mutex
1657 * held so we don't always grab the mutex on
1658 * softunlocks.
1659 */
1660 if (AS_ISUNMAPWAIT(seg->s_as)) {
1661 mutex_enter(&seg->s_as->a_contents);
1662 if (AS_ISUNMAPWAIT(seg->s_as)) {
1663 AS_CLRUNMAPWAIT(seg->s_as);
1664 cv_broadcast(&seg->s_as->a_cv);
1665 }
1666 mutex_exit(&seg->s_as->a_contents);
1667 }
1668 }
1669 }
1670
1671 int
segspt_shmattach(struct seg * seg,caddr_t * argsp)1672 segspt_shmattach(struct seg *seg, caddr_t *argsp)
1673 {
1674 struct shm_data *shmd_arg = (struct shm_data *)argsp;
1675 struct shm_data *shmd;
1676 struct anon_map *shm_amp = shmd_arg->shm_amp;
1677 struct spt_data *sptd;
1678 int error = 0;
1679
1680 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1681
1682 shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1683 if (shmd == NULL)
1684 return (ENOMEM);
1685
1686 shmd->shm_sptas = shmd_arg->shm_sptas;
1687 shmd->shm_amp = shm_amp;
1688 shmd->shm_sptseg = shmd_arg->shm_sptseg;
1689
1690 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1691 NULL, 0, seg->s_size);
1692
1693 mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL);
1694
1695 seg->s_data = (void *)shmd;
1696 seg->s_ops = &segspt_shmops;
1697 seg->s_szc = shmd->shm_sptseg->s_szc;
1698 sptd = shmd->shm_sptseg->s_data;
1699
1700 if (sptd->spt_flags & SHM_PAGEABLE) {
1701 if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size),
1702 KM_NOSLEEP)) == NULL) {
1703 seg->s_data = (void *)NULL;
1704 kmem_free(shmd, (sizeof (*shmd)));
1705 return (ENOMEM);
1706 }
1707 shmd->shm_lckpgs = 0;
1708 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1709 if ((error = hat_share(seg->s_as->a_hat, seg->s_base,
1710 shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1711 seg->s_size, seg->s_szc)) != 0) {
1712 kmem_free(shmd->shm_vpage,
1713 btopr(shm_amp->size));
1714 }
1715 }
1716 } else {
1717 error = hat_share(seg->s_as->a_hat, seg->s_base,
1718 shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1719 seg->s_size, seg->s_szc);
1720 }
1721 if (error) {
1722 seg->s_szc = 0;
1723 seg->s_data = (void *)NULL;
1724 kmem_free(shmd, (sizeof (*shmd)));
1725 } else {
1726 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1727 shm_amp->refcnt++;
1728 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1729 }
1730 return (error);
1731 }
1732
1733 int
segspt_shmunmap(struct seg * seg,caddr_t raddr,size_t ssize)1734 segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1735 {
1736 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1737 int reclaim = 1;
1738
1739 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1740 retry:
1741 if (shmd->shm_softlockcnt > 0) {
1742 if (reclaim == 1) {
1743 segspt_purge(seg);
1744 reclaim = 0;
1745 goto retry;
1746 }
1747 return (EAGAIN);
1748 }
1749
1750 if (ssize != seg->s_size) {
1751 #ifdef DEBUG
1752 cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1753 ssize, seg->s_size);
1754 #endif
1755 return (EINVAL);
1756 }
1757
1758 (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1759 NULL, 0);
1760 hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1761
1762 seg_free(seg);
1763
1764 return (0);
1765 }
1766
1767 void
segspt_shmfree(struct seg * seg)1768 segspt_shmfree(struct seg *seg)
1769 {
1770 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1771 struct anon_map *shm_amp = shmd->shm_amp;
1772
1773 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1774
1775 (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1776 MC_UNLOCK, NULL, 0);
1777
1778 /*
1779 * Need to increment refcnt when attaching
1780 * and decrement when detaching because of dup().
1781 */
1782 ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1783 shm_amp->refcnt--;
1784 ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1785
1786 if (shmd->shm_vpage) { /* only for DISM */
1787 kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1788 shmd->shm_vpage = NULL;
1789 }
1790
1791 /*
1792 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1793 * still working with this segment without holding as lock.
1794 */
1795 ASSERT(shmd->shm_softlockcnt == 0);
1796 mutex_enter(&shmd->shm_segfree_syncmtx);
1797 mutex_destroy(&shmd->shm_segfree_syncmtx);
1798
1799 kmem_free(shmd, sizeof (*shmd));
1800 }
1801
1802 /*ARGSUSED*/
1803 int
segspt_shmsetprot(struct seg * seg,caddr_t addr,size_t len,uint_t prot)1804 segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1805 {
1806 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1807
1808 /*
1809 * Shared page table is more than shared mapping.
1810 * Individual process sharing page tables can't change prot
1811 * because there is only one set of page tables.
1812 * This will be allowed after private page table is
1813 * supported.
1814 */
1815 /* need to return correct status error? */
1816 return (0);
1817 }
1818
1819
1820 faultcode_t
segspt_dismfault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)1821 segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1822 size_t len, enum fault_type type, enum seg_rw rw)
1823 {
1824 struct shm_data *shmd = (struct shm_data *)seg->s_data;
1825 struct seg *sptseg = shmd->shm_sptseg;
1826 struct as *curspt = shmd->shm_sptas;
1827 struct spt_data *sptd = sptseg->s_data;
1828 pgcnt_t npages;
1829 size_t size;
1830 caddr_t segspt_addr, shm_addr;
1831 page_t **ppa;
1832 int i;
1833 ulong_t an_idx = 0;
1834 int err = 0;
1835 int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0);
1836 size_t pgsz;
1837 pgcnt_t pgcnt;
1838 caddr_t a;
1839 pgcnt_t pidx;
1840
1841 #ifdef lint
1842 hat = hat;
1843 #endif
1844 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1845
1846 /*
1847 * Because of the way spt is implemented
1848 * the realsize of the segment does not have to be
1849 * equal to the segment size itself. The segment size is
1850 * often in multiples of a page size larger than PAGESIZE.
1851 * The realsize is rounded up to the nearest PAGESIZE
1852 * based on what the user requested. This is a bit of
1853 * ungliness that is historical but not easily fixed
1854 * without re-designing the higher levels of ISM.
1855 */
1856 ASSERT(addr >= seg->s_base);
1857 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1858 return (FC_NOMAP);
1859 /*
1860 * For all of the following cases except F_PROT, we need to
1861 * make any necessary adjustments to addr and len
1862 * and get all of the necessary page_t's into an array called ppa[].
1863 *
1864 * The code in shmat() forces base addr and len of ISM segment
1865 * to be aligned to largest page size supported. Therefore,
1866 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
1867 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
1868 * in large pagesize chunks, or else we will screw up the HAT
1869 * layer by calling hat_memload_array() with differing page sizes
1870 * over a given virtual range.
1871 */
1872 pgsz = page_get_pagesize(sptseg->s_szc);
1873 pgcnt = page_get_pagecnt(sptseg->s_szc);
1874 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
1875 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
1876 npages = btopr(size);
1877
1878 /*
1879 * Now we need to convert from addr in segshm to addr in segspt.
1880 */
1881 an_idx = seg_page(seg, shm_addr);
1882 segspt_addr = sptseg->s_base + ptob(an_idx);
1883
1884 ASSERT((segspt_addr + ptob(npages)) <=
1885 (sptseg->s_base + sptd->spt_realsize));
1886 ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size));
1887
1888 switch (type) {
1889
1890 case F_SOFTLOCK:
1891
1892 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
1893 /*
1894 * Fall through to the F_INVAL case to load up the hat layer
1895 * entries with the HAT_LOAD_LOCK flag.
1896 */
1897 /* FALLTHRU */
1898 case F_INVAL:
1899
1900 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1901 return (FC_NOMAP);
1902
1903 ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1904
1905 err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1906 if (err != 0) {
1907 if (type == F_SOFTLOCK) {
1908 atomic_add_long((ulong_t *)(
1909 &(shmd->shm_softlockcnt)), -npages);
1910 }
1911 goto dism_err;
1912 }
1913 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
1914 a = segspt_addr;
1915 pidx = 0;
1916 if (type == F_SOFTLOCK) {
1917
1918 /*
1919 * Load up the translation keeping it
1920 * locked and don't unlock the page.
1921 */
1922 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
1923 hat_memload_array(sptseg->s_as->a_hat,
1924 a, pgsz, &ppa[pidx], sptd->spt_prot,
1925 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1926 }
1927 } else {
1928 if (hat == seg->s_as->a_hat) {
1929
1930 /*
1931 * Migrate pages marked for migration
1932 */
1933 if (lgrp_optimizations())
1934 page_migrate(seg, shm_addr, ppa,
1935 npages);
1936
1937 /* CPU HAT */
1938 for (; pidx < npages;
1939 a += pgsz, pidx += pgcnt) {
1940 hat_memload_array(sptseg->s_as->a_hat,
1941 a, pgsz, &ppa[pidx],
1942 sptd->spt_prot,
1943 HAT_LOAD_SHARE);
1944 }
1945 } else {
1946 /* XHAT. Pass real address */
1947 hat_memload_array(hat, shm_addr,
1948 size, ppa, sptd->spt_prot, HAT_LOAD_SHARE);
1949 }
1950
1951 /*
1952 * And now drop the SE_SHARED lock(s).
1953 */
1954 if (dyn_ism_unmap) {
1955 for (i = 0; i < npages; i++) {
1956 page_unlock(ppa[i]);
1957 }
1958 }
1959 }
1960
1961 if (!dyn_ism_unmap) {
1962 if (hat_share(seg->s_as->a_hat, shm_addr,
1963 curspt->a_hat, segspt_addr, ptob(npages),
1964 seg->s_szc) != 0) {
1965 panic("hat_share err in DISM fault");
1966 /* NOTREACHED */
1967 }
1968 if (type == F_INVAL) {
1969 for (i = 0; i < npages; i++) {
1970 page_unlock(ppa[i]);
1971 }
1972 }
1973 }
1974 AS_LOCK_EXIT(sptseg->s_as);
1975 dism_err:
1976 kmem_free(ppa, npages * sizeof (page_t *));
1977 return (err);
1978
1979 case F_SOFTUNLOCK:
1980
1981 /*
1982 * This is a bit ugly, we pass in the real seg pointer,
1983 * but the segspt_addr is the virtual address within the
1984 * dummy seg.
1985 */
1986 segspt_softunlock(seg, segspt_addr, size, rw);
1987 return (0);
1988
1989 case F_PROT:
1990
1991 /*
1992 * This takes care of the unusual case where a user
1993 * allocates a stack in shared memory and a register
1994 * window overflow is written to that stack page before
1995 * it is otherwise modified.
1996 *
1997 * We can get away with this because ISM segments are
1998 * always rw. Other than this unusual case, there
1999 * should be no instances of protection violations.
2000 */
2001 return (0);
2002
2003 default:
2004 #ifdef DEBUG
2005 panic("segspt_dismfault default type?");
2006 #else
2007 return (FC_NOMAP);
2008 #endif
2009 }
2010 }
2011
2012
2013 faultcode_t
segspt_shmfault(struct hat * hat,struct seg * seg,caddr_t addr,size_t len,enum fault_type type,enum seg_rw rw)2014 segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
2015 size_t len, enum fault_type type, enum seg_rw rw)
2016 {
2017 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2018 struct seg *sptseg = shmd->shm_sptseg;
2019 struct as *curspt = shmd->shm_sptas;
2020 struct spt_data *sptd = sptseg->s_data;
2021 pgcnt_t npages;
2022 size_t size;
2023 caddr_t sptseg_addr, shm_addr;
2024 page_t *pp, **ppa;
2025 int i;
2026 u_offset_t offset;
2027 ulong_t anon_index = 0;
2028 struct vnode *vp;
2029 struct anon_map *amp; /* XXX - for locknest */
2030 struct anon *ap = NULL;
2031 size_t pgsz;
2032 pgcnt_t pgcnt;
2033 caddr_t a;
2034 pgcnt_t pidx;
2035 size_t sz;
2036
2037 #ifdef lint
2038 hat = hat;
2039 #endif
2040
2041 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2042
2043 if (sptd->spt_flags & SHM_PAGEABLE) {
2044 return (segspt_dismfault(hat, seg, addr, len, type, rw));
2045 }
2046
2047 /*
2048 * Because of the way spt is implemented
2049 * the realsize of the segment does not have to be
2050 * equal to the segment size itself. The segment size is
2051 * often in multiples of a page size larger than PAGESIZE.
2052 * The realsize is rounded up to the nearest PAGESIZE
2053 * based on what the user requested. This is a bit of
2054 * ungliness that is historical but not easily fixed
2055 * without re-designing the higher levels of ISM.
2056 */
2057 ASSERT(addr >= seg->s_base);
2058 if (((addr + len) - seg->s_base) > sptd->spt_realsize)
2059 return (FC_NOMAP);
2060 /*
2061 * For all of the following cases except F_PROT, we need to
2062 * make any necessary adjustments to addr and len
2063 * and get all of the necessary page_t's into an array called ppa[].
2064 *
2065 * The code in shmat() forces base addr and len of ISM segment
2066 * to be aligned to largest page size supported. Therefore,
2067 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
2068 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
2069 * in large pagesize chunks, or else we will screw up the HAT
2070 * layer by calling hat_memload_array() with differing page sizes
2071 * over a given virtual range.
2072 */
2073 pgsz = page_get_pagesize(sptseg->s_szc);
2074 pgcnt = page_get_pagecnt(sptseg->s_szc);
2075 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz);
2076 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz);
2077 npages = btopr(size);
2078
2079 /*
2080 * Now we need to convert from addr in segshm to addr in segspt.
2081 */
2082 anon_index = seg_page(seg, shm_addr);
2083 sptseg_addr = sptseg->s_base + ptob(anon_index);
2084
2085 /*
2086 * And now we may have to adjust npages downward if we have
2087 * exceeded the realsize of the segment or initial anon
2088 * allocations.
2089 */
2090 if ((sptseg_addr + ptob(npages)) >
2091 (sptseg->s_base + sptd->spt_realsize))
2092 size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr;
2093
2094 npages = btopr(size);
2095
2096 ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size));
2097 ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0);
2098
2099 switch (type) {
2100
2101 case F_SOFTLOCK:
2102
2103 /*
2104 * availrmem is decremented once during anon_swap_adjust()
2105 * and is incremented during the anon_unresv(), which is
2106 * called from shm_rm_amp() when the segment is destroyed.
2107 */
2108 atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
2109 /*
2110 * Some platforms assume that ISM pages are SE_SHARED
2111 * locked for the entire life of the segment.
2112 */
2113 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0))
2114 return (0);
2115 /*
2116 * Fall through to the F_INVAL case to load up the hat layer
2117 * entries with the HAT_LOAD_LOCK flag.
2118 */
2119
2120 /* FALLTHRU */
2121 case F_INVAL:
2122
2123 if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
2124 return (FC_NOMAP);
2125
2126 /*
2127 * Some platforms that do NOT support DYNAMIC_ISM_UNMAP
2128 * may still rely on this call to hat_share(). That
2129 * would imply that those hat's can fault on a
2130 * HAT_LOAD_LOCK translation, which would seem
2131 * contradictory.
2132 */
2133 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2134 if (hat_share(seg->s_as->a_hat, seg->s_base,
2135 curspt->a_hat, sptseg->s_base,
2136 sptseg->s_size, sptseg->s_szc) != 0) {
2137 panic("hat_share error in ISM fault");
2138 /*NOTREACHED*/
2139 }
2140 return (0);
2141 }
2142 ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP);
2143
2144 /*
2145 * I see no need to lock the real seg,
2146 * here, because all of our work will be on the underlying
2147 * dummy seg.
2148 *
2149 * sptseg_addr and npages now account for large pages.
2150 */
2151 amp = sptd->spt_amp;
2152 ASSERT(amp != NULL);
2153 anon_index = seg_page(sptseg, sptseg_addr);
2154
2155 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2156 for (i = 0; i < npages; i++) {
2157 ap = anon_get_ptr(amp->ahp, anon_index++);
2158 ASSERT(ap != NULL);
2159 swap_xlate(ap, &vp, &offset);
2160 pp = page_lookup(vp, offset, SE_SHARED);
2161 ASSERT(pp != NULL);
2162 ppa[i] = pp;
2163 }
2164 ANON_LOCK_EXIT(&->a_rwlock);
2165 ASSERT(i == npages);
2166
2167 /*
2168 * We are already holding the as->a_lock on the user's
2169 * real segment, but we need to hold the a_lock on the
2170 * underlying dummy as. This is mostly to satisfy the
2171 * underlying HAT layer.
2172 */
2173 AS_LOCK_ENTER(sptseg->s_as, RW_READER);
2174 a = sptseg_addr;
2175 pidx = 0;
2176 if (type == F_SOFTLOCK) {
2177 /*
2178 * Load up the translation keeping it
2179 * locked and don't unlock the page.
2180 */
2181 for (; pidx < npages; a += pgsz, pidx += pgcnt) {
2182 sz = MIN(pgsz, ptob(npages - pidx));
2183 hat_memload_array(sptseg->s_as->a_hat, a,
2184 sz, &ppa[pidx], sptd->spt_prot,
2185 HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2186 }
2187 } else {
2188 if (hat == seg->s_as->a_hat) {
2189
2190 /*
2191 * Migrate pages marked for migration.
2192 */
2193 if (lgrp_optimizations())
2194 page_migrate(seg, shm_addr, ppa,
2195 npages);
2196
2197 /* CPU HAT */
2198 for (; pidx < npages;
2199 a += pgsz, pidx += pgcnt) {
2200 sz = MIN(pgsz, ptob(npages - pidx));
2201 hat_memload_array(sptseg->s_as->a_hat,
2202 a, sz, &ppa[pidx],
2203 sptd->spt_prot, HAT_LOAD_SHARE);
2204 }
2205 } else {
2206 /* XHAT. Pass real address */
2207 hat_memload_array(hat, shm_addr,
2208 ptob(npages), ppa, sptd->spt_prot,
2209 HAT_LOAD_SHARE);
2210 }
2211
2212 /*
2213 * And now drop the SE_SHARED lock(s).
2214 */
2215 for (i = 0; i < npages; i++)
2216 page_unlock(ppa[i]);
2217 }
2218 AS_LOCK_EXIT(sptseg->s_as);
2219
2220 kmem_free(ppa, sizeof (page_t *) * npages);
2221 return (0);
2222 case F_SOFTUNLOCK:
2223
2224 /*
2225 * This is a bit ugly, we pass in the real seg pointer,
2226 * but the sptseg_addr is the virtual address within the
2227 * dummy seg.
2228 */
2229 segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2230 return (0);
2231
2232 case F_PROT:
2233
2234 /*
2235 * This takes care of the unusual case where a user
2236 * allocates a stack in shared memory and a register
2237 * window overflow is written to that stack page before
2238 * it is otherwise modified.
2239 *
2240 * We can get away with this because ISM segments are
2241 * always rw. Other than this unusual case, there
2242 * should be no instances of protection violations.
2243 */
2244 return (0);
2245
2246 default:
2247 #ifdef DEBUG
2248 cmn_err(CE_WARN, "segspt_shmfault default type?");
2249 #endif
2250 return (FC_NOMAP);
2251 }
2252 }
2253
2254 /*ARGSUSED*/
2255 static faultcode_t
segspt_shmfaulta(struct seg * seg,caddr_t addr)2256 segspt_shmfaulta(struct seg *seg, caddr_t addr)
2257 {
2258 return (0);
2259 }
2260
2261 /*ARGSUSED*/
2262 static int
segspt_shmkluster(struct seg * seg,caddr_t addr,ssize_t delta)2263 segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta)
2264 {
2265 return (0);
2266 }
2267
2268 /*ARGSUSED*/
2269 static size_t
segspt_shmswapout(struct seg * seg)2270 segspt_shmswapout(struct seg *seg)
2271 {
2272 return (0);
2273 }
2274
2275 /*
2276 * duplicate the shared page tables
2277 */
2278 int
segspt_shmdup(struct seg * seg,struct seg * newseg)2279 segspt_shmdup(struct seg *seg, struct seg *newseg)
2280 {
2281 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2282 struct anon_map *amp = shmd->shm_amp;
2283 struct shm_data *shmd_new;
2284 struct seg *spt_seg = shmd->shm_sptseg;
2285 struct spt_data *sptd = spt_seg->s_data;
2286 int error = 0;
2287
2288 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
2289
2290 shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2291 newseg->s_data = (void *)shmd_new;
2292 shmd_new->shm_sptas = shmd->shm_sptas;
2293 shmd_new->shm_amp = amp;
2294 shmd_new->shm_sptseg = shmd->shm_sptseg;
2295 newseg->s_ops = &segspt_shmops;
2296 newseg->s_szc = seg->s_szc;
2297 ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2298
2299 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
2300 amp->refcnt++;
2301 ANON_LOCK_EXIT(&->a_rwlock);
2302
2303 if (sptd->spt_flags & SHM_PAGEABLE) {
2304 shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2305 shmd_new->shm_lckpgs = 0;
2306 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
2307 if ((error = hat_share(newseg->s_as->a_hat,
2308 newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR,
2309 seg->s_size, seg->s_szc)) != 0) {
2310 kmem_free(shmd_new->shm_vpage,
2311 btopr(amp->size));
2312 }
2313 }
2314 return (error);
2315 } else {
2316 return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2317 shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size,
2318 seg->s_szc));
2319
2320 }
2321 }
2322
2323 /*ARGSUSED*/
2324 int
segspt_shmcheckprot(struct seg * seg,caddr_t addr,size_t size,uint_t prot)2325 segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2326 {
2327 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2328 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2329
2330 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2331
2332 /*
2333 * ISM segment is always rw.
2334 */
2335 return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2336 }
2337
2338 /*
2339 * Return an array of locked large pages, for empty slots allocate
2340 * private zero-filled anon pages.
2341 */
2342 static int
spt_anon_getpages(struct seg * sptseg,caddr_t sptaddr,size_t len,page_t * ppa[])2343 spt_anon_getpages(
2344 struct seg *sptseg,
2345 caddr_t sptaddr,
2346 size_t len,
2347 page_t *ppa[])
2348 {
2349 struct spt_data *sptd = sptseg->s_data;
2350 struct anon_map *amp = sptd->spt_amp;
2351 enum seg_rw rw = sptd->spt_prot;
2352 uint_t szc = sptseg->s_szc;
2353 size_t pg_sz, share_sz = page_get_pagesize(szc);
2354 pgcnt_t lp_npgs;
2355 caddr_t lp_addr, e_sptaddr;
2356 uint_t vpprot, ppa_szc = 0;
2357 struct vpage *vpage = NULL;
2358 ulong_t j, ppa_idx;
2359 int err, ierr = 0;
2360 pgcnt_t an_idx;
2361 anon_sync_obj_t cookie;
2362 int anon_locked = 0;
2363 pgcnt_t amp_pgs;
2364
2365
2366 ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz));
2367 ASSERT(len != 0);
2368
2369 pg_sz = share_sz;
2370 lp_npgs = btop(pg_sz);
2371 lp_addr = sptaddr;
2372 e_sptaddr = sptaddr + len;
2373 an_idx = seg_page(sptseg, sptaddr);
2374 ppa_idx = 0;
2375
2376 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2377
2378 amp_pgs = page_get_pagecnt(amp->a_szc);
2379
2380 /*CONSTCOND*/
2381 while (1) {
2382 for (; lp_addr < e_sptaddr;
2383 an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) {
2384
2385 /*
2386 * If we're currently locked, and we get to a new
2387 * page, unlock our current anon chunk.
2388 */
2389 if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) {
2390 anon_array_exit(&cookie);
2391 anon_locked = 0;
2392 }
2393 if (!anon_locked) {
2394 anon_array_enter(amp, an_idx, &cookie);
2395 anon_locked = 1;
2396 }
2397 ppa_szc = (uint_t)-1;
2398 ierr = anon_map_getpages(amp, an_idx, szc, sptseg,
2399 lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx],
2400 &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred);
2401
2402 if (ierr != 0) {
2403 if (ierr > 0) {
2404 err = FC_MAKE_ERR(ierr);
2405 goto lpgs_err;
2406 }
2407 break;
2408 }
2409 }
2410 if (lp_addr == e_sptaddr) {
2411 break;
2412 }
2413 ASSERT(lp_addr < e_sptaddr);
2414
2415 /*
2416 * ierr == -1 means we failed to allocate a large page.
2417 * so do a size down operation.
2418 *
2419 * ierr == -2 means some other process that privately shares
2420 * pages with this process has allocated a larger page and we
2421 * need to retry with larger pages. So do a size up
2422 * operation. This relies on the fact that large pages are
2423 * never partially shared i.e. if we share any constituent
2424 * page of a large page with another process we must share the
2425 * entire large page. Note this cannot happen for SOFTLOCK
2426 * case, unless current address (lpaddr) is at the beginning
2427 * of the next page size boundary because the other process
2428 * couldn't have relocated locked pages.
2429 */
2430 ASSERT(ierr == -1 || ierr == -2);
2431 if (segvn_anypgsz) {
2432 ASSERT(ierr == -2 || szc != 0);
2433 ASSERT(ierr == -1 || szc < sptseg->s_szc);
2434 szc = (ierr == -1) ? szc - 1 : szc + 1;
2435 } else {
2436 /*
2437 * For faults and segvn_anypgsz == 0
2438 * we need to be careful not to loop forever
2439 * if existing page is found with szc other
2440 * than 0 or seg->s_szc. This could be due
2441 * to page relocations on behalf of DR or
2442 * more likely large page creation. For this
2443 * case simply re-size to existing page's szc
2444 * if returned by anon_map_getpages().
2445 */
2446 if (ppa_szc == (uint_t)-1) {
2447 szc = (ierr == -1) ? 0 : sptseg->s_szc;
2448 } else {
2449 ASSERT(ppa_szc <= sptseg->s_szc);
2450 ASSERT(ierr == -2 || ppa_szc < szc);
2451 ASSERT(ierr == -1 || ppa_szc > szc);
2452 szc = ppa_szc;
2453 }
2454 }
2455 pg_sz = page_get_pagesize(szc);
2456 lp_npgs = btop(pg_sz);
2457 ASSERT(IS_P2ALIGNED(lp_addr, pg_sz));
2458 }
2459 if (anon_locked) {
2460 anon_array_exit(&cookie);
2461 }
2462 ANON_LOCK_EXIT(&->a_rwlock);
2463 return (0);
2464
2465 lpgs_err:
2466 if (anon_locked) {
2467 anon_array_exit(&cookie);
2468 }
2469 ANON_LOCK_EXIT(&->a_rwlock);
2470 for (j = 0; j < ppa_idx; j++)
2471 page_unlock(ppa[j]);
2472 return (err);
2473 }
2474
2475 /*
2476 * count the number of bytes in a set of spt pages that are currently not
2477 * locked
2478 */
2479 static rctl_qty_t
spt_unlockedbytes(pgcnt_t npages,page_t ** ppa)2480 spt_unlockedbytes(pgcnt_t npages, page_t **ppa)
2481 {
2482 ulong_t i;
2483 rctl_qty_t unlocked = 0;
2484
2485 for (i = 0; i < npages; i++) {
2486 if (ppa[i]->p_lckcnt == 0)
2487 unlocked += PAGESIZE;
2488 }
2489 return (unlocked);
2490 }
2491
2492 extern u_longlong_t randtick(void);
2493 /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */
2494 #define NLCK (NCPU_P2)
2495 /* Random number with a range [0, n-1], n must be power of two */
2496 #define RAND_P2(n) \
2497 ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1))
2498
2499 int
spt_lockpages(struct seg * seg,pgcnt_t anon_index,pgcnt_t npages,page_t ** ppa,ulong_t * lockmap,size_t pos,rctl_qty_t * locked)2500 spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2501 page_t **ppa, ulong_t *lockmap, size_t pos,
2502 rctl_qty_t *locked)
2503 {
2504 struct shm_data *shmd = seg->s_data;
2505 struct spt_data *sptd = shmd->shm_sptseg->s_data;
2506 ulong_t i;
2507 int kernel;
2508 pgcnt_t nlck = 0;
2509 int rv = 0;
2510 int use_reserved = 1;
2511
2512 /* return the number of bytes actually locked */
2513 *locked = 0;
2514
2515 /*
2516 * To avoid contention on freemem_lock, availrmem and pages_locked
2517 * global counters are updated only every nlck locked pages instead of
2518 * every time. Reserve nlck locks up front and deduct from this
2519 * reservation for each page that requires a lock. When the reservation
2520 * is consumed, reserve again. nlck is randomized, so the competing
2521 * threads do not fall into a cyclic lock contention pattern. When
2522 * memory is low, the lock ahead is disabled, and instead page_pp_lock()
2523 * is used to lock pages.
2524 */
2525 for (i = 0; i < npages; anon_index++, pos++, i++) {
2526 if (nlck == 0 && use_reserved == 1) {
2527 nlck = NLCK + RAND_P2(NLCK);
2528 /* if fewer loops left, decrease nlck */
2529 nlck = MIN(nlck, npages - i);
2530 /*
2531 * Reserve nlck locks up front and deduct from this
2532 * reservation for each page that requires a lock. When
2533 * the reservation is consumed, reserve again.
2534 */
2535 mutex_enter(&freemem_lock);
2536 if ((availrmem - nlck) < pages_pp_maximum) {
2537 /* Do not do advance memory reserves */
2538 use_reserved = 0;
2539 } else {
2540 availrmem -= nlck;
2541 pages_locked += nlck;
2542 }
2543 mutex_exit(&freemem_lock);
2544 }
2545 if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) {
2546 if (sptd->spt_ppa_lckcnt[anon_index] <
2547 (ushort_t)DISM_LOCK_MAX) {
2548 if (++sptd->spt_ppa_lckcnt[anon_index] ==
2549 (ushort_t)DISM_LOCK_MAX) {
2550 cmn_err(CE_WARN,
2551 "DISM page lock limit "
2552 "reached on DISM offset 0x%lx\n",
2553 anon_index << PAGESHIFT);
2554 }
2555 kernel = (sptd->spt_ppa &&
2556 sptd->spt_ppa[anon_index]);
2557 if (!page_pp_lock(ppa[i], 0, kernel ||
2558 use_reserved)) {
2559 sptd->spt_ppa_lckcnt[anon_index]--;
2560 rv = EAGAIN;
2561 break;
2562 }
2563 /* if this is a newly locked page, count it */
2564 if (ppa[i]->p_lckcnt == 1) {
2565 if (kernel == 0 && use_reserved == 1)
2566 nlck--;
2567 *locked += PAGESIZE;
2568 }
2569 shmd->shm_lckpgs++;
2570 shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED;
2571 if (lockmap != NULL)
2572 BT_SET(lockmap, pos);
2573 }
2574 }
2575 }
2576 /* Return unused lock reservation */
2577 if (nlck != 0 && use_reserved == 1) {
2578 mutex_enter(&freemem_lock);
2579 availrmem += nlck;
2580 pages_locked -= nlck;
2581 mutex_exit(&freemem_lock);
2582 }
2583
2584 return (rv);
2585 }
2586
2587 int
spt_unlockpages(struct seg * seg,pgcnt_t anon_index,pgcnt_t npages,rctl_qty_t * unlocked)2588 spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2589 rctl_qty_t *unlocked)
2590 {
2591 struct shm_data *shmd = seg->s_data;
2592 struct spt_data *sptd = shmd->shm_sptseg->s_data;
2593 struct anon_map *amp = sptd->spt_amp;
2594 struct anon *ap;
2595 struct vnode *vp;
2596 u_offset_t off;
2597 struct page *pp;
2598 int kernel;
2599 anon_sync_obj_t cookie;
2600 ulong_t i;
2601 pgcnt_t nlck = 0;
2602 pgcnt_t nlck_limit = NLCK;
2603
2604 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2605 for (i = 0; i < npages; i++, anon_index++) {
2606 if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
2607 anon_array_enter(amp, anon_index, &cookie);
2608 ap = anon_get_ptr(amp->ahp, anon_index);
2609 ASSERT(ap);
2610
2611 swap_xlate(ap, &vp, &off);
2612 anon_array_exit(&cookie);
2613 pp = page_lookup(vp, off, SE_SHARED);
2614 ASSERT(pp);
2615 /*
2616 * availrmem is decremented only for pages which are not
2617 * in seg pcache, for pages in seg pcache availrmem was
2618 * decremented in _dismpagelock()
2619 */
2620 kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]);
2621 ASSERT(pp->p_lckcnt > 0);
2622
2623 /*
2624 * lock page but do not change availrmem, we do it
2625 * ourselves every nlck loops.
2626 */
2627 page_pp_unlock(pp, 0, 1);
2628 if (pp->p_lckcnt == 0) {
2629 if (kernel == 0)
2630 nlck++;
2631 *unlocked += PAGESIZE;
2632 }
2633 page_unlock(pp);
2634 shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED;
2635 sptd->spt_ppa_lckcnt[anon_index]--;
2636 shmd->shm_lckpgs--;
2637 }
2638
2639 /*
2640 * To reduce freemem_lock contention, do not update availrmem
2641 * until at least NLCK pages have been unlocked.
2642 * 1. No need to update if nlck is zero
2643 * 2. Always update if the last iteration
2644 */
2645 if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) {
2646 mutex_enter(&freemem_lock);
2647 availrmem += nlck;
2648 pages_locked -= nlck;
2649 mutex_exit(&freemem_lock);
2650 nlck = 0;
2651 nlck_limit = NLCK + RAND_P2(NLCK);
2652 }
2653 }
2654 ANON_LOCK_EXIT(&->a_rwlock);
2655
2656 return (0);
2657 }
2658
2659 /*ARGSUSED*/
2660 static int
segspt_shmlockop(struct seg * seg,caddr_t addr,size_t len,int attr,int op,ulong_t * lockmap,size_t pos)2661 segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2662 int attr, int op, ulong_t *lockmap, size_t pos)
2663 {
2664 struct shm_data *shmd = seg->s_data;
2665 struct seg *sptseg = shmd->shm_sptseg;
2666 struct spt_data *sptd = sptseg->s_data;
2667 struct kshmid *sp = sptd->spt_amp->a_sp;
2668 pgcnt_t npages, a_npages;
2669 page_t **ppa;
2670 pgcnt_t an_idx, a_an_idx, ppa_idx;
2671 caddr_t spt_addr, a_addr; /* spt and aligned address */
2672 size_t a_len; /* aligned len */
2673 size_t share_sz;
2674 ulong_t i;
2675 int sts = 0;
2676 rctl_qty_t unlocked = 0;
2677 rctl_qty_t locked = 0;
2678 struct proc *p = curproc;
2679 kproject_t *proj;
2680
2681 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2682 ASSERT(sp != NULL);
2683
2684 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2685 return (0);
2686 }
2687
2688 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2689 an_idx = seg_page(seg, addr);
2690 npages = btopr(len);
2691
2692 if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2693 return (ENOMEM);
2694 }
2695
2696 /*
2697 * A shm's project never changes, so no lock needed.
2698 * The shm has a hold on the project, so it will not go away.
2699 * Since we have a mapping to shm within this zone, we know
2700 * that the zone will not go away.
2701 */
2702 proj = sp->shm_perm.ipc_proj;
2703
2704 if (op == MC_LOCK) {
2705
2706 /*
2707 * Need to align addr and size request if they are not
2708 * aligned so we can always allocate large page(s) however
2709 * we only lock what was requested in initial request.
2710 */
2711 share_sz = page_get_pagesize(sptseg->s_szc);
2712 a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
2713 a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)),
2714 share_sz);
2715 a_npages = btop(a_len);
2716 a_an_idx = seg_page(seg, a_addr);
2717 spt_addr = sptseg->s_base + ptob(a_an_idx);
2718 ppa_idx = an_idx - a_an_idx;
2719
2720 if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages),
2721 KM_NOSLEEP)) == NULL) {
2722 return (ENOMEM);
2723 }
2724
2725 /*
2726 * Don't cache any new pages for IO and
2727 * flush any cached pages.
2728 */
2729 mutex_enter(&sptd->spt_lock);
2730 if (sptd->spt_ppa != NULL)
2731 sptd->spt_flags |= DISM_PPA_CHANGED;
2732
2733 sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa);
2734 if (sts != 0) {
2735 mutex_exit(&sptd->spt_lock);
2736 kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2737 return (sts);
2738 }
2739
2740 mutex_enter(&sp->shm_mlock);
2741 /* enforce locked memory rctl */
2742 unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]);
2743
2744 mutex_enter(&p->p_lock);
2745 if (rctl_incr_locked_mem(p, proj, unlocked, 0)) {
2746 mutex_exit(&p->p_lock);
2747 sts = EAGAIN;
2748 } else {
2749 mutex_exit(&p->p_lock);
2750 sts = spt_lockpages(seg, an_idx, npages,
2751 &ppa[ppa_idx], lockmap, pos, &locked);
2752
2753 /*
2754 * correct locked count if not all pages could be
2755 * locked
2756 */
2757 if ((unlocked - locked) > 0) {
2758 rctl_decr_locked_mem(NULL, proj,
2759 (unlocked - locked), 0);
2760 }
2761 }
2762 /*
2763 * unlock pages
2764 */
2765 for (i = 0; i < a_npages; i++)
2766 page_unlock(ppa[i]);
2767 if (sptd->spt_ppa != NULL)
2768 sptd->spt_flags |= DISM_PPA_CHANGED;
2769 mutex_exit(&sp->shm_mlock);
2770 mutex_exit(&sptd->spt_lock);
2771
2772 kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2773
2774 } else if (op == MC_UNLOCK) { /* unlock */
2775 page_t **ppa;
2776
2777 mutex_enter(&sptd->spt_lock);
2778 if (shmd->shm_lckpgs == 0) {
2779 mutex_exit(&sptd->spt_lock);
2780 return (0);
2781 }
2782 /*
2783 * Don't cache new IO pages.
2784 */
2785 if (sptd->spt_ppa != NULL)
2786 sptd->spt_flags |= DISM_PPA_CHANGED;
2787
2788 mutex_enter(&sp->shm_mlock);
2789 sts = spt_unlockpages(seg, an_idx, npages, &unlocked);
2790 if ((ppa = sptd->spt_ppa) != NULL)
2791 sptd->spt_flags |= DISM_PPA_CHANGED;
2792 mutex_exit(&sptd->spt_lock);
2793
2794 rctl_decr_locked_mem(NULL, proj, unlocked, 0);
2795 mutex_exit(&sp->shm_mlock);
2796
2797 if (ppa != NULL)
2798 seg_ppurge_wiredpp(ppa);
2799 }
2800 return (sts);
2801 }
2802
2803 /*ARGSUSED*/
2804 int
segspt_shmgetprot(struct seg * seg,caddr_t addr,size_t len,uint_t * protv)2805 segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2806 {
2807 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2808 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2809 spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2810
2811 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2812
2813 /*
2814 * ISM segment is always rw.
2815 */
2816 while (--pgno >= 0)
2817 *protv++ = sptd->spt_prot;
2818 return (0);
2819 }
2820
2821 /*ARGSUSED*/
2822 u_offset_t
segspt_shmgetoffset(struct seg * seg,caddr_t addr)2823 segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2824 {
2825 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2826
2827 /* Offset does not matter in ISM memory */
2828
2829 return ((u_offset_t)0);
2830 }
2831
2832 /* ARGSUSED */
2833 int
segspt_shmgettype(struct seg * seg,caddr_t addr)2834 segspt_shmgettype(struct seg *seg, caddr_t addr)
2835 {
2836 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2837 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2838
2839 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2840
2841 /*
2842 * The shared memory mapping is always MAP_SHARED, SWAP is only
2843 * reserved for DISM
2844 */
2845 return (MAP_SHARED |
2846 ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2847 }
2848
2849 /*ARGSUSED*/
2850 int
segspt_shmgetvp(struct seg * seg,caddr_t addr,struct vnode ** vpp)2851 segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2852 {
2853 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2854 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2855
2856 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2857
2858 *vpp = sptd->spt_vp;
2859 return (0);
2860 }
2861
2862 /*
2863 * We need to wait for pending IO to complete to a DISM segment in order for
2864 * pages to get kicked out of the seg_pcache. 120 seconds should be more
2865 * than enough time to wait.
2866 */
2867 static clock_t spt_pcache_wait = 120;
2868
2869 /*ARGSUSED*/
2870 static int
segspt_shmadvise(struct seg * seg,caddr_t addr,size_t len,uint_t behav)2871 segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2872 {
2873 struct shm_data *shmd = (struct shm_data *)seg->s_data;
2874 struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2875 struct anon_map *amp;
2876 pgcnt_t pg_idx;
2877 ushort_t gen;
2878 clock_t end_lbolt;
2879 int writer;
2880 page_t **ppa;
2881
2882 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2883
2884 if (behav == MADV_FREE || behav == MADV_PURGE) {
2885 if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2886 return (0);
2887
2888 amp = sptd->spt_amp;
2889 pg_idx = seg_page(seg, addr);
2890
2891 mutex_enter(&sptd->spt_lock);
2892 if ((ppa = sptd->spt_ppa) == NULL) {
2893 mutex_exit(&sptd->spt_lock);
2894 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2895 (void) anon_disclaim(amp, pg_idx, len, behav, NULL);
2896 ANON_LOCK_EXIT(&->a_rwlock);
2897 return (0);
2898 }
2899
2900 sptd->spt_flags |= DISM_PPA_CHANGED;
2901 gen = sptd->spt_gen;
2902
2903 mutex_exit(&sptd->spt_lock);
2904
2905 /*
2906 * Purge all DISM cached pages
2907 */
2908 seg_ppurge_wiredpp(ppa);
2909
2910 /*
2911 * Drop the AS_LOCK so that other threads can grab it
2912 * in the as_pageunlock path and hopefully get the segment
2913 * kicked out of the seg_pcache. We bump the shm_softlockcnt
2914 * to keep this segment resident.
2915 */
2916 writer = AS_WRITE_HELD(seg->s_as);
2917 atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2918 AS_LOCK_EXIT(seg->s_as);
2919
2920 mutex_enter(&sptd->spt_lock);
2921
2922 end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait);
2923
2924 /*
2925 * Try to wait for pages to get kicked out of the seg_pcache.
2926 */
2927 while (sptd->spt_gen == gen &&
2928 (sptd->spt_flags & DISM_PPA_CHANGED) &&
2929 ddi_get_lbolt() < end_lbolt) {
2930 if (!cv_timedwait_sig(&sptd->spt_cv,
2931 &sptd->spt_lock, end_lbolt)) {
2932 break;
2933 }
2934 }
2935
2936 mutex_exit(&sptd->spt_lock);
2937
2938 /* Regrab the AS_LOCK and release our hold on the segment */
2939 AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER);
2940 atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt)));
2941 if (shmd->shm_softlockcnt <= 0) {
2942 if (AS_ISUNMAPWAIT(seg->s_as)) {
2943 mutex_enter(&seg->s_as->a_contents);
2944 if (AS_ISUNMAPWAIT(seg->s_as)) {
2945 AS_CLRUNMAPWAIT(seg->s_as);
2946 cv_broadcast(&seg->s_as->a_cv);
2947 }
2948 mutex_exit(&seg->s_as->a_contents);
2949 }
2950 }
2951
2952 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
2953 (void) anon_disclaim(amp, pg_idx, len, behav, NULL);
2954 ANON_LOCK_EXIT(&->a_rwlock);
2955 } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2956 behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2957 int already_set;
2958 ulong_t anon_index;
2959 lgrp_mem_policy_t policy;
2960 caddr_t shm_addr;
2961 size_t share_size;
2962 size_t size;
2963 struct seg *sptseg = shmd->shm_sptseg;
2964 caddr_t sptseg_addr;
2965
2966 /*
2967 * Align address and length to page size of underlying segment
2968 */
2969 share_size = page_get_pagesize(shmd->shm_sptseg->s_szc);
2970 shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
2971 size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)),
2972 share_size);
2973
2974 amp = shmd->shm_amp;
2975 anon_index = seg_page(seg, shm_addr);
2976
2977 /*
2978 * And now we may have to adjust size downward if we have
2979 * exceeded the realsize of the segment or initial anon
2980 * allocations.
2981 */
2982 sptseg_addr = sptseg->s_base + ptob(anon_index);
2983 if ((sptseg_addr + size) >
2984 (sptseg->s_base + sptd->spt_realsize))
2985 size = (sptseg->s_base + sptd->spt_realsize) -
2986 sptseg_addr;
2987
2988 /*
2989 * Set memory allocation policy for this segment
2990 */
2991 policy = lgrp_madv_to_policy(behav, len, MAP_SHARED);
2992 already_set = lgrp_shm_policy_set(policy, amp, anon_index,
2993 NULL, 0, len);
2994
2995 /*
2996 * If random memory allocation policy set already,
2997 * don't bother reapplying it.
2998 */
2999 if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy))
3000 return (0);
3001
3002 /*
3003 * Mark any existing pages in the given range for
3004 * migration, flushing the I/O page cache, and using
3005 * underlying segment to calculate anon index and get
3006 * anonmap and vnode pointer from
3007 */
3008 if (shmd->shm_softlockcnt > 0)
3009 segspt_purge(seg);
3010
3011 page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0);
3012 }
3013
3014 return (0);
3015 }
3016
3017 /*ARGSUSED*/
3018 void
segspt_shmdump(struct seg * seg)3019 segspt_shmdump(struct seg *seg)
3020 {
3021 /* no-op for ISM segment */
3022 }
3023
3024 /*ARGSUSED*/
3025 static faultcode_t
segspt_shmsetpgsz(struct seg * seg,caddr_t addr,size_t len,uint_t szc)3026 segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
3027 {
3028 return (ENOTSUP);
3029 }
3030
3031 /*
3032 * get a memory ID for an addr in a given segment
3033 */
3034 static int
segspt_shmgetmemid(struct seg * seg,caddr_t addr,memid_t * memidp)3035 segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3036 {
3037 struct shm_data *shmd = (struct shm_data *)seg->s_data;
3038 struct anon *ap;
3039 size_t anon_index;
3040 struct anon_map *amp = shmd->shm_amp;
3041 struct spt_data *sptd = shmd->shm_sptseg->s_data;
3042 struct seg *sptseg = shmd->shm_sptseg;
3043 anon_sync_obj_t cookie;
3044
3045 anon_index = seg_page(seg, addr);
3046
3047 if (addr > (seg->s_base + sptd->spt_realsize)) {
3048 return (EFAULT);
3049 }
3050
3051 ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
3052 anon_array_enter(amp, anon_index, &cookie);
3053 ap = anon_get_ptr(amp->ahp, anon_index);
3054 if (ap == NULL) {
3055 struct page *pp;
3056 caddr_t spt_addr = sptseg->s_base + ptob(anon_index);
3057
3058 pp = anon_zero(sptseg, spt_addr, &ap, kcred);
3059 if (pp == NULL) {
3060 anon_array_exit(&cookie);
3061 ANON_LOCK_EXIT(&->a_rwlock);
3062 return (ENOMEM);
3063 }
3064 (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP);
3065 page_unlock(pp);
3066 }
3067 anon_array_exit(&cookie);
3068 ANON_LOCK_EXIT(&->a_rwlock);
3069 memidp->val[0] = (uintptr_t)ap;
3070 memidp->val[1] = (uintptr_t)addr & PAGEOFFSET;
3071 return (0);
3072 }
3073
3074 /*
3075 * Get memory allocation policy info for specified address in given segment
3076 */
3077 static lgrp_mem_policy_info_t *
segspt_shmgetpolicy(struct seg * seg,caddr_t addr)3078 segspt_shmgetpolicy(struct seg *seg, caddr_t addr)
3079 {
3080 struct anon_map *amp;
3081 ulong_t anon_index;
3082 lgrp_mem_policy_info_t *policy_info;
3083 struct shm_data *shm_data;
3084
3085 ASSERT(seg != NULL);
3086
3087 /*
3088 * Get anon_map from segshm
3089 *
3090 * Assume that no lock needs to be held on anon_map, since
3091 * it should be protected by its reference count which must be
3092 * nonzero for an existing segment
3093 * Need to grab readers lock on policy tree though
3094 */
3095 shm_data = (struct shm_data *)seg->s_data;
3096 if (shm_data == NULL)
3097 return (NULL);
3098 amp = shm_data->shm_amp;
3099 ASSERT(amp->refcnt != 0);
3100
3101 /*
3102 * Get policy info
3103 *
3104 * Assume starting anon index of 0
3105 */
3106 anon_index = seg_page(seg, addr);
3107 policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
3108
3109 return (policy_info);
3110 }
3111
3112 /*ARGSUSED*/
3113 static int
segspt_shmcapable(struct seg * seg,segcapability_t capability)3114 segspt_shmcapable(struct seg *seg, segcapability_t capability)
3115 {
3116 return (0);
3117 }
3118