17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5e67882ffSbs21162 * Common Development and Distribution License (the "License"). 6e67882ffSbs21162 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22b52a336eSPavel Tatashin * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate #include <sys/param.h> 267c478bd9Sstevel@tonic-gate #include <sys/user.h> 277c478bd9Sstevel@tonic-gate #include <sys/mman.h> 287c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 297c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 307c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 317c478bd9Sstevel@tonic-gate #include <sys/systm.h> 327c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 337c478bd9Sstevel@tonic-gate #include <vm/hat.h> 347c478bd9Sstevel@tonic-gate #include <vm/seg.h> 357c478bd9Sstevel@tonic-gate #include <vm/as.h> 367c478bd9Sstevel@tonic-gate #include <vm/anon.h> 377c478bd9Sstevel@tonic-gate #include <vm/page.h> 387c478bd9Sstevel@tonic-gate #include <sys/buf.h> 397c478bd9Sstevel@tonic-gate #include <sys/swap.h> 407c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 417c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h> 427c478bd9Sstevel@tonic-gate #include <sys/debug.h> 437c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 447c478bd9Sstevel@tonic-gate #include <sys/shm.h> 45c6939658Ssl108498 #include <sys/shm_impl.h> 467c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 477c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 48c6939658Ssl108498 #include <sys/policy.h> 49c6939658Ssl108498 #include <sys/project.h> 507c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 51c6939658Ssl108498 #include <sys/zone.h> 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate #define SEGSPTADDR (caddr_t)0x0 547c478bd9Sstevel@tonic-gate 557c478bd9Sstevel@tonic-gate /* 567c478bd9Sstevel@tonic-gate * # pages used for spt 577c478bd9Sstevel@tonic-gate */ 58cee1d74bSjfrank size_t spt_used; 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate /* 617c478bd9Sstevel@tonic-gate * segspt_minfree is the memory left for system after ISM 627c478bd9Sstevel@tonic-gate * locked its pages; it is set up to 5% of availrmem in 637c478bd9Sstevel@tonic-gate * sptcreate when ISM is created. ISM should not use more 647c478bd9Sstevel@tonic-gate * than ~90% of availrmem; if it does, then the performance 657c478bd9Sstevel@tonic-gate * of the system may decrease. Machines with large memories may 667c478bd9Sstevel@tonic-gate * be able to use up more memory for ISM so we set the default 677c478bd9Sstevel@tonic-gate * segspt_minfree to 5% (which gives ISM max 95% of availrmem. 687c478bd9Sstevel@tonic-gate * If somebody wants even more memory for ISM (risking hanging 697c478bd9Sstevel@tonic-gate * the system) they can patch the segspt_minfree to smaller number. 707c478bd9Sstevel@tonic-gate */ 717c478bd9Sstevel@tonic-gate pgcnt_t segspt_minfree = 0; 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate static int segspt_create(struct seg *seg, caddr_t argsp); 747c478bd9Sstevel@tonic-gate static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize); 757c478bd9Sstevel@tonic-gate static void segspt_free(struct seg *seg); 767c478bd9Sstevel@tonic-gate static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len); 777c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr); 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate static void 807c478bd9Sstevel@tonic-gate segspt_badop() 817c478bd9Sstevel@tonic-gate { 827c478bd9Sstevel@tonic-gate panic("segspt_badop called"); 837c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 847c478bd9Sstevel@tonic-gate } 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate #define SEGSPT_BADOP(t) (t(*)())segspt_badop 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate struct seg_ops segspt_ops = { 897c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* dup */ 907c478bd9Sstevel@tonic-gate segspt_unmap, 917c478bd9Sstevel@tonic-gate segspt_free, 927c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* fault */ 937c478bd9Sstevel@tonic-gate SEGSPT_BADOP(faultcode_t), /* faulta */ 947c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* setprot */ 957c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* checkprot */ 967c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* kluster */ 977c478bd9Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* swapout */ 987c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* sync */ 997c478bd9Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* incore */ 1007c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* lockop */ 1017c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* getprot */ 1027c478bd9Sstevel@tonic-gate SEGSPT_BADOP(u_offset_t), /* getoffset */ 1037c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* gettype */ 1047c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* getvp */ 1057c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* advise */ 1067c478bd9Sstevel@tonic-gate SEGSPT_BADOP(void), /* dump */ 1077c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* pagelock */ 1087c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* setpgsz */ 1097c478bd9Sstevel@tonic-gate SEGSPT_BADOP(int), /* getmemid */ 1107c478bd9Sstevel@tonic-gate segspt_getpolicy, /* getpolicy */ 1111bd5c35fSelowe SEGSPT_BADOP(int), /* capable */ 1129d12795fSRobert Mustacchi seg_inherit_notsup /* inherit */ 1137c478bd9Sstevel@tonic-gate }; 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate static int segspt_shmdup(struct seg *seg, struct seg *newseg); 1167c478bd9Sstevel@tonic-gate static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize); 1177c478bd9Sstevel@tonic-gate static void segspt_shmfree(struct seg *seg); 1187c478bd9Sstevel@tonic-gate static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg, 1197c478bd9Sstevel@tonic-gate caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw); 1207c478bd9Sstevel@tonic-gate static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr); 1217c478bd9Sstevel@tonic-gate static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr, 1227c478bd9Sstevel@tonic-gate register size_t len, register uint_t prot); 1237c478bd9Sstevel@tonic-gate static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, 1247c478bd9Sstevel@tonic-gate uint_t prot); 1257c478bd9Sstevel@tonic-gate static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta); 1267c478bd9Sstevel@tonic-gate static size_t segspt_shmswapout(struct seg *seg); 1277c478bd9Sstevel@tonic-gate static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, 1287c478bd9Sstevel@tonic-gate register char *vec); 1297c478bd9Sstevel@tonic-gate static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len, 1307c478bd9Sstevel@tonic-gate int attr, uint_t flags); 1317c478bd9Sstevel@tonic-gate static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 1327c478bd9Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos); 1337c478bd9Sstevel@tonic-gate static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, 1347c478bd9Sstevel@tonic-gate uint_t *protv); 1357c478bd9Sstevel@tonic-gate static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr); 1367c478bd9Sstevel@tonic-gate static int segspt_shmgettype(struct seg *seg, caddr_t addr); 1377c478bd9Sstevel@tonic-gate static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 1387c478bd9Sstevel@tonic-gate static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, 1397c478bd9Sstevel@tonic-gate uint_t behav); 1407c478bd9Sstevel@tonic-gate static void segspt_shmdump(struct seg *seg); 1417c478bd9Sstevel@tonic-gate static int segspt_shmpagelock(struct seg *, caddr_t, size_t, 1427c478bd9Sstevel@tonic-gate struct page ***, enum lock_type, enum seg_rw); 1437c478bd9Sstevel@tonic-gate static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t); 1447c478bd9Sstevel@tonic-gate static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *); 1457c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t); 1461bd5c35fSelowe static int segspt_shmcapable(struct seg *, segcapability_t); 1477c478bd9Sstevel@tonic-gate 1487c478bd9Sstevel@tonic-gate struct seg_ops segspt_shmops = { 1497c478bd9Sstevel@tonic-gate segspt_shmdup, 1507c478bd9Sstevel@tonic-gate segspt_shmunmap, 1517c478bd9Sstevel@tonic-gate segspt_shmfree, 1527c478bd9Sstevel@tonic-gate segspt_shmfault, 1537c478bd9Sstevel@tonic-gate segspt_shmfaulta, 1547c478bd9Sstevel@tonic-gate segspt_shmsetprot, 1557c478bd9Sstevel@tonic-gate segspt_shmcheckprot, 1567c478bd9Sstevel@tonic-gate segspt_shmkluster, 1577c478bd9Sstevel@tonic-gate segspt_shmswapout, 1587c478bd9Sstevel@tonic-gate segspt_shmsync, 1597c478bd9Sstevel@tonic-gate segspt_shmincore, 1607c478bd9Sstevel@tonic-gate segspt_shmlockop, 1617c478bd9Sstevel@tonic-gate segspt_shmgetprot, 1627c478bd9Sstevel@tonic-gate segspt_shmgetoffset, 1637c478bd9Sstevel@tonic-gate segspt_shmgettype, 1647c478bd9Sstevel@tonic-gate segspt_shmgetvp, 1657c478bd9Sstevel@tonic-gate segspt_shmadvise, /* advise */ 1667c478bd9Sstevel@tonic-gate segspt_shmdump, 1677c478bd9Sstevel@tonic-gate segspt_shmpagelock, 1687c478bd9Sstevel@tonic-gate segspt_shmsetpgsz, 1697c478bd9Sstevel@tonic-gate segspt_shmgetmemid, 1707c478bd9Sstevel@tonic-gate segspt_shmgetpolicy, 1711bd5c35fSelowe segspt_shmcapable, 1729d12795fSRobert Mustacchi seg_inherit_notsup 1737c478bd9Sstevel@tonic-gate }; 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate static void segspt_purge(struct seg *seg); 176a98e9dbfSaguzovsk static int segspt_reclaim(void *, caddr_t, size_t, struct page **, 177a98e9dbfSaguzovsk enum seg_rw, int); 1787c478bd9Sstevel@tonic-gate static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, 1797c478bd9Sstevel@tonic-gate page_t **ppa); 1807c478bd9Sstevel@tonic-gate 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate 1837c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1847c478bd9Sstevel@tonic-gate int 1857c478bd9Sstevel@tonic-gate sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, 1867c478bd9Sstevel@tonic-gate uint_t prot, uint_t flags, uint_t share_szc) 1877c478bd9Sstevel@tonic-gate { 1887c478bd9Sstevel@tonic-gate int err; 1897c478bd9Sstevel@tonic-gate struct as *newas; 1907c478bd9Sstevel@tonic-gate struct segspt_crargs sptcargs; 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate #ifdef DEBUG 1937c478bd9Sstevel@tonic-gate TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, 1947c478bd9Sstevel@tonic-gate tnf_ulong, size, size ); 1957c478bd9Sstevel@tonic-gate #endif 1967c478bd9Sstevel@tonic-gate if (segspt_minfree == 0) /* leave min 5% of availrmem for */ 1977c478bd9Sstevel@tonic-gate segspt_minfree = availrmem/20; /* for the system */ 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate if (!hat_supported(HAT_SHARED_PT, (void *)0)) 2007c478bd9Sstevel@tonic-gate return (EINVAL); 2017c478bd9Sstevel@tonic-gate 2027c478bd9Sstevel@tonic-gate /* 2037c478bd9Sstevel@tonic-gate * get a new as for this shared memory segment 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate newas = as_alloc(); 206c6939658Ssl108498 newas->a_proc = NULL; 2077c478bd9Sstevel@tonic-gate sptcargs.amp = amp; 2087c478bd9Sstevel@tonic-gate sptcargs.prot = prot; 2097c478bd9Sstevel@tonic-gate sptcargs.flags = flags; 2107c478bd9Sstevel@tonic-gate sptcargs.szc = share_szc; 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * create a shared page table (spt) segment 2137c478bd9Sstevel@tonic-gate */ 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) { 2167c478bd9Sstevel@tonic-gate as_free(newas); 2177c478bd9Sstevel@tonic-gate return (err); 2187c478bd9Sstevel@tonic-gate } 2197c478bd9Sstevel@tonic-gate *sptseg = sptcargs.seg_spt; 2207c478bd9Sstevel@tonic-gate return (0); 2217c478bd9Sstevel@tonic-gate } 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate void 2247c478bd9Sstevel@tonic-gate sptdestroy(struct as *as, struct anon_map *amp) 2257c478bd9Sstevel@tonic-gate { 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate #ifdef DEBUG 2287c478bd9Sstevel@tonic-gate TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */); 2297c478bd9Sstevel@tonic-gate #endif 2307c478bd9Sstevel@tonic-gate (void) as_unmap(as, SEGSPTADDR, amp->size); 2317c478bd9Sstevel@tonic-gate as_free(as); 2327c478bd9Sstevel@tonic-gate } 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate /* 2357c478bd9Sstevel@tonic-gate * called from seg_free(). 2367c478bd9Sstevel@tonic-gate * free (i.e., unlock, unmap, return to free list) 2377c478bd9Sstevel@tonic-gate * all the pages in the given seg. 2387c478bd9Sstevel@tonic-gate */ 2397c478bd9Sstevel@tonic-gate void 2407c478bd9Sstevel@tonic-gate segspt_free(struct seg *seg) 2417c478bd9Sstevel@tonic-gate { 2427c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 2437c478bd9Sstevel@tonic-gate 244*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate if (sptd != NULL) { 2477c478bd9Sstevel@tonic-gate if (sptd->spt_realsize) 2487c478bd9Sstevel@tonic-gate segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt) 2517c478bd9Sstevel@tonic-gate kmem_free(sptd->spt_ppa_lckcnt, 2527c478bd9Sstevel@tonic-gate sizeof (*sptd->spt_ppa_lckcnt) 2537c478bd9Sstevel@tonic-gate * btopr(sptd->spt_amp->size)); 2547c478bd9Sstevel@tonic-gate kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); 2552ba723d8Smec cv_destroy(&sptd->spt_cv); 2567c478bd9Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 2577c478bd9Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 2587c478bd9Sstevel@tonic-gate } 2597c478bd9Sstevel@tonic-gate } 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2627c478bd9Sstevel@tonic-gate static int 2637c478bd9Sstevel@tonic-gate segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr, 2647c478bd9Sstevel@tonic-gate uint_t flags) 2657c478bd9Sstevel@tonic-gate { 266*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate return (0); 2697c478bd9Sstevel@tonic-gate } 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2727c478bd9Sstevel@tonic-gate static size_t 2737c478bd9Sstevel@tonic-gate segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec) 2747c478bd9Sstevel@tonic-gate { 2757c478bd9Sstevel@tonic-gate caddr_t eo_seg; 2767c478bd9Sstevel@tonic-gate pgcnt_t npages; 2777c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2787c478bd9Sstevel@tonic-gate struct seg *sptseg; 2797c478bd9Sstevel@tonic-gate struct spt_data *sptd; 2807c478bd9Sstevel@tonic-gate 281*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2827c478bd9Sstevel@tonic-gate #ifdef lint 2837c478bd9Sstevel@tonic-gate seg = seg; 2847c478bd9Sstevel@tonic-gate #endif 2857c478bd9Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 2867c478bd9Sstevel@tonic-gate sptd = sptseg->s_data; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 2897c478bd9Sstevel@tonic-gate eo_seg = addr + len; 2907c478bd9Sstevel@tonic-gate while (addr < eo_seg) { 2917c478bd9Sstevel@tonic-gate /* page exists, and it's locked. */ 2927c478bd9Sstevel@tonic-gate *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED | 2937c478bd9Sstevel@tonic-gate SEG_PAGE_ANON; 2947c478bd9Sstevel@tonic-gate addr += PAGESIZE; 2957c478bd9Sstevel@tonic-gate } 2967c478bd9Sstevel@tonic-gate return (len); 2977c478bd9Sstevel@tonic-gate } else { 2987c478bd9Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 2997c478bd9Sstevel@tonic-gate struct anon *ap; 3007c478bd9Sstevel@tonic-gate page_t *pp; 3017c478bd9Sstevel@tonic-gate pgcnt_t anon_index; 3027c478bd9Sstevel@tonic-gate struct vnode *vp; 3037c478bd9Sstevel@tonic-gate u_offset_t off; 3047c478bd9Sstevel@tonic-gate ulong_t i; 3057c478bd9Sstevel@tonic-gate int ret; 3067c478bd9Sstevel@tonic-gate anon_sync_obj_t cookie; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3097c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, addr); 3107c478bd9Sstevel@tonic-gate npages = btopr(len); 3117c478bd9Sstevel@tonic-gate if (anon_index + npages > btopr(shmd->shm_amp->size)) { 3127c478bd9Sstevel@tonic-gate return (EINVAL); 3137c478bd9Sstevel@tonic-gate } 3147c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3157c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++, anon_index++) { 3167c478bd9Sstevel@tonic-gate ret = 0; 3177c478bd9Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 3187c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 3197c478bd9Sstevel@tonic-gate if (ap != NULL) { 3207c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 3217c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 3227c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 3237c478bd9Sstevel@tonic-gate if (pp != NULL) { 3247c478bd9Sstevel@tonic-gate ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON; 3257c478bd9Sstevel@tonic-gate page_unlock(pp); 3267c478bd9Sstevel@tonic-gate } 3277c478bd9Sstevel@tonic-gate } else { 3287c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 3297c478bd9Sstevel@tonic-gate } 3307c478bd9Sstevel@tonic-gate if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 3317c478bd9Sstevel@tonic-gate ret |= SEG_PAGE_LOCKED; 3327c478bd9Sstevel@tonic-gate } 3337c478bd9Sstevel@tonic-gate *vec++ = (char)ret; 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3367c478bd9Sstevel@tonic-gate return (len); 3377c478bd9Sstevel@tonic-gate } 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate static int 3417c478bd9Sstevel@tonic-gate segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize) 3427c478bd9Sstevel@tonic-gate { 3437c478bd9Sstevel@tonic-gate size_t share_size; 3447c478bd9Sstevel@tonic-gate 345*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate /* 3487c478bd9Sstevel@tonic-gate * seg.s_size may have been rounded up to the largest page size 3497c478bd9Sstevel@tonic-gate * in shmat(). 3507c478bd9Sstevel@tonic-gate * XXX This should be cleanedup. sptdestroy should take a length 3517c478bd9Sstevel@tonic-gate * argument which should be the same as sptcreate. Then 3527c478bd9Sstevel@tonic-gate * this rounding would not be needed (or is done in shm.c) 3537c478bd9Sstevel@tonic-gate * Only the check for full segment will be needed. 3547c478bd9Sstevel@tonic-gate * 3557c478bd9Sstevel@tonic-gate * XXX -- shouldn't raddr == 0 always? These tests don't seem 3567c478bd9Sstevel@tonic-gate * to be useful at all. 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate share_size = page_get_pagesize(seg->s_szc); 3597c478bd9Sstevel@tonic-gate ssize = P2ROUNDUP(ssize, share_size); 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate if (raddr == seg->s_base && ssize == seg->s_size) { 3627c478bd9Sstevel@tonic-gate seg_free(seg); 3637c478bd9Sstevel@tonic-gate return (0); 3647c478bd9Sstevel@tonic-gate } else 3657c478bd9Sstevel@tonic-gate return (EINVAL); 3667c478bd9Sstevel@tonic-gate } 3677c478bd9Sstevel@tonic-gate 3687c478bd9Sstevel@tonic-gate int 3697c478bd9Sstevel@tonic-gate segspt_create(struct seg *seg, caddr_t argsp) 3707c478bd9Sstevel@tonic-gate { 3717c478bd9Sstevel@tonic-gate int err; 3727c478bd9Sstevel@tonic-gate caddr_t addr = seg->s_base; 3737c478bd9Sstevel@tonic-gate struct spt_data *sptd; 3747c478bd9Sstevel@tonic-gate struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; 3757c478bd9Sstevel@tonic-gate struct anon_map *amp = sptcargs->amp; 376c6939658Ssl108498 struct kshmid *sp = amp->a_sp; 3777c478bd9Sstevel@tonic-gate struct cred *cred = CRED(); 3787c478bd9Sstevel@tonic-gate ulong_t i, j, anon_index = 0; 3797c478bd9Sstevel@tonic-gate pgcnt_t npages = btopr(amp->size); 3807c478bd9Sstevel@tonic-gate struct vnode *vp; 3817c478bd9Sstevel@tonic-gate page_t **ppa; 3827c478bd9Sstevel@tonic-gate uint_t hat_flags; 38307b65a64Saguzovsk size_t pgsz; 38407b65a64Saguzovsk pgcnt_t pgcnt; 38507b65a64Saguzovsk caddr_t a; 38607b65a64Saguzovsk pgcnt_t pidx; 38707b65a64Saguzovsk size_t sz; 388c6939658Ssl108498 proc_t *procp = curproc; 389c6939658Ssl108498 rctl_qty_t lockedbytes = 0; 390c6939658Ssl108498 kproject_t *proj; 3917c478bd9Sstevel@tonic-gate 3927c478bd9Sstevel@tonic-gate /* 3937c478bd9Sstevel@tonic-gate * We are holding the a_lock on the underlying dummy as, 3947c478bd9Sstevel@tonic-gate * so we can make calls to the HAT layer. 3957c478bd9Sstevel@tonic-gate */ 396*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 397c6939658Ssl108498 ASSERT(sp != NULL); 3987c478bd9Sstevel@tonic-gate 3997c478bd9Sstevel@tonic-gate #ifdef DEBUG 4007c478bd9Sstevel@tonic-gate TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, 4012ba723d8Smec tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); 4027c478bd9Sstevel@tonic-gate #endif 4037c478bd9Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 404e0cb4e8dSOndrej Kubecka if (err = anon_swap_adjust(npages)) 4057c478bd9Sstevel@tonic-gate return (err); 4067c478bd9Sstevel@tonic-gate } 4077c478bd9Sstevel@tonic-gate err = ENOMEM; 4087c478bd9Sstevel@tonic-gate 4097c478bd9Sstevel@tonic-gate if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL) 4107c478bd9Sstevel@tonic-gate goto out1; 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 4137c478bd9Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages), 4147c478bd9Sstevel@tonic-gate KM_NOSLEEP)) == NULL) 4157c478bd9Sstevel@tonic-gate goto out2; 4167c478bd9Sstevel@tonic-gate } 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL); 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL) 4217c478bd9Sstevel@tonic-gate goto out3; 4227c478bd9Sstevel@tonic-gate 4237c478bd9Sstevel@tonic-gate seg->s_ops = &segspt_ops; 4247c478bd9Sstevel@tonic-gate sptd->spt_vp = vp; 4257c478bd9Sstevel@tonic-gate sptd->spt_amp = amp; 4267c478bd9Sstevel@tonic-gate sptd->spt_prot = sptcargs->prot; 4277c478bd9Sstevel@tonic-gate sptd->spt_flags = sptcargs->flags; 4287c478bd9Sstevel@tonic-gate seg->s_data = (caddr_t)sptd; 4297c478bd9Sstevel@tonic-gate sptd->spt_ppa = NULL; 4307c478bd9Sstevel@tonic-gate sptd->spt_ppa_lckcnt = NULL; 4317c478bd9Sstevel@tonic-gate seg->s_szc = sptcargs->szc; 4322ba723d8Smec cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL); 4332ba723d8Smec sptd->spt_gen = 0; 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 43607b65a64Saguzovsk if (seg->s_szc > amp->a_szc) { 4377c478bd9Sstevel@tonic-gate amp->a_szc = seg->s_szc; 43807b65a64Saguzovsk } 4397c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate /* 4427c478bd9Sstevel@tonic-gate * Set policy to affect initial allocation of pages in 4437c478bd9Sstevel@tonic-gate * anon_map_createpages() 4447c478bd9Sstevel@tonic-gate */ 4457c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index, 4467c478bd9Sstevel@tonic-gate NULL, 0, ptob(npages)); 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate if (sptcargs->flags & SHM_PAGEABLE) { 4497c478bd9Sstevel@tonic-gate size_t share_sz; 4507c478bd9Sstevel@tonic-gate pgcnt_t new_npgs, more_pgs; 4517c478bd9Sstevel@tonic-gate struct anon_hdr *nahp; 45268803f2dSsl108498 zone_t *zone; 4537c478bd9Sstevel@tonic-gate 4547c478bd9Sstevel@tonic-gate share_sz = page_get_pagesize(seg->s_szc); 4557c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(amp->size, share_sz)) { 4567c478bd9Sstevel@tonic-gate /* 4577c478bd9Sstevel@tonic-gate * We are rounding up the size of the anon array 4587c478bd9Sstevel@tonic-gate * on 4 M boundary because we always create 4 M 4597c478bd9Sstevel@tonic-gate * of page(s) when locking, faulting pages and we 4607c478bd9Sstevel@tonic-gate * don't have to check for all corner cases e.g. 4617c478bd9Sstevel@tonic-gate * if there is enough space to allocate 4 M 4627c478bd9Sstevel@tonic-gate * page. 4637c478bd9Sstevel@tonic-gate */ 4647c478bd9Sstevel@tonic-gate new_npgs = btop(P2ROUNDUP(amp->size, share_sz)); 4657c478bd9Sstevel@tonic-gate more_pgs = new_npgs - npages; 4667c478bd9Sstevel@tonic-gate 46768803f2dSsl108498 /* 46867253d2cSsl108498 * The zone will never be NULL, as a fully created 46967253d2cSsl108498 * shm always has an owning zone. 47068803f2dSsl108498 */ 471a19609f8Sjv227347 zone = sp->shm_perm.ipc_zone_ref.zref_zone; 47267253d2cSsl108498 ASSERT(zone != NULL); 47368803f2dSsl108498 if (anon_resv_zone(ptob(more_pgs), zone) == 0) { 4747c478bd9Sstevel@tonic-gate err = ENOMEM; 4757c478bd9Sstevel@tonic-gate goto out4; 4767c478bd9Sstevel@tonic-gate } 47768803f2dSsl108498 4787c478bd9Sstevel@tonic-gate nahp = anon_create(new_npgs, ANON_SLEEP); 4797c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 4807c478bd9Sstevel@tonic-gate (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages, 4817c478bd9Sstevel@tonic-gate ANON_SLEEP); 4827c478bd9Sstevel@tonic-gate anon_release(amp->ahp, npages); 4837c478bd9Sstevel@tonic-gate amp->ahp = nahp; 48468803f2dSsl108498 ASSERT(amp->swresv == ptob(npages)); 4857c478bd9Sstevel@tonic-gate amp->swresv = amp->size = ptob(new_npgs); 4867c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4877c478bd9Sstevel@tonic-gate npages = new_npgs; 4887c478bd9Sstevel@tonic-gate } 4897c478bd9Sstevel@tonic-gate 4907c478bd9Sstevel@tonic-gate sptd->spt_ppa_lckcnt = kmem_zalloc(npages * 4917c478bd9Sstevel@tonic-gate sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP); 4927c478bd9Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 4937c478bd9Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 4947c478bd9Sstevel@tonic-gate sptcargs->seg_spt = seg; 4957c478bd9Sstevel@tonic-gate return (0); 4967c478bd9Sstevel@tonic-gate } 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate /* 4997c478bd9Sstevel@tonic-gate * get array of pages for each anon slot in amp 5007c478bd9Sstevel@tonic-gate */ 5017c478bd9Sstevel@tonic-gate if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa, 5027c478bd9Sstevel@tonic-gate seg, addr, S_CREATE, cred)) != 0) 5037c478bd9Sstevel@tonic-gate goto out4; 5047c478bd9Sstevel@tonic-gate 505c6939658Ssl108498 mutex_enter(&sp->shm_mlock); 506c6939658Ssl108498 507c6939658Ssl108498 /* May be partially locked, so, count bytes to charge for locking */ 508c6939658Ssl108498 for (i = 0; i < npages; i++) 509c6939658Ssl108498 if (ppa[i]->p_lckcnt == 0) 510c6939658Ssl108498 lockedbytes += PAGESIZE; 511c6939658Ssl108498 512c6939658Ssl108498 proj = sp->shm_perm.ipc_proj; 513c6939658Ssl108498 514c6939658Ssl108498 if (lockedbytes > 0) { 515c6939658Ssl108498 mutex_enter(&procp->p_lock); 516c6939658Ssl108498 if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { 517c6939658Ssl108498 mutex_exit(&procp->p_lock); 518c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 519c6939658Ssl108498 for (i = 0; i < npages; i++) 520c6939658Ssl108498 page_unlock(ppa[i]); 521c6939658Ssl108498 err = ENOMEM; 522c6939658Ssl108498 goto out4; 523c6939658Ssl108498 } 524c6939658Ssl108498 mutex_exit(&procp->p_lock); 525c6939658Ssl108498 } 526c6939658Ssl108498 5277c478bd9Sstevel@tonic-gate /* 5287c478bd9Sstevel@tonic-gate * addr is initial address corresponding to the first page on ppa list 5297c478bd9Sstevel@tonic-gate */ 5307c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) { 5317c478bd9Sstevel@tonic-gate /* attempt to lock all pages */ 532c6939658Ssl108498 if (page_pp_lock(ppa[i], 0, 1) == 0) { 5337c478bd9Sstevel@tonic-gate /* 5347c478bd9Sstevel@tonic-gate * if unable to lock any page, unlock all 5357c478bd9Sstevel@tonic-gate * of them and return error 5367c478bd9Sstevel@tonic-gate */ 5377c478bd9Sstevel@tonic-gate for (j = 0; j < i; j++) 5387c478bd9Sstevel@tonic-gate page_pp_unlock(ppa[j], 0, 1); 539c6939658Ssl108498 for (i = 0; i < npages; i++) 5407c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 541c6939658Ssl108498 rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); 542c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 5437c478bd9Sstevel@tonic-gate err = ENOMEM; 5447c478bd9Sstevel@tonic-gate goto out4; 5457c478bd9Sstevel@tonic-gate } 5467c478bd9Sstevel@tonic-gate } 547c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate /* 5507c478bd9Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 5517c478bd9Sstevel@tonic-gate * for the entire life of the segment. For example platforms 5527c478bd9Sstevel@tonic-gate * that do not support Dynamic Reconfiguration. 5537c478bd9Sstevel@tonic-gate */ 5547c478bd9Sstevel@tonic-gate hat_flags = HAT_LOAD_SHARE; 5557c478bd9Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL)) 5567c478bd9Sstevel@tonic-gate hat_flags |= HAT_LOAD_LOCK; 5577c478bd9Sstevel@tonic-gate 55807b65a64Saguzovsk /* 55907b65a64Saguzovsk * Load translations one lare page at a time 56007b65a64Saguzovsk * to make sure we don't create mappings bigger than 56107b65a64Saguzovsk * segment's size code in case underlying pages 56207b65a64Saguzovsk * are shared with segvn's segment that uses bigger 56307b65a64Saguzovsk * size code than we do. 56407b65a64Saguzovsk */ 56507b65a64Saguzovsk pgsz = page_get_pagesize(seg->s_szc); 56607b65a64Saguzovsk pgcnt = page_get_pagecnt(seg->s_szc); 56707b65a64Saguzovsk for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) { 56807b65a64Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 56907b65a64Saguzovsk hat_memload_array(seg->s_as->a_hat, a, sz, 57007b65a64Saguzovsk &ppa[pidx], sptd->spt_prot, hat_flags); 57107b65a64Saguzovsk } 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate /* 5747c478bd9Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 5757c478bd9Sstevel@tonic-gate * we will leave the pages locked SE_SHARED for the life 5767c478bd9Sstevel@tonic-gate * of the ISM segment. This will prevent any calls to 5777c478bd9Sstevel@tonic-gate * hat_pageunload() on this ISM segment for those platforms. 5787c478bd9Sstevel@tonic-gate */ 5797c478bd9Sstevel@tonic-gate if (!(hat_flags & HAT_LOAD_LOCK)) { 5807c478bd9Sstevel@tonic-gate /* 5817c478bd9Sstevel@tonic-gate * On platforms that support HAT_DYNAMIC_ISM_UNMAP, 5827c478bd9Sstevel@tonic-gate * we no longer need to hold the SE_SHARED lock on the pages, 5837c478bd9Sstevel@tonic-gate * since L_PAGELOCK and F_SOFTLOCK calls will grab the 5847c478bd9Sstevel@tonic-gate * SE_SHARED lock on the pages as necessary. 5857c478bd9Sstevel@tonic-gate */ 5867c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) 5877c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 5887c478bd9Sstevel@tonic-gate } 5897c478bd9Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 5907c478bd9Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * npages)); 5917c478bd9Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 5927c478bd9Sstevel@tonic-gate atomic_add_long(&spt_used, npages); 5937c478bd9Sstevel@tonic-gate sptcargs->seg_spt = seg; 5947c478bd9Sstevel@tonic-gate return (0); 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate out4: 5977c478bd9Sstevel@tonic-gate seg->s_data = NULL; 5987c478bd9Sstevel@tonic-gate kmem_free(vp, sizeof (*vp)); 5992ba723d8Smec cv_destroy(&sptd->spt_cv); 6007c478bd9Sstevel@tonic-gate out3: 6017c478bd9Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 6027c478bd9Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 6037c478bd9Sstevel@tonic-gate kmem_free(ppa, (sizeof (*ppa) * npages)); 6047c478bd9Sstevel@tonic-gate out2: 6057c478bd9Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 6067c478bd9Sstevel@tonic-gate out1: 6077c478bd9Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 6087c478bd9Sstevel@tonic-gate anon_swap_restore(npages); 6097c478bd9Sstevel@tonic-gate return (err); 6107c478bd9Sstevel@tonic-gate } 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6137c478bd9Sstevel@tonic-gate void 6147c478bd9Sstevel@tonic-gate segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) 6157c478bd9Sstevel@tonic-gate { 6167c478bd9Sstevel@tonic-gate struct page *pp; 6177c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 6187c478bd9Sstevel@tonic-gate pgcnt_t npages; 6197c478bd9Sstevel@tonic-gate ulong_t anon_idx; 6207c478bd9Sstevel@tonic-gate struct anon_map *amp; 6217c478bd9Sstevel@tonic-gate struct anon *ap; 6227c478bd9Sstevel@tonic-gate struct vnode *vp; 6237c478bd9Sstevel@tonic-gate u_offset_t off; 6247c478bd9Sstevel@tonic-gate uint_t hat_flags; 6257c478bd9Sstevel@tonic-gate int root = 0; 6267c478bd9Sstevel@tonic-gate pgcnt_t pgs, curnpgs = 0; 6277c478bd9Sstevel@tonic-gate page_t *rootpp; 628c6939658Ssl108498 rctl_qty_t unlocked_bytes = 0; 629c6939658Ssl108498 kproject_t *proj; 630c6939658Ssl108498 kshmid_t *sp; 6317c478bd9Sstevel@tonic-gate 632*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 6337c478bd9Sstevel@tonic-gate 6347c478bd9Sstevel@tonic-gate len = P2ROUNDUP(len, PAGESIZE); 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate npages = btop(len); 6377c478bd9Sstevel@tonic-gate 63805d3dc4bSpaulsan hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP; 6397c478bd9Sstevel@tonic-gate if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) || 6407c478bd9Sstevel@tonic-gate (sptd->spt_flags & SHM_PAGEABLE)) { 64105d3dc4bSpaulsan hat_flags = HAT_UNLOAD_UNMAP; 6427c478bd9Sstevel@tonic-gate } 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, hat_flags); 6457c478bd9Sstevel@tonic-gate 6467c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 6477c478bd9Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) 6487c478bd9Sstevel@tonic-gate npages = btop(amp->size); 6497c478bd9Sstevel@tonic-gate 650c6939658Ssl108498 ASSERT(amp != NULL); 651c6939658Ssl108498 652c6939658Ssl108498 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 653c6939658Ssl108498 sp = amp->a_sp; 654c6939658Ssl108498 proj = sp->shm_perm.ipc_proj; 655c6939658Ssl108498 mutex_enter(&sp->shm_mlock); 656c6939658Ssl108498 } 6577c478bd9Sstevel@tonic-gate for (anon_idx = 0; anon_idx < npages; anon_idx++) { 6587c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 6597c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { 6607c478bd9Sstevel@tonic-gate panic("segspt_free_pages: null app"); 6617c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 6627c478bd9Sstevel@tonic-gate } 6637c478bd9Sstevel@tonic-gate } else { 6647c478bd9Sstevel@tonic-gate if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx)) 6657c478bd9Sstevel@tonic-gate == NULL) 6667c478bd9Sstevel@tonic-gate continue; 6677c478bd9Sstevel@tonic-gate } 6687c478bd9Sstevel@tonic-gate ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0); 6697c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate /* 6727c478bd9Sstevel@tonic-gate * If this platform supports HAT_DYNAMIC_ISM_UNMAP, 6737c478bd9Sstevel@tonic-gate * the pages won't be having SE_SHARED lock at this 6747c478bd9Sstevel@tonic-gate * point. 6757c478bd9Sstevel@tonic-gate * 6767c478bd9Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 6777c478bd9Sstevel@tonic-gate * the pages are still held SE_SHARED locked from the 6787c478bd9Sstevel@tonic-gate * original segspt_create() 6797c478bd9Sstevel@tonic-gate * 6807c478bd9Sstevel@tonic-gate * Our goal is to get SE_EXCL lock on each page, remove 6817c478bd9Sstevel@tonic-gate * permanent lock on it and invalidate the page. 6827c478bd9Sstevel@tonic-gate */ 6837c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 68405d3dc4bSpaulsan if (hat_flags == HAT_UNLOAD_UNMAP) 6857c478bd9Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 6867c478bd9Sstevel@tonic-gate else { 6877c478bd9Sstevel@tonic-gate if ((pp = page_find(vp, off)) == NULL) { 6887c478bd9Sstevel@tonic-gate panic("segspt_free_pages: " 6897c478bd9Sstevel@tonic-gate "page not locked"); 6907c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 6917c478bd9Sstevel@tonic-gate } 6927c478bd9Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 6937c478bd9Sstevel@tonic-gate page_unlock(pp); 6947c478bd9Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate if (pp == NULL) { 6987c478bd9Sstevel@tonic-gate panic("segspt_free_pages: " 6997c478bd9Sstevel@tonic-gate "page not in the system"); 7007c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 7017c478bd9Sstevel@tonic-gate } 702c6939658Ssl108498 ASSERT(pp->p_lckcnt > 0); 7037c478bd9Sstevel@tonic-gate page_pp_unlock(pp, 0, 1); 704c6939658Ssl108498 if (pp->p_lckcnt == 0) 705c6939658Ssl108498 unlocked_bytes += PAGESIZE; 7067c478bd9Sstevel@tonic-gate } else { 7077c478bd9Sstevel@tonic-gate if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) 7087c478bd9Sstevel@tonic-gate continue; 7097c478bd9Sstevel@tonic-gate } 7107c478bd9Sstevel@tonic-gate /* 7117c478bd9Sstevel@tonic-gate * It's logical to invalidate the pages here as in most cases 7127c478bd9Sstevel@tonic-gate * these were created by segspt. 7137c478bd9Sstevel@tonic-gate */ 7147c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 7157c478bd9Sstevel@tonic-gate if (root == 0) { 7167c478bd9Sstevel@tonic-gate ASSERT(curnpgs == 0); 7177c478bd9Sstevel@tonic-gate root = 1; 7187c478bd9Sstevel@tonic-gate rootpp = pp; 7197c478bd9Sstevel@tonic-gate pgs = curnpgs = page_get_pagecnt(pp->p_szc); 7207c478bd9Sstevel@tonic-gate ASSERT(pgs > 1); 7217c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgs, pgs)); 7227c478bd9Sstevel@tonic-gate ASSERT(!(page_pptonum(pp) & (pgs - 1))); 7237c478bd9Sstevel@tonic-gate curnpgs--; 7247c478bd9Sstevel@tonic-gate } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) { 7257c478bd9Sstevel@tonic-gate ASSERT(curnpgs == 1); 7267c478bd9Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 7277c478bd9Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - 1)); 7287c478bd9Sstevel@tonic-gate page_destroy_pages(rootpp); 7297c478bd9Sstevel@tonic-gate root = 0; 7307c478bd9Sstevel@tonic-gate curnpgs = 0; 7317c478bd9Sstevel@tonic-gate } else { 7327c478bd9Sstevel@tonic-gate ASSERT(curnpgs > 1); 7337c478bd9Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 7347c478bd9Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - curnpgs)); 7357c478bd9Sstevel@tonic-gate curnpgs--; 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate } else { 7387c478bd9Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 7397c478bd9Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 7407c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 7417c478bd9Sstevel@tonic-gate } 742552507c5SGangadhar Mylapuram /* 743552507c5SGangadhar Mylapuram * Before destroying the pages, we need to take care 744552507c5SGangadhar Mylapuram * of the rctl locked memory accounting. For that 745552507c5SGangadhar Mylapuram * we need to calculte the unlocked_bytes. 746552507c5SGangadhar Mylapuram */ 747552507c5SGangadhar Mylapuram if (pp->p_lckcnt > 0) 748552507c5SGangadhar Mylapuram unlocked_bytes += PAGESIZE; 7497c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context */ 7507c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 7517c478bd9Sstevel@tonic-gate } 7527c478bd9Sstevel@tonic-gate } 753c6939658Ssl108498 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 754c6939658Ssl108498 if (unlocked_bytes > 0) 755c6939658Ssl108498 rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); 756c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 757c6939658Ssl108498 } 7587c478bd9Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 7597c478bd9Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 7607c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 7617c478bd9Sstevel@tonic-gate } 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate /* 7647c478bd9Sstevel@tonic-gate * mark that pages have been released 7657c478bd9Sstevel@tonic-gate */ 7667c478bd9Sstevel@tonic-gate sptd->spt_realsize = 0; 7677c478bd9Sstevel@tonic-gate 7687c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 7697c478bd9Sstevel@tonic-gate atomic_add_long(&spt_used, -npages); 7707c478bd9Sstevel@tonic-gate anon_swap_restore(npages); 7717c478bd9Sstevel@tonic-gate } 7727c478bd9Sstevel@tonic-gate } 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate /* 7757c478bd9Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 7767c478bd9Sstevel@tonic-gate */ 7777c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t * 7787c478bd9Sstevel@tonic-gate segspt_getpolicy(struct seg *seg, caddr_t addr) 7797c478bd9Sstevel@tonic-gate { 7807c478bd9Sstevel@tonic-gate struct anon_map *amp; 7817c478bd9Sstevel@tonic-gate ulong_t anon_index; 7827c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 7837c478bd9Sstevel@tonic-gate struct spt_data *spt_data; 7847c478bd9Sstevel@tonic-gate 7857c478bd9Sstevel@tonic-gate ASSERT(seg != NULL); 7867c478bd9Sstevel@tonic-gate 7877c478bd9Sstevel@tonic-gate /* 7887c478bd9Sstevel@tonic-gate * Get anon_map from segspt 7897c478bd9Sstevel@tonic-gate * 7907c478bd9Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 7917c478bd9Sstevel@tonic-gate * it should be protected by its reference count which must be 7927c478bd9Sstevel@tonic-gate * nonzero for an existing segment 7937c478bd9Sstevel@tonic-gate * Need to grab readers lock on policy tree though 7947c478bd9Sstevel@tonic-gate */ 7957c478bd9Sstevel@tonic-gate spt_data = (struct spt_data *)seg->s_data; 7967c478bd9Sstevel@tonic-gate if (spt_data == NULL) 7977c478bd9Sstevel@tonic-gate return (NULL); 7987c478bd9Sstevel@tonic-gate amp = spt_data->spt_amp; 7997c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate /* 8027c478bd9Sstevel@tonic-gate * Get policy info 8037c478bd9Sstevel@tonic-gate * 8047c478bd9Sstevel@tonic-gate * Assume starting anon index of 0 8057c478bd9Sstevel@tonic-gate */ 8067c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, addr); 8077c478bd9Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate return (policy_info); 8107c478bd9Sstevel@tonic-gate } 8117c478bd9Sstevel@tonic-gate 8127c478bd9Sstevel@tonic-gate /* 8137c478bd9Sstevel@tonic-gate * DISM only. 8147c478bd9Sstevel@tonic-gate * Return locked pages over a given range. 8157c478bd9Sstevel@tonic-gate * 8167c478bd9Sstevel@tonic-gate * We will cache all DISM locked pages and save the pplist for the 8177c478bd9Sstevel@tonic-gate * entire segment in the ppa field of the underlying DISM segment structure. 8187c478bd9Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 8197c478bd9Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 8207c478bd9Sstevel@tonic-gate */ 8217c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8227c478bd9Sstevel@tonic-gate static int 8237c478bd9Sstevel@tonic-gate segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len, 8247c478bd9Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 8257c478bd9Sstevel@tonic-gate { 8267c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 8277c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 8287c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 8297c478bd9Sstevel@tonic-gate pgcnt_t pg_idx, npages, tot_npages, npgs; 8307c478bd9Sstevel@tonic-gate struct page **pplist, **pl, **ppa, *pp; 8317c478bd9Sstevel@tonic-gate struct anon_map *amp; 8327c478bd9Sstevel@tonic-gate spgcnt_t an_idx; 8337c478bd9Sstevel@tonic-gate int ret = ENOTSUP; 8347c478bd9Sstevel@tonic-gate uint_t pl_built = 0; 8357c478bd9Sstevel@tonic-gate struct anon *ap; 8367c478bd9Sstevel@tonic-gate struct vnode *vp; 8377c478bd9Sstevel@tonic-gate u_offset_t off; 8387c478bd9Sstevel@tonic-gate pgcnt_t claim_availrmem = 0; 8397c478bd9Sstevel@tonic-gate uint_t szc; 8407c478bd9Sstevel@tonic-gate 841*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 842a98e9dbfSaguzovsk ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /* 8457c478bd9Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 8467c478bd9Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 8477c478bd9Sstevel@tonic-gate * and length for the caching arguments. 8487c478bd9Sstevel@tonic-gate */ 8497c478bd9Sstevel@tonic-gate ASSERT(sptseg); 8507c478bd9Sstevel@tonic-gate ASSERT(sptd); 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 8537c478bd9Sstevel@tonic-gate npages = btopr(len); 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate /* 8567c478bd9Sstevel@tonic-gate * check if the request is larger than number of pages covered 8577c478bd9Sstevel@tonic-gate * by amp 8587c478bd9Sstevel@tonic-gate */ 8597c478bd9Sstevel@tonic-gate if (pg_idx + npages > btopr(sptd->spt_amp->size)) { 8607c478bd9Sstevel@tonic-gate *ppp = NULL; 8617c478bd9Sstevel@tonic-gate return (ENOTSUP); 8627c478bd9Sstevel@tonic-gate } 8637c478bd9Sstevel@tonic-gate 8647c478bd9Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 8657c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 8667c478bd9Sstevel@tonic-gate 867a98e9dbfSaguzovsk seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 868a98e9dbfSaguzovsk sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate /* 8717c478bd9Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 8727c478bd9Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 8737c478bd9Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 8747c478bd9Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 8757c478bd9Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 8767c478bd9Sstevel@tonic-gate * exits on data fault in a multithreaded application. 8777c478bd9Sstevel@tonic-gate */ 878a98e9dbfSaguzovsk if ((sptd->spt_flags & DISM_PPA_CHANGED) || 879a98e9dbfSaguzovsk (AS_ISUNMAPWAIT(seg->s_as) && 880a98e9dbfSaguzovsk shmd->shm_softlockcnt > 0)) { 8817c478bd9Sstevel@tonic-gate segspt_purge(seg); 8827c478bd9Sstevel@tonic-gate } 8837c478bd9Sstevel@tonic-gate return (0); 8847c478bd9Sstevel@tonic-gate } 8857c478bd9Sstevel@tonic-gate 886a98e9dbfSaguzovsk /* The L_PAGELOCK case ... */ 887a98e9dbfSaguzovsk 8887c478bd9Sstevel@tonic-gate if (sptd->spt_flags & DISM_PPA_CHANGED) { 8897c478bd9Sstevel@tonic-gate segspt_purge(seg); 8907c478bd9Sstevel@tonic-gate /* 8917c478bd9Sstevel@tonic-gate * for DISM ppa needs to be rebuild since 8927c478bd9Sstevel@tonic-gate * number of locked pages could be changed 8937c478bd9Sstevel@tonic-gate */ 8947c478bd9Sstevel@tonic-gate *ppp = NULL; 8957c478bd9Sstevel@tonic-gate return (ENOTSUP); 8967c478bd9Sstevel@tonic-gate } 8977c478bd9Sstevel@tonic-gate 8987c478bd9Sstevel@tonic-gate /* 8997c478bd9Sstevel@tonic-gate * First try to find pages in segment page cache, without 9007c478bd9Sstevel@tonic-gate * holding the segment lock. 9017c478bd9Sstevel@tonic-gate */ 902a98e9dbfSaguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 903a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED); 9047c478bd9Sstevel@tonic-gate if (pplist != NULL) { 9057c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 9067c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 9077c478bd9Sstevel@tonic-gate ppa = sptd->spt_ppa; 9087c478bd9Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 9097c478bd9Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 910a98e9dbfSaguzovsk seg_pinactive(seg, NULL, seg->s_base, 9117c478bd9Sstevel@tonic-gate sptd->spt_amp->size, ppa, 912a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 9137c478bd9Sstevel@tonic-gate *ppp = NULL; 9147c478bd9Sstevel@tonic-gate return (ENOTSUP); 9157c478bd9Sstevel@tonic-gate } 9167c478bd9Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 9177c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 9187c478bd9Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 9197c478bd9Sstevel@tonic-gate } else { 9207c478bd9Sstevel@tonic-gate an_idx++; 9217c478bd9Sstevel@tonic-gate } 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate /* 9247c478bd9Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 9257c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 9267c478bd9Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 9277c478bd9Sstevel@tonic-gate */ 9287c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 9297c478bd9Sstevel@tonic-gate return (0); 9307c478bd9Sstevel@tonic-gate } 9317c478bd9Sstevel@tonic-gate 9327c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 9337c478bd9Sstevel@tonic-gate /* 9347c478bd9Sstevel@tonic-gate * try to find pages in segment page cache with mutex 9357c478bd9Sstevel@tonic-gate */ 936a98e9dbfSaguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 937a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED); 9387c478bd9Sstevel@tonic-gate if (pplist != NULL) { 9397c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 9407c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 9417c478bd9Sstevel@tonic-gate ppa = sptd->spt_ppa; 9427c478bd9Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 9437c478bd9Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 9447c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 945a98e9dbfSaguzovsk seg_pinactive(seg, NULL, seg->s_base, 9467c478bd9Sstevel@tonic-gate sptd->spt_amp->size, ppa, 947a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 9487c478bd9Sstevel@tonic-gate *ppp = NULL; 9497c478bd9Sstevel@tonic-gate return (ENOTSUP); 9507c478bd9Sstevel@tonic-gate } 9517c478bd9Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 9527c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 9537c478bd9Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 9547c478bd9Sstevel@tonic-gate } else { 9557c478bd9Sstevel@tonic-gate an_idx++; 9567c478bd9Sstevel@tonic-gate } 9577c478bd9Sstevel@tonic-gate } 9587c478bd9Sstevel@tonic-gate /* 9597c478bd9Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 9607c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 9617c478bd9Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 9627c478bd9Sstevel@tonic-gate */ 9637c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 9647c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 9657c478bd9Sstevel@tonic-gate return (0); 9667c478bd9Sstevel@tonic-gate } 967a98e9dbfSaguzovsk if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 968a98e9dbfSaguzovsk SEGP_FORCE_WIRED) == SEGP_FAIL) { 9697c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 9707c478bd9Sstevel@tonic-gate *ppp = NULL; 9717c478bd9Sstevel@tonic-gate return (ENOTSUP); 9727c478bd9Sstevel@tonic-gate } 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate /* 9757c478bd9Sstevel@tonic-gate * No need to worry about protections because DISM pages are always rw. 9767c478bd9Sstevel@tonic-gate */ 9777c478bd9Sstevel@tonic-gate pl = pplist = NULL; 9787c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 9797c478bd9Sstevel@tonic-gate 9807c478bd9Sstevel@tonic-gate /* 9817c478bd9Sstevel@tonic-gate * Do we need to build the ppa array? 9827c478bd9Sstevel@tonic-gate */ 9837c478bd9Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 9847c478bd9Sstevel@tonic-gate pgcnt_t lpg_cnt = 0; 9857c478bd9Sstevel@tonic-gate 9867c478bd9Sstevel@tonic-gate pl_built = 1; 9877c478bd9Sstevel@tonic-gate tot_npages = btopr(sptd->spt_amp->size); 9887c478bd9Sstevel@tonic-gate 9897c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 9907c478bd9Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP); 9917c478bd9Sstevel@tonic-gate pl = pplist; 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 9947c478bd9Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; ) { 9957c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 9967c478bd9Sstevel@tonic-gate /* 9977c478bd9Sstevel@tonic-gate * Cache only mlocked pages. For large pages 9987c478bd9Sstevel@tonic-gate * if one (constituent) page is mlocked 9997c478bd9Sstevel@tonic-gate * all pages for that large page 10007c478bd9Sstevel@tonic-gate * are cached also. This is for quick 10017c478bd9Sstevel@tonic-gate * lookups of ppa array; 10027c478bd9Sstevel@tonic-gate */ 10037c478bd9Sstevel@tonic-gate if ((ap != NULL) && (lpg_cnt != 0 || 10047c478bd9Sstevel@tonic-gate (sptd->spt_ppa_lckcnt[an_idx] != 0))) { 10057c478bd9Sstevel@tonic-gate 10067c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 10077c478bd9Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 10087c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 10097c478bd9Sstevel@tonic-gate if (lpg_cnt == 0) { 1010e67882ffSbs21162 lpg_cnt++; 1011e67882ffSbs21162 /* 1012e67882ffSbs21162 * For a small page, we are done -- 1013e67882ffSbs21162 * lpg_count is reset to 0 below. 1014e67882ffSbs21162 * 1015e67882ffSbs21162 * For a large page, we are guaranteed 1016e67882ffSbs21162 * to find the anon structures of all 1017e67882ffSbs21162 * constituent pages and a non-zero 1018e67882ffSbs21162 * lpg_cnt ensures that we don't test 1019e67882ffSbs21162 * for mlock for these. We are done 1020e67882ffSbs21162 * when lpg_count reaches (npgs + 1). 1021e67882ffSbs21162 * If we are not the first constituent 1022e67882ffSbs21162 * page, restart at the first one. 1023e67882ffSbs21162 */ 10247c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 10257c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(an_idx, npgs)) { 10267c478bd9Sstevel@tonic-gate an_idx = P2ALIGN(an_idx, npgs); 10277c478bd9Sstevel@tonic-gate page_unlock(pp); 10287c478bd9Sstevel@tonic-gate continue; 10297c478bd9Sstevel@tonic-gate } 10307c478bd9Sstevel@tonic-gate } 1031e67882ffSbs21162 if (++lpg_cnt > npgs) 10327c478bd9Sstevel@tonic-gate lpg_cnt = 0; 10337c478bd9Sstevel@tonic-gate 10347c478bd9Sstevel@tonic-gate /* 10357c478bd9Sstevel@tonic-gate * availrmem is decremented only 10367c478bd9Sstevel@tonic-gate * for unlocked pages 10377c478bd9Sstevel@tonic-gate */ 10387c478bd9Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[an_idx] == 0) 10397c478bd9Sstevel@tonic-gate claim_availrmem++; 10407c478bd9Sstevel@tonic-gate pplist[an_idx] = pp; 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate an_idx++; 10437c478bd9Sstevel@tonic-gate } 10447c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 10457c478bd9Sstevel@tonic-gate 1046a98e9dbfSaguzovsk if (claim_availrmem) { 10477c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 10487c478bd9Sstevel@tonic-gate if (availrmem < tune.t_minarmem + claim_availrmem) { 10497c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 1050a98e9dbfSaguzovsk ret = ENOTSUP; 10517c478bd9Sstevel@tonic-gate claim_availrmem = 0; 10527c478bd9Sstevel@tonic-gate goto insert_fail; 10537c478bd9Sstevel@tonic-gate } else { 10547c478bd9Sstevel@tonic-gate availrmem -= claim_availrmem; 10557c478bd9Sstevel@tonic-gate } 10567c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 1057a98e9dbfSaguzovsk } 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate sptd->spt_ppa = pl; 10607c478bd9Sstevel@tonic-gate } else { 10617c478bd9Sstevel@tonic-gate /* 10627c478bd9Sstevel@tonic-gate * We already have a valid ppa[]. 10637c478bd9Sstevel@tonic-gate */ 10647c478bd9Sstevel@tonic-gate pl = sptd->spt_ppa; 10657c478bd9Sstevel@tonic-gate } 10667c478bd9Sstevel@tonic-gate 10677c478bd9Sstevel@tonic-gate ASSERT(pl != NULL); 10687c478bd9Sstevel@tonic-gate 1069a98e9dbfSaguzovsk ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 1070a98e9dbfSaguzovsk sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 10717c478bd9Sstevel@tonic-gate segspt_reclaim); 10727c478bd9Sstevel@tonic-gate if (ret == SEGP_FAIL) { 10737c478bd9Sstevel@tonic-gate /* 10747c478bd9Sstevel@tonic-gate * seg_pinsert failed. We return 10757c478bd9Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 10767c478bd9Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 10777c478bd9Sstevel@tonic-gate */ 10780da3d2a8Srd117015 if (pl_built) { 10790da3d2a8Srd117015 /* 10800da3d2a8Srd117015 * No one else has referenced the ppa[]. 10810da3d2a8Srd117015 * We created it and we need to destroy it. 10820da3d2a8Srd117015 */ 10837c478bd9Sstevel@tonic-gate sptd->spt_ppa = NULL; 10840da3d2a8Srd117015 } 10857c478bd9Sstevel@tonic-gate ret = ENOTSUP; 10867c478bd9Sstevel@tonic-gate goto insert_fail; 10877c478bd9Sstevel@tonic-gate } 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate /* 10907c478bd9Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 10917c478bd9Sstevel@tonic-gate */ 10927c478bd9Sstevel@tonic-gate sptd->spt_pcachecnt++; 10931a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 10947c478bd9Sstevel@tonic-gate 10957c478bd9Sstevel@tonic-gate ppa = sptd->spt_ppa; 10967c478bd9Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 10977c478bd9Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 10987c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1099a98e9dbfSaguzovsk seg_pinactive(seg, NULL, seg->s_base, 1100a98e9dbfSaguzovsk sptd->spt_amp->size, 1101a98e9dbfSaguzovsk pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 11027c478bd9Sstevel@tonic-gate *ppp = NULL; 11037c478bd9Sstevel@tonic-gate return (ENOTSUP); 11047c478bd9Sstevel@tonic-gate } 11057c478bd9Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 11067c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 11077c478bd9Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 11087c478bd9Sstevel@tonic-gate } else { 11097c478bd9Sstevel@tonic-gate an_idx++; 11107c478bd9Sstevel@tonic-gate } 11117c478bd9Sstevel@tonic-gate } 11127c478bd9Sstevel@tonic-gate /* 11137c478bd9Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 11147c478bd9Sstevel@tonic-gate * exists and he have incremented pacachecnt. 11157c478bd9Sstevel@tonic-gate */ 11167c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate /* 11197c478bd9Sstevel@tonic-gate * Since we cache the entire segment, we want to 11207c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 11217c478bd9Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 11227c478bd9Sstevel@tonic-gate */ 11237c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 1124a98e9dbfSaguzovsk return (0); 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate insert_fail: 11277c478bd9Sstevel@tonic-gate /* 11287c478bd9Sstevel@tonic-gate * We will only reach this code if we tried and failed. 11297c478bd9Sstevel@tonic-gate * 11307c478bd9Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 11317c478bd9Sstevel@tonic-gate * to set up a new ppa[]. 11327c478bd9Sstevel@tonic-gate */ 11337c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 11347c478bd9Sstevel@tonic-gate 11357c478bd9Sstevel@tonic-gate if (pl_built) { 1136a98e9dbfSaguzovsk if (claim_availrmem) { 11377c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 11387c478bd9Sstevel@tonic-gate availrmem += claim_availrmem; 11397c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 1140a98e9dbfSaguzovsk } 11417c478bd9Sstevel@tonic-gate 11427c478bd9Sstevel@tonic-gate /* 11437c478bd9Sstevel@tonic-gate * We created pl and we need to destroy it. 11447c478bd9Sstevel@tonic-gate */ 11457c478bd9Sstevel@tonic-gate pplist = pl; 11467c478bd9Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; an_idx++) { 11477c478bd9Sstevel@tonic-gate if (pplist[an_idx] != NULL) 11487c478bd9Sstevel@tonic-gate page_unlock(pplist[an_idx]); 11497c478bd9Sstevel@tonic-gate } 11507c478bd9Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * tot_npages); 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 11547c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 11557c478bd9Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 11567c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 11577c478bd9Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 11587c478bd9Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 11597c478bd9Sstevel@tonic-gate } 11607c478bd9Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 11617c478bd9Sstevel@tonic-gate } 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate *ppp = NULL; 11647c478bd9Sstevel@tonic-gate return (ret); 11657c478bd9Sstevel@tonic-gate } 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * return locked pages over a given range. 11717c478bd9Sstevel@tonic-gate * 11727c478bd9Sstevel@tonic-gate * We will cache the entire ISM segment and save the pplist for the 11737c478bd9Sstevel@tonic-gate * entire segment in the ppa field of the underlying ISM segment structure. 11747c478bd9Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 11757c478bd9Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 11767c478bd9Sstevel@tonic-gate */ 11777c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 11787c478bd9Sstevel@tonic-gate static int 11797c478bd9Sstevel@tonic-gate segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len, 11807c478bd9Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 11817c478bd9Sstevel@tonic-gate { 11827c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 11837c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 11847c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 11857c478bd9Sstevel@tonic-gate pgcnt_t np, page_index, npages; 11867c478bd9Sstevel@tonic-gate caddr_t a, spt_base; 11877c478bd9Sstevel@tonic-gate struct page **pplist, **pl, *pp; 11887c478bd9Sstevel@tonic-gate struct anon_map *amp; 11897c478bd9Sstevel@tonic-gate ulong_t anon_index; 11907c478bd9Sstevel@tonic-gate int ret = ENOTSUP; 11917c478bd9Sstevel@tonic-gate uint_t pl_built = 0; 11927c478bd9Sstevel@tonic-gate struct anon *ap; 11937c478bd9Sstevel@tonic-gate struct vnode *vp; 11947c478bd9Sstevel@tonic-gate u_offset_t off; 11957c478bd9Sstevel@tonic-gate 1196*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 1197a98e9dbfSaguzovsk ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 1198a98e9dbfSaguzovsk 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate /* 12017c478bd9Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 12027c478bd9Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 12037c478bd9Sstevel@tonic-gate * and length for the caching arguments. 12047c478bd9Sstevel@tonic-gate */ 12057c478bd9Sstevel@tonic-gate ASSERT(sptseg); 12067c478bd9Sstevel@tonic-gate ASSERT(sptd); 12077c478bd9Sstevel@tonic-gate 12087c478bd9Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 12097c478bd9Sstevel@tonic-gate return (segspt_dismpagelock(seg, addr, len, ppp, type, rw)); 12107c478bd9Sstevel@tonic-gate } 12117c478bd9Sstevel@tonic-gate 12127c478bd9Sstevel@tonic-gate page_index = seg_page(seg, addr); 12137c478bd9Sstevel@tonic-gate npages = btopr(len); 12147c478bd9Sstevel@tonic-gate 12157c478bd9Sstevel@tonic-gate /* 12167c478bd9Sstevel@tonic-gate * check if the request is larger than number of pages covered 12177c478bd9Sstevel@tonic-gate * by amp 12187c478bd9Sstevel@tonic-gate */ 12197c478bd9Sstevel@tonic-gate if (page_index + npages > btopr(sptd->spt_amp->size)) { 12207c478bd9Sstevel@tonic-gate *ppp = NULL; 12217c478bd9Sstevel@tonic-gate return (ENOTSUP); 12227c478bd9Sstevel@tonic-gate } 12237c478bd9Sstevel@tonic-gate 12247c478bd9Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 12257c478bd9Sstevel@tonic-gate 12267c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 12277c478bd9Sstevel@tonic-gate 1228a98e9dbfSaguzovsk seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 1229a98e9dbfSaguzovsk sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 12307c478bd9Sstevel@tonic-gate 12317c478bd9Sstevel@tonic-gate /* 12327c478bd9Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 12337c478bd9Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 12347c478bd9Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 12357c478bd9Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 12367c478bd9Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 12377c478bd9Sstevel@tonic-gate * exits on data fault in a multithreaded application. 12387c478bd9Sstevel@tonic-gate */ 12397c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) { 12407c478bd9Sstevel@tonic-gate segspt_purge(seg); 12417c478bd9Sstevel@tonic-gate } 12427c478bd9Sstevel@tonic-gate return (0); 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate 1245a98e9dbfSaguzovsk /* The L_PAGELOCK case... */ 1246a98e9dbfSaguzovsk 12477c478bd9Sstevel@tonic-gate /* 12487c478bd9Sstevel@tonic-gate * First try to find pages in segment page cache, without 12497c478bd9Sstevel@tonic-gate * holding the segment lock. 12507c478bd9Sstevel@tonic-gate */ 1251a98e9dbfSaguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1252a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED); 12537c478bd9Sstevel@tonic-gate if (pplist != NULL) { 12547c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 12557c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa[page_index]); 12567c478bd9Sstevel@tonic-gate /* 12577c478bd9Sstevel@tonic-gate * Since we cache the entire ISM segment, we want to 12587c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 12597c478bd9Sstevel@tonic-gate * to the requested addr, i.e. page_index. 12607c478bd9Sstevel@tonic-gate */ 12617c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 12627c478bd9Sstevel@tonic-gate return (0); 12637c478bd9Sstevel@tonic-gate } 12647c478bd9Sstevel@tonic-gate 12657c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 12667c478bd9Sstevel@tonic-gate 12677c478bd9Sstevel@tonic-gate /* 12687c478bd9Sstevel@tonic-gate * try to find pages in segment page cache 12697c478bd9Sstevel@tonic-gate */ 1270a98e9dbfSaguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 1271a98e9dbfSaguzovsk S_WRITE, SEGP_FORCE_WIRED); 12727c478bd9Sstevel@tonic-gate if (pplist != NULL) { 12737c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 12747c478bd9Sstevel@tonic-gate /* 12757c478bd9Sstevel@tonic-gate * Since we cache the entire segment, we want to 12767c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 12777c478bd9Sstevel@tonic-gate * to the requested addr, i.e. page_index. 12787c478bd9Sstevel@tonic-gate */ 12797c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 12807c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 12817c478bd9Sstevel@tonic-gate return (0); 12827c478bd9Sstevel@tonic-gate } 12837c478bd9Sstevel@tonic-gate 1284a98e9dbfSaguzovsk if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 1285a98e9dbfSaguzovsk SEGP_FORCE_WIRED) == SEGP_FAIL) { 12867c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 12877c478bd9Sstevel@tonic-gate *ppp = NULL; 12887c478bd9Sstevel@tonic-gate return (ENOTSUP); 12897c478bd9Sstevel@tonic-gate } 12907c478bd9Sstevel@tonic-gate 12917c478bd9Sstevel@tonic-gate /* 12927c478bd9Sstevel@tonic-gate * No need to worry about protections because ISM pages 12937c478bd9Sstevel@tonic-gate * are always rw. 12947c478bd9Sstevel@tonic-gate */ 12957c478bd9Sstevel@tonic-gate pl = pplist = NULL; 12967c478bd9Sstevel@tonic-gate 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * Do we need to build the ppa array? 12997c478bd9Sstevel@tonic-gate */ 13007c478bd9Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 13017c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate spt_base = sptseg->s_base; 13047c478bd9Sstevel@tonic-gate pl_built = 1; 13057c478bd9Sstevel@tonic-gate 13067c478bd9Sstevel@tonic-gate /* 13077c478bd9Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 13087c478bd9Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 13097c478bd9Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 13107c478bd9Sstevel@tonic-gate */ 13117c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 13127c478bd9Sstevel@tonic-gate ASSERT(amp != NULL); 13137c478bd9Sstevel@tonic-gate 13147c478bd9Sstevel@tonic-gate /* pcachecnt is protected by sptd->spt_lock */ 13157c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 13167c478bd9Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) 13177c478bd9Sstevel@tonic-gate * btopr(sptd->spt_amp->size), KM_SLEEP); 13187c478bd9Sstevel@tonic-gate pl = pplist; 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate anon_index = seg_page(sptseg, spt_base); 13217c478bd9Sstevel@tonic-gate 13227c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 13237c478bd9Sstevel@tonic-gate for (a = spt_base; a < (spt_base + sptd->spt_amp->size); 13247c478bd9Sstevel@tonic-gate a += PAGESIZE, anon_index++, pplist++) { 13257c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 13267c478bd9Sstevel@tonic-gate ASSERT(ap != NULL); 13277c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 13287c478bd9Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 13297c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 13307c478bd9Sstevel@tonic-gate *pplist = pp; 13317c478bd9Sstevel@tonic-gate } 13327c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 13337c478bd9Sstevel@tonic-gate 13347c478bd9Sstevel@tonic-gate if (a < (spt_base + sptd->spt_amp->size)) { 13357c478bd9Sstevel@tonic-gate ret = ENOTSUP; 13367c478bd9Sstevel@tonic-gate goto insert_fail; 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate sptd->spt_ppa = pl; 13397c478bd9Sstevel@tonic-gate } else { 13407c478bd9Sstevel@tonic-gate /* 13417c478bd9Sstevel@tonic-gate * We already have a valid ppa[]. 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate pl = sptd->spt_ppa; 13447c478bd9Sstevel@tonic-gate } 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate ASSERT(pl != NULL); 13477c478bd9Sstevel@tonic-gate 1348a98e9dbfSaguzovsk ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 1349a98e9dbfSaguzovsk sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 1350a98e9dbfSaguzovsk segspt_reclaim); 13517c478bd9Sstevel@tonic-gate if (ret == SEGP_FAIL) { 13527c478bd9Sstevel@tonic-gate /* 13537c478bd9Sstevel@tonic-gate * seg_pinsert failed. We return 13547c478bd9Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 13557c478bd9Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 13567c478bd9Sstevel@tonic-gate */ 13577c478bd9Sstevel@tonic-gate if (pl_built) { 13587c478bd9Sstevel@tonic-gate /* 13597c478bd9Sstevel@tonic-gate * No one else has referenced the ppa[]. 13607c478bd9Sstevel@tonic-gate * We created it and we need to destroy it. 13617c478bd9Sstevel@tonic-gate */ 13627c478bd9Sstevel@tonic-gate sptd->spt_ppa = NULL; 13637c478bd9Sstevel@tonic-gate } 13647c478bd9Sstevel@tonic-gate ret = ENOTSUP; 13657c478bd9Sstevel@tonic-gate goto insert_fail; 13667c478bd9Sstevel@tonic-gate } 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate /* 13697c478bd9Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 13707c478bd9Sstevel@tonic-gate */ 13717c478bd9Sstevel@tonic-gate sptd->spt_pcachecnt++; 13721a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 13737c478bd9Sstevel@tonic-gate 13747c478bd9Sstevel@tonic-gate /* 13757c478bd9Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 13767c478bd9Sstevel@tonic-gate * exists and he have incremented pacachecnt. 13777c478bd9Sstevel@tonic-gate */ 13787c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate /* 13817c478bd9Sstevel@tonic-gate * Since we cache the entire segment, we want to 13827c478bd9Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 13837c478bd9Sstevel@tonic-gate * to the requested addr, i.e. page_index. 13847c478bd9Sstevel@tonic-gate */ 13857c478bd9Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 1386a98e9dbfSaguzovsk return (0); 13877c478bd9Sstevel@tonic-gate 13887c478bd9Sstevel@tonic-gate insert_fail: 13897c478bd9Sstevel@tonic-gate /* 13907c478bd9Sstevel@tonic-gate * We will only reach this code if we tried and failed. 13917c478bd9Sstevel@tonic-gate * 13927c478bd9Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 13937c478bd9Sstevel@tonic-gate * to set up a new ppa[]. 13947c478bd9Sstevel@tonic-gate */ 13957c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate if (pl_built) { 13987c478bd9Sstevel@tonic-gate /* 13997c478bd9Sstevel@tonic-gate * We created pl and we need to destroy it. 14007c478bd9Sstevel@tonic-gate */ 14017c478bd9Sstevel@tonic-gate pplist = pl; 14027c478bd9Sstevel@tonic-gate np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT); 14037c478bd9Sstevel@tonic-gate while (np) { 14047c478bd9Sstevel@tonic-gate page_unlock(*pplist); 14057c478bd9Sstevel@tonic-gate np--; 14067c478bd9Sstevel@tonic-gate pplist++; 14077c478bd9Sstevel@tonic-gate } 14082ba723d8Smec kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size)); 14097c478bd9Sstevel@tonic-gate } 14107c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 14117c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 14127c478bd9Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 14137c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 14147c478bd9Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 14157c478bd9Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 14167c478bd9Sstevel@tonic-gate } 14177c478bd9Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 14187c478bd9Sstevel@tonic-gate } 14197c478bd9Sstevel@tonic-gate } 14207c478bd9Sstevel@tonic-gate *ppp = NULL; 14217c478bd9Sstevel@tonic-gate return (ret); 14227c478bd9Sstevel@tonic-gate } 14237c478bd9Sstevel@tonic-gate 14247c478bd9Sstevel@tonic-gate /* 14257c478bd9Sstevel@tonic-gate * purge any cached pages in the I/O page cache 14267c478bd9Sstevel@tonic-gate */ 14277c478bd9Sstevel@tonic-gate static void 14287c478bd9Sstevel@tonic-gate segspt_purge(struct seg *seg) 14297c478bd9Sstevel@tonic-gate { 1430a98e9dbfSaguzovsk seg_ppurge(seg, NULL, SEGP_FORCE_WIRED); 14317c478bd9Sstevel@tonic-gate } 14327c478bd9Sstevel@tonic-gate 14337c478bd9Sstevel@tonic-gate static int 1434a98e9dbfSaguzovsk segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, 1435a98e9dbfSaguzovsk enum seg_rw rw, int async) 14367c478bd9Sstevel@tonic-gate { 1437a98e9dbfSaguzovsk struct seg *seg = (struct seg *)ptag; 14387c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 14397c478bd9Sstevel@tonic-gate struct seg *sptseg; 14407c478bd9Sstevel@tonic-gate struct spt_data *sptd; 14417c478bd9Sstevel@tonic-gate pgcnt_t npages, i, free_availrmem = 0; 14427c478bd9Sstevel@tonic-gate int done = 0; 14437c478bd9Sstevel@tonic-gate 14447c478bd9Sstevel@tonic-gate #ifdef lint 14457c478bd9Sstevel@tonic-gate addr = addr; 14467c478bd9Sstevel@tonic-gate #endif 14477c478bd9Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 14487c478bd9Sstevel@tonic-gate sptd = sptseg->s_data; 14497c478bd9Sstevel@tonic-gate npages = (len >> PAGESHIFT); 14507c478bd9Sstevel@tonic-gate ASSERT(npages); 14517c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt != 0); 14527c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 14537c478bd9Sstevel@tonic-gate ASSERT(npages == btopr(sptd->spt_amp->size)); 1454*dc32d872SJosef 'Jeff' Sipek ASSERT(async || AS_LOCK_HELD(seg->s_as)); 1455a98e9dbfSaguzovsk 14567c478bd9Sstevel@tonic-gate /* 14577c478bd9Sstevel@tonic-gate * Acquire the lock on the dummy seg and destroy the 14587c478bd9Sstevel@tonic-gate * ppa array IF this is the last pcachecnt. 14597c478bd9Sstevel@tonic-gate */ 14607c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 14617c478bd9Sstevel@tonic-gate if (--sptd->spt_pcachecnt == 0) { 14627c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) { 14637c478bd9Sstevel@tonic-gate if (pplist[i] == NULL) { 14647c478bd9Sstevel@tonic-gate continue; 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate if (rw == S_WRITE) { 14677c478bd9Sstevel@tonic-gate hat_setrefmod(pplist[i]); 14687c478bd9Sstevel@tonic-gate } else { 14697c478bd9Sstevel@tonic-gate hat_setref(pplist[i]); 14707c478bd9Sstevel@tonic-gate } 14717c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) && 14727c478bd9Sstevel@tonic-gate (sptd->spt_ppa_lckcnt[i] == 0)) 14737c478bd9Sstevel@tonic-gate free_availrmem++; 14747c478bd9Sstevel@tonic-gate page_unlock(pplist[i]); 14757c478bd9Sstevel@tonic-gate } 1476a98e9dbfSaguzovsk if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) { 14777c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 14787c478bd9Sstevel@tonic-gate availrmem += free_availrmem; 14797c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 14807c478bd9Sstevel@tonic-gate } 14817c478bd9Sstevel@tonic-gate /* 14827c478bd9Sstevel@tonic-gate * Since we want to cach/uncache the entire ISM segment, 14837c478bd9Sstevel@tonic-gate * we will track the pplist in a segspt specific field 14847c478bd9Sstevel@tonic-gate * ppa, that is initialized at the time we add an entry to 14857c478bd9Sstevel@tonic-gate * the cache. 14867c478bd9Sstevel@tonic-gate */ 14877c478bd9Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 14887c478bd9Sstevel@tonic-gate kmem_free(pplist, sizeof (page_t *) * npages); 14897c478bd9Sstevel@tonic-gate sptd->spt_ppa = NULL; 14907c478bd9Sstevel@tonic-gate sptd->spt_flags &= ~DISM_PPA_CHANGED; 14912ba723d8Smec sptd->spt_gen++; 14922ba723d8Smec cv_broadcast(&sptd->spt_cv); 14937c478bd9Sstevel@tonic-gate done = 1; 14947c478bd9Sstevel@tonic-gate } 14957c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1496a98e9dbfSaguzovsk 1497a98e9dbfSaguzovsk /* 1498a98e9dbfSaguzovsk * If we are pcache async thread or called via seg_ppurge_wiredpp() we 1499a98e9dbfSaguzovsk * may not hold AS lock (in this case async argument is not 0). This 1500a98e9dbfSaguzovsk * means if softlockcnt drops to 0 after the decrement below address 1501a98e9dbfSaguzovsk * space may get freed. We can't allow it since after softlock 1502a98e9dbfSaguzovsk * derement to 0 we still need to access as structure for possible 1503a98e9dbfSaguzovsk * wakeup of unmap waiters. To prevent the disappearance of as we take 1504a98e9dbfSaguzovsk * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes 1505a98e9dbfSaguzovsk * this mutex as a barrier to make sure this routine completes before 1506a98e9dbfSaguzovsk * segment is freed. 1507a98e9dbfSaguzovsk * 1508a98e9dbfSaguzovsk * The second complication we have to deal with in async case is a 1509a98e9dbfSaguzovsk * possibility of missed wake up of unmap wait thread. When we don't 1510a98e9dbfSaguzovsk * hold as lock here we may take a_contents lock before unmap wait 1511a98e9dbfSaguzovsk * thread that was first to see softlockcnt was still not 0. As a 1512a98e9dbfSaguzovsk * result we'll fail to wake up an unmap wait thread. To avoid this 1513a98e9dbfSaguzovsk * race we set nounmapwait flag in as structure if we drop softlockcnt 1514a98e9dbfSaguzovsk * to 0 if async is not 0. unmapwait thread 1515a98e9dbfSaguzovsk * will not block if this flag is set. 1516a98e9dbfSaguzovsk */ 1517a98e9dbfSaguzovsk if (async) 1518a98e9dbfSaguzovsk mutex_enter(&shmd->shm_segfree_syncmtx); 1519a98e9dbfSaguzovsk 15207c478bd9Sstevel@tonic-gate /* 15217c478bd9Sstevel@tonic-gate * Now decrement softlockcnt. 15227c478bd9Sstevel@tonic-gate */ 1523a98e9dbfSaguzovsk ASSERT(shmd->shm_softlockcnt > 0); 15241a5e258fSJosef 'Jeff' Sipek atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 15257c478bd9Sstevel@tonic-gate 15267c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 1527a98e9dbfSaguzovsk if (async || AS_ISUNMAPWAIT(seg->s_as)) { 15287c478bd9Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1529a98e9dbfSaguzovsk if (async) 1530a98e9dbfSaguzovsk AS_SETNOUNMAPWAIT(seg->s_as); 15317c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 15327c478bd9Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 15337c478bd9Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 15347c478bd9Sstevel@tonic-gate } 15357c478bd9Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 15367c478bd9Sstevel@tonic-gate } 15377c478bd9Sstevel@tonic-gate } 1538a98e9dbfSaguzovsk 1539a98e9dbfSaguzovsk if (async) 1540a98e9dbfSaguzovsk mutex_exit(&shmd->shm_segfree_syncmtx); 1541a98e9dbfSaguzovsk 15427c478bd9Sstevel@tonic-gate return (done); 15437c478bd9Sstevel@tonic-gate } 15447c478bd9Sstevel@tonic-gate 15457c478bd9Sstevel@tonic-gate /* 15467c478bd9Sstevel@tonic-gate * Do a F_SOFTUNLOCK call over the range requested. 15477c478bd9Sstevel@tonic-gate * The range must have already been F_SOFTLOCK'ed. 15487c478bd9Sstevel@tonic-gate * 15497c478bd9Sstevel@tonic-gate * The calls to acquire and release the anon map lock mutex were 15507c478bd9Sstevel@tonic-gate * removed in order to avoid a deadly embrace during a DR 15517c478bd9Sstevel@tonic-gate * memory delete operation. (Eg. DR blocks while waiting for a 15527c478bd9Sstevel@tonic-gate * exclusive lock on a page that is being used for kaio; the 15537c478bd9Sstevel@tonic-gate * thread that will complete the kaio and call segspt_softunlock 15547c478bd9Sstevel@tonic-gate * blocks on the anon map lock; another thread holding the anon 15557c478bd9Sstevel@tonic-gate * map lock blocks on another page lock via the segspt_shmfault 15567c478bd9Sstevel@tonic-gate * -> page_lookup -> page_lookup_create -> page_lock_es code flow.) 15577c478bd9Sstevel@tonic-gate * 15587c478bd9Sstevel@tonic-gate * The appropriateness of the removal is based upon the following: 15597c478bd9Sstevel@tonic-gate * 1. If we are holding a segment's reader lock and the page is held 15607c478bd9Sstevel@tonic-gate * shared, then the corresponding element in anonmap which points to 15617c478bd9Sstevel@tonic-gate * anon struct cannot change and there is no need to acquire the 15627c478bd9Sstevel@tonic-gate * anonymous map lock. 15637c478bd9Sstevel@tonic-gate * 2. Threads in segspt_softunlock have a reader lock on the segment 15647c478bd9Sstevel@tonic-gate * and already have the shared page lock, so we are guaranteed that 15657c478bd9Sstevel@tonic-gate * the anon map slot cannot change and therefore can call anon_get_ptr() 15667c478bd9Sstevel@tonic-gate * without grabbing the anonymous map lock. 15677c478bd9Sstevel@tonic-gate * 3. Threads that softlock a shared page break copy-on-write, even if 15687c478bd9Sstevel@tonic-gate * its a read. Thus cow faults can be ignored with respect to soft 15697c478bd9Sstevel@tonic-gate * unlocking, since the breaking of cow means that the anon slot(s) will 15707c478bd9Sstevel@tonic-gate * not be shared. 15717c478bd9Sstevel@tonic-gate */ 15727c478bd9Sstevel@tonic-gate static void 15737c478bd9Sstevel@tonic-gate segspt_softunlock(struct seg *seg, caddr_t sptseg_addr, 15747c478bd9Sstevel@tonic-gate size_t len, enum seg_rw rw) 15757c478bd9Sstevel@tonic-gate { 15767c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 15777c478bd9Sstevel@tonic-gate struct seg *sptseg; 15787c478bd9Sstevel@tonic-gate struct spt_data *sptd; 15797c478bd9Sstevel@tonic-gate page_t *pp; 15807c478bd9Sstevel@tonic-gate caddr_t adr; 15817c478bd9Sstevel@tonic-gate struct vnode *vp; 15827c478bd9Sstevel@tonic-gate u_offset_t offset; 15837c478bd9Sstevel@tonic-gate ulong_t anon_index; 15847c478bd9Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 15857c478bd9Sstevel@tonic-gate struct anon *ap = NULL; 15867c478bd9Sstevel@tonic-gate pgcnt_t npages; 15877c478bd9Sstevel@tonic-gate 1588*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 15917c478bd9Sstevel@tonic-gate sptd = sptseg->s_data; 15927c478bd9Sstevel@tonic-gate 15937c478bd9Sstevel@tonic-gate /* 15947c478bd9Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 15957c478bd9Sstevel@tonic-gate * and therefore their pages are SE_SHARED locked 15967c478bd9Sstevel@tonic-gate * for the entire life of the segment. 15977c478bd9Sstevel@tonic-gate */ 15987c478bd9Sstevel@tonic-gate if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) && 15997c478bd9Sstevel@tonic-gate ((sptd->spt_flags & SHM_PAGEABLE) == 0)) { 16007c478bd9Sstevel@tonic-gate goto softlock_decrement; 16017c478bd9Sstevel@tonic-gate } 16027c478bd9Sstevel@tonic-gate 16037c478bd9Sstevel@tonic-gate /* 16047c478bd9Sstevel@tonic-gate * Any thread is free to do a page_find and 16057c478bd9Sstevel@tonic-gate * page_unlock() on the pages within this seg. 16067c478bd9Sstevel@tonic-gate * 16077c478bd9Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 16087c478bd9Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 16097c478bd9Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 16107c478bd9Sstevel@tonic-gate * underlying HAT layer. 16117c478bd9Sstevel@tonic-gate */ 1612*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(sptseg->s_as, RW_READER); 16137c478bd9Sstevel@tonic-gate hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len); 1614*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(sptseg->s_as); 16157c478bd9Sstevel@tonic-gate 16167c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 16177c478bd9Sstevel@tonic-gate ASSERT(amp != NULL); 16187c478bd9Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 16197c478bd9Sstevel@tonic-gate 16207c478bd9Sstevel@tonic-gate for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) { 16217c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 16227c478bd9Sstevel@tonic-gate ASSERT(ap != NULL); 16237c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 16247c478bd9Sstevel@tonic-gate 16257c478bd9Sstevel@tonic-gate /* 16267c478bd9Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 16277c478bd9Sstevel@tonic-gate * find the page since we know that it has a 16287c478bd9Sstevel@tonic-gate * "shared" lock. 16297c478bd9Sstevel@tonic-gate */ 16307c478bd9Sstevel@tonic-gate pp = page_find(vp, offset); 16317c478bd9Sstevel@tonic-gate ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1)); 16327c478bd9Sstevel@tonic-gate if (pp == NULL) { 16337c478bd9Sstevel@tonic-gate panic("segspt_softunlock: " 16347c478bd9Sstevel@tonic-gate "addr %p, ap %p, vp %p, off %llx", 16357c478bd9Sstevel@tonic-gate (void *)adr, (void *)ap, (void *)vp, offset); 16367c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 16377c478bd9Sstevel@tonic-gate } 16387c478bd9Sstevel@tonic-gate 16397c478bd9Sstevel@tonic-gate if (rw == S_WRITE) { 16407c478bd9Sstevel@tonic-gate hat_setrefmod(pp); 16417c478bd9Sstevel@tonic-gate } else if (rw != S_OTHER) { 16427c478bd9Sstevel@tonic-gate hat_setref(pp); 16437c478bd9Sstevel@tonic-gate } 16447c478bd9Sstevel@tonic-gate page_unlock(pp); 16457c478bd9Sstevel@tonic-gate } 16467c478bd9Sstevel@tonic-gate 16477c478bd9Sstevel@tonic-gate softlock_decrement: 16487c478bd9Sstevel@tonic-gate npages = btopr(len); 1649a98e9dbfSaguzovsk ASSERT(shmd->shm_softlockcnt >= npages); 16507c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages); 16517c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt == 0) { 16527c478bd9Sstevel@tonic-gate /* 16537c478bd9Sstevel@tonic-gate * All SOFTLOCKS are gone. Wakeup any waiting 16547c478bd9Sstevel@tonic-gate * unmappers so they can try again to unmap. 16557c478bd9Sstevel@tonic-gate * Check for waiters first without the mutex 16567c478bd9Sstevel@tonic-gate * held so we don't always grab the mutex on 16577c478bd9Sstevel@tonic-gate * softunlocks. 16587c478bd9Sstevel@tonic-gate */ 16597c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 16607c478bd9Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 16617c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 16627c478bd9Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 16637c478bd9Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 16647c478bd9Sstevel@tonic-gate } 16657c478bd9Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate } 16687c478bd9Sstevel@tonic-gate } 16697c478bd9Sstevel@tonic-gate 16707c478bd9Sstevel@tonic-gate int 16717c478bd9Sstevel@tonic-gate segspt_shmattach(struct seg *seg, caddr_t *argsp) 16727c478bd9Sstevel@tonic-gate { 16737c478bd9Sstevel@tonic-gate struct shm_data *shmd_arg = (struct shm_data *)argsp; 16747c478bd9Sstevel@tonic-gate struct shm_data *shmd; 16757c478bd9Sstevel@tonic-gate struct anon_map *shm_amp = shmd_arg->shm_amp; 16767c478bd9Sstevel@tonic-gate struct spt_data *sptd; 16777c478bd9Sstevel@tonic-gate int error = 0; 16787c478bd9Sstevel@tonic-gate 1679*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 16807c478bd9Sstevel@tonic-gate 16817c478bd9Sstevel@tonic-gate shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP); 16827c478bd9Sstevel@tonic-gate if (shmd == NULL) 16837c478bd9Sstevel@tonic-gate return (ENOMEM); 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate shmd->shm_sptas = shmd_arg->shm_sptas; 16867c478bd9Sstevel@tonic-gate shmd->shm_amp = shm_amp; 16877c478bd9Sstevel@tonic-gate shmd->shm_sptseg = shmd_arg->shm_sptseg; 16887c478bd9Sstevel@tonic-gate 16897c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0, 16907c478bd9Sstevel@tonic-gate NULL, 0, seg->s_size); 16917c478bd9Sstevel@tonic-gate 1692a98e9dbfSaguzovsk mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL); 1693a98e9dbfSaguzovsk 16947c478bd9Sstevel@tonic-gate seg->s_data = (void *)shmd; 16957c478bd9Sstevel@tonic-gate seg->s_ops = &segspt_shmops; 16967c478bd9Sstevel@tonic-gate seg->s_szc = shmd->shm_sptseg->s_szc; 16977c478bd9Sstevel@tonic-gate sptd = shmd->shm_sptseg->s_data; 16987c478bd9Sstevel@tonic-gate 16997c478bd9Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 17007c478bd9Sstevel@tonic-gate if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size), 17017c478bd9Sstevel@tonic-gate KM_NOSLEEP)) == NULL) { 17027c478bd9Sstevel@tonic-gate seg->s_data = (void *)NULL; 17037c478bd9Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 17047c478bd9Sstevel@tonic-gate return (ENOMEM); 17057c478bd9Sstevel@tonic-gate } 17067c478bd9Sstevel@tonic-gate shmd->shm_lckpgs = 0; 17077c478bd9Sstevel@tonic-gate if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 17087c478bd9Sstevel@tonic-gate if ((error = hat_share(seg->s_as->a_hat, seg->s_base, 17097c478bd9Sstevel@tonic-gate shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 17107c478bd9Sstevel@tonic-gate seg->s_size, seg->s_szc)) != 0) { 17117c478bd9Sstevel@tonic-gate kmem_free(shmd->shm_vpage, 17127c478bd9Sstevel@tonic-gate btopr(shm_amp->size)); 17137c478bd9Sstevel@tonic-gate } 17147c478bd9Sstevel@tonic-gate } 17157c478bd9Sstevel@tonic-gate } else { 17167c478bd9Sstevel@tonic-gate error = hat_share(seg->s_as->a_hat, seg->s_base, 17177c478bd9Sstevel@tonic-gate shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 17187c478bd9Sstevel@tonic-gate seg->s_size, seg->s_szc); 17197c478bd9Sstevel@tonic-gate } 17207c478bd9Sstevel@tonic-gate if (error) { 17217c478bd9Sstevel@tonic-gate seg->s_szc = 0; 17227c478bd9Sstevel@tonic-gate seg->s_data = (void *)NULL; 17237c478bd9Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 17247c478bd9Sstevel@tonic-gate } else { 17257c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 17267c478bd9Sstevel@tonic-gate shm_amp->refcnt++; 17277c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 17287c478bd9Sstevel@tonic-gate } 17297c478bd9Sstevel@tonic-gate return (error); 17307c478bd9Sstevel@tonic-gate } 17317c478bd9Sstevel@tonic-gate 17327c478bd9Sstevel@tonic-gate int 17337c478bd9Sstevel@tonic-gate segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize) 17347c478bd9Sstevel@tonic-gate { 17357c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 17367c478bd9Sstevel@tonic-gate int reclaim = 1; 17377c478bd9Sstevel@tonic-gate 1738*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 17397c478bd9Sstevel@tonic-gate retry: 17407c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) { 17417c478bd9Sstevel@tonic-gate if (reclaim == 1) { 17427c478bd9Sstevel@tonic-gate segspt_purge(seg); 17437c478bd9Sstevel@tonic-gate reclaim = 0; 17447c478bd9Sstevel@tonic-gate goto retry; 17457c478bd9Sstevel@tonic-gate } 17467c478bd9Sstevel@tonic-gate return (EAGAIN); 17477c478bd9Sstevel@tonic-gate } 17487c478bd9Sstevel@tonic-gate 17497c478bd9Sstevel@tonic-gate if (ssize != seg->s_size) { 17507c478bd9Sstevel@tonic-gate #ifdef DEBUG 17517c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n", 17527c478bd9Sstevel@tonic-gate ssize, seg->s_size); 17537c478bd9Sstevel@tonic-gate #endif 17547c478bd9Sstevel@tonic-gate return (EINVAL); 17557c478bd9Sstevel@tonic-gate } 17567c478bd9Sstevel@tonic-gate 17577c478bd9Sstevel@tonic-gate (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK, 17587c478bd9Sstevel@tonic-gate NULL, 0); 17597c478bd9Sstevel@tonic-gate hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc); 17607c478bd9Sstevel@tonic-gate 17617c478bd9Sstevel@tonic-gate seg_free(seg); 17627c478bd9Sstevel@tonic-gate 17637c478bd9Sstevel@tonic-gate return (0); 17647c478bd9Sstevel@tonic-gate } 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate void 17677c478bd9Sstevel@tonic-gate segspt_shmfree(struct seg *seg) 17687c478bd9Sstevel@tonic-gate { 17697c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 17707c478bd9Sstevel@tonic-gate struct anon_map *shm_amp = shmd->shm_amp; 17717c478bd9Sstevel@tonic-gate 1772*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 17737c478bd9Sstevel@tonic-gate 17747c478bd9Sstevel@tonic-gate (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0, 17757c478bd9Sstevel@tonic-gate MC_UNLOCK, NULL, 0); 17767c478bd9Sstevel@tonic-gate 17777c478bd9Sstevel@tonic-gate /* 17787c478bd9Sstevel@tonic-gate * Need to increment refcnt when attaching 17797c478bd9Sstevel@tonic-gate * and decrement when detaching because of dup(). 17807c478bd9Sstevel@tonic-gate */ 17817c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 17827c478bd9Sstevel@tonic-gate shm_amp->refcnt--; 17837c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 17847c478bd9Sstevel@tonic-gate 17857c478bd9Sstevel@tonic-gate if (shmd->shm_vpage) { /* only for DISM */ 17867c478bd9Sstevel@tonic-gate kmem_free(shmd->shm_vpage, btopr(shm_amp->size)); 17877c478bd9Sstevel@tonic-gate shmd->shm_vpage = NULL; 17887c478bd9Sstevel@tonic-gate } 1789a98e9dbfSaguzovsk 1790a98e9dbfSaguzovsk /* 1791a98e9dbfSaguzovsk * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's 1792a98e9dbfSaguzovsk * still working with this segment without holding as lock. 1793a98e9dbfSaguzovsk */ 1794a98e9dbfSaguzovsk ASSERT(shmd->shm_softlockcnt == 0); 1795a98e9dbfSaguzovsk mutex_enter(&shmd->shm_segfree_syncmtx); 1796a98e9dbfSaguzovsk mutex_destroy(&shmd->shm_segfree_syncmtx); 1797a98e9dbfSaguzovsk 17987c478bd9Sstevel@tonic-gate kmem_free(shmd, sizeof (*shmd)); 17997c478bd9Sstevel@tonic-gate } 18007c478bd9Sstevel@tonic-gate 18017c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 18027c478bd9Sstevel@tonic-gate int 18037c478bd9Sstevel@tonic-gate segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 18047c478bd9Sstevel@tonic-gate { 1805*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 18067c478bd9Sstevel@tonic-gate 18077c478bd9Sstevel@tonic-gate /* 18087c478bd9Sstevel@tonic-gate * Shared page table is more than shared mapping. 18097c478bd9Sstevel@tonic-gate * Individual process sharing page tables can't change prot 18107c478bd9Sstevel@tonic-gate * because there is only one set of page tables. 18117c478bd9Sstevel@tonic-gate * This will be allowed after private page table is 18127c478bd9Sstevel@tonic-gate * supported. 18137c478bd9Sstevel@tonic-gate */ 18147c478bd9Sstevel@tonic-gate /* need to return correct status error? */ 18157c478bd9Sstevel@tonic-gate return (0); 18167c478bd9Sstevel@tonic-gate } 18177c478bd9Sstevel@tonic-gate 18187c478bd9Sstevel@tonic-gate 18197c478bd9Sstevel@tonic-gate faultcode_t 18207c478bd9Sstevel@tonic-gate segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, 18217c478bd9Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 18227c478bd9Sstevel@tonic-gate { 18237c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 18247c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 18257c478bd9Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 18267c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 18277c478bd9Sstevel@tonic-gate pgcnt_t npages; 182807b65a64Saguzovsk size_t size; 18297c478bd9Sstevel@tonic-gate caddr_t segspt_addr, shm_addr; 18307c478bd9Sstevel@tonic-gate page_t **ppa; 18317c478bd9Sstevel@tonic-gate int i; 18327c478bd9Sstevel@tonic-gate ulong_t an_idx = 0; 18337c478bd9Sstevel@tonic-gate int err = 0; 18341b42782eSmec int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0); 183507b65a64Saguzovsk size_t pgsz; 183607b65a64Saguzovsk pgcnt_t pgcnt; 183707b65a64Saguzovsk caddr_t a; 183807b65a64Saguzovsk pgcnt_t pidx; 18397c478bd9Sstevel@tonic-gate 18407c478bd9Sstevel@tonic-gate #ifdef lint 18417c478bd9Sstevel@tonic-gate hat = hat; 18427c478bd9Sstevel@tonic-gate #endif 1843*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 18447c478bd9Sstevel@tonic-gate 18457c478bd9Sstevel@tonic-gate /* 18467c478bd9Sstevel@tonic-gate * Because of the way spt is implemented 18477c478bd9Sstevel@tonic-gate * the realsize of the segment does not have to be 18487c478bd9Sstevel@tonic-gate * equal to the segment size itself. The segment size is 18497c478bd9Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 18507c478bd9Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 18517c478bd9Sstevel@tonic-gate * based on what the user requested. This is a bit of 18527c478bd9Sstevel@tonic-gate * ungliness that is historical but not easily fixed 18537c478bd9Sstevel@tonic-gate * without re-designing the higher levels of ISM. 18547c478bd9Sstevel@tonic-gate */ 18557c478bd9Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 18567c478bd9Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 18577c478bd9Sstevel@tonic-gate return (FC_NOMAP); 18587c478bd9Sstevel@tonic-gate /* 18597c478bd9Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 18607c478bd9Sstevel@tonic-gate * make any necessary adjustments to addr and len 18617c478bd9Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 18627c478bd9Sstevel@tonic-gate * 18637c478bd9Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 18647c478bd9Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 18657c478bd9Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 18667c478bd9Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 18677c478bd9Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 18687c478bd9Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 18697c478bd9Sstevel@tonic-gate * over a given virtual range. 18707c478bd9Sstevel@tonic-gate */ 187107b65a64Saguzovsk pgsz = page_get_pagesize(sptseg->s_szc); 187207b65a64Saguzovsk pgcnt = page_get_pagecnt(sptseg->s_szc); 187307b65a64Saguzovsk shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 187407b65a64Saguzovsk size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 18757c478bd9Sstevel@tonic-gate npages = btopr(size); 18767c478bd9Sstevel@tonic-gate 18777c478bd9Sstevel@tonic-gate /* 18787c478bd9Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 18797c478bd9Sstevel@tonic-gate */ 18807c478bd9Sstevel@tonic-gate an_idx = seg_page(seg, shm_addr); 18817c478bd9Sstevel@tonic-gate segspt_addr = sptseg->s_base + ptob(an_idx); 18827c478bd9Sstevel@tonic-gate 18837c478bd9Sstevel@tonic-gate ASSERT((segspt_addr + ptob(npages)) <= 18847c478bd9Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)); 18857c478bd9Sstevel@tonic-gate ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size)); 18867c478bd9Sstevel@tonic-gate 18877c478bd9Sstevel@tonic-gate switch (type) { 18887c478bd9Sstevel@tonic-gate 18897c478bd9Sstevel@tonic-gate case F_SOFTLOCK: 18907c478bd9Sstevel@tonic-gate 18917c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 18927c478bd9Sstevel@tonic-gate /* 18937c478bd9Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 18947c478bd9Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 18957c478bd9Sstevel@tonic-gate */ 18967c478bd9Sstevel@tonic-gate /* FALLTHRU */ 18977c478bd9Sstevel@tonic-gate case F_INVAL: 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 19007c478bd9Sstevel@tonic-gate return (FC_NOMAP); 19017c478bd9Sstevel@tonic-gate 19027c478bd9Sstevel@tonic-gate ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 19037c478bd9Sstevel@tonic-gate 19047c478bd9Sstevel@tonic-gate err = spt_anon_getpages(sptseg, segspt_addr, size, ppa); 19057c478bd9Sstevel@tonic-gate if (err != 0) { 19067c478bd9Sstevel@tonic-gate if (type == F_SOFTLOCK) { 19077c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)( 19087c478bd9Sstevel@tonic-gate &(shmd->shm_softlockcnt)), -npages); 19097c478bd9Sstevel@tonic-gate } 19107c478bd9Sstevel@tonic-gate goto dism_err; 19117c478bd9Sstevel@tonic-gate } 1912*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(sptseg->s_as, RW_READER); 191307b65a64Saguzovsk a = segspt_addr; 191407b65a64Saguzovsk pidx = 0; 19157c478bd9Sstevel@tonic-gate if (type == F_SOFTLOCK) { 19167c478bd9Sstevel@tonic-gate 19177c478bd9Sstevel@tonic-gate /* 19187c478bd9Sstevel@tonic-gate * Load up the translation keeping it 19197c478bd9Sstevel@tonic-gate * locked and don't unlock the page. 19207c478bd9Sstevel@tonic-gate */ 192107b65a64Saguzovsk for (; pidx < npages; a += pgsz, pidx += pgcnt) { 192207b65a64Saguzovsk hat_memload_array(sptseg->s_as->a_hat, 192307b65a64Saguzovsk a, pgsz, &ppa[pidx], sptd->spt_prot, 19247c478bd9Sstevel@tonic-gate HAT_LOAD_LOCK | HAT_LOAD_SHARE); 192507b65a64Saguzovsk } 19267c478bd9Sstevel@tonic-gate } else { 19277c478bd9Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 19287c478bd9Sstevel@tonic-gate 19297c478bd9Sstevel@tonic-gate /* 19307c478bd9Sstevel@tonic-gate * Migrate pages marked for migration 19317c478bd9Sstevel@tonic-gate */ 19327c478bd9Sstevel@tonic-gate if (lgrp_optimizations()) 19337c478bd9Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 19347c478bd9Sstevel@tonic-gate npages); 19357c478bd9Sstevel@tonic-gate 19367c478bd9Sstevel@tonic-gate /* CPU HAT */ 193707b65a64Saguzovsk for (; pidx < npages; 193807b65a64Saguzovsk a += pgsz, pidx += pgcnt) { 19397c478bd9Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, 194007b65a64Saguzovsk a, pgsz, &ppa[pidx], 194107b65a64Saguzovsk sptd->spt_prot, 19427c478bd9Sstevel@tonic-gate HAT_LOAD_SHARE); 194307b65a64Saguzovsk } 19447c478bd9Sstevel@tonic-gate } else { 19457c478bd9Sstevel@tonic-gate /* XHAT. Pass real address */ 19467c478bd9Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 19477c478bd9Sstevel@tonic-gate size, ppa, sptd->spt_prot, HAT_LOAD_SHARE); 19487c478bd9Sstevel@tonic-gate } 19497c478bd9Sstevel@tonic-gate 19507c478bd9Sstevel@tonic-gate /* 19517c478bd9Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 19527c478bd9Sstevel@tonic-gate */ 19531b42782eSmec if (dyn_ism_unmap) { 19541b42782eSmec for (i = 0; i < npages; i++) { 19557c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 19567c478bd9Sstevel@tonic-gate } 19571b42782eSmec } 19581b42782eSmec } 19597c478bd9Sstevel@tonic-gate 19601b42782eSmec if (!dyn_ism_unmap) { 19617c478bd9Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, shm_addr, 19627c478bd9Sstevel@tonic-gate curspt->a_hat, segspt_addr, ptob(npages), 19637c478bd9Sstevel@tonic-gate seg->s_szc) != 0) { 19647c478bd9Sstevel@tonic-gate panic("hat_share err in DISM fault"); 19657c478bd9Sstevel@tonic-gate /* NOTREACHED */ 19667c478bd9Sstevel@tonic-gate } 19671b42782eSmec if (type == F_INVAL) { 19681b42782eSmec for (i = 0; i < npages; i++) { 19691b42782eSmec page_unlock(ppa[i]); 19701b42782eSmec } 19711b42782eSmec } 19727c478bd9Sstevel@tonic-gate } 1973*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(sptseg->s_as); 19747c478bd9Sstevel@tonic-gate dism_err: 19757c478bd9Sstevel@tonic-gate kmem_free(ppa, npages * sizeof (page_t *)); 19767c478bd9Sstevel@tonic-gate return (err); 19777c478bd9Sstevel@tonic-gate 19787c478bd9Sstevel@tonic-gate case F_SOFTUNLOCK: 19797c478bd9Sstevel@tonic-gate 19807c478bd9Sstevel@tonic-gate /* 19817c478bd9Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 19827c478bd9Sstevel@tonic-gate * but the segspt_addr is the virtual address within the 19837c478bd9Sstevel@tonic-gate * dummy seg. 19847c478bd9Sstevel@tonic-gate */ 19857c478bd9Sstevel@tonic-gate segspt_softunlock(seg, segspt_addr, size, rw); 19867c478bd9Sstevel@tonic-gate return (0); 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate case F_PROT: 19897c478bd9Sstevel@tonic-gate 19907c478bd9Sstevel@tonic-gate /* 19917c478bd9Sstevel@tonic-gate * This takes care of the unusual case where a user 19927c478bd9Sstevel@tonic-gate * allocates a stack in shared memory and a register 19937c478bd9Sstevel@tonic-gate * window overflow is written to that stack page before 19947c478bd9Sstevel@tonic-gate * it is otherwise modified. 19957c478bd9Sstevel@tonic-gate * 19967c478bd9Sstevel@tonic-gate * We can get away with this because ISM segments are 19977c478bd9Sstevel@tonic-gate * always rw. Other than this unusual case, there 19987c478bd9Sstevel@tonic-gate * should be no instances of protection violations. 19997c478bd9Sstevel@tonic-gate */ 20007c478bd9Sstevel@tonic-gate return (0); 20017c478bd9Sstevel@tonic-gate 20027c478bd9Sstevel@tonic-gate default: 20037c478bd9Sstevel@tonic-gate #ifdef DEBUG 20047c478bd9Sstevel@tonic-gate panic("segspt_dismfault default type?"); 20057c478bd9Sstevel@tonic-gate #else 20067c478bd9Sstevel@tonic-gate return (FC_NOMAP); 20077c478bd9Sstevel@tonic-gate #endif 20087c478bd9Sstevel@tonic-gate } 20097c478bd9Sstevel@tonic-gate } 20107c478bd9Sstevel@tonic-gate 20117c478bd9Sstevel@tonic-gate 20127c478bd9Sstevel@tonic-gate faultcode_t 20137c478bd9Sstevel@tonic-gate segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, 20147c478bd9Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 20157c478bd9Sstevel@tonic-gate { 20167c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 20177c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 20187c478bd9Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 20197c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 20207c478bd9Sstevel@tonic-gate pgcnt_t npages; 202107b65a64Saguzovsk size_t size; 20227c478bd9Sstevel@tonic-gate caddr_t sptseg_addr, shm_addr; 20237c478bd9Sstevel@tonic-gate page_t *pp, **ppa; 20247c478bd9Sstevel@tonic-gate int i; 20257c478bd9Sstevel@tonic-gate u_offset_t offset; 20267c478bd9Sstevel@tonic-gate ulong_t anon_index = 0; 20277c478bd9Sstevel@tonic-gate struct vnode *vp; 20287c478bd9Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 20297c478bd9Sstevel@tonic-gate struct anon *ap = NULL; 203007b65a64Saguzovsk size_t pgsz; 203107b65a64Saguzovsk pgcnt_t pgcnt; 203207b65a64Saguzovsk caddr_t a; 203307b65a64Saguzovsk pgcnt_t pidx; 203407b65a64Saguzovsk size_t sz; 20357c478bd9Sstevel@tonic-gate 20367c478bd9Sstevel@tonic-gate #ifdef lint 20377c478bd9Sstevel@tonic-gate hat = hat; 20387c478bd9Sstevel@tonic-gate #endif 20397c478bd9Sstevel@tonic-gate 2040*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 20417c478bd9Sstevel@tonic-gate 20427c478bd9Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 20437c478bd9Sstevel@tonic-gate return (segspt_dismfault(hat, seg, addr, len, type, rw)); 20447c478bd9Sstevel@tonic-gate } 20457c478bd9Sstevel@tonic-gate 20467c478bd9Sstevel@tonic-gate /* 20477c478bd9Sstevel@tonic-gate * Because of the way spt is implemented 20487c478bd9Sstevel@tonic-gate * the realsize of the segment does not have to be 20497c478bd9Sstevel@tonic-gate * equal to the segment size itself. The segment size is 20507c478bd9Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 20517c478bd9Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 20527c478bd9Sstevel@tonic-gate * based on what the user requested. This is a bit of 20537c478bd9Sstevel@tonic-gate * ungliness that is historical but not easily fixed 20547c478bd9Sstevel@tonic-gate * without re-designing the higher levels of ISM. 20557c478bd9Sstevel@tonic-gate */ 20567c478bd9Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 20577c478bd9Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 20587c478bd9Sstevel@tonic-gate return (FC_NOMAP); 20597c478bd9Sstevel@tonic-gate /* 20607c478bd9Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 20617c478bd9Sstevel@tonic-gate * make any necessary adjustments to addr and len 20627c478bd9Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 20637c478bd9Sstevel@tonic-gate * 20647c478bd9Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 20657c478bd9Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 20667c478bd9Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 20677c478bd9Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 20687c478bd9Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 20697c478bd9Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 20707c478bd9Sstevel@tonic-gate * over a given virtual range. 20717c478bd9Sstevel@tonic-gate */ 207207b65a64Saguzovsk pgsz = page_get_pagesize(sptseg->s_szc); 207307b65a64Saguzovsk pgcnt = page_get_pagecnt(sptseg->s_szc); 207407b65a64Saguzovsk shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 207507b65a64Saguzovsk size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 20767c478bd9Sstevel@tonic-gate npages = btopr(size); 20777c478bd9Sstevel@tonic-gate 20787c478bd9Sstevel@tonic-gate /* 20797c478bd9Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 20807c478bd9Sstevel@tonic-gate */ 20817c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 20827c478bd9Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 20837c478bd9Sstevel@tonic-gate 20847c478bd9Sstevel@tonic-gate /* 20857c478bd9Sstevel@tonic-gate * And now we may have to adjust npages downward if we have 20867c478bd9Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 20877c478bd9Sstevel@tonic-gate * allocations. 20887c478bd9Sstevel@tonic-gate */ 20897c478bd9Sstevel@tonic-gate if ((sptseg_addr + ptob(npages)) > 20907c478bd9Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 20917c478bd9Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr; 20927c478bd9Sstevel@tonic-gate 20937c478bd9Sstevel@tonic-gate npages = btopr(size); 20947c478bd9Sstevel@tonic-gate 20957c478bd9Sstevel@tonic-gate ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size)); 20967c478bd9Sstevel@tonic-gate ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0); 20977c478bd9Sstevel@tonic-gate 20987c478bd9Sstevel@tonic-gate switch (type) { 20997c478bd9Sstevel@tonic-gate 21007c478bd9Sstevel@tonic-gate case F_SOFTLOCK: 21017c478bd9Sstevel@tonic-gate 21027c478bd9Sstevel@tonic-gate /* 21037c478bd9Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 21047c478bd9Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 21057c478bd9Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 21067c478bd9Sstevel@tonic-gate */ 21077c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 21087c478bd9Sstevel@tonic-gate /* 21097c478bd9Sstevel@tonic-gate * Some platforms assume that ISM pages are SE_SHARED 21107c478bd9Sstevel@tonic-gate * locked for the entire life of the segment. 21117c478bd9Sstevel@tonic-gate */ 21127c478bd9Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) 21137c478bd9Sstevel@tonic-gate return (0); 21147c478bd9Sstevel@tonic-gate /* 21157c478bd9Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 21167c478bd9Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 21177c478bd9Sstevel@tonic-gate */ 21187c478bd9Sstevel@tonic-gate 21197c478bd9Sstevel@tonic-gate /* FALLTHRU */ 21207c478bd9Sstevel@tonic-gate case F_INVAL: 21217c478bd9Sstevel@tonic-gate 21227c478bd9Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 21237c478bd9Sstevel@tonic-gate return (FC_NOMAP); 21247c478bd9Sstevel@tonic-gate 21257c478bd9Sstevel@tonic-gate /* 21267c478bd9Sstevel@tonic-gate * Some platforms that do NOT support DYNAMIC_ISM_UNMAP 21277c478bd9Sstevel@tonic-gate * may still rely on this call to hat_share(). That 21287c478bd9Sstevel@tonic-gate * would imply that those hat's can fault on a 21297c478bd9Sstevel@tonic-gate * HAT_LOAD_LOCK translation, which would seem 21307c478bd9Sstevel@tonic-gate * contradictory. 21317c478bd9Sstevel@tonic-gate */ 21327c478bd9Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 21337c478bd9Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, seg->s_base, 21347c478bd9Sstevel@tonic-gate curspt->a_hat, sptseg->s_base, 21357c478bd9Sstevel@tonic-gate sptseg->s_size, sptseg->s_szc) != 0) { 21367c478bd9Sstevel@tonic-gate panic("hat_share error in ISM fault"); 21377c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 21387c478bd9Sstevel@tonic-gate } 21397c478bd9Sstevel@tonic-gate return (0); 21407c478bd9Sstevel@tonic-gate } 21417c478bd9Sstevel@tonic-gate ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP); 21427c478bd9Sstevel@tonic-gate 21437c478bd9Sstevel@tonic-gate /* 21447c478bd9Sstevel@tonic-gate * I see no need to lock the real seg, 21457c478bd9Sstevel@tonic-gate * here, because all of our work will be on the underlying 21467c478bd9Sstevel@tonic-gate * dummy seg. 21477c478bd9Sstevel@tonic-gate * 21487c478bd9Sstevel@tonic-gate * sptseg_addr and npages now account for large pages. 21497c478bd9Sstevel@tonic-gate */ 21507c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 21517c478bd9Sstevel@tonic-gate ASSERT(amp != NULL); 21527c478bd9Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 21537c478bd9Sstevel@tonic-gate 21547c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 21557c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) { 21567c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 21577c478bd9Sstevel@tonic-gate ASSERT(ap != NULL); 21587c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 21597c478bd9Sstevel@tonic-gate pp = page_lookup(vp, offset, SE_SHARED); 21607c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 21617c478bd9Sstevel@tonic-gate ppa[i] = pp; 21627c478bd9Sstevel@tonic-gate } 21637c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 21647c478bd9Sstevel@tonic-gate ASSERT(i == npages); 21657c478bd9Sstevel@tonic-gate 21667c478bd9Sstevel@tonic-gate /* 21677c478bd9Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 21687c478bd9Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 21697c478bd9Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 21707c478bd9Sstevel@tonic-gate * underlying HAT layer. 21717c478bd9Sstevel@tonic-gate */ 2172*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(sptseg->s_as, RW_READER); 217307b65a64Saguzovsk a = sptseg_addr; 217407b65a64Saguzovsk pidx = 0; 21757c478bd9Sstevel@tonic-gate if (type == F_SOFTLOCK) { 21767c478bd9Sstevel@tonic-gate /* 21777c478bd9Sstevel@tonic-gate * Load up the translation keeping it 21787c478bd9Sstevel@tonic-gate * locked and don't unlock the page. 21797c478bd9Sstevel@tonic-gate */ 218007b65a64Saguzovsk for (; pidx < npages; a += pgsz, pidx += pgcnt) { 218107b65a64Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 218207b65a64Saguzovsk hat_memload_array(sptseg->s_as->a_hat, a, 218307b65a64Saguzovsk sz, &ppa[pidx], sptd->spt_prot, 21847c478bd9Sstevel@tonic-gate HAT_LOAD_LOCK | HAT_LOAD_SHARE); 218507b65a64Saguzovsk } 21867c478bd9Sstevel@tonic-gate } else { 21877c478bd9Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 21887c478bd9Sstevel@tonic-gate 21897c478bd9Sstevel@tonic-gate /* 21907c478bd9Sstevel@tonic-gate * Migrate pages marked for migration. 21917c478bd9Sstevel@tonic-gate */ 21927c478bd9Sstevel@tonic-gate if (lgrp_optimizations()) 21937c478bd9Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 21947c478bd9Sstevel@tonic-gate npages); 21957c478bd9Sstevel@tonic-gate 21967c478bd9Sstevel@tonic-gate /* CPU HAT */ 219707b65a64Saguzovsk for (; pidx < npages; 219807b65a64Saguzovsk a += pgsz, pidx += pgcnt) { 219907b65a64Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 22007c478bd9Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, 220107b65a64Saguzovsk a, sz, &ppa[pidx], 22027c478bd9Sstevel@tonic-gate sptd->spt_prot, HAT_LOAD_SHARE); 220307b65a64Saguzovsk } 22047c478bd9Sstevel@tonic-gate } else { 22057c478bd9Sstevel@tonic-gate /* XHAT. Pass real address */ 22067c478bd9Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 22077c478bd9Sstevel@tonic-gate ptob(npages), ppa, sptd->spt_prot, 22087c478bd9Sstevel@tonic-gate HAT_LOAD_SHARE); 22097c478bd9Sstevel@tonic-gate } 22107c478bd9Sstevel@tonic-gate 22117c478bd9Sstevel@tonic-gate /* 22127c478bd9Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 22137c478bd9Sstevel@tonic-gate */ 22147c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) 22157c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 22167c478bd9Sstevel@tonic-gate } 2217*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(sptseg->s_as); 22187c478bd9Sstevel@tonic-gate 22197c478bd9Sstevel@tonic-gate kmem_free(ppa, sizeof (page_t *) * npages); 22207c478bd9Sstevel@tonic-gate return (0); 22217c478bd9Sstevel@tonic-gate case F_SOFTUNLOCK: 22227c478bd9Sstevel@tonic-gate 22237c478bd9Sstevel@tonic-gate /* 22247c478bd9Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 22257c478bd9Sstevel@tonic-gate * but the sptseg_addr is the virtual address within the 22267c478bd9Sstevel@tonic-gate * dummy seg. 22277c478bd9Sstevel@tonic-gate */ 22287c478bd9Sstevel@tonic-gate segspt_softunlock(seg, sptseg_addr, ptob(npages), rw); 22297c478bd9Sstevel@tonic-gate return (0); 22307c478bd9Sstevel@tonic-gate 22317c478bd9Sstevel@tonic-gate case F_PROT: 22327c478bd9Sstevel@tonic-gate 22337c478bd9Sstevel@tonic-gate /* 22347c478bd9Sstevel@tonic-gate * This takes care of the unusual case where a user 22357c478bd9Sstevel@tonic-gate * allocates a stack in shared memory and a register 22367c478bd9Sstevel@tonic-gate * window overflow is written to that stack page before 22377c478bd9Sstevel@tonic-gate * it is otherwise modified. 22387c478bd9Sstevel@tonic-gate * 22397c478bd9Sstevel@tonic-gate * We can get away with this because ISM segments are 22407c478bd9Sstevel@tonic-gate * always rw. Other than this unusual case, there 22417c478bd9Sstevel@tonic-gate * should be no instances of protection violations. 22427c478bd9Sstevel@tonic-gate */ 22437c478bd9Sstevel@tonic-gate return (0); 22447c478bd9Sstevel@tonic-gate 22457c478bd9Sstevel@tonic-gate default: 22467c478bd9Sstevel@tonic-gate #ifdef DEBUG 22477c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "segspt_shmfault default type?"); 22487c478bd9Sstevel@tonic-gate #endif 22497c478bd9Sstevel@tonic-gate return (FC_NOMAP); 22507c478bd9Sstevel@tonic-gate } 22517c478bd9Sstevel@tonic-gate } 22527c478bd9Sstevel@tonic-gate 22537c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 22547c478bd9Sstevel@tonic-gate static faultcode_t 22557c478bd9Sstevel@tonic-gate segspt_shmfaulta(struct seg *seg, caddr_t addr) 22567c478bd9Sstevel@tonic-gate { 22577c478bd9Sstevel@tonic-gate return (0); 22587c478bd9Sstevel@tonic-gate } 22597c478bd9Sstevel@tonic-gate 22607c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 22617c478bd9Sstevel@tonic-gate static int 22627c478bd9Sstevel@tonic-gate segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta) 22637c478bd9Sstevel@tonic-gate { 22647c478bd9Sstevel@tonic-gate return (0); 22657c478bd9Sstevel@tonic-gate } 22667c478bd9Sstevel@tonic-gate 22677c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 22687c478bd9Sstevel@tonic-gate static size_t 22697c478bd9Sstevel@tonic-gate segspt_shmswapout(struct seg *seg) 22707c478bd9Sstevel@tonic-gate { 22717c478bd9Sstevel@tonic-gate return (0); 22727c478bd9Sstevel@tonic-gate } 22737c478bd9Sstevel@tonic-gate 22747c478bd9Sstevel@tonic-gate /* 22757c478bd9Sstevel@tonic-gate * duplicate the shared page tables 22767c478bd9Sstevel@tonic-gate */ 22777c478bd9Sstevel@tonic-gate int 22787c478bd9Sstevel@tonic-gate segspt_shmdup(struct seg *seg, struct seg *newseg) 22797c478bd9Sstevel@tonic-gate { 22807c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 22817c478bd9Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 22827c478bd9Sstevel@tonic-gate struct shm_data *shmd_new; 22837c478bd9Sstevel@tonic-gate struct seg *spt_seg = shmd->shm_sptseg; 22847c478bd9Sstevel@tonic-gate struct spt_data *sptd = spt_seg->s_data; 22851b42782eSmec int error = 0; 22867c478bd9Sstevel@tonic-gate 2287*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP); 22907c478bd9Sstevel@tonic-gate newseg->s_data = (void *)shmd_new; 22917c478bd9Sstevel@tonic-gate shmd_new->shm_sptas = shmd->shm_sptas; 22927c478bd9Sstevel@tonic-gate shmd_new->shm_amp = amp; 22937c478bd9Sstevel@tonic-gate shmd_new->shm_sptseg = shmd->shm_sptseg; 22947c478bd9Sstevel@tonic-gate newseg->s_ops = &segspt_shmops; 22957c478bd9Sstevel@tonic-gate newseg->s_szc = seg->s_szc; 22967c478bd9Sstevel@tonic-gate ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc); 22977c478bd9Sstevel@tonic-gate 22987c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 22997c478bd9Sstevel@tonic-gate amp->refcnt++; 23007c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 23017c478bd9Sstevel@tonic-gate 23027c478bd9Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 23037c478bd9Sstevel@tonic-gate shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP); 23047c478bd9Sstevel@tonic-gate shmd_new->shm_lckpgs = 0; 23051b42782eSmec if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 23061b42782eSmec if ((error = hat_share(newseg->s_as->a_hat, 23071b42782eSmec newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR, 23081b42782eSmec seg->s_size, seg->s_szc)) != 0) { 23091b42782eSmec kmem_free(shmd_new->shm_vpage, 23101b42782eSmec btopr(amp->size)); 23117c478bd9Sstevel@tonic-gate } 23121b42782eSmec } 23131b42782eSmec return (error); 23141b42782eSmec } else { 23157c478bd9Sstevel@tonic-gate return (hat_share(newseg->s_as->a_hat, newseg->s_base, 23161b42782eSmec shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size, 23171b42782eSmec seg->s_szc)); 23181b42782eSmec 23191b42782eSmec } 23207c478bd9Sstevel@tonic-gate } 23217c478bd9Sstevel@tonic-gate 23227c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 23237c478bd9Sstevel@tonic-gate int 23247c478bd9Sstevel@tonic-gate segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot) 23257c478bd9Sstevel@tonic-gate { 23267c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 23277c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 23287c478bd9Sstevel@tonic-gate 2329*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 23307c478bd9Sstevel@tonic-gate 23317c478bd9Sstevel@tonic-gate /* 23327c478bd9Sstevel@tonic-gate * ISM segment is always rw. 23337c478bd9Sstevel@tonic-gate */ 23347c478bd9Sstevel@tonic-gate return (((sptd->spt_prot & prot) != prot) ? EACCES : 0); 23357c478bd9Sstevel@tonic-gate } 23367c478bd9Sstevel@tonic-gate 23377c478bd9Sstevel@tonic-gate /* 23387c478bd9Sstevel@tonic-gate * Return an array of locked large pages, for empty slots allocate 23397c478bd9Sstevel@tonic-gate * private zero-filled anon pages. 23407c478bd9Sstevel@tonic-gate */ 23417c478bd9Sstevel@tonic-gate static int 23427c478bd9Sstevel@tonic-gate spt_anon_getpages( 23437c478bd9Sstevel@tonic-gate struct seg *sptseg, 23447c478bd9Sstevel@tonic-gate caddr_t sptaddr, 23457c478bd9Sstevel@tonic-gate size_t len, 23467c478bd9Sstevel@tonic-gate page_t *ppa[]) 23477c478bd9Sstevel@tonic-gate { 23487c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 23497c478bd9Sstevel@tonic-gate struct anon_map *amp = sptd->spt_amp; 23507c478bd9Sstevel@tonic-gate enum seg_rw rw = sptd->spt_prot; 23517c478bd9Sstevel@tonic-gate uint_t szc = sptseg->s_szc; 23527c478bd9Sstevel@tonic-gate size_t pg_sz, share_sz = page_get_pagesize(szc); 23537c478bd9Sstevel@tonic-gate pgcnt_t lp_npgs; 23547c478bd9Sstevel@tonic-gate caddr_t lp_addr, e_sptaddr; 23557c478bd9Sstevel@tonic-gate uint_t vpprot, ppa_szc = 0; 23567c478bd9Sstevel@tonic-gate struct vpage *vpage = NULL; 23577c478bd9Sstevel@tonic-gate ulong_t j, ppa_idx; 23587c478bd9Sstevel@tonic-gate int err, ierr = 0; 23597c478bd9Sstevel@tonic-gate pgcnt_t an_idx; 23607c478bd9Sstevel@tonic-gate anon_sync_obj_t cookie; 23612ba723d8Smec int anon_locked = 0; 23622ba723d8Smec pgcnt_t amp_pgs; 23632ba723d8Smec 23647c478bd9Sstevel@tonic-gate 23657c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz)); 23667c478bd9Sstevel@tonic-gate ASSERT(len != 0); 23677c478bd9Sstevel@tonic-gate 23687c478bd9Sstevel@tonic-gate pg_sz = share_sz; 23697c478bd9Sstevel@tonic-gate lp_npgs = btop(pg_sz); 23707c478bd9Sstevel@tonic-gate lp_addr = sptaddr; 23717c478bd9Sstevel@tonic-gate e_sptaddr = sptaddr + len; 23727c478bd9Sstevel@tonic-gate an_idx = seg_page(sptseg, sptaddr); 23737c478bd9Sstevel@tonic-gate ppa_idx = 0; 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 23762ba723d8Smec 23772ba723d8Smec amp_pgs = page_get_pagecnt(amp->a_szc); 23782ba723d8Smec 23797c478bd9Sstevel@tonic-gate /*CONSTCOND*/ 23807c478bd9Sstevel@tonic-gate while (1) { 23817c478bd9Sstevel@tonic-gate for (; lp_addr < e_sptaddr; 23822ba723d8Smec an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) { 23837c478bd9Sstevel@tonic-gate 23842ba723d8Smec /* 23852ba723d8Smec * If we're currently locked, and we get to a new 23862ba723d8Smec * page, unlock our current anon chunk. 23872ba723d8Smec */ 23882ba723d8Smec if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) { 23892ba723d8Smec anon_array_exit(&cookie); 23902ba723d8Smec anon_locked = 0; 23912ba723d8Smec } 23922ba723d8Smec if (!anon_locked) { 23937c478bd9Sstevel@tonic-gate anon_array_enter(amp, an_idx, &cookie); 23942ba723d8Smec anon_locked = 1; 23952ba723d8Smec } 23967c478bd9Sstevel@tonic-gate ppa_szc = (uint_t)-1; 23977c478bd9Sstevel@tonic-gate ierr = anon_map_getpages(amp, an_idx, szc, sptseg, 23987c478bd9Sstevel@tonic-gate lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx], 23992cb27123Saguzovsk &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred); 24007c478bd9Sstevel@tonic-gate 24017c478bd9Sstevel@tonic-gate if (ierr != 0) { 24027c478bd9Sstevel@tonic-gate if (ierr > 0) { 24037c478bd9Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 24047c478bd9Sstevel@tonic-gate goto lpgs_err; 24057c478bd9Sstevel@tonic-gate } 24067c478bd9Sstevel@tonic-gate break; 24077c478bd9Sstevel@tonic-gate } 24087c478bd9Sstevel@tonic-gate } 24097c478bd9Sstevel@tonic-gate if (lp_addr == e_sptaddr) { 24107c478bd9Sstevel@tonic-gate break; 24117c478bd9Sstevel@tonic-gate } 24127c478bd9Sstevel@tonic-gate ASSERT(lp_addr < e_sptaddr); 24137c478bd9Sstevel@tonic-gate 24147c478bd9Sstevel@tonic-gate /* 24157c478bd9Sstevel@tonic-gate * ierr == -1 means we failed to allocate a large page. 24167c478bd9Sstevel@tonic-gate * so do a size down operation. 24177c478bd9Sstevel@tonic-gate * 24187c478bd9Sstevel@tonic-gate * ierr == -2 means some other process that privately shares 24197c478bd9Sstevel@tonic-gate * pages with this process has allocated a larger page and we 24207c478bd9Sstevel@tonic-gate * need to retry with larger pages. So do a size up 24217c478bd9Sstevel@tonic-gate * operation. This relies on the fact that large pages are 24227c478bd9Sstevel@tonic-gate * never partially shared i.e. if we share any constituent 24237c478bd9Sstevel@tonic-gate * page of a large page with another process we must share the 24247c478bd9Sstevel@tonic-gate * entire large page. Note this cannot happen for SOFTLOCK 24257c478bd9Sstevel@tonic-gate * case, unless current address (lpaddr) is at the beginning 24267c478bd9Sstevel@tonic-gate * of the next page size boundary because the other process 24277c478bd9Sstevel@tonic-gate * couldn't have relocated locked pages. 24287c478bd9Sstevel@tonic-gate */ 24297c478bd9Sstevel@tonic-gate ASSERT(ierr == -1 || ierr == -2); 24307c478bd9Sstevel@tonic-gate if (segvn_anypgsz) { 24317c478bd9Sstevel@tonic-gate ASSERT(ierr == -2 || szc != 0); 24327c478bd9Sstevel@tonic-gate ASSERT(ierr == -1 || szc < sptseg->s_szc); 24337c478bd9Sstevel@tonic-gate szc = (ierr == -1) ? szc - 1 : szc + 1; 24347c478bd9Sstevel@tonic-gate } else { 24357c478bd9Sstevel@tonic-gate /* 24367c478bd9Sstevel@tonic-gate * For faults and segvn_anypgsz == 0 24377c478bd9Sstevel@tonic-gate * we need to be careful not to loop forever 24387c478bd9Sstevel@tonic-gate * if existing page is found with szc other 24397c478bd9Sstevel@tonic-gate * than 0 or seg->s_szc. This could be due 24407c478bd9Sstevel@tonic-gate * to page relocations on behalf of DR or 24417c478bd9Sstevel@tonic-gate * more likely large page creation. For this 24427c478bd9Sstevel@tonic-gate * case simply re-size to existing page's szc 24437c478bd9Sstevel@tonic-gate * if returned by anon_map_getpages(). 24447c478bd9Sstevel@tonic-gate */ 24457c478bd9Sstevel@tonic-gate if (ppa_szc == (uint_t)-1) { 24467c478bd9Sstevel@tonic-gate szc = (ierr == -1) ? 0 : sptseg->s_szc; 24477c478bd9Sstevel@tonic-gate } else { 24487c478bd9Sstevel@tonic-gate ASSERT(ppa_szc <= sptseg->s_szc); 24497c478bd9Sstevel@tonic-gate ASSERT(ierr == -2 || ppa_szc < szc); 24507c478bd9Sstevel@tonic-gate ASSERT(ierr == -1 || ppa_szc > szc); 24517c478bd9Sstevel@tonic-gate szc = ppa_szc; 24527c478bd9Sstevel@tonic-gate } 24537c478bd9Sstevel@tonic-gate } 24547c478bd9Sstevel@tonic-gate pg_sz = page_get_pagesize(szc); 24557c478bd9Sstevel@tonic-gate lp_npgs = btop(pg_sz); 24567c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(lp_addr, pg_sz)); 24577c478bd9Sstevel@tonic-gate } 24582ba723d8Smec if (anon_locked) { 24592ba723d8Smec anon_array_exit(&cookie); 24602ba723d8Smec } 24617c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 24627c478bd9Sstevel@tonic-gate return (0); 24637c478bd9Sstevel@tonic-gate 24647c478bd9Sstevel@tonic-gate lpgs_err: 24652ba723d8Smec if (anon_locked) { 24662ba723d8Smec anon_array_exit(&cookie); 24672ba723d8Smec } 24687c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 24697c478bd9Sstevel@tonic-gate for (j = 0; j < ppa_idx; j++) 24707c478bd9Sstevel@tonic-gate page_unlock(ppa[j]); 24717c478bd9Sstevel@tonic-gate return (err); 24727c478bd9Sstevel@tonic-gate } 24737c478bd9Sstevel@tonic-gate 2474c6939658Ssl108498 /* 2475c6939658Ssl108498 * count the number of bytes in a set of spt pages that are currently not 2476c6939658Ssl108498 * locked 2477c6939658Ssl108498 */ 2478c6939658Ssl108498 static rctl_qty_t 2479c6939658Ssl108498 spt_unlockedbytes(pgcnt_t npages, page_t **ppa) 2480c6939658Ssl108498 { 2481c6939658Ssl108498 ulong_t i; 2482c6939658Ssl108498 rctl_qty_t unlocked = 0; 2483c6939658Ssl108498 2484c6939658Ssl108498 for (i = 0; i < npages; i++) { 2485c6939658Ssl108498 if (ppa[i]->p_lckcnt == 0) 2486c6939658Ssl108498 unlocked += PAGESIZE; 2487c6939658Ssl108498 } 2488c6939658Ssl108498 return (unlocked); 2489c6939658Ssl108498 } 2490c6939658Ssl108498 2491b52a336eSPavel Tatashin extern u_longlong_t randtick(void); 2492b52a336eSPavel Tatashin /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */ 2493b52a336eSPavel Tatashin #define NLCK (NCPU_P2) 2494b52a336eSPavel Tatashin /* Random number with a range [0, n-1], n must be power of two */ 2495b52a336eSPavel Tatashin #define RAND_P2(n) \ 2496b52a336eSPavel Tatashin ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1)) 2497b52a336eSPavel Tatashin 24987c478bd9Sstevel@tonic-gate int 24997c478bd9Sstevel@tonic-gate spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 2500c6939658Ssl108498 page_t **ppa, ulong_t *lockmap, size_t pos, 2501c6939658Ssl108498 rctl_qty_t *locked) 25027c478bd9Sstevel@tonic-gate { 25037c478bd9Sstevel@tonic-gate struct shm_data *shmd = seg->s_data; 25047c478bd9Sstevel@tonic-gate struct spt_data *sptd = shmd->shm_sptseg->s_data; 25057c478bd9Sstevel@tonic-gate ulong_t i; 25067c478bd9Sstevel@tonic-gate int kernel; 2507b52a336eSPavel Tatashin pgcnt_t nlck = 0; 2508b52a336eSPavel Tatashin int rv = 0; 2509b52a336eSPavel Tatashin int use_reserved = 1; 25107c478bd9Sstevel@tonic-gate 2511c6939658Ssl108498 /* return the number of bytes actually locked */ 2512c6939658Ssl108498 *locked = 0; 2513b52a336eSPavel Tatashin 2514b52a336eSPavel Tatashin /* 2515b52a336eSPavel Tatashin * To avoid contention on freemem_lock, availrmem and pages_locked 2516b52a336eSPavel Tatashin * global counters are updated only every nlck locked pages instead of 2517b52a336eSPavel Tatashin * every time. Reserve nlck locks up front and deduct from this 2518b52a336eSPavel Tatashin * reservation for each page that requires a lock. When the reservation 2519b52a336eSPavel Tatashin * is consumed, reserve again. nlck is randomized, so the competing 2520b52a336eSPavel Tatashin * threads do not fall into a cyclic lock contention pattern. When 2521b52a336eSPavel Tatashin * memory is low, the lock ahead is disabled, and instead page_pp_lock() 2522b52a336eSPavel Tatashin * is used to lock pages. 2523b52a336eSPavel Tatashin */ 25247c478bd9Sstevel@tonic-gate for (i = 0; i < npages; anon_index++, pos++, i++) { 2525b52a336eSPavel Tatashin if (nlck == 0 && use_reserved == 1) { 2526b52a336eSPavel Tatashin nlck = NLCK + RAND_P2(NLCK); 2527b52a336eSPavel Tatashin /* if fewer loops left, decrease nlck */ 2528b52a336eSPavel Tatashin nlck = MIN(nlck, npages - i); 2529b52a336eSPavel Tatashin /* 2530b52a336eSPavel Tatashin * Reserve nlck locks up front and deduct from this 2531b52a336eSPavel Tatashin * reservation for each page that requires a lock. When 2532b52a336eSPavel Tatashin * the reservation is consumed, reserve again. 2533b52a336eSPavel Tatashin */ 2534b52a336eSPavel Tatashin mutex_enter(&freemem_lock); 2535b52a336eSPavel Tatashin if ((availrmem - nlck) < pages_pp_maximum) { 2536b52a336eSPavel Tatashin /* Do not do advance memory reserves */ 2537b52a336eSPavel Tatashin use_reserved = 0; 2538b52a336eSPavel Tatashin } else { 2539b52a336eSPavel Tatashin availrmem -= nlck; 2540b52a336eSPavel Tatashin pages_locked += nlck; 2541b52a336eSPavel Tatashin } 2542b52a336eSPavel Tatashin mutex_exit(&freemem_lock); 2543b52a336eSPavel Tatashin } 25447c478bd9Sstevel@tonic-gate if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { 25457c478bd9Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[anon_index] < 25467c478bd9Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 25477c478bd9Sstevel@tonic-gate if (++sptd->spt_ppa_lckcnt[anon_index] == 25487c478bd9Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 25497c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 25507c478bd9Sstevel@tonic-gate "DISM page lock limit " 25517c478bd9Sstevel@tonic-gate "reached on DISM offset 0x%lx\n", 25527c478bd9Sstevel@tonic-gate anon_index << PAGESHIFT); 25537c478bd9Sstevel@tonic-gate } 25547c478bd9Sstevel@tonic-gate kernel = (sptd->spt_ppa && 2555b52a336eSPavel Tatashin sptd->spt_ppa[anon_index]); 2556b52a336eSPavel Tatashin if (!page_pp_lock(ppa[i], 0, kernel || 2557b52a336eSPavel Tatashin use_reserved)) { 25587c478bd9Sstevel@tonic-gate sptd->spt_ppa_lckcnt[anon_index]--; 2559b52a336eSPavel Tatashin rv = EAGAIN; 2560b52a336eSPavel Tatashin break; 25617c478bd9Sstevel@tonic-gate } 2562c6939658Ssl108498 /* if this is a newly locked page, count it */ 2563c6939658Ssl108498 if (ppa[i]->p_lckcnt == 1) { 2564b52a336eSPavel Tatashin if (kernel == 0 && use_reserved == 1) 2565b52a336eSPavel Tatashin nlck--; 2566c6939658Ssl108498 *locked += PAGESIZE; 2567c6939658Ssl108498 } 25687c478bd9Sstevel@tonic-gate shmd->shm_lckpgs++; 25697c478bd9Sstevel@tonic-gate shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; 25707c478bd9Sstevel@tonic-gate if (lockmap != NULL) 25717c478bd9Sstevel@tonic-gate BT_SET(lockmap, pos); 25727c478bd9Sstevel@tonic-gate } 25737c478bd9Sstevel@tonic-gate } 25747c478bd9Sstevel@tonic-gate } 2575b52a336eSPavel Tatashin /* Return unused lock reservation */ 2576b52a336eSPavel Tatashin if (nlck != 0 && use_reserved == 1) { 2577b52a336eSPavel Tatashin mutex_enter(&freemem_lock); 2578b52a336eSPavel Tatashin availrmem += nlck; 2579b52a336eSPavel Tatashin pages_locked -= nlck; 2580b52a336eSPavel Tatashin mutex_exit(&freemem_lock); 2581b52a336eSPavel Tatashin } 2582b52a336eSPavel Tatashin 2583b52a336eSPavel Tatashin return (rv); 2584b52a336eSPavel Tatashin } 2585b52a336eSPavel Tatashin 2586b52a336eSPavel Tatashin int 2587b52a336eSPavel Tatashin spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 2588b52a336eSPavel Tatashin rctl_qty_t *unlocked) 2589b52a336eSPavel Tatashin { 2590b52a336eSPavel Tatashin struct shm_data *shmd = seg->s_data; 2591b52a336eSPavel Tatashin struct spt_data *sptd = shmd->shm_sptseg->s_data; 2592b52a336eSPavel Tatashin struct anon_map *amp = sptd->spt_amp; 2593b52a336eSPavel Tatashin struct anon *ap; 2594b52a336eSPavel Tatashin struct vnode *vp; 2595b52a336eSPavel Tatashin u_offset_t off; 2596b52a336eSPavel Tatashin struct page *pp; 2597b52a336eSPavel Tatashin int kernel; 2598b52a336eSPavel Tatashin anon_sync_obj_t cookie; 2599b52a336eSPavel Tatashin ulong_t i; 2600b52a336eSPavel Tatashin pgcnt_t nlck = 0; 2601b52a336eSPavel Tatashin pgcnt_t nlck_limit = NLCK; 2602b52a336eSPavel Tatashin 2603b52a336eSPavel Tatashin ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2604b52a336eSPavel Tatashin for (i = 0; i < npages; i++, anon_index++) { 2605b52a336eSPavel Tatashin if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 2606b52a336eSPavel Tatashin anon_array_enter(amp, anon_index, &cookie); 2607b52a336eSPavel Tatashin ap = anon_get_ptr(amp->ahp, anon_index); 2608b52a336eSPavel Tatashin ASSERT(ap); 2609b52a336eSPavel Tatashin 2610b52a336eSPavel Tatashin swap_xlate(ap, &vp, &off); 2611b52a336eSPavel Tatashin anon_array_exit(&cookie); 2612b52a336eSPavel Tatashin pp = page_lookup(vp, off, SE_SHARED); 2613b52a336eSPavel Tatashin ASSERT(pp); 2614b52a336eSPavel Tatashin /* 2615b52a336eSPavel Tatashin * availrmem is decremented only for pages which are not 2616b52a336eSPavel Tatashin * in seg pcache, for pages in seg pcache availrmem was 2617b52a336eSPavel Tatashin * decremented in _dismpagelock() 2618b52a336eSPavel Tatashin */ 2619b52a336eSPavel Tatashin kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]); 2620b52a336eSPavel Tatashin ASSERT(pp->p_lckcnt > 0); 2621b52a336eSPavel Tatashin 2622b52a336eSPavel Tatashin /* 2623b52a336eSPavel Tatashin * lock page but do not change availrmem, we do it 2624b52a336eSPavel Tatashin * ourselves every nlck loops. 2625b52a336eSPavel Tatashin */ 2626b52a336eSPavel Tatashin page_pp_unlock(pp, 0, 1); 2627b52a336eSPavel Tatashin if (pp->p_lckcnt == 0) { 2628b52a336eSPavel Tatashin if (kernel == 0) 2629b52a336eSPavel Tatashin nlck++; 2630b52a336eSPavel Tatashin *unlocked += PAGESIZE; 2631b52a336eSPavel Tatashin } 2632b52a336eSPavel Tatashin page_unlock(pp); 2633b52a336eSPavel Tatashin shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED; 2634b52a336eSPavel Tatashin sptd->spt_ppa_lckcnt[anon_index]--; 2635b52a336eSPavel Tatashin shmd->shm_lckpgs--; 2636b52a336eSPavel Tatashin } 2637b52a336eSPavel Tatashin 2638b52a336eSPavel Tatashin /* 2639b52a336eSPavel Tatashin * To reduce freemem_lock contention, do not update availrmem 2640b52a336eSPavel Tatashin * until at least NLCK pages have been unlocked. 2641b52a336eSPavel Tatashin * 1. No need to update if nlck is zero 2642b52a336eSPavel Tatashin * 2. Always update if the last iteration 2643b52a336eSPavel Tatashin */ 2644b52a336eSPavel Tatashin if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) { 2645b52a336eSPavel Tatashin mutex_enter(&freemem_lock); 2646b52a336eSPavel Tatashin availrmem += nlck; 2647b52a336eSPavel Tatashin pages_locked -= nlck; 2648b52a336eSPavel Tatashin mutex_exit(&freemem_lock); 2649b52a336eSPavel Tatashin nlck = 0; 2650b52a336eSPavel Tatashin nlck_limit = NLCK + RAND_P2(NLCK); 2651b52a336eSPavel Tatashin } 2652b52a336eSPavel Tatashin } 2653b52a336eSPavel Tatashin ANON_LOCK_EXIT(&->a_rwlock); 2654b52a336eSPavel Tatashin 26557c478bd9Sstevel@tonic-gate return (0); 26567c478bd9Sstevel@tonic-gate } 26577c478bd9Sstevel@tonic-gate 26587c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 26597c478bd9Sstevel@tonic-gate static int 26607c478bd9Sstevel@tonic-gate segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 26617c478bd9Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos) 26627c478bd9Sstevel@tonic-gate { 26637c478bd9Sstevel@tonic-gate struct shm_data *shmd = seg->s_data; 26647c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 26657c478bd9Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 2666c6939658Ssl108498 struct kshmid *sp = sptd->spt_amp->a_sp; 26677c478bd9Sstevel@tonic-gate pgcnt_t npages, a_npages; 26687c478bd9Sstevel@tonic-gate page_t **ppa; 26697c478bd9Sstevel@tonic-gate pgcnt_t an_idx, a_an_idx, ppa_idx; 26707c478bd9Sstevel@tonic-gate caddr_t spt_addr, a_addr; /* spt and aligned address */ 26717c478bd9Sstevel@tonic-gate size_t a_len; /* aligned len */ 26727c478bd9Sstevel@tonic-gate size_t share_sz; 26737c478bd9Sstevel@tonic-gate ulong_t i; 26747c478bd9Sstevel@tonic-gate int sts = 0; 2675c6939658Ssl108498 rctl_qty_t unlocked = 0; 2676c6939658Ssl108498 rctl_qty_t locked = 0; 2677c6939658Ssl108498 struct proc *p = curproc; 2678c6939658Ssl108498 kproject_t *proj; 26797c478bd9Sstevel@tonic-gate 2680*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 2681c6939658Ssl108498 ASSERT(sp != NULL); 26827c478bd9Sstevel@tonic-gate 26837c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 26847c478bd9Sstevel@tonic-gate return (0); 26857c478bd9Sstevel@tonic-gate } 26867c478bd9Sstevel@tonic-gate 26877c478bd9Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 26887c478bd9Sstevel@tonic-gate an_idx = seg_page(seg, addr); 26897c478bd9Sstevel@tonic-gate npages = btopr(len); 26907c478bd9Sstevel@tonic-gate 26917c478bd9Sstevel@tonic-gate if (an_idx + npages > btopr(shmd->shm_amp->size)) { 26927c478bd9Sstevel@tonic-gate return (ENOMEM); 26937c478bd9Sstevel@tonic-gate } 26947c478bd9Sstevel@tonic-gate 2695c6939658Ssl108498 /* 2696c6939658Ssl108498 * A shm's project never changes, so no lock needed. 2697c6939658Ssl108498 * The shm has a hold on the project, so it will not go away. 2698c6939658Ssl108498 * Since we have a mapping to shm within this zone, we know 2699c6939658Ssl108498 * that the zone will not go away. 2700c6939658Ssl108498 */ 2701c6939658Ssl108498 proj = sp->shm_perm.ipc_proj; 2702c6939658Ssl108498 27037c478bd9Sstevel@tonic-gate if (op == MC_LOCK) { 2704c6939658Ssl108498 27057c478bd9Sstevel@tonic-gate /* 27067c478bd9Sstevel@tonic-gate * Need to align addr and size request if they are not 27077c478bd9Sstevel@tonic-gate * aligned so we can always allocate large page(s) however 27087c478bd9Sstevel@tonic-gate * we only lock what was requested in initial request. 27097c478bd9Sstevel@tonic-gate */ 27107c478bd9Sstevel@tonic-gate share_sz = page_get_pagesize(sptseg->s_szc); 27117c478bd9Sstevel@tonic-gate a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); 27127c478bd9Sstevel@tonic-gate a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)), 27137c478bd9Sstevel@tonic-gate share_sz); 27147c478bd9Sstevel@tonic-gate a_npages = btop(a_len); 27157c478bd9Sstevel@tonic-gate a_an_idx = seg_page(seg, a_addr); 27167c478bd9Sstevel@tonic-gate spt_addr = sptseg->s_base + ptob(a_an_idx); 27177c478bd9Sstevel@tonic-gate ppa_idx = an_idx - a_an_idx; 27187c478bd9Sstevel@tonic-gate 27197c478bd9Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages), 27207c478bd9Sstevel@tonic-gate KM_NOSLEEP)) == NULL) { 27217c478bd9Sstevel@tonic-gate return (ENOMEM); 27227c478bd9Sstevel@tonic-gate } 27237c478bd9Sstevel@tonic-gate 27247c478bd9Sstevel@tonic-gate /* 27257c478bd9Sstevel@tonic-gate * Don't cache any new pages for IO and 27267c478bd9Sstevel@tonic-gate * flush any cached pages. 27277c478bd9Sstevel@tonic-gate */ 27287c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 27297c478bd9Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27307c478bd9Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27317c478bd9Sstevel@tonic-gate 27327c478bd9Sstevel@tonic-gate sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa); 27337c478bd9Sstevel@tonic-gate if (sts != 0) { 27347c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27357c478bd9Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 27367c478bd9Sstevel@tonic-gate return (sts); 27377c478bd9Sstevel@tonic-gate } 27387c478bd9Sstevel@tonic-gate 2739c6939658Ssl108498 mutex_enter(&sp->shm_mlock); 2740c6939658Ssl108498 /* enforce locked memory rctl */ 2741c6939658Ssl108498 unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); 2742c6939658Ssl108498 2743c6939658Ssl108498 mutex_enter(&p->p_lock); 2744c6939658Ssl108498 if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { 2745c6939658Ssl108498 mutex_exit(&p->p_lock); 2746c6939658Ssl108498 sts = EAGAIN; 2747c6939658Ssl108498 } else { 2748c6939658Ssl108498 mutex_exit(&p->p_lock); 27497c478bd9Sstevel@tonic-gate sts = spt_lockpages(seg, an_idx, npages, 2750c6939658Ssl108498 &ppa[ppa_idx], lockmap, pos, &locked); 2751c6939658Ssl108498 27527c478bd9Sstevel@tonic-gate /* 2753c6939658Ssl108498 * correct locked count if not all pages could be 2754c6939658Ssl108498 * locked 27557c478bd9Sstevel@tonic-gate */ 2756c6939658Ssl108498 if ((unlocked - locked) > 0) { 2757c6939658Ssl108498 rctl_decr_locked_mem(NULL, proj, 2758c6939658Ssl108498 (unlocked - locked), 0); 2759c6939658Ssl108498 } 2760c6939658Ssl108498 } 2761c6939658Ssl108498 /* 2762c6939658Ssl108498 * unlock pages 2763c6939658Ssl108498 */ 2764c6939658Ssl108498 for (i = 0; i < a_npages; i++) 27657c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 27667c478bd9Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27677c478bd9Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2768c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 27697c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27707c478bd9Sstevel@tonic-gate 27717c478bd9Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 27727c478bd9Sstevel@tonic-gate 27737c478bd9Sstevel@tonic-gate } else if (op == MC_UNLOCK) { /* unlock */ 2774a98e9dbfSaguzovsk page_t **ppa; 27757c478bd9Sstevel@tonic-gate 27767c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 27777c478bd9Sstevel@tonic-gate if (shmd->shm_lckpgs == 0) { 27787c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27797c478bd9Sstevel@tonic-gate return (0); 27807c478bd9Sstevel@tonic-gate } 27817c478bd9Sstevel@tonic-gate /* 27827c478bd9Sstevel@tonic-gate * Don't cache new IO pages. 27837c478bd9Sstevel@tonic-gate */ 27847c478bd9Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27857c478bd9Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27867c478bd9Sstevel@tonic-gate 2787c6939658Ssl108498 mutex_enter(&sp->shm_mlock); 2788b52a336eSPavel Tatashin sts = spt_unlockpages(seg, an_idx, npages, &unlocked); 2789a98e9dbfSaguzovsk if ((ppa = sptd->spt_ppa) != NULL) 27907c478bd9Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27917c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2792c6939658Ssl108498 2793c6939658Ssl108498 rctl_decr_locked_mem(NULL, proj, unlocked, 0); 2794c6939658Ssl108498 mutex_exit(&sp->shm_mlock); 2795a98e9dbfSaguzovsk 2796a98e9dbfSaguzovsk if (ppa != NULL) 2797a98e9dbfSaguzovsk seg_ppurge_wiredpp(ppa); 27987c478bd9Sstevel@tonic-gate } 27997c478bd9Sstevel@tonic-gate return (sts); 28007c478bd9Sstevel@tonic-gate } 28017c478bd9Sstevel@tonic-gate 28027c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 28037c478bd9Sstevel@tonic-gate int 28047c478bd9Sstevel@tonic-gate segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 28057c478bd9Sstevel@tonic-gate { 28067c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28077c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28087c478bd9Sstevel@tonic-gate spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1; 28097c478bd9Sstevel@tonic-gate 2810*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 28117c478bd9Sstevel@tonic-gate 28127c478bd9Sstevel@tonic-gate /* 28137c478bd9Sstevel@tonic-gate * ISM segment is always rw. 28147c478bd9Sstevel@tonic-gate */ 28157c478bd9Sstevel@tonic-gate while (--pgno >= 0) 28167c478bd9Sstevel@tonic-gate *protv++ = sptd->spt_prot; 28177c478bd9Sstevel@tonic-gate return (0); 28187c478bd9Sstevel@tonic-gate } 28197c478bd9Sstevel@tonic-gate 28207c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 28217c478bd9Sstevel@tonic-gate u_offset_t 28227c478bd9Sstevel@tonic-gate segspt_shmgetoffset(struct seg *seg, caddr_t addr) 28237c478bd9Sstevel@tonic-gate { 2824*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 28257c478bd9Sstevel@tonic-gate 28267c478bd9Sstevel@tonic-gate /* Offset does not matter in ISM memory */ 28277c478bd9Sstevel@tonic-gate 28287c478bd9Sstevel@tonic-gate return ((u_offset_t)0); 28297c478bd9Sstevel@tonic-gate } 28307c478bd9Sstevel@tonic-gate 28317c478bd9Sstevel@tonic-gate /* ARGSUSED */ 28327c478bd9Sstevel@tonic-gate int 28337c478bd9Sstevel@tonic-gate segspt_shmgettype(struct seg *seg, caddr_t addr) 28347c478bd9Sstevel@tonic-gate { 28357c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28367c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28377c478bd9Sstevel@tonic-gate 2838*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 28397c478bd9Sstevel@tonic-gate 28407c478bd9Sstevel@tonic-gate /* 28417c478bd9Sstevel@tonic-gate * The shared memory mapping is always MAP_SHARED, SWAP is only 28427c478bd9Sstevel@tonic-gate * reserved for DISM 28437c478bd9Sstevel@tonic-gate */ 28447c478bd9Sstevel@tonic-gate return (MAP_SHARED | 28457c478bd9Sstevel@tonic-gate ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE)); 28467c478bd9Sstevel@tonic-gate } 28477c478bd9Sstevel@tonic-gate 28487c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 28497c478bd9Sstevel@tonic-gate int 28507c478bd9Sstevel@tonic-gate segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 28517c478bd9Sstevel@tonic-gate { 28527c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28537c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28547c478bd9Sstevel@tonic-gate 2855*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 28567c478bd9Sstevel@tonic-gate 28577c478bd9Sstevel@tonic-gate *vpp = sptd->spt_vp; 28587c478bd9Sstevel@tonic-gate return (0); 28597c478bd9Sstevel@tonic-gate } 28607c478bd9Sstevel@tonic-gate 28612ba723d8Smec /* 28622ba723d8Smec * We need to wait for pending IO to complete to a DISM segment in order for 28632ba723d8Smec * pages to get kicked out of the seg_pcache. 120 seconds should be more 28642ba723d8Smec * than enough time to wait. 28652ba723d8Smec */ 28662ba723d8Smec static clock_t spt_pcache_wait = 120; 28672ba723d8Smec 28687c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 28697c478bd9Sstevel@tonic-gate static int 28707c478bd9Sstevel@tonic-gate segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 28717c478bd9Sstevel@tonic-gate { 28727c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28737c478bd9Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28747c478bd9Sstevel@tonic-gate struct anon_map *amp; 28757c478bd9Sstevel@tonic-gate pgcnt_t pg_idx; 28762ba723d8Smec ushort_t gen; 28772ba723d8Smec clock_t end_lbolt; 28782ba723d8Smec int writer; 2879a98e9dbfSaguzovsk page_t **ppa; 28807c478bd9Sstevel@tonic-gate 2881*dc32d872SJosef 'Jeff' Sipek ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 28827c478bd9Sstevel@tonic-gate 28837c478bd9Sstevel@tonic-gate if (behav == MADV_FREE) { 28847c478bd9Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) 28857c478bd9Sstevel@tonic-gate return (0); 28867c478bd9Sstevel@tonic-gate 28877c478bd9Sstevel@tonic-gate amp = sptd->spt_amp; 28887c478bd9Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 28897c478bd9Sstevel@tonic-gate 28907c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 2891a98e9dbfSaguzovsk if ((ppa = sptd->spt_ppa) == NULL) { 28922ba723d8Smec mutex_exit(&sptd->spt_lock); 28932ba723d8Smec ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 28942ba723d8Smec anon_disclaim(amp, pg_idx, len); 28952ba723d8Smec ANON_LOCK_EXIT(&->a_rwlock); 28962ba723d8Smec return (0); 28972ba723d8Smec } 28982ba723d8Smec 28997c478bd9Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 29002ba723d8Smec gen = sptd->spt_gen; 29012ba723d8Smec 29027c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 29037c478bd9Sstevel@tonic-gate 29047c478bd9Sstevel@tonic-gate /* 29057c478bd9Sstevel@tonic-gate * Purge all DISM cached pages 29067c478bd9Sstevel@tonic-gate */ 2907a98e9dbfSaguzovsk seg_ppurge_wiredpp(ppa); 29087c478bd9Sstevel@tonic-gate 29092ba723d8Smec /* 29102ba723d8Smec * Drop the AS_LOCK so that other threads can grab it 29112ba723d8Smec * in the as_pageunlock path and hopefully get the segment 29122ba723d8Smec * kicked out of the seg_pcache. We bump the shm_softlockcnt 29132ba723d8Smec * to keep this segment resident. 29142ba723d8Smec */ 2915*dc32d872SJosef 'Jeff' Sipek writer = AS_WRITE_HELD(seg->s_as); 29161a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 2917*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(seg->s_as); 29182ba723d8Smec 29197c478bd9Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 29202ba723d8Smec 2921d3d50737SRafael Vanoni end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait); 29222ba723d8Smec 29232ba723d8Smec /* 29242ba723d8Smec * Try to wait for pages to get kicked out of the seg_pcache. 29252ba723d8Smec */ 29262ba723d8Smec while (sptd->spt_gen == gen && 29272ba723d8Smec (sptd->spt_flags & DISM_PPA_CHANGED) && 2928d3d50737SRafael Vanoni ddi_get_lbolt() < end_lbolt) { 29292ba723d8Smec if (!cv_timedwait_sig(&sptd->spt_cv, 29302ba723d8Smec &sptd->spt_lock, end_lbolt)) { 29312ba723d8Smec break; 29322ba723d8Smec } 29332ba723d8Smec } 29342ba723d8Smec 29357c478bd9Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 29362ba723d8Smec 29372ba723d8Smec /* Regrab the AS_LOCK and release our hold on the segment */ 2938*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(seg->s_as, writer ? RW_WRITER : RW_READER); 29391a5e258fSJosef 'Jeff' Sipek atomic_dec_ulong((ulong_t *)(&(shmd->shm_softlockcnt))); 29402ba723d8Smec if (shmd->shm_softlockcnt <= 0) { 29412ba723d8Smec if (AS_ISUNMAPWAIT(seg->s_as)) { 29422ba723d8Smec mutex_enter(&seg->s_as->a_contents); 29432ba723d8Smec if (AS_ISUNMAPWAIT(seg->s_as)) { 29442ba723d8Smec AS_CLRUNMAPWAIT(seg->s_as); 29452ba723d8Smec cv_broadcast(&seg->s_as->a_cv); 29462ba723d8Smec } 29472ba723d8Smec mutex_exit(&seg->s_as->a_contents); 29482ba723d8Smec } 29492ba723d8Smec } 29502ba723d8Smec 29512ba723d8Smec ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 29522ba723d8Smec anon_disclaim(amp, pg_idx, len); 29532ba723d8Smec ANON_LOCK_EXIT(&->a_rwlock); 29547c478bd9Sstevel@tonic-gate } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP || 29557c478bd9Sstevel@tonic-gate behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) { 29567c478bd9Sstevel@tonic-gate int already_set; 29577c478bd9Sstevel@tonic-gate ulong_t anon_index; 29587c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy; 29597c478bd9Sstevel@tonic-gate caddr_t shm_addr; 29607c478bd9Sstevel@tonic-gate size_t share_size; 29617c478bd9Sstevel@tonic-gate size_t size; 29627c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 29637c478bd9Sstevel@tonic-gate caddr_t sptseg_addr; 29647c478bd9Sstevel@tonic-gate 29657c478bd9Sstevel@tonic-gate /* 29667c478bd9Sstevel@tonic-gate * Align address and length to page size of underlying segment 29677c478bd9Sstevel@tonic-gate */ 29687c478bd9Sstevel@tonic-gate share_size = page_get_pagesize(shmd->shm_sptseg->s_szc); 29697c478bd9Sstevel@tonic-gate shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); 29707c478bd9Sstevel@tonic-gate size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), 29717c478bd9Sstevel@tonic-gate share_size); 29727c478bd9Sstevel@tonic-gate 29737c478bd9Sstevel@tonic-gate amp = shmd->shm_amp; 29747c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 29757c478bd9Sstevel@tonic-gate 29767c478bd9Sstevel@tonic-gate /* 29777c478bd9Sstevel@tonic-gate * And now we may have to adjust size downward if we have 29787c478bd9Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 29797c478bd9Sstevel@tonic-gate * allocations. 29807c478bd9Sstevel@tonic-gate */ 29817c478bd9Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 29827c478bd9Sstevel@tonic-gate if ((sptseg_addr + size) > 29837c478bd9Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 29847c478bd9Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - 29857c478bd9Sstevel@tonic-gate sptseg_addr; 29867c478bd9Sstevel@tonic-gate 29877c478bd9Sstevel@tonic-gate /* 29887c478bd9Sstevel@tonic-gate * Set memory allocation policy for this segment 29897c478bd9Sstevel@tonic-gate */ 29907c478bd9Sstevel@tonic-gate policy = lgrp_madv_to_policy(behav, len, MAP_SHARED); 29917c478bd9Sstevel@tonic-gate already_set = lgrp_shm_policy_set(policy, amp, anon_index, 29927c478bd9Sstevel@tonic-gate NULL, 0, len); 29937c478bd9Sstevel@tonic-gate 29947c478bd9Sstevel@tonic-gate /* 29957c478bd9Sstevel@tonic-gate * If random memory allocation policy set already, 29967c478bd9Sstevel@tonic-gate * don't bother reapplying it. 29977c478bd9Sstevel@tonic-gate */ 29987c478bd9Sstevel@tonic-gate if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 29997c478bd9Sstevel@tonic-gate return (0); 30007c478bd9Sstevel@tonic-gate 30017c478bd9Sstevel@tonic-gate /* 30027c478bd9Sstevel@tonic-gate * Mark any existing pages in the given range for 30037c478bd9Sstevel@tonic-gate * migration, flushing the I/O page cache, and using 30047c478bd9Sstevel@tonic-gate * underlying segment to calculate anon index and get 30057c478bd9Sstevel@tonic-gate * anonmap and vnode pointer from 30067c478bd9Sstevel@tonic-gate */ 30077c478bd9Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) 30087c478bd9Sstevel@tonic-gate segspt_purge(seg); 30097c478bd9Sstevel@tonic-gate 30107c478bd9Sstevel@tonic-gate page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0); 30117c478bd9Sstevel@tonic-gate } 30127c478bd9Sstevel@tonic-gate 30137c478bd9Sstevel@tonic-gate return (0); 30147c478bd9Sstevel@tonic-gate } 30157c478bd9Sstevel@tonic-gate 30167c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 30177c478bd9Sstevel@tonic-gate void 30187c478bd9Sstevel@tonic-gate segspt_shmdump(struct seg *seg) 30197c478bd9Sstevel@tonic-gate { 30207c478bd9Sstevel@tonic-gate /* no-op for ISM segment */ 30217c478bd9Sstevel@tonic-gate } 30227c478bd9Sstevel@tonic-gate 30237c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 30247c478bd9Sstevel@tonic-gate static faultcode_t 30257c478bd9Sstevel@tonic-gate segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 30267c478bd9Sstevel@tonic-gate { 30277c478bd9Sstevel@tonic-gate return (ENOTSUP); 30287c478bd9Sstevel@tonic-gate } 30297c478bd9Sstevel@tonic-gate 30307c478bd9Sstevel@tonic-gate /* 30317c478bd9Sstevel@tonic-gate * get a memory ID for an addr in a given segment 30327c478bd9Sstevel@tonic-gate */ 30337c478bd9Sstevel@tonic-gate static int 30347c478bd9Sstevel@tonic-gate segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 30357c478bd9Sstevel@tonic-gate { 30367c478bd9Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 30377c478bd9Sstevel@tonic-gate struct anon *ap; 30387c478bd9Sstevel@tonic-gate size_t anon_index; 30397c478bd9Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 30407c478bd9Sstevel@tonic-gate struct spt_data *sptd = shmd->shm_sptseg->s_data; 30417c478bd9Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 30427c478bd9Sstevel@tonic-gate anon_sync_obj_t cookie; 30437c478bd9Sstevel@tonic-gate 30447c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, addr); 30457c478bd9Sstevel@tonic-gate 30467c478bd9Sstevel@tonic-gate if (addr > (seg->s_base + sptd->spt_realsize)) { 30477c478bd9Sstevel@tonic-gate return (EFAULT); 30487c478bd9Sstevel@tonic-gate } 30497c478bd9Sstevel@tonic-gate 30507c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 30517c478bd9Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 30527c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 30537c478bd9Sstevel@tonic-gate if (ap == NULL) { 30547c478bd9Sstevel@tonic-gate struct page *pp; 30557c478bd9Sstevel@tonic-gate caddr_t spt_addr = sptseg->s_base + ptob(anon_index); 30567c478bd9Sstevel@tonic-gate 30577c478bd9Sstevel@tonic-gate pp = anon_zero(sptseg, spt_addr, &ap, kcred); 30587c478bd9Sstevel@tonic-gate if (pp == NULL) { 30597c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 30607c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30617c478bd9Sstevel@tonic-gate return (ENOMEM); 30627c478bd9Sstevel@tonic-gate } 30637c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); 30647c478bd9Sstevel@tonic-gate page_unlock(pp); 30657c478bd9Sstevel@tonic-gate } 30667c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 30677c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30687c478bd9Sstevel@tonic-gate memidp->val[0] = (uintptr_t)ap; 30697c478bd9Sstevel@tonic-gate memidp->val[1] = (uintptr_t)addr & PAGEOFFSET; 30707c478bd9Sstevel@tonic-gate return (0); 30717c478bd9Sstevel@tonic-gate } 30727c478bd9Sstevel@tonic-gate 30737c478bd9Sstevel@tonic-gate /* 30747c478bd9Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 30757c478bd9Sstevel@tonic-gate */ 30767c478bd9Sstevel@tonic-gate static lgrp_mem_policy_info_t * 30777c478bd9Sstevel@tonic-gate segspt_shmgetpolicy(struct seg *seg, caddr_t addr) 30787c478bd9Sstevel@tonic-gate { 30797c478bd9Sstevel@tonic-gate struct anon_map *amp; 30807c478bd9Sstevel@tonic-gate ulong_t anon_index; 30817c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 30827c478bd9Sstevel@tonic-gate struct shm_data *shm_data; 30837c478bd9Sstevel@tonic-gate 30847c478bd9Sstevel@tonic-gate ASSERT(seg != NULL); 30857c478bd9Sstevel@tonic-gate 30867c478bd9Sstevel@tonic-gate /* 30877c478bd9Sstevel@tonic-gate * Get anon_map from segshm 30887c478bd9Sstevel@tonic-gate * 30897c478bd9Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 30907c478bd9Sstevel@tonic-gate * it should be protected by its reference count which must be 30917c478bd9Sstevel@tonic-gate * nonzero for an existing segment 30927c478bd9Sstevel@tonic-gate * Need to grab readers lock on policy tree though 30937c478bd9Sstevel@tonic-gate */ 30947c478bd9Sstevel@tonic-gate shm_data = (struct shm_data *)seg->s_data; 30957c478bd9Sstevel@tonic-gate if (shm_data == NULL) 30967c478bd9Sstevel@tonic-gate return (NULL); 30977c478bd9Sstevel@tonic-gate amp = shm_data->shm_amp; 30987c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 30997c478bd9Sstevel@tonic-gate 31007c478bd9Sstevel@tonic-gate /* 31017c478bd9Sstevel@tonic-gate * Get policy info 31027c478bd9Sstevel@tonic-gate * 31037c478bd9Sstevel@tonic-gate * Assume starting anon index of 0 31047c478bd9Sstevel@tonic-gate */ 31057c478bd9Sstevel@tonic-gate anon_index = seg_page(seg, addr); 31067c478bd9Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 31077c478bd9Sstevel@tonic-gate 31087c478bd9Sstevel@tonic-gate return (policy_info); 31097c478bd9Sstevel@tonic-gate } 31101bd5c35fSelowe 31111bd5c35fSelowe /*ARGSUSED*/ 31121bd5c35fSelowe static int 31131bd5c35fSelowe segspt_shmcapable(struct seg *seg, segcapability_t capability) 31141bd5c35fSelowe { 31151bd5c35fSelowe return (0); 31161bd5c35fSelowe } 3117