17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5f5164d87Scwb * Common Development and Distribution License (the "License"). 6f5164d87Scwb * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22b942e89bSDavid Valin * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 23*5e76ec37SBryan Cantrill * Copyright (c) 2015, Joyent, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 317c478bd9Sstevel@tonic-gate * The Regents of the University of California 327c478bd9Sstevel@tonic-gate * All Rights Reserved 337c478bd9Sstevel@tonic-gate * 347c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 357c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 367c478bd9Sstevel@tonic-gate * contributors. 377c478bd9Sstevel@tonic-gate */ 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate /* 407c478bd9Sstevel@tonic-gate * VM - anonymous pages. 417c478bd9Sstevel@tonic-gate * 427c478bd9Sstevel@tonic-gate * This layer sits immediately above the vm_swap layer. It manages 437c478bd9Sstevel@tonic-gate * physical pages that have no permanent identity in the file system 447c478bd9Sstevel@tonic-gate * name space, using the services of the vm_swap layer to allocate 457c478bd9Sstevel@tonic-gate * backing storage for these pages. Since these pages have no external 467c478bd9Sstevel@tonic-gate * identity, they are discarded when the last reference is removed. 477c478bd9Sstevel@tonic-gate * 487c478bd9Sstevel@tonic-gate * An important function of this layer is to manage low-level sharing 497c478bd9Sstevel@tonic-gate * of pages that are logically distinct but that happen to be 507c478bd9Sstevel@tonic-gate * physically identical (e.g., the corresponding pages of the processes 517c478bd9Sstevel@tonic-gate * resulting from a fork before one process or the other changes their 527c478bd9Sstevel@tonic-gate * contents). This pseudo-sharing is present only as an optimization 537c478bd9Sstevel@tonic-gate * and is not to be confused with true sharing in which multiple 547c478bd9Sstevel@tonic-gate * address spaces deliberately contain references to the same object; 557c478bd9Sstevel@tonic-gate * such sharing is managed at a higher level. 567c478bd9Sstevel@tonic-gate * 577c478bd9Sstevel@tonic-gate * The key data structure here is the anon struct, which contains a 587c478bd9Sstevel@tonic-gate * reference count for its associated physical page and a hint about 597c478bd9Sstevel@tonic-gate * the identity of that page. Anon structs typically live in arrays, 607c478bd9Sstevel@tonic-gate * with an instance's position in its array determining where the 617c478bd9Sstevel@tonic-gate * corresponding backing storage is allocated; however, the swap_xlate() 627c478bd9Sstevel@tonic-gate * routine abstracts away this representation information so that the 637c478bd9Sstevel@tonic-gate * rest of the anon layer need not know it. (See the swap layer for 647c478bd9Sstevel@tonic-gate * more details on anon struct layout.) 657c478bd9Sstevel@tonic-gate * 667c478bd9Sstevel@tonic-gate * In the future versions of the system, the association between an 677c478bd9Sstevel@tonic-gate * anon struct and its position on backing store will change so that 687c478bd9Sstevel@tonic-gate * we don't require backing store all anonymous pages in the system. 697c478bd9Sstevel@tonic-gate * This is important for consideration for large memory systems. 707c478bd9Sstevel@tonic-gate * We can also use this technique to delay binding physical locations 717c478bd9Sstevel@tonic-gate * to anonymous pages until pageout/swapout time where we can make 727c478bd9Sstevel@tonic-gate * smarter allocation decisions to improve anonymous klustering. 737c478bd9Sstevel@tonic-gate * 747c478bd9Sstevel@tonic-gate * Many of the routines defined here take a (struct anon **) argument, 757c478bd9Sstevel@tonic-gate * which allows the code at this level to manage anon pages directly, 767c478bd9Sstevel@tonic-gate * so that callers can regard anon structs as opaque objects and not be 777c478bd9Sstevel@tonic-gate * concerned with assigning or inspecting their contents. 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * Clients of this layer refer to anon pages indirectly. That is, they 807c478bd9Sstevel@tonic-gate * maintain arrays of pointers to anon structs rather than maintaining 817c478bd9Sstevel@tonic-gate * anon structs themselves. The (struct anon **) arguments mentioned 827c478bd9Sstevel@tonic-gate * above are pointers to entries in these arrays. It is these arrays 837c478bd9Sstevel@tonic-gate * that capture the mapping between offsets within a given segment and 847c478bd9Sstevel@tonic-gate * the corresponding anonymous backing storage address. 857c478bd9Sstevel@tonic-gate */ 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate #ifdef DEBUG 887c478bd9Sstevel@tonic-gate #define ANON_DEBUG 897c478bd9Sstevel@tonic-gate #endif 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate #include <sys/types.h> 927c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 937c478bd9Sstevel@tonic-gate #include <sys/param.h> 947c478bd9Sstevel@tonic-gate #include <sys/systm.h> 957c478bd9Sstevel@tonic-gate #include <sys/mman.h> 967c478bd9Sstevel@tonic-gate #include <sys/cred.h> 977c478bd9Sstevel@tonic-gate #include <sys/thread.h> 987c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 997c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 1007c478bd9Sstevel@tonic-gate #include <sys/swap.h> 1017c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 1027c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 1037c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 1047c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 1057c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 1067c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 107a98e9dbfSaguzovsk #include <sys/tuneable.h> 1087c478bd9Sstevel@tonic-gate #include <sys/debug.h> 109e44bd21cSsusans #include <sys/fs/swapnode.h> 1107c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 1117c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 1127c478bd9Sstevel@tonic-gate #include <sys/policy.h> 1137c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> 1147c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h> 1150209230bSgjelinek #include <sys/rctl.h> 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate #include <vm/as.h> 1187c478bd9Sstevel@tonic-gate #include <vm/hat.h> 1197c478bd9Sstevel@tonic-gate #include <vm/anon.h> 1207c478bd9Sstevel@tonic-gate #include <vm/page.h> 1217c478bd9Sstevel@tonic-gate #include <vm/vpage.h> 1227c478bd9Sstevel@tonic-gate #include <vm/seg.h> 1237c478bd9Sstevel@tonic-gate #include <vm/rm.h> 1247c478bd9Sstevel@tonic-gate 1257c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 1267c478bd9Sstevel@tonic-gate 127e44bd21cSsusans struct vnode *anon_vp; 128e44bd21cSsusans 1297c478bd9Sstevel@tonic-gate int anon_debug; 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate kmutex_t anoninfo_lock; 1327c478bd9Sstevel@tonic-gate struct k_anoninfo k_anoninfo; 133b52a336eSPavel Tatashin ani_free_t *ani_free_pool; 1347c478bd9Sstevel@tonic-gate pad_mutex_t anon_array_lock[ANON_LOCKSIZE]; 1357c478bd9Sstevel@tonic-gate kcondvar_t anon_array_cv[ANON_LOCKSIZE]; 1367c478bd9Sstevel@tonic-gate 1377c478bd9Sstevel@tonic-gate /* 1387c478bd9Sstevel@tonic-gate * Global hash table for (vp, off) -> anon slot 1397c478bd9Sstevel@tonic-gate */ 1407c478bd9Sstevel@tonic-gate extern int swap_maxcontig; 1417c478bd9Sstevel@tonic-gate size_t anon_hash_size; 142cb15d5d9SPeter Rival unsigned int anon_hash_shift; 1437c478bd9Sstevel@tonic-gate struct anon **anon_hash; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate static struct kmem_cache *anon_cache; 1467c478bd9Sstevel@tonic-gate static struct kmem_cache *anonmap_cache; 1477c478bd9Sstevel@tonic-gate 14823d9e5acSMichael Corcoran pad_mutex_t *anonhash_lock; 14923d9e5acSMichael Corcoran 15023d9e5acSMichael Corcoran /* 15123d9e5acSMichael Corcoran * Used to make the increment of all refcnts of all anon slots of a large 15223d9e5acSMichael Corcoran * page appear to be atomic. The lock is grabbed for the first anon slot of 15323d9e5acSMichael Corcoran * a large page. 15423d9e5acSMichael Corcoran */ 15523d9e5acSMichael Corcoran pad_mutex_t *anonpages_hash_lock; 15623d9e5acSMichael Corcoran 15723d9e5acSMichael Corcoran #define APH_MUTEX(vp, off) \ 15823d9e5acSMichael Corcoran (&anonpages_hash_lock[(ANON_HASH((vp), (off)) & \ 15923d9e5acSMichael Corcoran (AH_LOCK_SIZE - 1))].pad_mutex) 16023d9e5acSMichael Corcoran 1617c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1627c478bd9Sstevel@tonic-gate static struct anonvmstats_str { 1637c478bd9Sstevel@tonic-gate ulong_t getpages[30]; 1647c478bd9Sstevel@tonic-gate ulong_t privatepages[10]; 1657c478bd9Sstevel@tonic-gate ulong_t demotepages[9]; 1667c478bd9Sstevel@tonic-gate ulong_t decrefpages[9]; 1677c478bd9Sstevel@tonic-gate ulong_t dupfillholes[4]; 1687c478bd9Sstevel@tonic-gate ulong_t freepages[1]; 1697c478bd9Sstevel@tonic-gate } anonvmstats; 1707c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1737c478bd9Sstevel@tonic-gate static int 1747c478bd9Sstevel@tonic-gate anonmap_cache_constructor(void *buf, void *cdrarg, int kmflags) 1757c478bd9Sstevel@tonic-gate { 1767c478bd9Sstevel@tonic-gate struct anon_map *amp = buf; 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate rw_init(&->a_rwlock, NULL, RW_DEFAULT, NULL); 179a98e9dbfSaguzovsk cv_init(&->a_purgecv, NULL, CV_DEFAULT, NULL); 180a98e9dbfSaguzovsk mutex_init(&->a_pmtx, NULL, MUTEX_DEFAULT, NULL); 181a98e9dbfSaguzovsk mutex_init(&->a_purgemtx, NULL, MUTEX_DEFAULT, NULL); 1827c478bd9Sstevel@tonic-gate return (0); 1837c478bd9Sstevel@tonic-gate } 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /*ARGSUSED1*/ 1867c478bd9Sstevel@tonic-gate static void 1877c478bd9Sstevel@tonic-gate anonmap_cache_destructor(void *buf, void *cdrarg) 1887c478bd9Sstevel@tonic-gate { 1897c478bd9Sstevel@tonic-gate struct anon_map *amp = buf; 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate rw_destroy(&->a_rwlock); 192a98e9dbfSaguzovsk cv_destroy(&->a_purgecv); 193a98e9dbfSaguzovsk mutex_destroy(&->a_pmtx); 194a98e9dbfSaguzovsk mutex_destroy(&->a_purgemtx); 1957c478bd9Sstevel@tonic-gate } 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate void 1987c478bd9Sstevel@tonic-gate anon_init(void) 1997c478bd9Sstevel@tonic-gate { 2007c478bd9Sstevel@tonic-gate int i; 20123d9e5acSMichael Corcoran pad_mutex_t *tmp; 2027c478bd9Sstevel@tonic-gate 20323d9e5acSMichael Corcoran /* These both need to be powers of 2 so round up to the next power */ 204cb15d5d9SPeter Rival anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1); 205cb15d5d9SPeter Rival anon_hash_size = 1L << anon_hash_shift; 20623d9e5acSMichael Corcoran 20723d9e5acSMichael Corcoran /* 20823d9e5acSMichael Corcoran * We need to align the anonhash_lock and anonpages_hash_lock arrays 20923d9e5acSMichael Corcoran * to a 64B boundary to avoid false sharing. We add 63B to our 21023d9e5acSMichael Corcoran * allocation so that we can get a 64B aligned address to use. 21123d9e5acSMichael Corcoran * We allocate both of these together to avoid wasting an additional 21223d9e5acSMichael Corcoran * 63B. 21323d9e5acSMichael Corcoran */ 21423d9e5acSMichael Corcoran tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63, 21523d9e5acSMichael Corcoran KM_SLEEP); 21623d9e5acSMichael Corcoran anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64); 21723d9e5acSMichael Corcoran anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate for (i = 0; i < AH_LOCK_SIZE; i++) { 22023d9e5acSMichael Corcoran mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT, 22123d9e5acSMichael Corcoran NULL); 22223d9e5acSMichael Corcoran mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL, 22323d9e5acSMichael Corcoran MUTEX_DEFAULT, NULL); 2247c478bd9Sstevel@tonic-gate } 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate for (i = 0; i < ANON_LOCKSIZE; i++) { 2277c478bd9Sstevel@tonic-gate mutex_init(&anon_array_lock[i].pad_mutex, NULL, 2287c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 2297c478bd9Sstevel@tonic-gate cv_init(&anon_array_cv[i], NULL, CV_DEFAULT, NULL); 2307c478bd9Sstevel@tonic-gate } 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate anon_hash = (struct anon **) 2337c478bd9Sstevel@tonic-gate kmem_zalloc(sizeof (struct anon *) * anon_hash_size, KM_SLEEP); 2347c478bd9Sstevel@tonic-gate anon_cache = kmem_cache_create("anon_cache", sizeof (struct anon), 235b942e89bSDavid Valin AN_CACHE_ALIGN, NULL, NULL, NULL, NULL, NULL, KMC_PREFILL); 2367c478bd9Sstevel@tonic-gate anonmap_cache = kmem_cache_create("anonmap_cache", 2377c478bd9Sstevel@tonic-gate sizeof (struct anon_map), 0, 2387c478bd9Sstevel@tonic-gate anonmap_cache_constructor, anonmap_cache_destructor, NULL, 2397c478bd9Sstevel@tonic-gate NULL, NULL, 0); 2407c478bd9Sstevel@tonic-gate swap_maxcontig = (1024 * 1024) >> PAGESHIFT; /* 1MB of pages */ 241e44bd21cSsusans 242b52a336eSPavel Tatashin tmp = kmem_zalloc((ANI_MAX_POOL * sizeof (ani_free_t)) + 63, KM_SLEEP); 243b52a336eSPavel Tatashin /* Round ani_free_pool to cacheline boundary to avoid false sharing. */ 244b52a336eSPavel Tatashin ani_free_pool = (ani_free_t *)P2ROUNDUP((uintptr_t)tmp, 64); 245b52a336eSPavel Tatashin 246e44bd21cSsusans anon_vp = vn_alloc(KM_SLEEP); 247e44bd21cSsusans vn_setops(anon_vp, swap_vnodeops); 248e44bd21cSsusans anon_vp->v_type = VREG; 249e44bd21cSsusans anon_vp->v_flag |= (VISSWAP|VISSWAPFS); 2507c478bd9Sstevel@tonic-gate } 2517c478bd9Sstevel@tonic-gate 2527c478bd9Sstevel@tonic-gate /* 2537c478bd9Sstevel@tonic-gate * Global anon slot hash table manipulation. 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate static void 2577c478bd9Sstevel@tonic-gate anon_addhash(struct anon *ap) 2587c478bd9Sstevel@tonic-gate { 2597c478bd9Sstevel@tonic-gate int index; 2607c478bd9Sstevel@tonic-gate 26123d9e5acSMichael Corcoran ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); 2627c478bd9Sstevel@tonic-gate index = ANON_HASH(ap->an_vp, ap->an_off); 2637c478bd9Sstevel@tonic-gate ap->an_hash = anon_hash[index]; 2647c478bd9Sstevel@tonic-gate anon_hash[index] = ap; 2657c478bd9Sstevel@tonic-gate } 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate static void 2687c478bd9Sstevel@tonic-gate anon_rmhash(struct anon *ap) 2697c478bd9Sstevel@tonic-gate { 2707c478bd9Sstevel@tonic-gate struct anon **app; 2717c478bd9Sstevel@tonic-gate 27223d9e5acSMichael Corcoran ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)]; 2757c478bd9Sstevel@tonic-gate *app; app = &((*app)->an_hash)) { 2767c478bd9Sstevel@tonic-gate if (*app == ap) { 2777c478bd9Sstevel@tonic-gate *app = ap->an_hash; 2787c478bd9Sstevel@tonic-gate break; 2797c478bd9Sstevel@tonic-gate } 2807c478bd9Sstevel@tonic-gate } 2817c478bd9Sstevel@tonic-gate } 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate /* 2847c478bd9Sstevel@tonic-gate * The anon array interfaces. Functions allocating, 2857c478bd9Sstevel@tonic-gate * freeing array of pointers, and returning/setting 2867c478bd9Sstevel@tonic-gate * entries in the array of pointers for a given offset. 2877c478bd9Sstevel@tonic-gate * 2887c478bd9Sstevel@tonic-gate * Create the list of pointers 2897c478bd9Sstevel@tonic-gate */ 2907c478bd9Sstevel@tonic-gate struct anon_hdr * 2917c478bd9Sstevel@tonic-gate anon_create(pgcnt_t npages, int flags) 2927c478bd9Sstevel@tonic-gate { 2937c478bd9Sstevel@tonic-gate struct anon_hdr *ahp; 2947c478bd9Sstevel@tonic-gate ulong_t nchunks; 2957c478bd9Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate if ((ahp = kmem_zalloc(sizeof (struct anon_hdr), kmemflags)) == NULL) { 2987c478bd9Sstevel@tonic-gate return (NULL); 2997c478bd9Sstevel@tonic-gate } 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate mutex_init(&ahp->serial_lock, NULL, MUTEX_DEFAULT, NULL); 3027c478bd9Sstevel@tonic-gate /* 3037c478bd9Sstevel@tonic-gate * Single level case. 3047c478bd9Sstevel@tonic-gate */ 3057c478bd9Sstevel@tonic-gate ahp->size = npages; 3067c478bd9Sstevel@tonic-gate if (npages <= ANON_CHUNK_SIZE || (flags & ANON_ALLOC_FORCE)) { 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate if (flags & ANON_ALLOC_FORCE) 3097c478bd9Sstevel@tonic-gate ahp->flags |= ANON_ALLOC_FORCE; 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate ahp->array_chunk = kmem_zalloc( 3127c478bd9Sstevel@tonic-gate ahp->size * sizeof (struct anon *), kmemflags); 3137c478bd9Sstevel@tonic-gate 3147c478bd9Sstevel@tonic-gate if (ahp->array_chunk == NULL) { 3157c478bd9Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3167c478bd9Sstevel@tonic-gate return (NULL); 3177c478bd9Sstevel@tonic-gate } 3187c478bd9Sstevel@tonic-gate } else { 3197c478bd9Sstevel@tonic-gate /* 3207c478bd9Sstevel@tonic-gate * 2 Level case. 321e77f3106Sudpa * anon hdr size needs to be rounded off to be a multiple 322e77f3106Sudpa * of ANON_CHUNK_SIZE. This is important as various anon 323e77f3106Sudpa * related functions depend on this. 324e77f3106Sudpa * NOTE - 325e77f3106Sudpa * anon_grow() makes anon hdr size a multiple of 326e77f3106Sudpa * ANON_CHUNK_SIZE. 327e77f3106Sudpa * amp size is <= anon hdr size. 328e77f3106Sudpa * anon_index + seg_pgs <= anon hdr size. 3297c478bd9Sstevel@tonic-gate */ 330e77f3106Sudpa ahp->size = P2ROUNDUP(npages, ANON_CHUNK_SIZE); 331e77f3106Sudpa nchunks = ahp->size >> ANON_CHUNK_SHIFT; 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate ahp->array_chunk = kmem_zalloc(nchunks * sizeof (ulong_t *), 3347c478bd9Sstevel@tonic-gate kmemflags); 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate if (ahp->array_chunk == NULL) { 3377c478bd9Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3387c478bd9Sstevel@tonic-gate return (NULL); 3397c478bd9Sstevel@tonic-gate } 3407c478bd9Sstevel@tonic-gate } 3417c478bd9Sstevel@tonic-gate return (ahp); 3427c478bd9Sstevel@tonic-gate } 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate /* 3457c478bd9Sstevel@tonic-gate * Free the array of pointers 3467c478bd9Sstevel@tonic-gate */ 3477c478bd9Sstevel@tonic-gate void 3487c478bd9Sstevel@tonic-gate anon_release(struct anon_hdr *ahp, pgcnt_t npages) 3497c478bd9Sstevel@tonic-gate { 3507c478bd9Sstevel@tonic-gate ulong_t i; 3517c478bd9Sstevel@tonic-gate void **ppp; 3527c478bd9Sstevel@tonic-gate ulong_t nchunks; 3537c478bd9Sstevel@tonic-gate 354e77f3106Sudpa ASSERT(npages <= ahp->size); 3557c478bd9Sstevel@tonic-gate 3567c478bd9Sstevel@tonic-gate /* 3577c478bd9Sstevel@tonic-gate * Single level case. 3587c478bd9Sstevel@tonic-gate */ 3597c478bd9Sstevel@tonic-gate if (npages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 3607c478bd9Sstevel@tonic-gate kmem_free(ahp->array_chunk, ahp->size * sizeof (struct anon *)); 3617c478bd9Sstevel@tonic-gate } else { 3627c478bd9Sstevel@tonic-gate /* 3637c478bd9Sstevel@tonic-gate * 2 level case. 3647c478bd9Sstevel@tonic-gate */ 365e77f3106Sudpa nchunks = ahp->size >> ANON_CHUNK_SHIFT; 3667c478bd9Sstevel@tonic-gate for (i = 0; i < nchunks; i++) { 3677c478bd9Sstevel@tonic-gate ppp = &ahp->array_chunk[i]; 3687c478bd9Sstevel@tonic-gate if (*ppp != NULL) 3697c478bd9Sstevel@tonic-gate kmem_free(*ppp, PAGESIZE); 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate kmem_free(ahp->array_chunk, nchunks * sizeof (ulong_t *)); 3727c478bd9Sstevel@tonic-gate } 3737c478bd9Sstevel@tonic-gate mutex_destroy(&ahp->serial_lock); 3747c478bd9Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3757c478bd9Sstevel@tonic-gate } 3767c478bd9Sstevel@tonic-gate 3777c478bd9Sstevel@tonic-gate /* 3787c478bd9Sstevel@tonic-gate * Return the pointer from the list for a 3797c478bd9Sstevel@tonic-gate * specified anon index. 3807c478bd9Sstevel@tonic-gate */ 3817c478bd9Sstevel@tonic-gate struct anon * 3827c478bd9Sstevel@tonic-gate anon_get_ptr(struct anon_hdr *ahp, ulong_t an_idx) 3837c478bd9Sstevel@tonic-gate { 3847c478bd9Sstevel@tonic-gate struct anon **app; 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate /* 3897c478bd9Sstevel@tonic-gate * Single level case. 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 3927c478bd9Sstevel@tonic-gate return ((struct anon *) 3937c478bd9Sstevel@tonic-gate ((uintptr_t)ahp->array_chunk[an_idx] & ANON_PTRMASK)); 3947c478bd9Sstevel@tonic-gate } else { 3957c478bd9Sstevel@tonic-gate 3967c478bd9Sstevel@tonic-gate /* 3977c478bd9Sstevel@tonic-gate * 2 level case. 3987c478bd9Sstevel@tonic-gate */ 3997c478bd9Sstevel@tonic-gate app = ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 4007c478bd9Sstevel@tonic-gate if (app) { 4017c478bd9Sstevel@tonic-gate return ((struct anon *) 4027c478bd9Sstevel@tonic-gate ((uintptr_t)app[an_idx & ANON_CHUNK_OFF] & 4037c478bd9Sstevel@tonic-gate ANON_PTRMASK)); 4047c478bd9Sstevel@tonic-gate } else { 4057c478bd9Sstevel@tonic-gate return (NULL); 4067c478bd9Sstevel@tonic-gate } 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate } 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * Return the anon pointer for the first valid entry in the anon list, 4127c478bd9Sstevel@tonic-gate * starting from the given index. 4137c478bd9Sstevel@tonic-gate */ 4147c478bd9Sstevel@tonic-gate struct anon * 4157c478bd9Sstevel@tonic-gate anon_get_next_ptr(struct anon_hdr *ahp, ulong_t *index) 4167c478bd9Sstevel@tonic-gate { 4177c478bd9Sstevel@tonic-gate struct anon *ap; 4187c478bd9Sstevel@tonic-gate struct anon **app; 4197c478bd9Sstevel@tonic-gate ulong_t chunkoff; 4207c478bd9Sstevel@tonic-gate ulong_t i; 4217c478bd9Sstevel@tonic-gate ulong_t j; 4227c478bd9Sstevel@tonic-gate pgcnt_t size; 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate i = *index; 4257c478bd9Sstevel@tonic-gate size = ahp->size; 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate ASSERT(i < size); 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate if ((size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 4307c478bd9Sstevel@tonic-gate /* 4317c478bd9Sstevel@tonic-gate * 1 level case 4327c478bd9Sstevel@tonic-gate */ 4337c478bd9Sstevel@tonic-gate while (i < size) { 4347c478bd9Sstevel@tonic-gate ap = (struct anon *) 4357c478bd9Sstevel@tonic-gate ((uintptr_t)ahp->array_chunk[i] & ANON_PTRMASK); 4367c478bd9Sstevel@tonic-gate if (ap) { 4377c478bd9Sstevel@tonic-gate *index = i; 4387c478bd9Sstevel@tonic-gate return (ap); 4397c478bd9Sstevel@tonic-gate } 4407c478bd9Sstevel@tonic-gate i++; 4417c478bd9Sstevel@tonic-gate } 4427c478bd9Sstevel@tonic-gate } else { 4437c478bd9Sstevel@tonic-gate /* 4447c478bd9Sstevel@tonic-gate * 2 level case 4457c478bd9Sstevel@tonic-gate */ 4467c478bd9Sstevel@tonic-gate chunkoff = i & ANON_CHUNK_OFF; 4477c478bd9Sstevel@tonic-gate while (i < size) { 4487c478bd9Sstevel@tonic-gate app = ahp->array_chunk[i >> ANON_CHUNK_SHIFT]; 4497c478bd9Sstevel@tonic-gate if (app) 4507c478bd9Sstevel@tonic-gate for (j = chunkoff; j < ANON_CHUNK_SIZE; j++) { 4517c478bd9Sstevel@tonic-gate ap = (struct anon *) 45278b03d3aSkchow ((uintptr_t)app[j] & ANON_PTRMASK); 4537c478bd9Sstevel@tonic-gate if (ap) { 4547c478bd9Sstevel@tonic-gate *index = i + (j - chunkoff); 4557c478bd9Sstevel@tonic-gate return (ap); 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate } 4587c478bd9Sstevel@tonic-gate chunkoff = 0; 4597c478bd9Sstevel@tonic-gate i = (i + ANON_CHUNK_SIZE) & ~ANON_CHUNK_OFF; 4607c478bd9Sstevel@tonic-gate } 4617c478bd9Sstevel@tonic-gate } 4627c478bd9Sstevel@tonic-gate *index = size; 4637c478bd9Sstevel@tonic-gate return (NULL); 4647c478bd9Sstevel@tonic-gate } 4657c478bd9Sstevel@tonic-gate 4667c478bd9Sstevel@tonic-gate /* 4677c478bd9Sstevel@tonic-gate * Set list entry with a given pointer for a specified offset 4687c478bd9Sstevel@tonic-gate */ 4697c478bd9Sstevel@tonic-gate int 4707c478bd9Sstevel@tonic-gate anon_set_ptr(struct anon_hdr *ahp, ulong_t an_idx, struct anon *ap, int flags) 4717c478bd9Sstevel@tonic-gate { 4727c478bd9Sstevel@tonic-gate void **ppp; 4737c478bd9Sstevel@tonic-gate struct anon **app; 4747c478bd9Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 4757c478bd9Sstevel@tonic-gate uintptr_t *ap_addr; 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 4787c478bd9Sstevel@tonic-gate 4797c478bd9Sstevel@tonic-gate /* 4807c478bd9Sstevel@tonic-gate * Single level case. 4817c478bd9Sstevel@tonic-gate */ 4827c478bd9Sstevel@tonic-gate if (ahp->size <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 4837c478bd9Sstevel@tonic-gate ap_addr = (uintptr_t *)&ahp->array_chunk[an_idx]; 4847c478bd9Sstevel@tonic-gate } else { 4857c478bd9Sstevel@tonic-gate 4867c478bd9Sstevel@tonic-gate /* 4877c478bd9Sstevel@tonic-gate * 2 level case. 4887c478bd9Sstevel@tonic-gate */ 4897c478bd9Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate ASSERT(ppp != NULL); 4927c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 4937c478bd9Sstevel@tonic-gate mutex_enter(&ahp->serial_lock); 4947c478bd9Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 4957c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 4967c478bd9Sstevel@tonic-gate *ppp = kmem_zalloc(PAGESIZE, kmemflags); 4977c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 4987c478bd9Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 4997c478bd9Sstevel@tonic-gate return (ENOMEM); 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate } 5027c478bd9Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate app = *ppp; 5057c478bd9Sstevel@tonic-gate ap_addr = (uintptr_t *)&app[an_idx & ANON_CHUNK_OFF]; 5067c478bd9Sstevel@tonic-gate } 5077c478bd9Sstevel@tonic-gate *ap_addr = (*ap_addr & ~ANON_PTRMASK) | (uintptr_t)ap; 5087c478bd9Sstevel@tonic-gate return (0); 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* 5127c478bd9Sstevel@tonic-gate * Copy anon array into a given new anon array 5137c478bd9Sstevel@tonic-gate */ 5147c478bd9Sstevel@tonic-gate int 5157c478bd9Sstevel@tonic-gate anon_copy_ptr(struct anon_hdr *sahp, ulong_t s_idx, 5167c478bd9Sstevel@tonic-gate struct anon_hdr *dahp, ulong_t d_idx, 5177c478bd9Sstevel@tonic-gate pgcnt_t npages, int flags) 5187c478bd9Sstevel@tonic-gate { 5197c478bd9Sstevel@tonic-gate void **sapp, **dapp; 5207c478bd9Sstevel@tonic-gate void *ap; 5217c478bd9Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate ASSERT((s_idx < sahp->size) && (d_idx < dahp->size)); 5247c478bd9Sstevel@tonic-gate ASSERT((npages <= sahp->size) && (npages <= dahp->size)); 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate /* 5277c478bd9Sstevel@tonic-gate * Both arrays are 1 level. 5287c478bd9Sstevel@tonic-gate */ 5297c478bd9Sstevel@tonic-gate if (((sahp->size <= ANON_CHUNK_SIZE) && 5307c478bd9Sstevel@tonic-gate (dahp->size <= ANON_CHUNK_SIZE)) || 5317c478bd9Sstevel@tonic-gate ((sahp->flags & ANON_ALLOC_FORCE) && 5327c478bd9Sstevel@tonic-gate (dahp->flags & ANON_ALLOC_FORCE))) { 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate bcopy(&sahp->array_chunk[s_idx], &dahp->array_chunk[d_idx], 5357c478bd9Sstevel@tonic-gate npages * sizeof (struct anon *)); 5367c478bd9Sstevel@tonic-gate return (0); 5377c478bd9Sstevel@tonic-gate } 5387c478bd9Sstevel@tonic-gate 5397c478bd9Sstevel@tonic-gate /* 5407c478bd9Sstevel@tonic-gate * Both arrays are 2 levels. 5417c478bd9Sstevel@tonic-gate */ 5427c478bd9Sstevel@tonic-gate if (sahp->size > ANON_CHUNK_SIZE && 5437c478bd9Sstevel@tonic-gate dahp->size > ANON_CHUNK_SIZE && 5447c478bd9Sstevel@tonic-gate ((sahp->flags & ANON_ALLOC_FORCE) == 0) && 5457c478bd9Sstevel@tonic-gate ((dahp->flags & ANON_ALLOC_FORCE) == 0)) { 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate ulong_t sapidx, dapidx; 5487c478bd9Sstevel@tonic-gate ulong_t *sap, *dap; 5497c478bd9Sstevel@tonic-gate ulong_t chknp; 5507c478bd9Sstevel@tonic-gate 5517c478bd9Sstevel@tonic-gate while (npages != 0) { 5527c478bd9Sstevel@tonic-gate 5537c478bd9Sstevel@tonic-gate sapidx = s_idx & ANON_CHUNK_OFF; 5547c478bd9Sstevel@tonic-gate dapidx = d_idx & ANON_CHUNK_OFF; 5557c478bd9Sstevel@tonic-gate chknp = ANON_CHUNK_SIZE - MAX(sapidx, dapidx); 5567c478bd9Sstevel@tonic-gate if (chknp > npages) 5577c478bd9Sstevel@tonic-gate chknp = npages; 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate sapp = &sahp->array_chunk[s_idx >> ANON_CHUNK_SHIFT]; 5607c478bd9Sstevel@tonic-gate if ((sap = *sapp) != NULL) { 5617c478bd9Sstevel@tonic-gate dapp = &dahp->array_chunk[d_idx 5627c478bd9Sstevel@tonic-gate >> ANON_CHUNK_SHIFT]; 5637c478bd9Sstevel@tonic-gate if ((dap = *dapp) == NULL) { 5647c478bd9Sstevel@tonic-gate *dapp = kmem_zalloc(PAGESIZE, 5657c478bd9Sstevel@tonic-gate kmemflags); 5667c478bd9Sstevel@tonic-gate if ((dap = *dapp) == NULL) 5677c478bd9Sstevel@tonic-gate return (ENOMEM); 5687c478bd9Sstevel@tonic-gate } 5697c478bd9Sstevel@tonic-gate bcopy((sap + sapidx), (dap + dapidx), 5707c478bd9Sstevel@tonic-gate chknp << ANON_PTRSHIFT); 5717c478bd9Sstevel@tonic-gate } 5727c478bd9Sstevel@tonic-gate s_idx += chknp; 5737c478bd9Sstevel@tonic-gate d_idx += chknp; 5747c478bd9Sstevel@tonic-gate npages -= chknp; 5757c478bd9Sstevel@tonic-gate } 5767c478bd9Sstevel@tonic-gate return (0); 5777c478bd9Sstevel@tonic-gate } 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * At least one of the arrays is 2 level. 5817c478bd9Sstevel@tonic-gate */ 5827c478bd9Sstevel@tonic-gate while (npages--) { 5837c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(sahp, s_idx)) != NULL) { 5847c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(sahp, s_idx))); 5857c478bd9Sstevel@tonic-gate if (anon_set_ptr(dahp, d_idx, ap, flags) == ENOMEM) 5867c478bd9Sstevel@tonic-gate return (ENOMEM); 5877c478bd9Sstevel@tonic-gate } 5887c478bd9Sstevel@tonic-gate s_idx++; 5897c478bd9Sstevel@tonic-gate d_idx++; 5907c478bd9Sstevel@tonic-gate } 5917c478bd9Sstevel@tonic-gate return (0); 5927c478bd9Sstevel@tonic-gate } 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate /* 5967c478bd9Sstevel@tonic-gate * ANON_INITBUF is a convenience macro for anon_grow() below. It 5977c478bd9Sstevel@tonic-gate * takes a buffer dst, which is at least as large as buffer src. It 5987c478bd9Sstevel@tonic-gate * does a bcopy from src into dst, and then bzeros the extra bytes 5997c478bd9Sstevel@tonic-gate * of dst. If tail is set, the data in src is tail aligned within 6007c478bd9Sstevel@tonic-gate * dst instead of head aligned. 6017c478bd9Sstevel@tonic-gate */ 6027c478bd9Sstevel@tonic-gate 6037c478bd9Sstevel@tonic-gate #define ANON_INITBUF(src, srclen, dst, dstsize, tail) \ 6047c478bd9Sstevel@tonic-gate if (tail) { \ 6057c478bd9Sstevel@tonic-gate bzero((dst), (dstsize) - (srclen)); \ 6067c478bd9Sstevel@tonic-gate bcopy((src), (char *)(dst) + (dstsize) - (srclen), (srclen)); \ 6077c478bd9Sstevel@tonic-gate } else { \ 6087c478bd9Sstevel@tonic-gate bcopy((src), (dst), (srclen)); \ 6097c478bd9Sstevel@tonic-gate bzero((char *)(dst) + (srclen), (dstsize) - (srclen)); \ 6107c478bd9Sstevel@tonic-gate } 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate #define ANON_1_LEVEL_INC (ANON_CHUNK_SIZE / 8) 6137c478bd9Sstevel@tonic-gate #define ANON_2_LEVEL_INC (ANON_1_LEVEL_INC * ANON_CHUNK_SIZE) 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate /* 6167c478bd9Sstevel@tonic-gate * anon_grow() is used to efficiently extend an existing anon array. 6177c478bd9Sstevel@tonic-gate * startidx_p points to the index into the anon array of the first page 61852b2f68aSstans * that is in use. oldseg_pgs is the number of pages in use, starting at 6197c478bd9Sstevel@tonic-gate * *startidx_p. newpages is the number of additional pages desired. 6207c478bd9Sstevel@tonic-gate * 6217c478bd9Sstevel@tonic-gate * If startidx_p == NULL, startidx is taken to be 0 and cannot be changed. 6227c478bd9Sstevel@tonic-gate * 6237c478bd9Sstevel@tonic-gate * The growth is done by creating a new top level of the anon array, 6247c478bd9Sstevel@tonic-gate * and (if the array is 2-level) reusing the existing second level arrays. 6257c478bd9Sstevel@tonic-gate * 6267c478bd9Sstevel@tonic-gate * flags can be used to specify ANON_NOSLEEP and ANON_GROWDOWN. 6277c478bd9Sstevel@tonic-gate * 6287c478bd9Sstevel@tonic-gate * Returns the new number of pages in the anon array. 6297c478bd9Sstevel@tonic-gate */ 6307c478bd9Sstevel@tonic-gate pgcnt_t 63152b2f68aSstans anon_grow(struct anon_hdr *ahp, ulong_t *startidx_p, pgcnt_t oldseg_pgs, 63252b2f68aSstans pgcnt_t newseg_pgs, int flags) 6337c478bd9Sstevel@tonic-gate { 6347c478bd9Sstevel@tonic-gate ulong_t startidx = startidx_p ? *startidx_p : 0; 63552b2f68aSstans pgcnt_t oldamp_pgs = ahp->size, newamp_pgs; 6367c478bd9Sstevel@tonic-gate pgcnt_t oelems, nelems, totpages; 6377c478bd9Sstevel@tonic-gate void **level1; 6387c478bd9Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 6397c478bd9Sstevel@tonic-gate int growdown = (flags & ANON_GROWDOWN); 6407c478bd9Sstevel@tonic-gate size_t newarrsz, oldarrsz; 6417c478bd9Sstevel@tonic-gate void *level2; 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate ASSERT(!(startidx_p == NULL && growdown)); 64452b2f68aSstans ASSERT(startidx + oldseg_pgs <= ahp->size); 6457c478bd9Sstevel@tonic-gate 6467c478bd9Sstevel@tonic-gate /* 6477c478bd9Sstevel@tonic-gate * Determine the total number of pages needed in the new 6487c478bd9Sstevel@tonic-gate * anon array. If growing down, totpages is all pages from 64952b2f68aSstans * startidx through the end of the array, plus <newseg_pgs> 6507c478bd9Sstevel@tonic-gate * pages. If growing up, keep all pages from page 0 through 65152b2f68aSstans * the last page currently in use, plus <newseg_pgs> pages. 6527c478bd9Sstevel@tonic-gate */ 6537c478bd9Sstevel@tonic-gate if (growdown) 65452b2f68aSstans totpages = oldamp_pgs - startidx + newseg_pgs; 6557c478bd9Sstevel@tonic-gate else 65652b2f68aSstans totpages = startidx + oldseg_pgs + newseg_pgs; 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* If the array is already large enough, just return. */ 6597c478bd9Sstevel@tonic-gate 66052b2f68aSstans if (oldamp_pgs >= totpages) { 66152b2f68aSstans if (growdown) 66252b2f68aSstans *startidx_p = oldamp_pgs - totpages; 66352b2f68aSstans return (oldamp_pgs); 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate /* 66752b2f68aSstans * oldamp_pgs/newamp_pgs are the total numbers of pages represented 66852b2f68aSstans * by the corresponding arrays. 66952b2f68aSstans * oelems/nelems are the number of pointers in the top level arrays 67052b2f68aSstans * which may be either level 1 or level 2. 6717c478bd9Sstevel@tonic-gate * Will the new anon array be one level or two levels? 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate if (totpages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 67452b2f68aSstans newamp_pgs = P2ROUNDUP(totpages, ANON_1_LEVEL_INC); 67552b2f68aSstans oelems = oldamp_pgs; 67652b2f68aSstans nelems = newamp_pgs; 6777c478bd9Sstevel@tonic-gate } else { 67852b2f68aSstans newamp_pgs = P2ROUNDUP(totpages, ANON_2_LEVEL_INC); 67952b2f68aSstans oelems = (oldamp_pgs + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT; 68052b2f68aSstans nelems = newamp_pgs >> ANON_CHUNK_SHIFT; 6817c478bd9Sstevel@tonic-gate } 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate newarrsz = nelems * sizeof (void *); 6847c478bd9Sstevel@tonic-gate level1 = kmem_alloc(newarrsz, kmemflags); 6857c478bd9Sstevel@tonic-gate if (level1 == NULL) 6867c478bd9Sstevel@tonic-gate return (0); 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* Are we converting from a one level to a two level anon array? */ 6897c478bd9Sstevel@tonic-gate 69052b2f68aSstans if (newamp_pgs > ANON_CHUNK_SIZE && oldamp_pgs <= ANON_CHUNK_SIZE && 6917c478bd9Sstevel@tonic-gate !(ahp->flags & ANON_ALLOC_FORCE)) { 69252b2f68aSstans 6937c478bd9Sstevel@tonic-gate /* 6947c478bd9Sstevel@tonic-gate * Yes, we're converting to a two level. Reuse old level 1 6957c478bd9Sstevel@tonic-gate * as new level 2 if it is exactly PAGESIZE. Otherwise 6967c478bd9Sstevel@tonic-gate * alloc a new level 2 and copy the old level 1 data into it. 6977c478bd9Sstevel@tonic-gate */ 69852b2f68aSstans if (oldamp_pgs == ANON_CHUNK_SIZE) { 6997c478bd9Sstevel@tonic-gate level2 = (void *)ahp->array_chunk; 7007c478bd9Sstevel@tonic-gate } else { 7017c478bd9Sstevel@tonic-gate level2 = kmem_alloc(PAGESIZE, kmemflags); 7027c478bd9Sstevel@tonic-gate if (level2 == NULL) { 7037c478bd9Sstevel@tonic-gate kmem_free(level1, newarrsz); 7047c478bd9Sstevel@tonic-gate return (0); 7057c478bd9Sstevel@tonic-gate } 70652b2f68aSstans oldarrsz = oldamp_pgs * sizeof (void *); 7077c478bd9Sstevel@tonic-gate 7087c478bd9Sstevel@tonic-gate ANON_INITBUF(ahp->array_chunk, oldarrsz, 7097c478bd9Sstevel@tonic-gate level2, PAGESIZE, growdown); 7107c478bd9Sstevel@tonic-gate kmem_free(ahp->array_chunk, oldarrsz); 7117c478bd9Sstevel@tonic-gate } 7127c478bd9Sstevel@tonic-gate bzero(level1, newarrsz); 7137c478bd9Sstevel@tonic-gate if (growdown) 7147c478bd9Sstevel@tonic-gate level1[nelems - 1] = level2; 7157c478bd9Sstevel@tonic-gate else 7167c478bd9Sstevel@tonic-gate level1[0] = level2; 7177c478bd9Sstevel@tonic-gate } else { 7187c478bd9Sstevel@tonic-gate oldarrsz = oelems * sizeof (void *); 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate ANON_INITBUF(ahp->array_chunk, oldarrsz, 7217c478bd9Sstevel@tonic-gate level1, newarrsz, growdown); 7227c478bd9Sstevel@tonic-gate kmem_free(ahp->array_chunk, oldarrsz); 7237c478bd9Sstevel@tonic-gate } 7247c478bd9Sstevel@tonic-gate 7257c478bd9Sstevel@tonic-gate ahp->array_chunk = level1; 72652b2f68aSstans ahp->size = newamp_pgs; 727e77f3106Sudpa if (growdown) 72852b2f68aSstans *startidx_p = newamp_pgs - totpages; 729e77f3106Sudpa 73052b2f68aSstans return (newamp_pgs); 73152b2f68aSstans } 73252b2f68aSstans 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate /* 735b52a336eSPavel Tatashin * Called to sync ani_free value. 7367c478bd9Sstevel@tonic-gate */ 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate void 7397c478bd9Sstevel@tonic-gate set_anoninfo(void) 7407c478bd9Sstevel@tonic-gate { 741b52a336eSPavel Tatashin processorid_t ix, max_seqid; 7427c478bd9Sstevel@tonic-gate pgcnt_t total = 0; 743b52a336eSPavel Tatashin static clock_t last_time; 744b52a336eSPavel Tatashin clock_t new_time; 7457c478bd9Sstevel@tonic-gate 746b52a336eSPavel Tatashin if (ani_free_pool == NULL) 747b52a336eSPavel Tatashin return; 748b52a336eSPavel Tatashin 749b52a336eSPavel Tatashin /* 750b52a336eSPavel Tatashin * Recompute ani_free at most once per tick. Use max_cpu_seqid_ever to 751b52a336eSPavel Tatashin * identify the maximum number of CPUs were ever online. 752b52a336eSPavel Tatashin */ 753b52a336eSPavel Tatashin new_time = ddi_get_lbolt(); 754b52a336eSPavel Tatashin if (new_time > last_time) { 755b52a336eSPavel Tatashin 756b52a336eSPavel Tatashin max_seqid = max_cpu_seqid_ever; 757b52a336eSPavel Tatashin ASSERT(ANI_MAX_POOL > max_seqid); 758b52a336eSPavel Tatashin for (ix = 0; ix <= max_seqid; ix++) 7597c478bd9Sstevel@tonic-gate total += ani_free_pool[ix].ani_count; 760b52a336eSPavel Tatashin 761b52a336eSPavel Tatashin last_time = new_time; 7627c478bd9Sstevel@tonic-gate k_anoninfo.ani_free = total; 7637c478bd9Sstevel@tonic-gate } 764b52a336eSPavel Tatashin } 7657c478bd9Sstevel@tonic-gate 7667c478bd9Sstevel@tonic-gate /* 7677c478bd9Sstevel@tonic-gate * Reserve anon space. 7687c478bd9Sstevel@tonic-gate * 7697c478bd9Sstevel@tonic-gate * It's no longer simply a matter of incrementing ani_resv to 7707c478bd9Sstevel@tonic-gate * reserve swap space, we need to check memory-based as well 7717c478bd9Sstevel@tonic-gate * as disk-backed (physical) swap. The following algorithm 7727c478bd9Sstevel@tonic-gate * is used: 7737c478bd9Sstevel@tonic-gate * Check the space on physical swap 7747c478bd9Sstevel@tonic-gate * i.e. amount needed < ani_max - ani_phys_resv 7757c478bd9Sstevel@tonic-gate * If we are swapping on swapfs check 7767c478bd9Sstevel@tonic-gate * amount needed < (availrmem - swapfs_minfree) 7777c478bd9Sstevel@tonic-gate * Since the algorithm to check for the quantity of swap space is 7787c478bd9Sstevel@tonic-gate * almost the same as that for reserving it, we'll just use anon_resvmem 7797c478bd9Sstevel@tonic-gate * with a flag to decrement availrmem. 7807c478bd9Sstevel@tonic-gate * 7817c478bd9Sstevel@tonic-gate * Return non-zero on success. 7827c478bd9Sstevel@tonic-gate */ 7837c478bd9Sstevel@tonic-gate int 7842cb27123Saguzovsk anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard) 7857c478bd9Sstevel@tonic-gate { 7867c478bd9Sstevel@tonic-gate pgcnt_t npages = btopr(size); 7877c478bd9Sstevel@tonic-gate pgcnt_t mswap_pages = 0; 7887c478bd9Sstevel@tonic-gate pgcnt_t pswap_pages = 0; 7890209230bSgjelinek proc_t *p = curproc; 7907c478bd9Sstevel@tonic-gate 7910209230bSgjelinek if (zone != NULL && takemem) { 7920209230bSgjelinek /* test zone.max-swap resource control */ 7930209230bSgjelinek mutex_enter(&p->p_lock); 7940209230bSgjelinek if (rctl_incr_swap(p, zone, ptob(npages)) != 0) { 7950209230bSgjelinek mutex_exit(&p->p_lock); 7960209230bSgjelinek return (0); 7970209230bSgjelinek } 7980209230bSgjelinek mutex_exit(&p->p_lock); 7990209230bSgjelinek } 8007c478bd9Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 8017c478bd9Sstevel@tonic-gate 8027c478bd9Sstevel@tonic-gate /* 8037c478bd9Sstevel@tonic-gate * pswap_pages is the number of pages we can take from 8047c478bd9Sstevel@tonic-gate * physical (i.e. disk-backed) swap. 8057c478bd9Sstevel@tonic-gate */ 8067c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 8077c478bd9Sstevel@tonic-gate pswap_pages = k_anoninfo.ani_max - k_anoninfo.ani_phys_resv; 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate ANON_PRINT(A_RESV, 8107c478bd9Sstevel@tonic-gate ("anon_resvmem: npages %lu takemem %u pswap %lu caller %p\n", 8117c478bd9Sstevel@tonic-gate npages, takemem, pswap_pages, (void *)caller())); 8127c478bd9Sstevel@tonic-gate 8137c478bd9Sstevel@tonic-gate if (npages <= pswap_pages) { 8147c478bd9Sstevel@tonic-gate /* 8157c478bd9Sstevel@tonic-gate * we have enough space on a physical swap 8167c478bd9Sstevel@tonic-gate */ 8177c478bd9Sstevel@tonic-gate if (takemem) 8187c478bd9Sstevel@tonic-gate k_anoninfo.ani_phys_resv += npages; 8197c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 8207c478bd9Sstevel@tonic-gate return (1); 8217c478bd9Sstevel@tonic-gate } else if (pswap_pages != 0) { 8227c478bd9Sstevel@tonic-gate /* 8237c478bd9Sstevel@tonic-gate * we have some space on a physical swap 8247c478bd9Sstevel@tonic-gate */ 8257c478bd9Sstevel@tonic-gate if (takemem) { 8267c478bd9Sstevel@tonic-gate /* 8277c478bd9Sstevel@tonic-gate * use up remainder of phys swap 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate k_anoninfo.ani_phys_resv += pswap_pages; 8307c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_phys_resv == k_anoninfo.ani_max); 8317c478bd9Sstevel@tonic-gate } 8327c478bd9Sstevel@tonic-gate } 8337c478bd9Sstevel@tonic-gate /* 8347c478bd9Sstevel@tonic-gate * since (npages > pswap_pages) we need mem swap 8357c478bd9Sstevel@tonic-gate * mswap_pages is the number of pages needed from availrmem 8367c478bd9Sstevel@tonic-gate */ 8377c478bd9Sstevel@tonic-gate ASSERT(npages > pswap_pages); 8387c478bd9Sstevel@tonic-gate mswap_pages = npages - pswap_pages; 8397c478bd9Sstevel@tonic-gate 8407c478bd9Sstevel@tonic-gate ANON_PRINT(A_RESV, ("anon_resvmem: need %ld pages from memory\n", 8417c478bd9Sstevel@tonic-gate mswap_pages)); 8427c478bd9Sstevel@tonic-gate 8437c478bd9Sstevel@tonic-gate /* 8447c478bd9Sstevel@tonic-gate * priv processes can reserve memory as swap as long as availrmem 8457c478bd9Sstevel@tonic-gate * remains greater than swapfs_minfree; in the case of non-priv 8467c478bd9Sstevel@tonic-gate * processes, memory can be reserved as swap only if availrmem 8477c478bd9Sstevel@tonic-gate * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus, 8487c478bd9Sstevel@tonic-gate * swapfs_reserve amount of memswap is not available to non-priv 8497c478bd9Sstevel@tonic-gate * processes. This protects daemons such as automounter dying 8507c478bd9Sstevel@tonic-gate * as a result of application processes eating away almost entire 8517c478bd9Sstevel@tonic-gate * membased swap. This safeguard becomes useless if apps are run 8527c478bd9Sstevel@tonic-gate * with root access. 8537c478bd9Sstevel@tonic-gate * 8547c478bd9Sstevel@tonic-gate * swapfs_reserve is minimum of 4Mb or 1/16 of physmem. 8557c478bd9Sstevel@tonic-gate * 8567c478bd9Sstevel@tonic-gate */ 8572cb27123Saguzovsk if (tryhard) { 8581c7cef2bSStan Studzinski pgcnt_t floor_pages; 8591c7cef2bSStan Studzinski 8601c7cef2bSStan Studzinski if (secpolicy_resource_anon_mem(CRED())) { 8611c7cef2bSStan Studzinski floor_pages = swapfs_minfree; 8621c7cef2bSStan Studzinski } else { 8631c7cef2bSStan Studzinski floor_pages = swapfs_minfree + swapfs_reserve; 8641c7cef2bSStan Studzinski } 8651c7cef2bSStan Studzinski 8663cff2f43Sstans mutex_exit(&anoninfo_lock); 867e0cb4e8dSOndrej Kubecka (void) page_reclaim_mem(mswap_pages, floor_pages, 0); 8683cff2f43Sstans mutex_enter(&anoninfo_lock); 8692cb27123Saguzovsk } 8703cff2f43Sstans 8717c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 8727c478bd9Sstevel@tonic-gate if (availrmem > (swapfs_minfree + swapfs_reserve + mswap_pages) || 8737c478bd9Sstevel@tonic-gate (availrmem > (swapfs_minfree + mswap_pages) && 8747c478bd9Sstevel@tonic-gate secpolicy_resource(CRED()) == 0)) { 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate if (takemem) { 8777c478bd9Sstevel@tonic-gate /* 8787c478bd9Sstevel@tonic-gate * Take the memory from the rest of the system. 8797c478bd9Sstevel@tonic-gate */ 8807c478bd9Sstevel@tonic-gate availrmem -= mswap_pages; 8817c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 8827c478bd9Sstevel@tonic-gate k_anoninfo.ani_mem_resv += mswap_pages; 8837c478bd9Sstevel@tonic-gate ANI_ADD(mswap_pages); 8847c478bd9Sstevel@tonic-gate ANON_PRINT((A_RESV | A_MRESV), 8857c478bd9Sstevel@tonic-gate ("anon_resvmem: took %ld pages of availrmem\n", 8867c478bd9Sstevel@tonic-gate mswap_pages)); 8877c478bd9Sstevel@tonic-gate } else { 8887c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 8897c478bd9Sstevel@tonic-gate } 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 8927c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 8937c478bd9Sstevel@tonic-gate return (1); 8947c478bd9Sstevel@tonic-gate } else { 8957c478bd9Sstevel@tonic-gate /* 8967c478bd9Sstevel@tonic-gate * Fail if not enough memory 8977c478bd9Sstevel@tonic-gate */ 8987c478bd9Sstevel@tonic-gate if (takemem) { 8997c478bd9Sstevel@tonic-gate k_anoninfo.ani_phys_resv -= pswap_pages; 9007c478bd9Sstevel@tonic-gate } 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 9037c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 9047c478bd9Sstevel@tonic-gate ANON_PRINT(A_RESV, 9057c478bd9Sstevel@tonic-gate ("anon_resvmem: not enough space from swapfs\n")); 9060209230bSgjelinek if (zone != NULL && takemem) 9070209230bSgjelinek rctl_decr_swap(zone, ptob(npages)); 9087c478bd9Sstevel@tonic-gate return (0); 9097c478bd9Sstevel@tonic-gate } 9107c478bd9Sstevel@tonic-gate } 9117c478bd9Sstevel@tonic-gate 9127c478bd9Sstevel@tonic-gate /* 9137c478bd9Sstevel@tonic-gate * Give back an anon reservation. 9147c478bd9Sstevel@tonic-gate */ 9157c478bd9Sstevel@tonic-gate void 9160209230bSgjelinek anon_unresvmem(size_t size, zone_t *zone) 9177c478bd9Sstevel@tonic-gate { 9187c478bd9Sstevel@tonic-gate pgcnt_t npages = btopr(size); 9197c478bd9Sstevel@tonic-gate spgcnt_t mem_free_pages = 0; 9207c478bd9Sstevel@tonic-gate pgcnt_t phys_free_slots; 9217c478bd9Sstevel@tonic-gate #ifdef ANON_DEBUG 9227c478bd9Sstevel@tonic-gate pgcnt_t mem_resv; 9237c478bd9Sstevel@tonic-gate #endif 9240209230bSgjelinek if (zone != NULL) 92568803f2dSsl108498 rctl_decr_swap(zone, ptob(npages)); 9267c478bd9Sstevel@tonic-gate 9277c478bd9Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 930a98e9dbfSaguzovsk 9317c478bd9Sstevel@tonic-gate /* 9327c478bd9Sstevel@tonic-gate * If some of this reservation belonged to swapfs 9337c478bd9Sstevel@tonic-gate * give it back to availrmem. 9347c478bd9Sstevel@tonic-gate * ani_mem_resv is the amount of availrmem swapfs has reserved. 9357c478bd9Sstevel@tonic-gate * but some of that memory could be locked by segspt so we can only 9367c478bd9Sstevel@tonic-gate * return non locked ani_mem_resv back to availrmem 9377c478bd9Sstevel@tonic-gate */ 9387c478bd9Sstevel@tonic-gate if (k_anoninfo.ani_mem_resv > k_anoninfo.ani_locked_swap) { 9397c478bd9Sstevel@tonic-gate ANON_PRINT((A_RESV | A_MRESV), 9407c478bd9Sstevel@tonic-gate ("anon_unresv: growing availrmem by %ld pages\n", 9417c478bd9Sstevel@tonic-gate MIN(k_anoninfo.ani_mem_resv, npages))); 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate mem_free_pages = MIN((spgcnt_t)(k_anoninfo.ani_mem_resv - 9447c478bd9Sstevel@tonic-gate k_anoninfo.ani_locked_swap), npages); 9457c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 9467c478bd9Sstevel@tonic-gate availrmem += mem_free_pages; 9477c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 9487c478bd9Sstevel@tonic-gate k_anoninfo.ani_mem_resv -= mem_free_pages; 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate ANI_ADD(-mem_free_pages); 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate /* 9537c478bd9Sstevel@tonic-gate * The remainder of the pages is returned to phys swap 9547c478bd9Sstevel@tonic-gate */ 9557c478bd9Sstevel@tonic-gate ASSERT(npages >= mem_free_pages); 9567c478bd9Sstevel@tonic-gate phys_free_slots = npages - mem_free_pages; 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate if (phys_free_slots) { 9597c478bd9Sstevel@tonic-gate k_anoninfo.ani_phys_resv -= phys_free_slots; 9607c478bd9Sstevel@tonic-gate } 9617c478bd9Sstevel@tonic-gate 9627c478bd9Sstevel@tonic-gate #ifdef ANON_DEBUG 9637c478bd9Sstevel@tonic-gate mem_resv = k_anoninfo.ani_mem_resv; 9647c478bd9Sstevel@tonic-gate #endif 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 9677c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate ANON_PRINT(A_RESV, ("anon_unresv: %lu, tot %lu, caller %p\n", 9727c478bd9Sstevel@tonic-gate npages, mem_resv, (void *)caller())); 9737c478bd9Sstevel@tonic-gate } 9747c478bd9Sstevel@tonic-gate 9757c478bd9Sstevel@tonic-gate /* 9767c478bd9Sstevel@tonic-gate * Allocate an anon slot and return it with the lock held. 9777c478bd9Sstevel@tonic-gate */ 9787c478bd9Sstevel@tonic-gate struct anon * 9797c478bd9Sstevel@tonic-gate anon_alloc(struct vnode *vp, anoff_t off) 9807c478bd9Sstevel@tonic-gate { 9817c478bd9Sstevel@tonic-gate struct anon *ap; 9827c478bd9Sstevel@tonic-gate kmutex_t *ahm; 9837c478bd9Sstevel@tonic-gate 9847c478bd9Sstevel@tonic-gate ap = kmem_cache_alloc(anon_cache, KM_SLEEP); 9857c478bd9Sstevel@tonic-gate if (vp == NULL) { 9867c478bd9Sstevel@tonic-gate swap_alloc(ap); 9877c478bd9Sstevel@tonic-gate } else { 9887c478bd9Sstevel@tonic-gate ap->an_vp = vp; 9897c478bd9Sstevel@tonic-gate ap->an_off = off; 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate ap->an_refcnt = 1; 9927c478bd9Sstevel@tonic-gate ap->an_pvp = NULL; 9937c478bd9Sstevel@tonic-gate ap->an_poff = 0; 99423d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 9957c478bd9Sstevel@tonic-gate mutex_enter(ahm); 9967c478bd9Sstevel@tonic-gate anon_addhash(ap); 9977c478bd9Sstevel@tonic-gate mutex_exit(ahm); 9987c478bd9Sstevel@tonic-gate ANI_ADD(-1); 9997c478bd9Sstevel@tonic-gate ANON_PRINT(A_ANON, ("anon_alloc: returning ap %p, vp %p\n", 10007c478bd9Sstevel@tonic-gate (void *)ap, (ap ? (void *)ap->an_vp : NULL))); 10017c478bd9Sstevel@tonic-gate return (ap); 10027c478bd9Sstevel@tonic-gate } 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate /* 1005a98e9dbfSaguzovsk * Called for pages locked in memory via softlock/pagelock/mlock to make sure 1006a98e9dbfSaguzovsk * such pages don't consume any physical swap resources needed for swapping 1007a98e9dbfSaguzovsk * unlocked pages. 1008a98e9dbfSaguzovsk */ 1009a98e9dbfSaguzovsk void 1010a98e9dbfSaguzovsk anon_swap_free(struct anon *ap, page_t *pp) 1011a98e9dbfSaguzovsk { 1012a98e9dbfSaguzovsk kmutex_t *ahm; 1013a98e9dbfSaguzovsk 1014a98e9dbfSaguzovsk ASSERT(ap != NULL); 1015a98e9dbfSaguzovsk ASSERT(pp != NULL); 1016a98e9dbfSaguzovsk ASSERT(PAGE_LOCKED(pp)); 1017a98e9dbfSaguzovsk ASSERT(pp->p_vnode != NULL); 1018a98e9dbfSaguzovsk ASSERT(IS_SWAPFSVP(pp->p_vnode)); 1019a98e9dbfSaguzovsk ASSERT(ap->an_refcnt != 0); 1020a98e9dbfSaguzovsk ASSERT(pp->p_vnode == ap->an_vp); 1021a98e9dbfSaguzovsk ASSERT(pp->p_offset == ap->an_off); 1022a98e9dbfSaguzovsk 1023a98e9dbfSaguzovsk if (ap->an_pvp == NULL) 1024a98e9dbfSaguzovsk return; 1025a98e9dbfSaguzovsk 1026a98e9dbfSaguzovsk page_io_lock(pp); 102723d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 1028a98e9dbfSaguzovsk mutex_enter(ahm); 1029a98e9dbfSaguzovsk 1030a98e9dbfSaguzovsk ASSERT(ap->an_refcnt != 0); 1031a98e9dbfSaguzovsk ASSERT(pp->p_vnode == ap->an_vp); 1032a98e9dbfSaguzovsk ASSERT(pp->p_offset == ap->an_off); 1033a98e9dbfSaguzovsk 1034a98e9dbfSaguzovsk if (ap->an_pvp != NULL) { 1035a98e9dbfSaguzovsk swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE); 1036a98e9dbfSaguzovsk ap->an_pvp = NULL; 1037a98e9dbfSaguzovsk ap->an_poff = 0; 1038a98e9dbfSaguzovsk mutex_exit(ahm); 1039a98e9dbfSaguzovsk hat_setmod(pp); 1040a98e9dbfSaguzovsk } else { 1041a98e9dbfSaguzovsk mutex_exit(ahm); 1042a98e9dbfSaguzovsk } 1043a98e9dbfSaguzovsk page_io_unlock(pp); 1044a98e9dbfSaguzovsk } 1045a98e9dbfSaguzovsk 1046a98e9dbfSaguzovsk /* 10477c478bd9Sstevel@tonic-gate * Decrement the reference count of an anon page. 10487c478bd9Sstevel@tonic-gate * If reference count goes to zero, free it and 10497c478bd9Sstevel@tonic-gate * its associated page (if any). 10507c478bd9Sstevel@tonic-gate */ 10517c478bd9Sstevel@tonic-gate void 10527c478bd9Sstevel@tonic-gate anon_decref(struct anon *ap) 10537c478bd9Sstevel@tonic-gate { 10547c478bd9Sstevel@tonic-gate page_t *pp; 10557c478bd9Sstevel@tonic-gate struct vnode *vp; 10567c478bd9Sstevel@tonic-gate anoff_t off; 10577c478bd9Sstevel@tonic-gate kmutex_t *ahm; 10587c478bd9Sstevel@tonic-gate 105923d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 10607c478bd9Sstevel@tonic-gate mutex_enter(ahm); 10617c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 10627c478bd9Sstevel@tonic-gate if (ap->an_refcnt == 0) 10637c478bd9Sstevel@tonic-gate panic("anon_decref: slot count 0"); 10647c478bd9Sstevel@tonic-gate if (--ap->an_refcnt == 0) { 10657c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 106620a2d3f6Sstans anon_rmhash(ap); 106720a2d3f6Sstans if (ap->an_pvp != NULL) 106820a2d3f6Sstans swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE); 10697c478bd9Sstevel@tonic-gate mutex_exit(ahm); 10707c478bd9Sstevel@tonic-gate 10717c478bd9Sstevel@tonic-gate /* 10727c478bd9Sstevel@tonic-gate * If there is a page for this anon slot we will need to 10737c478bd9Sstevel@tonic-gate * call VN_DISPOSE to get rid of the vp association and 10747c478bd9Sstevel@tonic-gate * put the page back on the free list as really free. 10757c478bd9Sstevel@tonic-gate * Acquire the "exclusive" lock to ensure that any 10767c478bd9Sstevel@tonic-gate * pending i/o always completes before the swap slot 10777c478bd9Sstevel@tonic-gate * is freed. 10787c478bd9Sstevel@tonic-gate */ 10797c478bd9Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, SE_EXCL); 10807c478bd9Sstevel@tonic-gate if (pp != NULL) { 10817c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context */ 10827c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 10837c478bd9Sstevel@tonic-gate } 10847c478bd9Sstevel@tonic-gate ANON_PRINT(A_ANON, ("anon_decref: free ap %p, vp %p\n", 10857c478bd9Sstevel@tonic-gate (void *)ap, (void *)ap->an_vp)); 108620a2d3f6Sstans 10877c478bd9Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate ANI_ADD(1); 10907c478bd9Sstevel@tonic-gate } else { 10917c478bd9Sstevel@tonic-gate mutex_exit(ahm); 10927c478bd9Sstevel@tonic-gate } 10937c478bd9Sstevel@tonic-gate } 10947c478bd9Sstevel@tonic-gate 109578b03d3aSkchow 109678b03d3aSkchow /* 109778b03d3aSkchow * check an_refcnt of the root anon slot (anon_index argument is aligned at 109878b03d3aSkchow * seg->s_szc level) to determine whether COW processing is required. 109978b03d3aSkchow * anonpages_hash_lock[] held on the root ap ensures that if root's 110078b03d3aSkchow * refcnt is 1 all other refcnt's are 1 as well (and they can't increase 110178b03d3aSkchow * later since this process can't fork while its AS lock is held). 110278b03d3aSkchow * 110378b03d3aSkchow * returns 1 if the root anon slot has a refcnt > 1 otherwise returns 0. 110478b03d3aSkchow */ 110578b03d3aSkchow int 110678b03d3aSkchow anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index) 110778b03d3aSkchow { 110878b03d3aSkchow struct anon *ap; 110978b03d3aSkchow kmutex_t *ahmpages = NULL; 111078b03d3aSkchow 111178b03d3aSkchow ap = anon_get_ptr(ahp, anon_index); 111278b03d3aSkchow if (ap == NULL) 111378b03d3aSkchow return (0); 111478b03d3aSkchow 111523d9e5acSMichael Corcoran ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 111678b03d3aSkchow mutex_enter(ahmpages); 111778b03d3aSkchow ASSERT(ap->an_refcnt >= 1); 111878b03d3aSkchow if (ap->an_refcnt == 1) { 111978b03d3aSkchow mutex_exit(ahmpages); 112078b03d3aSkchow return (0); 112178b03d3aSkchow } 112278b03d3aSkchow mutex_exit(ahmpages); 112378b03d3aSkchow return (1); 112478b03d3aSkchow } 112578b03d3aSkchow /* 112678b03d3aSkchow * Check 'nslots' anon slots for refcnt > 1. 112778b03d3aSkchow * 112878b03d3aSkchow * returns 1 if any of the 'nslots' anon slots has a refcnt > 1 otherwise 112978b03d3aSkchow * returns 0. 113078b03d3aSkchow */ 11317c478bd9Sstevel@tonic-gate static int 11327c478bd9Sstevel@tonic-gate anon_share(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots) 11337c478bd9Sstevel@tonic-gate { 11347c478bd9Sstevel@tonic-gate struct anon *ap; 11357c478bd9Sstevel@tonic-gate 11367c478bd9Sstevel@tonic-gate while (nslots-- > 0) { 11377c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, anon_index)) != NULL && 11387c478bd9Sstevel@tonic-gate ap->an_refcnt > 1) 11397c478bd9Sstevel@tonic-gate return (1); 11407c478bd9Sstevel@tonic-gate anon_index++; 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate 11437c478bd9Sstevel@tonic-gate return (0); 11447c478bd9Sstevel@tonic-gate } 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate static void 11477c478bd9Sstevel@tonic-gate anon_decref_pages( 11487c478bd9Sstevel@tonic-gate struct anon_hdr *ahp, 11497c478bd9Sstevel@tonic-gate ulong_t an_idx, 11507c478bd9Sstevel@tonic-gate uint_t szc) 11517c478bd9Sstevel@tonic-gate { 11527c478bd9Sstevel@tonic-gate struct anon *ap = anon_get_ptr(ahp, an_idx); 11537c478bd9Sstevel@tonic-gate kmutex_t *ahmpages = NULL; 11547c478bd9Sstevel@tonic-gate page_t *pp; 11557c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(szc); 11567c478bd9Sstevel@tonic-gate pgcnt_t i; 11577c478bd9Sstevel@tonic-gate struct vnode *vp; 11587c478bd9Sstevel@tonic-gate anoff_t off; 11597c478bd9Sstevel@tonic-gate kmutex_t *ahm; 11607c478bd9Sstevel@tonic-gate #ifdef DEBUG 11617c478bd9Sstevel@tonic-gate int refcnt = 1; 11627c478bd9Sstevel@tonic-gate #endif 11637c478bd9Sstevel@tonic-gate 11647c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 11657c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 11667c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 116707b65a64Saguzovsk ASSERT(an_idx < ahp->size); 116807b65a64Saguzovsk 116907b65a64Saguzovsk if (ahp->size - an_idx < pgcnt) { 117007b65a64Saguzovsk /* 117107b65a64Saguzovsk * In case of shared mappings total anon map size may not be 117207b65a64Saguzovsk * the largest page size aligned. 117307b65a64Saguzovsk */ 117407b65a64Saguzovsk pgcnt = ahp->size - an_idx; 117507b65a64Saguzovsk } 11767c478bd9Sstevel@tonic-gate 11777c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[0]); 11787c478bd9Sstevel@tonic-gate 11797c478bd9Sstevel@tonic-gate if (ap != NULL) { 118023d9e5acSMichael Corcoran ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 11817c478bd9Sstevel@tonic-gate mutex_enter(ahmpages); 11827c478bd9Sstevel@tonic-gate ASSERT((refcnt = ap->an_refcnt) != 0); 11837c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[1]); 11847c478bd9Sstevel@tonic-gate if (ap->an_refcnt == 1) { 11857c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[2]); 11867c478bd9Sstevel@tonic-gate ASSERT(!anon_share(ahp, an_idx, pgcnt)); 11877c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 11887c478bd9Sstevel@tonic-gate ahmpages = NULL; 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate } 11917c478bd9Sstevel@tonic-gate 11927c478bd9Sstevel@tonic-gate i = 0; 11937c478bd9Sstevel@tonic-gate while (i < pgcnt) { 11947c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, an_idx + i)) == NULL) { 11957c478bd9Sstevel@tonic-gate ASSERT(refcnt == 1 && ahmpages == NULL); 11967c478bd9Sstevel@tonic-gate i++; 11977c478bd9Sstevel@tonic-gate continue; 11987c478bd9Sstevel@tonic-gate } 11997c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt == refcnt); 12007c478bd9Sstevel@tonic-gate ASSERT(ahmpages != NULL || ap->an_refcnt == 1); 12017c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || ap->an_refcnt > 1); 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate if (ahmpages == NULL) { 12047c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 12057c478bd9Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, SE_EXCL); 12067c478bd9Sstevel@tonic-gate if (pp == NULL || pp->p_szc == 0) { 12077c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[3]); 120823d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 12097c478bd9Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + i, NULL, 12107c478bd9Sstevel@tonic-gate ANON_SLEEP); 12117c478bd9Sstevel@tonic-gate mutex_enter(ahm); 12127c478bd9Sstevel@tonic-gate ap->an_refcnt--; 12137c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt == 0); 12147c478bd9Sstevel@tonic-gate anon_rmhash(ap); 12157c478bd9Sstevel@tonic-gate if (ap->an_pvp) 12167c478bd9Sstevel@tonic-gate swap_phys_free(ap->an_pvp, ap->an_poff, 12177c478bd9Sstevel@tonic-gate PAGESIZE); 12187c478bd9Sstevel@tonic-gate mutex_exit(ahm); 121920a2d3f6Sstans if (pp == NULL) { 122020a2d3f6Sstans pp = page_lookup(vp, (u_offset_t)off, 122120a2d3f6Sstans SE_EXCL); 122220a2d3f6Sstans ASSERT(pp == NULL || pp->p_szc == 0); 122320a2d3f6Sstans } 12247c478bd9Sstevel@tonic-gate if (pp != NULL) { 12257c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[4]); 12267c478bd9Sstevel@tonic-gate /*LINTED*/ 12277c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 12287c478bd9Sstevel@tonic-gate } 12297c478bd9Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 12307c478bd9Sstevel@tonic-gate ANI_ADD(1); 12317c478bd9Sstevel@tonic-gate i++; 12327c478bd9Sstevel@tonic-gate } else { 12337c478bd9Sstevel@tonic-gate pgcnt_t j; 12347c478bd9Sstevel@tonic-gate pgcnt_t curpgcnt = 12357c478bd9Sstevel@tonic-gate page_get_pagecnt(pp->p_szc); 12367c478bd9Sstevel@tonic-gate size_t ppasize = curpgcnt * sizeof (page_t *); 12377c478bd9Sstevel@tonic-gate page_t **ppa = kmem_alloc(ppasize, KM_SLEEP); 12387c478bd9Sstevel@tonic-gate int dispose = 0; 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[5]); 12417c478bd9Sstevel@tonic-gate 12427c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc <= szc); 12437c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(curpgcnt, curpgcnt)); 12447c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(i, curpgcnt)); 12457c478bd9Sstevel@tonic-gate ASSERT(i + curpgcnt <= pgcnt); 12467c478bd9Sstevel@tonic-gate ASSERT(!(page_pptonum(pp) & (curpgcnt - 1))); 12477c478bd9Sstevel@tonic-gate ppa[0] = pp; 12487c478bd9Sstevel@tonic-gate for (j = i + 1; j < i + curpgcnt; j++) { 12497c478bd9Sstevel@tonic-gate ap = anon_get_ptr(ahp, an_idx + j); 12507c478bd9Sstevel@tonic-gate ASSERT(ap != NULL && 12517c478bd9Sstevel@tonic-gate ap->an_refcnt == 1); 12527c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 12537c478bd9Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, 12547c478bd9Sstevel@tonic-gate SE_EXCL); 12557c478bd9Sstevel@tonic-gate if (pp == NULL) 12567c478bd9Sstevel@tonic-gate panic("anon_decref_pages: " 12577c478bd9Sstevel@tonic-gate "no page"); 12587c478bd9Sstevel@tonic-gate 12597c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, 12607c478bd9Sstevel@tonic-gate HAT_FORCE_PGUNLOAD); 12617c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == ppa[0]->p_szc); 12627c478bd9Sstevel@tonic-gate ASSERT(page_pptonum(pp) - 1 == 12637c478bd9Sstevel@tonic-gate page_pptonum(ppa[j - i - 1])); 12647c478bd9Sstevel@tonic-gate ppa[j - i] = pp; 12657c478bd9Sstevel@tonic-gate if (ap->an_pvp != NULL && 12667c478bd9Sstevel@tonic-gate !vn_matchopval(ap->an_pvp, 12677c478bd9Sstevel@tonic-gate VOPNAME_DISPOSE, 12687c478bd9Sstevel@tonic-gate (fs_generic_func_p)fs_dispose)) 12697c478bd9Sstevel@tonic-gate dispose = 1; 12707c478bd9Sstevel@tonic-gate } 12717c478bd9Sstevel@tonic-gate for (j = i; j < i + curpgcnt; j++) { 12727c478bd9Sstevel@tonic-gate ap = anon_get_ptr(ahp, an_idx + j); 12737c478bd9Sstevel@tonic-gate ASSERT(ap != NULL && 12747c478bd9Sstevel@tonic-gate ap->an_refcnt == 1); 127523d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 12767c478bd9Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + j, 12777c478bd9Sstevel@tonic-gate NULL, ANON_SLEEP); 12787c478bd9Sstevel@tonic-gate mutex_enter(ahm); 12797c478bd9Sstevel@tonic-gate ap->an_refcnt--; 12807c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt == 0); 12817c478bd9Sstevel@tonic-gate anon_rmhash(ap); 12827c478bd9Sstevel@tonic-gate if (ap->an_pvp) 12837c478bd9Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 12847c478bd9Sstevel@tonic-gate ap->an_poff, PAGESIZE); 12857c478bd9Sstevel@tonic-gate mutex_exit(ahm); 12867c478bd9Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 12877c478bd9Sstevel@tonic-gate ANI_ADD(1); 12887c478bd9Sstevel@tonic-gate } 128920a2d3f6Sstans if (!dispose) { 129020a2d3f6Sstans VM_STAT_ADD(anonvmstats.decrefpages[6]); 129120a2d3f6Sstans page_destroy_pages(ppa[0]); 129220a2d3f6Sstans } else { 129320a2d3f6Sstans VM_STAT_ADD(anonvmstats.decrefpages[7]); 129420a2d3f6Sstans for (j = 0; j < curpgcnt; j++) { 129520a2d3f6Sstans ASSERT(PAGE_EXCL(ppa[j])); 129620a2d3f6Sstans ppa[j]->p_szc = 0; 129720a2d3f6Sstans } 129820a2d3f6Sstans for (j = 0; j < curpgcnt; j++) { 129920a2d3f6Sstans ASSERT(!hat_page_is_mapped( 130020a2d3f6Sstans ppa[j])); 130120a2d3f6Sstans /*LINTED*/ 130220a2d3f6Sstans VN_DISPOSE(ppa[j], B_INVAL, 0, 130320a2d3f6Sstans kcred); 130420a2d3f6Sstans } 130520a2d3f6Sstans } 130620a2d3f6Sstans kmem_free(ppa, ppasize); 13077c478bd9Sstevel@tonic-gate i += curpgcnt; 13087c478bd9Sstevel@tonic-gate } 13097c478bd9Sstevel@tonic-gate } else { 13107c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[8]); 13117c478bd9Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP); 131223d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 13137c478bd9Sstevel@tonic-gate mutex_enter(ahm); 13147c478bd9Sstevel@tonic-gate ap->an_refcnt--; 13157c478bd9Sstevel@tonic-gate mutex_exit(ahm); 13167c478bd9Sstevel@tonic-gate i++; 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate } 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate if (ahmpages != NULL) { 13217c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 13227c478bd9Sstevel@tonic-gate } 13237c478bd9Sstevel@tonic-gate } 13247c478bd9Sstevel@tonic-gate 13257c478bd9Sstevel@tonic-gate /* 13267c478bd9Sstevel@tonic-gate * Duplicate references to size bytes worth of anon pages. 13277c478bd9Sstevel@tonic-gate * Used when duplicating a segment that contains private anon pages. 13287c478bd9Sstevel@tonic-gate * This code assumes that procedure calling this one has already used 13297c478bd9Sstevel@tonic-gate * hat_chgprot() to disable write access to the range of addresses that 13307c478bd9Sstevel@tonic-gate * that *old actually refers to. 13317c478bd9Sstevel@tonic-gate */ 13327c478bd9Sstevel@tonic-gate void 13337c478bd9Sstevel@tonic-gate anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new, 13347c478bd9Sstevel@tonic-gate ulong_t new_idx, size_t size) 13357c478bd9Sstevel@tonic-gate { 13367c478bd9Sstevel@tonic-gate spgcnt_t npages; 13377c478bd9Sstevel@tonic-gate kmutex_t *ahm; 13387c478bd9Sstevel@tonic-gate struct anon *ap; 13397c478bd9Sstevel@tonic-gate ulong_t off; 13407c478bd9Sstevel@tonic-gate ulong_t index; 13417c478bd9Sstevel@tonic-gate 13427c478bd9Sstevel@tonic-gate npages = btopr(size); 13437c478bd9Sstevel@tonic-gate while (npages > 0) { 13447c478bd9Sstevel@tonic-gate index = old_idx; 13457c478bd9Sstevel@tonic-gate if ((ap = anon_get_next_ptr(old, &index)) == NULL) 13467c478bd9Sstevel@tonic-gate break; 13477c478bd9Sstevel@tonic-gate 13487c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(old, index))); 13497c478bd9Sstevel@tonic-gate off = index - old_idx; 13507c478bd9Sstevel@tonic-gate npages -= off; 13517c478bd9Sstevel@tonic-gate if (npages <= 0) 13527c478bd9Sstevel@tonic-gate break; 13537c478bd9Sstevel@tonic-gate 13547c478bd9Sstevel@tonic-gate (void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP); 135523d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 13567c478bd9Sstevel@tonic-gate 13577c478bd9Sstevel@tonic-gate mutex_enter(ahm); 13587c478bd9Sstevel@tonic-gate ap->an_refcnt++; 13597c478bd9Sstevel@tonic-gate mutex_exit(ahm); 13607c478bd9Sstevel@tonic-gate 13617c478bd9Sstevel@tonic-gate off++; 13627c478bd9Sstevel@tonic-gate new_idx += off; 13637c478bd9Sstevel@tonic-gate old_idx += off; 13647c478bd9Sstevel@tonic-gate npages--; 13657c478bd9Sstevel@tonic-gate } 13667c478bd9Sstevel@tonic-gate } 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate /* 13697c478bd9Sstevel@tonic-gate * Just like anon_dup but also guarantees there are no holes (unallocated anon 13707c478bd9Sstevel@tonic-gate * slots) within any large page region. That means if a large page region is 13717c478bd9Sstevel@tonic-gate * empty in the old array it will skip it. If there are 1 or more valid slots 13727c478bd9Sstevel@tonic-gate * in the large page region of the old array it will make sure to fill in any 13737c478bd9Sstevel@tonic-gate * unallocated ones and also copy them to the new array. If noalloc is 1 large 13747c478bd9Sstevel@tonic-gate * page region should either have no valid anon slots or all slots should be 13757c478bd9Sstevel@tonic-gate * valid. 13767c478bd9Sstevel@tonic-gate */ 13777c478bd9Sstevel@tonic-gate void 13787c478bd9Sstevel@tonic-gate anon_dup_fill_holes( 13797c478bd9Sstevel@tonic-gate struct anon_hdr *old, 13807c478bd9Sstevel@tonic-gate ulong_t old_idx, 13817c478bd9Sstevel@tonic-gate struct anon_hdr *new, 13827c478bd9Sstevel@tonic-gate ulong_t new_idx, 13837c478bd9Sstevel@tonic-gate size_t size, 13847c478bd9Sstevel@tonic-gate uint_t szc, 13857c478bd9Sstevel@tonic-gate int noalloc) 13867c478bd9Sstevel@tonic-gate { 13877c478bd9Sstevel@tonic-gate struct anon *ap; 13887c478bd9Sstevel@tonic-gate spgcnt_t npages; 13897c478bd9Sstevel@tonic-gate kmutex_t *ahm, *ahmpages = NULL; 13907c478bd9Sstevel@tonic-gate pgcnt_t pgcnt, i; 13917c478bd9Sstevel@tonic-gate ulong_t index, off; 13927c478bd9Sstevel@tonic-gate #ifdef DEBUG 13937c478bd9Sstevel@tonic-gate int refcnt; 13947c478bd9Sstevel@tonic-gate #endif 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 13977c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 13987c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 13997c478bd9Sstevel@tonic-gate npages = btopr(size); 14007c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 14017c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(old_idx, pgcnt)); 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[0]); 14047c478bd9Sstevel@tonic-gate 14057c478bd9Sstevel@tonic-gate while (npages > 0) { 14067c478bd9Sstevel@tonic-gate index = old_idx; 14077c478bd9Sstevel@tonic-gate 14087c478bd9Sstevel@tonic-gate /* 14097c478bd9Sstevel@tonic-gate * Find the next valid slot. 14107c478bd9Sstevel@tonic-gate */ 14117c478bd9Sstevel@tonic-gate if (anon_get_next_ptr(old, &index) == NULL) 14127c478bd9Sstevel@tonic-gate break; 14137c478bd9Sstevel@tonic-gate 14147c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(old, index))); 14157c478bd9Sstevel@tonic-gate /* 14167c478bd9Sstevel@tonic-gate * Now backup index to the beginning of the 14177c478bd9Sstevel@tonic-gate * current large page region of the old array. 14187c478bd9Sstevel@tonic-gate */ 14197c478bd9Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 14207c478bd9Sstevel@tonic-gate off = index - old_idx; 14217c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 14227c478bd9Sstevel@tonic-gate npages -= off; 14237c478bd9Sstevel@tonic-gate if (npages <= 0) 14247c478bd9Sstevel@tonic-gate break; 14257c478bd9Sstevel@tonic-gate 14267c478bd9Sstevel@tonic-gate /* 14277c478bd9Sstevel@tonic-gate * Fill and copy a large page regions worth 14287c478bd9Sstevel@tonic-gate * of anon slots. 14297c478bd9Sstevel@tonic-gate */ 14307c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 14317c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(old, index + i)) == NULL) { 14327c478bd9Sstevel@tonic-gate if (noalloc) { 14337c478bd9Sstevel@tonic-gate panic("anon_dup_fill_holes: " 14347c478bd9Sstevel@tonic-gate "empty anon slot\n"); 14357c478bd9Sstevel@tonic-gate } 14367c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[1]); 14377c478bd9Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 14387c478bd9Sstevel@tonic-gate (void) anon_set_ptr(old, index + i, ap, 14397c478bd9Sstevel@tonic-gate ANON_SLEEP); 14407c478bd9Sstevel@tonic-gate } else if (i == 0) { 14417c478bd9Sstevel@tonic-gate /* 14427c478bd9Sstevel@tonic-gate * make the increment of all refcnts of all 14437c478bd9Sstevel@tonic-gate * anon slots of a large page appear atomic by 14447c478bd9Sstevel@tonic-gate * getting an anonpages_hash_lock for the 14457c478bd9Sstevel@tonic-gate * first anon slot of a large page. 14467c478bd9Sstevel@tonic-gate */ 14477c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[2]); 14487c478bd9Sstevel@tonic-gate 144923d9e5acSMichael Corcoran ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 14507c478bd9Sstevel@tonic-gate mutex_enter(ahmpages); 14517c478bd9Sstevel@tonic-gate /*LINTED*/ 14527c478bd9Sstevel@tonic-gate ASSERT(refcnt = ap->an_refcnt); 14537c478bd9Sstevel@tonic-gate 14547c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(ap->an_refcnt > 1, 14557c478bd9Sstevel@tonic-gate anonvmstats.dupfillholes[3]); 14567c478bd9Sstevel@tonic-gate } 14577c478bd9Sstevel@tonic-gate (void) anon_set_ptr(new, new_idx + off + i, ap, 14587c478bd9Sstevel@tonic-gate ANON_SLEEP); 145923d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 14607c478bd9Sstevel@tonic-gate mutex_enter(ahm); 14617c478bd9Sstevel@tonic-gate ASSERT(ahmpages != NULL || ap->an_refcnt == 1); 14627c478bd9Sstevel@tonic-gate ASSERT(i == 0 || ahmpages == NULL || 14637c478bd9Sstevel@tonic-gate refcnt == ap->an_refcnt); 14647c478bd9Sstevel@tonic-gate ap->an_refcnt++; 14657c478bd9Sstevel@tonic-gate mutex_exit(ahm); 14667c478bd9Sstevel@tonic-gate } 14677c478bd9Sstevel@tonic-gate if (ahmpages != NULL) { 14687c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 14697c478bd9Sstevel@tonic-gate ahmpages = NULL; 14707c478bd9Sstevel@tonic-gate } 14717c478bd9Sstevel@tonic-gate off += pgcnt; 14727c478bd9Sstevel@tonic-gate new_idx += off; 14737c478bd9Sstevel@tonic-gate old_idx += off; 14747c478bd9Sstevel@tonic-gate npages -= pgcnt; 14757c478bd9Sstevel@tonic-gate } 14767c478bd9Sstevel@tonic-gate } 14777c478bd9Sstevel@tonic-gate 14787c478bd9Sstevel@tonic-gate /* 14797c478bd9Sstevel@tonic-gate * Used when a segment with a vnode changes szc. similarly to 14807c478bd9Sstevel@tonic-gate * anon_dup_fill_holes() makes sure each large page region either has no anon 14817c478bd9Sstevel@tonic-gate * slots or all of them. but new slots are created by COWing the file 14827c478bd9Sstevel@tonic-gate * pages. on entrance no anon slots should be shared. 14837c478bd9Sstevel@tonic-gate */ 14847c478bd9Sstevel@tonic-gate int 14857c478bd9Sstevel@tonic-gate anon_fill_cow_holes( 14867c478bd9Sstevel@tonic-gate struct seg *seg, 14877c478bd9Sstevel@tonic-gate caddr_t addr, 14887c478bd9Sstevel@tonic-gate struct anon_hdr *ahp, 14897c478bd9Sstevel@tonic-gate ulong_t an_idx, 14907c478bd9Sstevel@tonic-gate struct vnode *vp, 14917c478bd9Sstevel@tonic-gate u_offset_t vp_off, 14927c478bd9Sstevel@tonic-gate size_t size, 14937c478bd9Sstevel@tonic-gate uint_t szc, 14947c478bd9Sstevel@tonic-gate uint_t prot, 14957c478bd9Sstevel@tonic-gate struct vpage vpage[], 14967c478bd9Sstevel@tonic-gate struct cred *cred) 14977c478bd9Sstevel@tonic-gate { 14987c478bd9Sstevel@tonic-gate struct anon *ap; 14997c478bd9Sstevel@tonic-gate spgcnt_t npages; 15007c478bd9Sstevel@tonic-gate pgcnt_t pgcnt, i; 15017c478bd9Sstevel@tonic-gate ulong_t index, off; 15027c478bd9Sstevel@tonic-gate int err = 0; 15037c478bd9Sstevel@tonic-gate int pageflags = 0; 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 15067c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 15077c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 15087c478bd9Sstevel@tonic-gate npages = btopr(size); 15097c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 15107c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate while (npages > 0) { 15137c478bd9Sstevel@tonic-gate index = an_idx; 15147c478bd9Sstevel@tonic-gate 15157c478bd9Sstevel@tonic-gate /* 15167c478bd9Sstevel@tonic-gate * Find the next valid slot. 15177c478bd9Sstevel@tonic-gate */ 15187c478bd9Sstevel@tonic-gate if (anon_get_next_ptr(ahp, &index) == NULL) { 15197c478bd9Sstevel@tonic-gate break; 15207c478bd9Sstevel@tonic-gate } 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 15237c478bd9Sstevel@tonic-gate /* 15247c478bd9Sstevel@tonic-gate * Now backup index to the beginning of the 15257c478bd9Sstevel@tonic-gate * current large page region of the anon array. 15267c478bd9Sstevel@tonic-gate */ 15277c478bd9Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 15287c478bd9Sstevel@tonic-gate off = index - an_idx; 15297c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 15307c478bd9Sstevel@tonic-gate npages -= off; 15317c478bd9Sstevel@tonic-gate if (npages <= 0) 15327c478bd9Sstevel@tonic-gate break; 15337c478bd9Sstevel@tonic-gate an_idx += off; 15347c478bd9Sstevel@tonic-gate vp_off += ptob(off); 15357c478bd9Sstevel@tonic-gate addr += ptob(off); 15367c478bd9Sstevel@tonic-gate if (vpage != NULL) { 15377c478bd9Sstevel@tonic-gate vpage += off; 15387c478bd9Sstevel@tonic-gate } 15397c478bd9Sstevel@tonic-gate 15407c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++, an_idx++, vp_off += PAGESIZE) { 15417c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, an_idx)) == NULL) { 15427c478bd9Sstevel@tonic-gate page_t *pl[1 + 1]; 15437c478bd9Sstevel@tonic-gate page_t *pp; 15447c478bd9Sstevel@tonic-gate 15457c478bd9Sstevel@tonic-gate err = VOP_GETPAGE(vp, vp_off, PAGESIZE, NULL, 1546da6c28aaSamw pl, PAGESIZE, seg, addr, S_READ, cred, 1547da6c28aaSamw NULL); 15487c478bd9Sstevel@tonic-gate if (err) { 15497c478bd9Sstevel@tonic-gate break; 15507c478bd9Sstevel@tonic-gate } 15517c478bd9Sstevel@tonic-gate if (vpage != NULL) { 15527c478bd9Sstevel@tonic-gate prot = VPP_PROT(vpage); 15537c478bd9Sstevel@tonic-gate pageflags = VPP_ISPPLOCK(vpage) ? 15547c478bd9Sstevel@tonic-gate LOCK_PAGE : 0; 15557c478bd9Sstevel@tonic-gate } 15567c478bd9Sstevel@tonic-gate pp = anon_private(&ap, seg, addr, prot, pl[0], 15577c478bd9Sstevel@tonic-gate pageflags, cred); 15587c478bd9Sstevel@tonic-gate if (pp == NULL) { 15597c478bd9Sstevel@tonic-gate err = ENOMEM; 15607c478bd9Sstevel@tonic-gate break; 15617c478bd9Sstevel@tonic-gate } 15627c478bd9Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx, ap, 15637c478bd9Sstevel@tonic-gate ANON_SLEEP); 15647c478bd9Sstevel@tonic-gate page_unlock(pp); 15657c478bd9Sstevel@tonic-gate } 15667c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt == 1); 15677c478bd9Sstevel@tonic-gate addr += PAGESIZE; 15687c478bd9Sstevel@tonic-gate if (vpage != NULL) { 15697c478bd9Sstevel@tonic-gate vpage++; 15707c478bd9Sstevel@tonic-gate } 15717c478bd9Sstevel@tonic-gate } 15727c478bd9Sstevel@tonic-gate npages -= pgcnt; 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate return (err); 15767c478bd9Sstevel@tonic-gate } 15777c478bd9Sstevel@tonic-gate 15787c478bd9Sstevel@tonic-gate /* 15797c478bd9Sstevel@tonic-gate * Free a group of "size" anon pages, size in bytes, 15807c478bd9Sstevel@tonic-gate * and clear out the pointers to the anon entries. 15817c478bd9Sstevel@tonic-gate */ 15827c478bd9Sstevel@tonic-gate void 15837c478bd9Sstevel@tonic-gate anon_free(struct anon_hdr *ahp, ulong_t index, size_t size) 15847c478bd9Sstevel@tonic-gate { 15857c478bd9Sstevel@tonic-gate spgcnt_t npages; 15867c478bd9Sstevel@tonic-gate struct anon *ap; 15877c478bd9Sstevel@tonic-gate ulong_t old; 15887c478bd9Sstevel@tonic-gate 15897c478bd9Sstevel@tonic-gate npages = btopr(size); 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate while (npages > 0) { 15927c478bd9Sstevel@tonic-gate old = index; 15937c478bd9Sstevel@tonic-gate if ((ap = anon_get_next_ptr(ahp, &index)) == NULL) 15947c478bd9Sstevel@tonic-gate break; 15957c478bd9Sstevel@tonic-gate 15967c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 15977c478bd9Sstevel@tonic-gate npages -= index - old; 15987c478bd9Sstevel@tonic-gate if (npages <= 0) 15997c478bd9Sstevel@tonic-gate break; 16007c478bd9Sstevel@tonic-gate 16017c478bd9Sstevel@tonic-gate (void) anon_set_ptr(ahp, index, NULL, ANON_SLEEP); 16027c478bd9Sstevel@tonic-gate anon_decref(ap); 16037c478bd9Sstevel@tonic-gate /* 16047c478bd9Sstevel@tonic-gate * Bump index and decrement page count 16057c478bd9Sstevel@tonic-gate */ 16067c478bd9Sstevel@tonic-gate index++; 16077c478bd9Sstevel@tonic-gate npages--; 16087c478bd9Sstevel@tonic-gate } 16097c478bd9Sstevel@tonic-gate } 16107c478bd9Sstevel@tonic-gate 16117c478bd9Sstevel@tonic-gate void 16127c478bd9Sstevel@tonic-gate anon_free_pages( 16137c478bd9Sstevel@tonic-gate struct anon_hdr *ahp, 16147c478bd9Sstevel@tonic-gate ulong_t an_idx, 16157c478bd9Sstevel@tonic-gate size_t size, 16167c478bd9Sstevel@tonic-gate uint_t szc) 16177c478bd9Sstevel@tonic-gate { 16187c478bd9Sstevel@tonic-gate spgcnt_t npages; 16197c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 16207c478bd9Sstevel@tonic-gate ulong_t index, off; 16217c478bd9Sstevel@tonic-gate 16227c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 16237c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 16247c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 16257c478bd9Sstevel@tonic-gate npages = btopr(size); 16267c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 16277c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 162807b65a64Saguzovsk ASSERT(an_idx < ahp->size); 16297c478bd9Sstevel@tonic-gate 16307c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.freepages[0]); 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate while (npages > 0) { 16337c478bd9Sstevel@tonic-gate index = an_idx; 16347c478bd9Sstevel@tonic-gate 16357c478bd9Sstevel@tonic-gate /* 16367c478bd9Sstevel@tonic-gate * Find the next valid slot. 16377c478bd9Sstevel@tonic-gate */ 16387c478bd9Sstevel@tonic-gate if (anon_get_next_ptr(ahp, &index) == NULL) 16397c478bd9Sstevel@tonic-gate break; 16407c478bd9Sstevel@tonic-gate 16417c478bd9Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 16427c478bd9Sstevel@tonic-gate /* 16437c478bd9Sstevel@tonic-gate * Now backup index to the beginning of the 16447c478bd9Sstevel@tonic-gate * current large page region of the old array. 16457c478bd9Sstevel@tonic-gate */ 16467c478bd9Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 16477c478bd9Sstevel@tonic-gate off = index - an_idx; 16487c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 16497c478bd9Sstevel@tonic-gate npages -= off; 16507c478bd9Sstevel@tonic-gate if (npages <= 0) 16517c478bd9Sstevel@tonic-gate break; 16527c478bd9Sstevel@tonic-gate 16537c478bd9Sstevel@tonic-gate anon_decref_pages(ahp, index, szc); 16547c478bd9Sstevel@tonic-gate 16557c478bd9Sstevel@tonic-gate off += pgcnt; 16567c478bd9Sstevel@tonic-gate an_idx += off; 16577c478bd9Sstevel@tonic-gate npages -= pgcnt; 16587c478bd9Sstevel@tonic-gate } 16597c478bd9Sstevel@tonic-gate } 16607c478bd9Sstevel@tonic-gate 16617c478bd9Sstevel@tonic-gate /* 16627c478bd9Sstevel@tonic-gate * Make anonymous pages discardable 16637c478bd9Sstevel@tonic-gate */ 1664*5e76ec37SBryan Cantrill int 1665*5e76ec37SBryan Cantrill anon_disclaim(struct anon_map *amp, ulong_t index, size_t size, 1666*5e76ec37SBryan Cantrill uint_t behav, pgcnt_t *purged) 16677c478bd9Sstevel@tonic-gate { 16687c478bd9Sstevel@tonic-gate spgcnt_t npages = btopr(size); 16697c478bd9Sstevel@tonic-gate struct anon *ap; 16707c478bd9Sstevel@tonic-gate struct vnode *vp; 16717c478bd9Sstevel@tonic-gate anoff_t off; 16727c478bd9Sstevel@tonic-gate page_t *pp, *root_pp; 16737c478bd9Sstevel@tonic-gate kmutex_t *ahm; 1674*5e76ec37SBryan Cantrill pgcnt_t pgcnt, npurged = 0; 16757c478bd9Sstevel@tonic-gate ulong_t old_idx, idx, i; 16767c478bd9Sstevel@tonic-gate struct anon_hdr *ahp = amp->ahp; 16777c478bd9Sstevel@tonic-gate anon_sync_obj_t cookie; 1678*5e76ec37SBryan Cantrill int err = 0; 16797c478bd9Sstevel@tonic-gate 1680*5e76ec37SBryan Cantrill VERIFY(behav == MADV_FREE || behav == MADV_PURGE); 16817c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&->a_rwlock)); 16827c478bd9Sstevel@tonic-gate pgcnt = 1; 16837c478bd9Sstevel@tonic-gate for (; npages > 0; index = (pgcnt == 1) ? index + 1 : 16847c478bd9Sstevel@tonic-gate P2ROUNDUP(index + 1, pgcnt), npages -= pgcnt) { 16857c478bd9Sstevel@tonic-gate 16867c478bd9Sstevel@tonic-gate /* 16877c478bd9Sstevel@tonic-gate * get anon pointer and index for the first valid entry 16887c478bd9Sstevel@tonic-gate * in the anon list, starting from "index" 16897c478bd9Sstevel@tonic-gate */ 16907c478bd9Sstevel@tonic-gate old_idx = index; 16917c478bd9Sstevel@tonic-gate if ((ap = anon_get_next_ptr(ahp, &index)) == NULL) 16927c478bd9Sstevel@tonic-gate break; 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate /* 16957c478bd9Sstevel@tonic-gate * decrement npages by number of NULL anon slots we skipped 16967c478bd9Sstevel@tonic-gate */ 16977c478bd9Sstevel@tonic-gate npages -= index - old_idx; 16987c478bd9Sstevel@tonic-gate if (npages <= 0) 16997c478bd9Sstevel@tonic-gate break; 17007c478bd9Sstevel@tonic-gate 17017c478bd9Sstevel@tonic-gate anon_array_enter(amp, index, &cookie); 17027c478bd9Sstevel@tonic-gate ap = anon_get_ptr(ahp, index); 17037c478bd9Sstevel@tonic-gate ASSERT(ap != NULL); 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate /* 17067c478bd9Sstevel@tonic-gate * Get anonymous page and try to lock it SE_EXCL; 17072ba723d8Smec * if we couldn't grab the lock we skip to next page. 17087c478bd9Sstevel@tonic-gate */ 17097c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 17107c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, (u_offset_t)off, SE_EXCL); 17117c478bd9Sstevel@tonic-gate if (pp == NULL) { 17127c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17137c478bd9Sstevel@tonic-gate pgcnt = 1; 17147c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 17157c478bd9Sstevel@tonic-gate continue; 17167c478bd9Sstevel@tonic-gate } 17177c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 17187c478bd9Sstevel@tonic-gate 17197c478bd9Sstevel@tonic-gate /* 17207c478bd9Sstevel@tonic-gate * we cannot free a page which is permanently locked. 17217c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired to examine 17227c478bd9Sstevel@tonic-gate * these fields since the page has an "exclusive" lock. 17237c478bd9Sstevel@tonic-gate */ 17247c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 17257c478bd9Sstevel@tonic-gate page_unlock(pp); 17267c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17277c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 1728*5e76ec37SBryan Cantrill err = EBUSY; 17297c478bd9Sstevel@tonic-gate continue; 17307c478bd9Sstevel@tonic-gate } 17317c478bd9Sstevel@tonic-gate 173223d9e5acSMichael Corcoran ahm = AH_MUTEX(vp, off); 17337c478bd9Sstevel@tonic-gate mutex_enter(ahm); 17347c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 17357c478bd9Sstevel@tonic-gate /* 17367c478bd9Sstevel@tonic-gate * skip this one if copy-on-write is not yet broken. 17377c478bd9Sstevel@tonic-gate */ 17387c478bd9Sstevel@tonic-gate if (ap->an_refcnt > 1) { 17397c478bd9Sstevel@tonic-gate mutex_exit(ahm); 17407c478bd9Sstevel@tonic-gate page_unlock(pp); 17417c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17427c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 17437c478bd9Sstevel@tonic-gate continue; 17447c478bd9Sstevel@tonic-gate } 17457c478bd9Sstevel@tonic-gate 1746*5e76ec37SBryan Cantrill if (behav == MADV_PURGE && pp->p_szc != 0) { 1747*5e76ec37SBryan Cantrill /* 1748*5e76ec37SBryan Cantrill * If we're purging and we have a large page, simplify 1749*5e76ec37SBryan Cantrill * things a bit by demoting ourselves into the base 1750*5e76ec37SBryan Cantrill * page case. 1751*5e76ec37SBryan Cantrill */ 1752*5e76ec37SBryan Cantrill (void) page_try_demote_pages(pp); 1753*5e76ec37SBryan Cantrill } 1754*5e76ec37SBryan Cantrill 17557c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 17567c478bd9Sstevel@tonic-gate pgcnt = 1; 17577c478bd9Sstevel@tonic-gate 17587c478bd9Sstevel@tonic-gate /* 17597c478bd9Sstevel@tonic-gate * free swap slot; 17607c478bd9Sstevel@tonic-gate */ 17617c478bd9Sstevel@tonic-gate if (ap->an_pvp) { 17627c478bd9Sstevel@tonic-gate swap_phys_free(ap->an_pvp, ap->an_poff, 17637c478bd9Sstevel@tonic-gate PAGESIZE); 17647c478bd9Sstevel@tonic-gate ap->an_pvp = NULL; 17657c478bd9Sstevel@tonic-gate ap->an_poff = 0; 17667c478bd9Sstevel@tonic-gate } 1767*5e76ec37SBryan Cantrill 1768*5e76ec37SBryan Cantrill if (behav == MADV_PURGE) { 1769*5e76ec37SBryan Cantrill /* 1770*5e76ec37SBryan Cantrill * If we're purging (instead of merely freeing), 1771*5e76ec37SBryan Cantrill * rip out this anon structure entirely to 1772*5e76ec37SBryan Cantrill * assure that any subsequent fault pulls from 1773*5e76ec37SBryan Cantrill * the backing vnode (if any). 1774*5e76ec37SBryan Cantrill */ 1775*5e76ec37SBryan Cantrill if (--ap->an_refcnt == 0) 1776*5e76ec37SBryan Cantrill anon_rmhash(ap); 1777*5e76ec37SBryan Cantrill 17787c478bd9Sstevel@tonic-gate mutex_exit(ahm); 1779*5e76ec37SBryan Cantrill (void) anon_set_ptr(ahp, index, 1780*5e76ec37SBryan Cantrill NULL, ANON_SLEEP); 1781*5e76ec37SBryan Cantrill npurged++; 1782*5e76ec37SBryan Cantrill ANI_ADD(1); 1783*5e76ec37SBryan Cantrill kmem_cache_free(anon_cache, ap); 1784*5e76ec37SBryan Cantrill } else { 1785*5e76ec37SBryan Cantrill mutex_exit(ahm); 1786*5e76ec37SBryan Cantrill } 1787*5e76ec37SBryan Cantrill 17887c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul++; 17897c478bd9Sstevel@tonic-gate 17907c478bd9Sstevel@tonic-gate /* 17917c478bd9Sstevel@tonic-gate * while we are at it, unload all the translations 17927c478bd9Sstevel@tonic-gate * and attempt to free the page. 17937c478bd9Sstevel@tonic-gate */ 17947c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 17957c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context */ 1796*5e76ec37SBryan Cantrill VN_DISPOSE(pp, 1797*5e76ec37SBryan Cantrill behav == MADV_FREE ? B_FREE : B_INVAL, 0, kcred); 1798*5e76ec37SBryan Cantrill 17997c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 18007c478bd9Sstevel@tonic-gate continue; 18017c478bd9Sstevel@tonic-gate } 18027c478bd9Sstevel@tonic-gate 18037c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 180407b65a64Saguzovsk if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) { 18057c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 18067c478bd9Sstevel@tonic-gate mutex_exit(ahm); 18077c478bd9Sstevel@tonic-gate page_unlock(pp); 18087c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 18097c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 1810*5e76ec37SBryan Cantrill err = EBUSY; 18117c478bd9Sstevel@tonic-gate continue; 18127c478bd9Sstevel@tonic-gate } else { 18137c478bd9Sstevel@tonic-gate pgcnt = 1; 18147c478bd9Sstevel@tonic-gate if (ap->an_pvp) { 18157c478bd9Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 18167c478bd9Sstevel@tonic-gate ap->an_poff, PAGESIZE); 18177c478bd9Sstevel@tonic-gate ap->an_pvp = NULL; 18187c478bd9Sstevel@tonic-gate ap->an_poff = 0; 18197c478bd9Sstevel@tonic-gate } 18207c478bd9Sstevel@tonic-gate mutex_exit(ahm); 18217c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 18227c478bd9Sstevel@tonic-gate /*LINTED*/ 18237c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 18247c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul++; 18257c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 18267c478bd9Sstevel@tonic-gate continue; 18277c478bd9Sstevel@tonic-gate } 18287c478bd9Sstevel@tonic-gate } 18297c478bd9Sstevel@tonic-gate mutex_exit(ahm); 18307c478bd9Sstevel@tonic-gate root_pp = pp; 18317c478bd9Sstevel@tonic-gate 18327c478bd9Sstevel@tonic-gate /* 18337c478bd9Sstevel@tonic-gate * try to lock remaining pages 18347c478bd9Sstevel@tonic-gate */ 18357c478bd9Sstevel@tonic-gate for (idx = 1; idx < pgcnt; idx++) { 1836affbd3ccSkchow pp++; 18377c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 18387c478bd9Sstevel@tonic-gate break; 18397c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 18407c478bd9Sstevel@tonic-gate page_unlock(pp); 18417c478bd9Sstevel@tonic-gate break; 18427c478bd9Sstevel@tonic-gate } 18437c478bd9Sstevel@tonic-gate } 18447c478bd9Sstevel@tonic-gate 18457c478bd9Sstevel@tonic-gate if (idx == pgcnt) { 18467c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 18477c478bd9Sstevel@tonic-gate ap = anon_get_ptr(ahp, index + i); 18487c478bd9Sstevel@tonic-gate if (ap == NULL) 18497c478bd9Sstevel@tonic-gate break; 18507c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 185123d9e5acSMichael Corcoran ahm = AH_MUTEX(vp, off); 18527c478bd9Sstevel@tonic-gate mutex_enter(ahm); 18537c478bd9Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 18547c478bd9Sstevel@tonic-gate 18557c478bd9Sstevel@tonic-gate /* 18567c478bd9Sstevel@tonic-gate * skip this one if copy-on-write 18577c478bd9Sstevel@tonic-gate * is not yet broken. 18587c478bd9Sstevel@tonic-gate */ 18597c478bd9Sstevel@tonic-gate if (ap->an_refcnt > 1) { 18607c478bd9Sstevel@tonic-gate mutex_exit(ahm); 18617c478bd9Sstevel@tonic-gate goto skiplp; 18627c478bd9Sstevel@tonic-gate } 18637c478bd9Sstevel@tonic-gate if (ap->an_pvp) { 18647c478bd9Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 18657c478bd9Sstevel@tonic-gate ap->an_poff, PAGESIZE); 18667c478bd9Sstevel@tonic-gate ap->an_pvp = NULL; 18677c478bd9Sstevel@tonic-gate ap->an_poff = 0; 18687c478bd9Sstevel@tonic-gate } 18697c478bd9Sstevel@tonic-gate mutex_exit(ahm); 18707c478bd9Sstevel@tonic-gate } 18717c478bd9Sstevel@tonic-gate page_destroy_pages(root_pp); 18727c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul += pgcnt; 18737c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 18747c478bd9Sstevel@tonic-gate continue; 18757c478bd9Sstevel@tonic-gate } 18767c478bd9Sstevel@tonic-gate skiplp: 18777c478bd9Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul += pgcnt; 1878affbd3ccSkchow for (i = 0, pp = root_pp; i < idx; pp++, i++) 18797c478bd9Sstevel@tonic-gate page_unlock(pp); 18807c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 18817c478bd9Sstevel@tonic-gate } 1882*5e76ec37SBryan Cantrill 1883*5e76ec37SBryan Cantrill if (purged != NULL) 1884*5e76ec37SBryan Cantrill *purged = npurged; 1885*5e76ec37SBryan Cantrill 1886*5e76ec37SBryan Cantrill return (err); 18877c478bd9Sstevel@tonic-gate } 18887c478bd9Sstevel@tonic-gate 18897c478bd9Sstevel@tonic-gate /* 18907c478bd9Sstevel@tonic-gate * Return the kept page(s) and protections back to the segment driver. 18917c478bd9Sstevel@tonic-gate */ 18927c478bd9Sstevel@tonic-gate int 18937c478bd9Sstevel@tonic-gate anon_getpage( 18947c478bd9Sstevel@tonic-gate struct anon **app, 18957c478bd9Sstevel@tonic-gate uint_t *protp, 18967c478bd9Sstevel@tonic-gate page_t *pl[], 18977c478bd9Sstevel@tonic-gate size_t plsz, 18987c478bd9Sstevel@tonic-gate struct seg *seg, 18997c478bd9Sstevel@tonic-gate caddr_t addr, 19007c478bd9Sstevel@tonic-gate enum seg_rw rw, 19017c478bd9Sstevel@tonic-gate struct cred *cred) 19027c478bd9Sstevel@tonic-gate { 19037c478bd9Sstevel@tonic-gate page_t *pp; 19047c478bd9Sstevel@tonic-gate struct anon *ap = *app; 19057c478bd9Sstevel@tonic-gate struct vnode *vp; 19067c478bd9Sstevel@tonic-gate anoff_t off; 19077c478bd9Sstevel@tonic-gate int err; 19087c478bd9Sstevel@tonic-gate kmutex_t *ahm; 19097c478bd9Sstevel@tonic-gate 19107c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 19117c478bd9Sstevel@tonic-gate 19127c478bd9Sstevel@tonic-gate /* 19137c478bd9Sstevel@tonic-gate * Lookup the page. If page is being paged in, 19147c478bd9Sstevel@tonic-gate * wait for it to finish as we must return a list of 19157c478bd9Sstevel@tonic-gate * pages since this routine acts like the VOP_GETPAGE 19167c478bd9Sstevel@tonic-gate * routine does. 19177c478bd9Sstevel@tonic-gate */ 19187c478bd9Sstevel@tonic-gate if (pl != NULL && (pp = page_lookup(vp, (u_offset_t)off, SE_SHARED))) { 191923d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 19207c478bd9Sstevel@tonic-gate mutex_enter(ahm); 19217c478bd9Sstevel@tonic-gate if (ap->an_refcnt == 1) 19227c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 19237c478bd9Sstevel@tonic-gate else 19247c478bd9Sstevel@tonic-gate *protp = PROT_ALL & ~PROT_WRITE; 19257c478bd9Sstevel@tonic-gate mutex_exit(ahm); 19267c478bd9Sstevel@tonic-gate pl[0] = pp; 19277c478bd9Sstevel@tonic-gate pl[1] = NULL; 19287c478bd9Sstevel@tonic-gate return (0); 19297c478bd9Sstevel@tonic-gate } 19307c478bd9Sstevel@tonic-gate 19317c478bd9Sstevel@tonic-gate /* 19327c478bd9Sstevel@tonic-gate * Simply treat it as a vnode fault on the anon vp. 19337c478bd9Sstevel@tonic-gate */ 19347c478bd9Sstevel@tonic-gate 19357c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_GETPAGE, 19367c478bd9Sstevel@tonic-gate "anon_getpage:seg %x addr %x vp %x", 19377c478bd9Sstevel@tonic-gate seg, addr, vp); 19387c478bd9Sstevel@tonic-gate 19397c478bd9Sstevel@tonic-gate err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, protp, pl, plsz, 1940da6c28aaSamw seg, addr, rw, cred, NULL); 19417c478bd9Sstevel@tonic-gate 19427c478bd9Sstevel@tonic-gate if (err == 0 && pl != NULL) { 194323d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 19447c478bd9Sstevel@tonic-gate mutex_enter(ahm); 19457c478bd9Sstevel@tonic-gate if (ap->an_refcnt != 1) 19467c478bd9Sstevel@tonic-gate *protp &= ~PROT_WRITE; /* make read-only */ 19477c478bd9Sstevel@tonic-gate mutex_exit(ahm); 19487c478bd9Sstevel@tonic-gate } 19497c478bd9Sstevel@tonic-gate return (err); 19507c478bd9Sstevel@tonic-gate } 19517c478bd9Sstevel@tonic-gate 19527c478bd9Sstevel@tonic-gate /* 19537c478bd9Sstevel@tonic-gate * Creates or returns kept pages to the segment driver. returns -1 if a large 19547c478bd9Sstevel@tonic-gate * page cannot be allocated. returns -2 if some other process has allocated a 19557c478bd9Sstevel@tonic-gate * larger page. 19567c478bd9Sstevel@tonic-gate * 1957da6c28aaSamw * For cowfault it will allocate any size pages to fill the requested area to 1958da6c28aaSamw * avoid partially overwriting anon slots (i.e. sharing only some of the anon 19597c478bd9Sstevel@tonic-gate * slots within a large page with other processes). This policy greatly 19607c478bd9Sstevel@tonic-gate * simplifies large page freeing (which is only freed when all anon slot 19617c478bd9Sstevel@tonic-gate * refcnts are 0). 19627c478bd9Sstevel@tonic-gate */ 19637c478bd9Sstevel@tonic-gate int 19647c478bd9Sstevel@tonic-gate anon_map_getpages( 19657c478bd9Sstevel@tonic-gate struct anon_map *amp, 19667c478bd9Sstevel@tonic-gate ulong_t start_idx, 19677c478bd9Sstevel@tonic-gate uint_t szc, 19687c478bd9Sstevel@tonic-gate struct seg *seg, 19697c478bd9Sstevel@tonic-gate caddr_t addr, 19707c478bd9Sstevel@tonic-gate uint_t prot, 19717c478bd9Sstevel@tonic-gate uint_t *protp, 19727c478bd9Sstevel@tonic-gate page_t *ppa[], 19737c478bd9Sstevel@tonic-gate uint_t *ppa_szc, 19747c478bd9Sstevel@tonic-gate struct vpage vpage[], 19757c478bd9Sstevel@tonic-gate enum seg_rw rw, 19767c478bd9Sstevel@tonic-gate int brkcow, 19777c478bd9Sstevel@tonic-gate int anypgsz, 19782cb27123Saguzovsk int pgflags, 19797c478bd9Sstevel@tonic-gate struct cred *cred) 19807c478bd9Sstevel@tonic-gate { 19817c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 19827c478bd9Sstevel@tonic-gate struct anon *ap; 19837c478bd9Sstevel@tonic-gate struct vnode *vp; 19847c478bd9Sstevel@tonic-gate anoff_t off; 19857c478bd9Sstevel@tonic-gate page_t *pp, *pl[2], *conpp = NULL; 19867c478bd9Sstevel@tonic-gate caddr_t vaddr; 19877c478bd9Sstevel@tonic-gate ulong_t pg_idx, an_idx, i; 19887c478bd9Sstevel@tonic-gate spgcnt_t nreloc = 0; 19897c478bd9Sstevel@tonic-gate int prealloc = 1; 19907c478bd9Sstevel@tonic-gate int err, slotcreate; 19917c478bd9Sstevel@tonic-gate uint_t vpprot; 199207b65a64Saguzovsk int upsize = (szc < seg->s_szc); 19937c478bd9Sstevel@tonic-gate 19947c478bd9Sstevel@tonic-gate #if !defined(__i386) && !defined(__amd64) 19957c478bd9Sstevel@tonic-gate ASSERT(seg->s_szc != 0); 19967c478bd9Sstevel@tonic-gate #endif 19977c478bd9Sstevel@tonic-gate ASSERT(szc <= seg->s_szc); 19987c478bd9Sstevel@tonic-gate ASSERT(ppa_szc != NULL); 19997c478bd9Sstevel@tonic-gate ASSERT(rw != S_CREATE); 20007c478bd9Sstevel@tonic-gate 20017c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 20027c478bd9Sstevel@tonic-gate 20037c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[0]); 20047c478bd9Sstevel@tonic-gate 20057c478bd9Sstevel@tonic-gate if (szc == 0) { 20067c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[1]); 20077c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, start_idx)) != NULL) { 20087c478bd9Sstevel@tonic-gate err = anon_getpage(&ap, protp, pl, PAGESIZE, seg, 20097c478bd9Sstevel@tonic-gate addr, rw, cred); 20107c478bd9Sstevel@tonic-gate if (err) 20117c478bd9Sstevel@tonic-gate return (err); 20127c478bd9Sstevel@tonic-gate ppa[0] = pl[0]; 20137c478bd9Sstevel@tonic-gate if (brkcow == 0 || (*protp & PROT_WRITE)) { 20147c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[2]); 201507b65a64Saguzovsk if (ppa[0]->p_szc != 0 && upsize) { 20167c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[3]); 201707b65a64Saguzovsk *ppa_szc = MIN(ppa[0]->p_szc, 201807b65a64Saguzovsk seg->s_szc); 20197c478bd9Sstevel@tonic-gate page_unlock(ppa[0]); 20207c478bd9Sstevel@tonic-gate return (-2); 20217c478bd9Sstevel@tonic-gate } 20227c478bd9Sstevel@tonic-gate return (0); 20237c478bd9Sstevel@tonic-gate } 20247c478bd9Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc 0"); 20257c478bd9Sstevel@tonic-gate } else { 20267c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[4]); 20277c478bd9Sstevel@tonic-gate ppa[0] = anon_zero(seg, addr, &ap, cred); 20287c478bd9Sstevel@tonic-gate if (ppa[0] == NULL) 20297c478bd9Sstevel@tonic-gate return (ENOMEM); 20307c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, start_idx, ap, 20317c478bd9Sstevel@tonic-gate ANON_SLEEP); 20327c478bd9Sstevel@tonic-gate return (0); 20337c478bd9Sstevel@tonic-gate } 20347c478bd9Sstevel@tonic-gate } 20357c478bd9Sstevel@tonic-gate 20367c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 20377c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 20387c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 20397c478bd9Sstevel@tonic-gate 20407c478bd9Sstevel@tonic-gate /* 20417c478bd9Sstevel@tonic-gate * First we check for the case that the requtested large 20427c478bd9Sstevel@tonic-gate * page or larger page already exists in the system. 20437c478bd9Sstevel@tonic-gate * Actually we only check if the first constituent page 20447c478bd9Sstevel@tonic-gate * exists and only preallocate if it's not found. 20457c478bd9Sstevel@tonic-gate */ 20467c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, start_idx); 20477c478bd9Sstevel@tonic-gate if (ap) { 20487c478bd9Sstevel@tonic-gate uint_t pszc; 20497c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 20507c478bd9Sstevel@tonic-gate if (page_exists_forreal(vp, (u_offset_t)off, &pszc)) { 205107b65a64Saguzovsk if (pszc > szc && upsize) { 205207b65a64Saguzovsk *ppa_szc = MIN(pszc, seg->s_szc); 20537c478bd9Sstevel@tonic-gate return (-2); 20547c478bd9Sstevel@tonic-gate } 205507b65a64Saguzovsk if (pszc >= szc) { 20567c478bd9Sstevel@tonic-gate prealloc = 0; 20577c478bd9Sstevel@tonic-gate } 20587c478bd9Sstevel@tonic-gate } 20597c478bd9Sstevel@tonic-gate } 20607c478bd9Sstevel@tonic-gate 20617c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(prealloc == 0, anonvmstats.getpages[5]); 20627c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(prealloc != 0, anonvmstats.getpages[6]); 20637c478bd9Sstevel@tonic-gate 20647c478bd9Sstevel@tonic-gate top: 20657c478bd9Sstevel@tonic-gate /* 20667c478bd9Sstevel@tonic-gate * If a smaller page or no page at all was found, 20677c478bd9Sstevel@tonic-gate * grab a large page off the freelist. 20687c478bd9Sstevel@tonic-gate */ 20697c478bd9Sstevel@tonic-gate if (prealloc) { 20707c478bd9Sstevel@tonic-gate ASSERT(conpp == NULL); 2071e44bd21cSsusans if (page_alloc_pages(anon_vp, seg, addr, NULL, ppa, 20722cb27123Saguzovsk szc, 0, pgflags) != 0) { 20737c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[7]); 207478b03d3aSkchow if (brkcow == 0 || szc < seg->s_szc || 207578b03d3aSkchow !anon_szcshare(amp->ahp, start_idx)) { 20767c478bd9Sstevel@tonic-gate /* 20777c478bd9Sstevel@tonic-gate * If the refcnt's of all anon slots are <= 1 20787c478bd9Sstevel@tonic-gate * they can't increase since we are holding 20797c478bd9Sstevel@tonic-gate * the address space's lock. So segvn can 20807c478bd9Sstevel@tonic-gate * safely decrease szc without risking to 20817c478bd9Sstevel@tonic-gate * generate a cow fault for the region smaller 20827c478bd9Sstevel@tonic-gate * than the segment's largest page size. 20837c478bd9Sstevel@tonic-gate */ 20847c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[8]); 20857c478bd9Sstevel@tonic-gate return (-1); 20867c478bd9Sstevel@tonic-gate } 20877c478bd9Sstevel@tonic-gate docow: 20887c478bd9Sstevel@tonic-gate /* 20897c478bd9Sstevel@tonic-gate * This is a cow fault. Copy away the entire 1 large 20907c478bd9Sstevel@tonic-gate * page region of this segment. 20917c478bd9Sstevel@tonic-gate */ 20927c478bd9Sstevel@tonic-gate if (szc != seg->s_szc) 20937c478bd9Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc %d", 20947c478bd9Sstevel@tonic-gate szc); 20957c478bd9Sstevel@tonic-gate vaddr = addr; 20967c478bd9Sstevel@tonic-gate for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt; 20977c478bd9Sstevel@tonic-gate pg_idx++, an_idx++, vaddr += PAGESIZE) { 20987c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, an_idx)) != 20997c478bd9Sstevel@tonic-gate NULL) { 21007c478bd9Sstevel@tonic-gate err = anon_getpage(&ap, &vpprot, pl, 21017c478bd9Sstevel@tonic-gate PAGESIZE, seg, vaddr, rw, cred); 21027c478bd9Sstevel@tonic-gate if (err) { 21037c478bd9Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 21047c478bd9Sstevel@tonic-gate if ((pp = ppa[i]) != 21057c478bd9Sstevel@tonic-gate NULL) 21067c478bd9Sstevel@tonic-gate page_unlock(pp); 21077c478bd9Sstevel@tonic-gate } 21087c478bd9Sstevel@tonic-gate return (err); 21097c478bd9Sstevel@tonic-gate } 21107c478bd9Sstevel@tonic-gate ppa[pg_idx] = pl[0]; 21117c478bd9Sstevel@tonic-gate } else { 21127c478bd9Sstevel@tonic-gate /* 21137c478bd9Sstevel@tonic-gate * Since this is a cowfault we know 21147c478bd9Sstevel@tonic-gate * that this address space has a 21157c478bd9Sstevel@tonic-gate * parent or children which means 21167c478bd9Sstevel@tonic-gate * anon_dup_fill_holes() has initialized 21177c478bd9Sstevel@tonic-gate * all anon slots within a large page 21187c478bd9Sstevel@tonic-gate * region that had at least one anon 21197c478bd9Sstevel@tonic-gate * slot at the time of fork(). 21207c478bd9Sstevel@tonic-gate */ 21217c478bd9Sstevel@tonic-gate panic("anon_map_getpages: " 21227c478bd9Sstevel@tonic-gate "cowfault but anon slot is empty"); 21237c478bd9Sstevel@tonic-gate } 21247c478bd9Sstevel@tonic-gate } 21257c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[9]); 21267c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 21277c478bd9Sstevel@tonic-gate return (anon_map_privatepages(amp, start_idx, szc, seg, 21282cb27123Saguzovsk addr, prot, ppa, vpage, anypgsz, pgflags, cred)); 21297c478bd9Sstevel@tonic-gate } 21307c478bd9Sstevel@tonic-gate } 21317c478bd9Sstevel@tonic-gate 21327c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[10]); 21337c478bd9Sstevel@tonic-gate 21347c478bd9Sstevel@tonic-gate an_idx = start_idx; 21357c478bd9Sstevel@tonic-gate pg_idx = 0; 21367c478bd9Sstevel@tonic-gate vaddr = addr; 21377c478bd9Sstevel@tonic-gate while (pg_idx < pgcnt) { 21387c478bd9Sstevel@tonic-gate slotcreate = 0; 21397c478bd9Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, an_idx)) == NULL) { 21407c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[11]); 21417c478bd9Sstevel@tonic-gate /* 21427c478bd9Sstevel@tonic-gate * For us to have decided not to preallocate 21437c478bd9Sstevel@tonic-gate * would have meant that a large page 21447c478bd9Sstevel@tonic-gate * was found. Which also means that all of the 21457c478bd9Sstevel@tonic-gate * anon slots for that page would have been 21467c478bd9Sstevel@tonic-gate * already created for us. 21477c478bd9Sstevel@tonic-gate */ 21487c478bd9Sstevel@tonic-gate if (prealloc == 0) 21497c478bd9Sstevel@tonic-gate panic("anon_map_getpages: prealloc = 0"); 21507c478bd9Sstevel@tonic-gate 21517c478bd9Sstevel@tonic-gate slotcreate = 1; 21527c478bd9Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 21537c478bd9Sstevel@tonic-gate } 21547c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate /* 21577c478bd9Sstevel@tonic-gate * Now setup our preallocated page to pass down 21587c478bd9Sstevel@tonic-gate * to swap_getpage(). 21597c478bd9Sstevel@tonic-gate */ 21607c478bd9Sstevel@tonic-gate if (prealloc) { 21617c478bd9Sstevel@tonic-gate ASSERT(ppa[pg_idx]->p_szc == szc); 21627c478bd9Sstevel@tonic-gate conpp = ppa[pg_idx]; 21637c478bd9Sstevel@tonic-gate } 21647c478bd9Sstevel@tonic-gate ASSERT(prealloc || conpp == NULL); 21657c478bd9Sstevel@tonic-gate 21667c478bd9Sstevel@tonic-gate /* 21677c478bd9Sstevel@tonic-gate * If we just created this anon slot then call 21687c478bd9Sstevel@tonic-gate * with S_CREATE to prevent doing IO on the page. 21697c478bd9Sstevel@tonic-gate * Similar to the anon_zero case. 21707c478bd9Sstevel@tonic-gate */ 21717c478bd9Sstevel@tonic-gate err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, 217207b65a64Saguzovsk NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr, 21737c478bd9Sstevel@tonic-gate slotcreate == 1 ? S_CREATE : rw, cred); 21747c478bd9Sstevel@tonic-gate 21757c478bd9Sstevel@tonic-gate if (err) { 217607b65a64Saguzovsk ASSERT(err != -2 || upsize); 21777c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[12]); 21787c478bd9Sstevel@tonic-gate ASSERT(slotcreate == 0); 21797c478bd9Sstevel@tonic-gate goto io_err; 21807c478bd9Sstevel@tonic-gate } 21817c478bd9Sstevel@tonic-gate 21827c478bd9Sstevel@tonic-gate pp = pl[0]; 21837c478bd9Sstevel@tonic-gate 218407b65a64Saguzovsk if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) { 21857c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[13]); 21867c478bd9Sstevel@tonic-gate ASSERT(slotcreate == 0); 21877c478bd9Sstevel@tonic-gate ASSERT(prealloc == 0); 21887c478bd9Sstevel@tonic-gate ASSERT(pg_idx == 0); 21897c478bd9Sstevel@tonic-gate if (pp->p_szc > szc) { 219007b65a64Saguzovsk ASSERT(upsize); 219107b65a64Saguzovsk *ppa_szc = MIN(pp->p_szc, seg->s_szc); 21927c478bd9Sstevel@tonic-gate page_unlock(pp); 21937c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[14]); 21947c478bd9Sstevel@tonic-gate return (-2); 21957c478bd9Sstevel@tonic-gate } 21967c478bd9Sstevel@tonic-gate page_unlock(pp); 21977c478bd9Sstevel@tonic-gate prealloc = 1; 21987c478bd9Sstevel@tonic-gate goto top; 21997c478bd9Sstevel@tonic-gate } 22007c478bd9Sstevel@tonic-gate 22017c478bd9Sstevel@tonic-gate /* 22027c478bd9Sstevel@tonic-gate * If we decided to preallocate but VOP_GETPAGE 22037c478bd9Sstevel@tonic-gate * found a page in the system that satisfies our 22047c478bd9Sstevel@tonic-gate * request then free up our preallocated large page 22057c478bd9Sstevel@tonic-gate * and continue looping accross the existing large 22067c478bd9Sstevel@tonic-gate * page via VOP_GETPAGE. 22077c478bd9Sstevel@tonic-gate */ 22087c478bd9Sstevel@tonic-gate if (prealloc && pp != ppa[pg_idx]) { 22097c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[15]); 22107c478bd9Sstevel@tonic-gate ASSERT(slotcreate == 0); 22117c478bd9Sstevel@tonic-gate ASSERT(pg_idx == 0); 22127c478bd9Sstevel@tonic-gate conpp = NULL; 22137c478bd9Sstevel@tonic-gate prealloc = 0; 22147c478bd9Sstevel@tonic-gate page_free_pages(ppa[0]); 22157c478bd9Sstevel@tonic-gate } 22167c478bd9Sstevel@tonic-gate 22177c478bd9Sstevel@tonic-gate if (prealloc && nreloc > 1) { 22187c478bd9Sstevel@tonic-gate /* 22197c478bd9Sstevel@tonic-gate * we have relocated out of a smaller large page. 22207c478bd9Sstevel@tonic-gate * skip npgs - 1 iterations and continue which will 22217c478bd9Sstevel@tonic-gate * increment by one the loop indices. 22227c478bd9Sstevel@tonic-gate */ 22237c478bd9Sstevel@tonic-gate spgcnt_t npgs = nreloc; 22247c478bd9Sstevel@tonic-gate 22257c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[16]); 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate ASSERT(pp == ppa[pg_idx]); 22287c478bd9Sstevel@tonic-gate ASSERT(slotcreate == 0); 22297c478bd9Sstevel@tonic-gate ASSERT(pg_idx + npgs <= pgcnt); 22307c478bd9Sstevel@tonic-gate if ((*protp & PROT_WRITE) && 22317c478bd9Sstevel@tonic-gate anon_share(amp->ahp, an_idx, npgs)) { 22327c478bd9Sstevel@tonic-gate *protp &= ~PROT_WRITE; 22337c478bd9Sstevel@tonic-gate } 22347c478bd9Sstevel@tonic-gate pg_idx += npgs; 22357c478bd9Sstevel@tonic-gate an_idx += npgs; 22367c478bd9Sstevel@tonic-gate vaddr += PAGESIZE * npgs; 22377c478bd9Sstevel@tonic-gate continue; 22387c478bd9Sstevel@tonic-gate } 22397c478bd9Sstevel@tonic-gate 22407c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[17]); 22417c478bd9Sstevel@tonic-gate 22427c478bd9Sstevel@tonic-gate /* 22437c478bd9Sstevel@tonic-gate * Anon_zero case. 22447c478bd9Sstevel@tonic-gate */ 22457c478bd9Sstevel@tonic-gate if (slotcreate) { 22467c478bd9Sstevel@tonic-gate ASSERT(prealloc); 22477c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 22487c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 22497c478bd9Sstevel@tonic-gate hat_setrefmod(pp); 22507c478bd9Sstevel@tonic-gate } 22517c478bd9Sstevel@tonic-gate 22527c478bd9Sstevel@tonic-gate ASSERT(prealloc == 0 || ppa[pg_idx] == pp); 22537c478bd9Sstevel@tonic-gate ASSERT(prealloc != 0 || PAGE_SHARED(pp)); 22547c478bd9Sstevel@tonic-gate ASSERT(prealloc == 0 || PAGE_EXCL(pp)); 22557c478bd9Sstevel@tonic-gate 22567c478bd9Sstevel@tonic-gate if (pg_idx > 0 && 22577c478bd9Sstevel@tonic-gate ((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) || 225807b65a64Saguzovsk (pp->p_szc != ppa[pg_idx - 1]->p_szc))) { 22597c478bd9Sstevel@tonic-gate panic("anon_map_getpages: unexpected page"); 226007b65a64Saguzovsk } else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) { 226107b65a64Saguzovsk panic("anon_map_getpages: unaligned page"); 226207b65a64Saguzovsk } 22637c478bd9Sstevel@tonic-gate 22647c478bd9Sstevel@tonic-gate if (prealloc == 0) { 22657c478bd9Sstevel@tonic-gate ppa[pg_idx] = pp; 22667c478bd9Sstevel@tonic-gate } 22677c478bd9Sstevel@tonic-gate 22687c478bd9Sstevel@tonic-gate if (ap->an_refcnt > 1) { 22697c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[18]); 22707c478bd9Sstevel@tonic-gate *protp &= ~PROT_WRITE; 22717c478bd9Sstevel@tonic-gate } 22727c478bd9Sstevel@tonic-gate 22737c478bd9Sstevel@tonic-gate /* 22747c478bd9Sstevel@tonic-gate * If this is a new anon slot then initialize 22757c478bd9Sstevel@tonic-gate * the anon array entry. 22767c478bd9Sstevel@tonic-gate */ 22777c478bd9Sstevel@tonic-gate if (slotcreate) { 22787c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP); 22797c478bd9Sstevel@tonic-gate } 22807c478bd9Sstevel@tonic-gate pg_idx++; 22817c478bd9Sstevel@tonic-gate an_idx++; 22827c478bd9Sstevel@tonic-gate vaddr += PAGESIZE; 22837c478bd9Sstevel@tonic-gate } 22847c478bd9Sstevel@tonic-gate 22857c478bd9Sstevel@tonic-gate /* 22867c478bd9Sstevel@tonic-gate * Since preallocated pages come off the freelist 22877c478bd9Sstevel@tonic-gate * they are locked SE_EXCL. Simply downgrade and return. 22887c478bd9Sstevel@tonic-gate */ 22897c478bd9Sstevel@tonic-gate if (prealloc) { 22907c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[19]); 22917c478bd9Sstevel@tonic-gate conpp = NULL; 22927c478bd9Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 22937c478bd9Sstevel@tonic-gate page_downgrade(ppa[pg_idx]); 22947c478bd9Sstevel@tonic-gate } 22957c478bd9Sstevel@tonic-gate } 22967c478bd9Sstevel@tonic-gate ASSERT(conpp == NULL); 22977c478bd9Sstevel@tonic-gate 22987c478bd9Sstevel@tonic-gate if (brkcow == 0 || (*protp & PROT_WRITE)) { 22997c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[20]); 23007c478bd9Sstevel@tonic-gate return (0); 23017c478bd9Sstevel@tonic-gate } 23027c478bd9Sstevel@tonic-gate 23037c478bd9Sstevel@tonic-gate if (szc < seg->s_szc) 23047c478bd9Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc %d", szc); 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[21]); 23077c478bd9Sstevel@tonic-gate 23087c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 23097c478bd9Sstevel@tonic-gate return (anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, 23102cb27123Saguzovsk ppa, vpage, anypgsz, pgflags, cred)); 23117c478bd9Sstevel@tonic-gate io_err: 23127c478bd9Sstevel@tonic-gate /* 23137c478bd9Sstevel@tonic-gate * We got an IO error somewhere in our large page. 23147c478bd9Sstevel@tonic-gate * If we were using a preallocated page then just demote 23157c478bd9Sstevel@tonic-gate * all the constituent pages that we've succeeded with sofar 23167c478bd9Sstevel@tonic-gate * to PAGESIZE pages and leave them in the system 23177c478bd9Sstevel@tonic-gate * unlocked. 23187c478bd9Sstevel@tonic-gate */ 23197c478bd9Sstevel@tonic-gate 232007b65a64Saguzovsk ASSERT(err != -2 || ((pg_idx == 0) && upsize)); 23217c478bd9Sstevel@tonic-gate 23227c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]); 23237c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]); 23247c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(err == -2, anonvmstats.getpages[24]); 23257c478bd9Sstevel@tonic-gate 23267c478bd9Sstevel@tonic-gate if (prealloc) { 23277c478bd9Sstevel@tonic-gate conpp = NULL; 23287c478bd9Sstevel@tonic-gate if (pg_idx > 0) { 23297c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[25]); 23307c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 23317c478bd9Sstevel@tonic-gate pp = ppa[i]; 23327c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 23337c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 23347c478bd9Sstevel@tonic-gate pp->p_szc = 0; 23357c478bd9Sstevel@tonic-gate } 23367c478bd9Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 23377c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(ppa[i])); 23387c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 23397c478bd9Sstevel@tonic-gate } 23407c478bd9Sstevel@tonic-gate /* 23417c478bd9Sstevel@tonic-gate * Now free up the remaining unused constituent 23427c478bd9Sstevel@tonic-gate * pages. 23437c478bd9Sstevel@tonic-gate */ 23447c478bd9Sstevel@tonic-gate while (pg_idx < pgcnt) { 23457c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(ppa[pg_idx])); 23467c478bd9Sstevel@tonic-gate page_free(ppa[pg_idx], 0); 23477c478bd9Sstevel@tonic-gate pg_idx++; 23487c478bd9Sstevel@tonic-gate } 23497c478bd9Sstevel@tonic-gate } else { 23507c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[26]); 23517c478bd9Sstevel@tonic-gate page_free_pages(ppa[0]); 23527c478bd9Sstevel@tonic-gate } 23537c478bd9Sstevel@tonic-gate } else { 23547c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[27]); 23557c478bd9Sstevel@tonic-gate ASSERT(err > 0); 23567c478bd9Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) 23577c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 23587c478bd9Sstevel@tonic-gate } 23597c478bd9Sstevel@tonic-gate ASSERT(conpp == NULL); 23607c478bd9Sstevel@tonic-gate if (err != -1) 23617c478bd9Sstevel@tonic-gate return (err); 23627c478bd9Sstevel@tonic-gate /* 23637c478bd9Sstevel@tonic-gate * we are here because we failed to relocate. 23647c478bd9Sstevel@tonic-gate */ 23657c478bd9Sstevel@tonic-gate ASSERT(prealloc); 236678b03d3aSkchow if (brkcow == 0 || szc < seg->s_szc || 236778b03d3aSkchow !anon_szcshare(amp->ahp, start_idx)) { 23687c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[28]); 23697c478bd9Sstevel@tonic-gate return (-1); 23707c478bd9Sstevel@tonic-gate } 23717c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[29]); 23727c478bd9Sstevel@tonic-gate goto docow; 23737c478bd9Sstevel@tonic-gate } 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate 23767c478bd9Sstevel@tonic-gate /* 23777c478bd9Sstevel@tonic-gate * Turn a reference to an object or shared anon page 23787c478bd9Sstevel@tonic-gate * into a private page with a copy of the data from the 23797c478bd9Sstevel@tonic-gate * original page which is always locked by the caller. 23807c478bd9Sstevel@tonic-gate * This routine unloads the translation and unlocks the 23817c478bd9Sstevel@tonic-gate * original page, if it isn't being stolen, before returning 23827c478bd9Sstevel@tonic-gate * to the caller. 23837c478bd9Sstevel@tonic-gate * 23847c478bd9Sstevel@tonic-gate * NOTE: The original anon slot is not freed by this routine 23857c478bd9Sstevel@tonic-gate * It must be freed by the caller while holding the 23867c478bd9Sstevel@tonic-gate * "anon_map" lock to prevent races which can occur if 23877c478bd9Sstevel@tonic-gate * a process has multiple lwps in its address space. 23887c478bd9Sstevel@tonic-gate */ 23897c478bd9Sstevel@tonic-gate page_t * 23907c478bd9Sstevel@tonic-gate anon_private( 23917c478bd9Sstevel@tonic-gate struct anon **app, 23927c478bd9Sstevel@tonic-gate struct seg *seg, 23937c478bd9Sstevel@tonic-gate caddr_t addr, 23947c478bd9Sstevel@tonic-gate uint_t prot, 23957c478bd9Sstevel@tonic-gate page_t *opp, 23967c478bd9Sstevel@tonic-gate int oppflags, 23977c478bd9Sstevel@tonic-gate struct cred *cred) 23987c478bd9Sstevel@tonic-gate { 23997c478bd9Sstevel@tonic-gate struct anon *old = *app; 24007c478bd9Sstevel@tonic-gate struct anon *new; 24017c478bd9Sstevel@tonic-gate page_t *pp = NULL; 24027c478bd9Sstevel@tonic-gate struct vnode *vp; 24037c478bd9Sstevel@tonic-gate anoff_t off; 24047c478bd9Sstevel@tonic-gate page_t *anon_pl[1 + 1]; 24057c478bd9Sstevel@tonic-gate int err; 24067c478bd9Sstevel@tonic-gate 24077c478bd9Sstevel@tonic-gate if (oppflags & STEAL_PAGE) 24087c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(opp)); 24097c478bd9Sstevel@tonic-gate else 24107c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(opp)); 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, cow_fault, 1); 24137c478bd9Sstevel@tonic-gate 24147c478bd9Sstevel@tonic-gate /* Kernel probe */ 24157c478bd9Sstevel@tonic-gate TNF_PROBE_1(anon_private, "vm pagefault", /* CSTYLED */, 24167c478bd9Sstevel@tonic-gate tnf_opaque, address, addr); 24177c478bd9Sstevel@tonic-gate 24187c478bd9Sstevel@tonic-gate *app = new = anon_alloc(NULL, 0); 24197c478bd9Sstevel@tonic-gate swap_xlate(new, &vp, &off); 24207c478bd9Sstevel@tonic-gate 24217c478bd9Sstevel@tonic-gate if (oppflags & STEAL_PAGE) { 24227c478bd9Sstevel@tonic-gate page_rename(opp, vp, (u_offset_t)off); 24237c478bd9Sstevel@tonic-gate pp = opp; 24247c478bd9Sstevel@tonic-gate TRACE_5(TR_FAC_VM, TR_ANON_PRIVATE, 24257c478bd9Sstevel@tonic-gate "anon_private:seg %p addr %x pp %p vp %p off %lx", 24267c478bd9Sstevel@tonic-gate seg, addr, pp, vp, off); 24277c478bd9Sstevel@tonic-gate hat_setmod(pp); 24287c478bd9Sstevel@tonic-gate 24297c478bd9Sstevel@tonic-gate /* bug 4026339 */ 24307c478bd9Sstevel@tonic-gate page_downgrade(pp); 24317c478bd9Sstevel@tonic-gate return (pp); 24327c478bd9Sstevel@tonic-gate } 24337c478bd9Sstevel@tonic-gate 24347c478bd9Sstevel@tonic-gate /* 24357c478bd9Sstevel@tonic-gate * Call the VOP_GETPAGE routine to create the page, thereby 24367c478bd9Sstevel@tonic-gate * enabling the vnode driver to allocate any filesystem 24377c478bd9Sstevel@tonic-gate * space (e.g., disk block allocation for UFS). This also 24387c478bd9Sstevel@tonic-gate * prevents more than one page from being added to the 24397c478bd9Sstevel@tonic-gate * vnode at the same time. 24407c478bd9Sstevel@tonic-gate */ 24417c478bd9Sstevel@tonic-gate err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, NULL, 2442da6c28aaSamw anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL); 24437c478bd9Sstevel@tonic-gate if (err) 24447c478bd9Sstevel@tonic-gate goto out; 24457c478bd9Sstevel@tonic-gate 24467c478bd9Sstevel@tonic-gate pp = anon_pl[0]; 24477c478bd9Sstevel@tonic-gate 24487c478bd9Sstevel@tonic-gate /* 24497c478bd9Sstevel@tonic-gate * If the original page was locked, we need to move the lock 24507c478bd9Sstevel@tonic-gate * to the new page by transfering 'cowcnt/lckcnt' of the original 24517c478bd9Sstevel@tonic-gate * page to 'cowcnt/lckcnt' of the new page. 24527c478bd9Sstevel@tonic-gate * 24537c478bd9Sstevel@tonic-gate * See Statement at the beginning of segvn_lockop() and 24547c478bd9Sstevel@tonic-gate * comments in page_pp_useclaim() regarding the way 24557c478bd9Sstevel@tonic-gate * cowcnts/lckcnts are handled. 24567c478bd9Sstevel@tonic-gate * 24577c478bd9Sstevel@tonic-gate * Also availrmem must be decremented up front for read only mapping 24587c478bd9Sstevel@tonic-gate * before calling page_pp_useclaim. page_pp_useclaim will bump it back 24597c478bd9Sstevel@tonic-gate * if availrmem did not need to be decremented after all. 24607c478bd9Sstevel@tonic-gate */ 24617c478bd9Sstevel@tonic-gate if (oppflags & LOCK_PAGE) { 24627c478bd9Sstevel@tonic-gate if ((prot & PROT_WRITE) == 0) { 24637c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 24647c478bd9Sstevel@tonic-gate if (availrmem > pages_pp_maximum) { 24657c478bd9Sstevel@tonic-gate availrmem--; 24667c478bd9Sstevel@tonic-gate pages_useclaim++; 24677c478bd9Sstevel@tonic-gate } else { 24687c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 24697c478bd9Sstevel@tonic-gate goto out; 24707c478bd9Sstevel@tonic-gate } 24717c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 24727c478bd9Sstevel@tonic-gate } 24737c478bd9Sstevel@tonic-gate page_pp_useclaim(opp, pp, prot & PROT_WRITE); 24747c478bd9Sstevel@tonic-gate } 24757c478bd9Sstevel@tonic-gate 24767c478bd9Sstevel@tonic-gate /* 24777c478bd9Sstevel@tonic-gate * Now copy the contents from the original page, 24787c478bd9Sstevel@tonic-gate * which is locked and loaded in the MMU by 24797c478bd9Sstevel@tonic-gate * the caller to prevent yet another page fault. 24807c478bd9Sstevel@tonic-gate */ 24818b464eb8Smec /* XXX - should set mod bit in here */ 24828b464eb8Smec if (ppcopy(opp, pp) == 0) { 24838b464eb8Smec /* 24848b464eb8Smec * Before ppcopy could hanlde UE or other faults, we 24858b464eb8Smec * would have panicked here, and still have no option 24868b464eb8Smec * but to do so now. 24878b464eb8Smec */ 24888b464eb8Smec panic("anon_private, ppcopy failed, opp = 0x%p, pp = 0x%p", 24898793b36bSNick Todd (void *)opp, (void *)pp); 24908b464eb8Smec } 24917c478bd9Sstevel@tonic-gate 24927c478bd9Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified */ 24937c478bd9Sstevel@tonic-gate 24947c478bd9Sstevel@tonic-gate /* 24957c478bd9Sstevel@tonic-gate * Unload the old translation. 24967c478bd9Sstevel@tonic-gate */ 24977c478bd9Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, PAGESIZE, HAT_UNLOAD); 24987c478bd9Sstevel@tonic-gate 24997c478bd9Sstevel@tonic-gate /* 25007c478bd9Sstevel@tonic-gate * Free unmapped, unmodified original page. 25017c478bd9Sstevel@tonic-gate * or release the lock on the original page, 25027c478bd9Sstevel@tonic-gate * otherwise the process will sleep forever in 25037c478bd9Sstevel@tonic-gate * anon_decref() waiting for the "exclusive" lock 25047c478bd9Sstevel@tonic-gate * on the page. 25057c478bd9Sstevel@tonic-gate */ 25067c478bd9Sstevel@tonic-gate (void) page_release(opp, 1); 25077c478bd9Sstevel@tonic-gate 25087c478bd9Sstevel@tonic-gate /* 25097c478bd9Sstevel@tonic-gate * we are done with page creation so downgrade the new 25107c478bd9Sstevel@tonic-gate * page's selock to shared, this helps when multiple 25117c478bd9Sstevel@tonic-gate * as_fault(...SOFTLOCK...) are done to the same 25127c478bd9Sstevel@tonic-gate * page(aio) 25137c478bd9Sstevel@tonic-gate */ 25147c478bd9Sstevel@tonic-gate page_downgrade(pp); 25157c478bd9Sstevel@tonic-gate 25167c478bd9Sstevel@tonic-gate /* 25177c478bd9Sstevel@tonic-gate * NOTE: The original anon slot must be freed by the 25187c478bd9Sstevel@tonic-gate * caller while holding the "anon_map" lock, if we 25197c478bd9Sstevel@tonic-gate * copied away from an anonymous page. 25207c478bd9Sstevel@tonic-gate */ 25217c478bd9Sstevel@tonic-gate return (pp); 25227c478bd9Sstevel@tonic-gate 25237c478bd9Sstevel@tonic-gate out: 25247c478bd9Sstevel@tonic-gate *app = old; 25257c478bd9Sstevel@tonic-gate if (pp) 25267c478bd9Sstevel@tonic-gate page_unlock(pp); 25277c478bd9Sstevel@tonic-gate anon_decref(new); 25287c478bd9Sstevel@tonic-gate page_unlock(opp); 25297c478bd9Sstevel@tonic-gate return ((page_t *)NULL); 25307c478bd9Sstevel@tonic-gate } 25317c478bd9Sstevel@tonic-gate 25327c478bd9Sstevel@tonic-gate int 25337c478bd9Sstevel@tonic-gate anon_map_privatepages( 25347c478bd9Sstevel@tonic-gate struct anon_map *amp, 25357c478bd9Sstevel@tonic-gate ulong_t start_idx, 25367c478bd9Sstevel@tonic-gate uint_t szc, 25377c478bd9Sstevel@tonic-gate struct seg *seg, 25387c478bd9Sstevel@tonic-gate caddr_t addr, 25397c478bd9Sstevel@tonic-gate uint_t prot, 25407c478bd9Sstevel@tonic-gate page_t *ppa[], 25417c478bd9Sstevel@tonic-gate struct vpage vpage[], 25427c478bd9Sstevel@tonic-gate int anypgsz, 25432cb27123Saguzovsk int pgflags, 25447c478bd9Sstevel@tonic-gate struct cred *cred) 25457c478bd9Sstevel@tonic-gate { 25467c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 25477c478bd9Sstevel@tonic-gate struct vnode *vp; 25487c478bd9Sstevel@tonic-gate anoff_t off; 25497c478bd9Sstevel@tonic-gate page_t *pl[2], *conpp = NULL; 25507c478bd9Sstevel@tonic-gate int err; 25517c478bd9Sstevel@tonic-gate int prealloc = 1; 25527c478bd9Sstevel@tonic-gate struct anon *ap, *oldap; 25537c478bd9Sstevel@tonic-gate caddr_t vaddr; 25547c478bd9Sstevel@tonic-gate page_t *pplist, *pp; 25557c478bd9Sstevel@tonic-gate ulong_t pg_idx, an_idx; 25567c478bd9Sstevel@tonic-gate spgcnt_t nreloc = 0; 25577c478bd9Sstevel@tonic-gate int pagelock = 0; 25587c478bd9Sstevel@tonic-gate kmutex_t *ahmpages = NULL; 25597c478bd9Sstevel@tonic-gate #ifdef DEBUG 25607c478bd9Sstevel@tonic-gate int refcnt; 25617c478bd9Sstevel@tonic-gate #endif 25627c478bd9Sstevel@tonic-gate 25637c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 25647c478bd9Sstevel@tonic-gate ASSERT(szc == seg->s_szc); 25657c478bd9Sstevel@tonic-gate 25667c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[0]); 25677c478bd9Sstevel@tonic-gate 25687c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 25697c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 25707c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 25717c478bd9Sstevel@tonic-gate 25727c478bd9Sstevel@tonic-gate ASSERT(amp != NULL); 25737c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, start_idx); 25747c478bd9Sstevel@tonic-gate ASSERT(ap == NULL || ap->an_refcnt >= 1); 25757c478bd9Sstevel@tonic-gate 25767c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(ap == NULL, anonvmstats.privatepages[1]); 25777c478bd9Sstevel@tonic-gate 25787c478bd9Sstevel@tonic-gate /* 25797c478bd9Sstevel@tonic-gate * Now try and allocate the large page. If we fail then just 25807c478bd9Sstevel@tonic-gate * let VOP_GETPAGE give us PAGESIZE pages. Normally we let 25817c478bd9Sstevel@tonic-gate * the caller make this decision but to avoid added complexity 25827c478bd9Sstevel@tonic-gate * it's simplier to handle that case here. 25837c478bd9Sstevel@tonic-gate */ 25847c478bd9Sstevel@tonic-gate if (anypgsz == -1) { 25857c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[2]); 25867c478bd9Sstevel@tonic-gate prealloc = 0; 2587e44bd21cSsusans } else if (page_alloc_pages(anon_vp, seg, addr, &pplist, NULL, szc, 25882cb27123Saguzovsk anypgsz, pgflags) != 0) { 25897c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[3]); 25907c478bd9Sstevel@tonic-gate prealloc = 0; 25917c478bd9Sstevel@tonic-gate } 25927c478bd9Sstevel@tonic-gate 25937c478bd9Sstevel@tonic-gate /* 25947c478bd9Sstevel@tonic-gate * make the decrement of all refcnts of all 25957c478bd9Sstevel@tonic-gate * anon slots of a large page appear atomic by 25967c478bd9Sstevel@tonic-gate * getting an anonpages_hash_lock for the 25977c478bd9Sstevel@tonic-gate * first anon slot of a large page. 25987c478bd9Sstevel@tonic-gate */ 25997c478bd9Sstevel@tonic-gate if (ap != NULL) { 260023d9e5acSMichael Corcoran ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 26017c478bd9Sstevel@tonic-gate mutex_enter(ahmpages); 26027c478bd9Sstevel@tonic-gate if (ap->an_refcnt == 1) { 26037c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[4]); 26047c478bd9Sstevel@tonic-gate ASSERT(!anon_share(amp->ahp, start_idx, pgcnt)); 26057c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 26067c478bd9Sstevel@tonic-gate 26077c478bd9Sstevel@tonic-gate if (prealloc) { 26087c478bd9Sstevel@tonic-gate page_free_replacement_page(pplist); 26097c478bd9Sstevel@tonic-gate page_create_putback(pgcnt); 26107c478bd9Sstevel@tonic-gate } 26117c478bd9Sstevel@tonic-gate ASSERT(ppa[0]->p_szc <= szc); 26127c478bd9Sstevel@tonic-gate if (ppa[0]->p_szc == szc) { 26137c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[5]); 26147c478bd9Sstevel@tonic-gate return (0); 26157c478bd9Sstevel@tonic-gate } 26167c478bd9Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 26177c478bd9Sstevel@tonic-gate ASSERT(ppa[pg_idx] != NULL); 26187c478bd9Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 26197c478bd9Sstevel@tonic-gate } 26207c478bd9Sstevel@tonic-gate return (-1); 26217c478bd9Sstevel@tonic-gate } 26227c478bd9Sstevel@tonic-gate } 26237c478bd9Sstevel@tonic-gate 26247c478bd9Sstevel@tonic-gate /* 26257c478bd9Sstevel@tonic-gate * If we are passed in the vpage array and this is 26267c478bd9Sstevel@tonic-gate * not PROT_WRITE then we need to decrement availrmem 26277c478bd9Sstevel@tonic-gate * up front before we try anything. If we need to and 26287c478bd9Sstevel@tonic-gate * can't decrement availrmem then its better to fail now 26297c478bd9Sstevel@tonic-gate * than in the middle of processing the new large page. 26307c478bd9Sstevel@tonic-gate * page_pp_usclaim() on behalf of each constituent page 26317c478bd9Sstevel@tonic-gate * below will adjust availrmem back for the cases not needed. 26327c478bd9Sstevel@tonic-gate */ 26337c478bd9Sstevel@tonic-gate if (vpage != NULL && (prot & PROT_WRITE) == 0) { 26347c478bd9Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 26357c478bd9Sstevel@tonic-gate if (VPP_ISPPLOCK(&vpage[pg_idx])) { 26367c478bd9Sstevel@tonic-gate pagelock = 1; 26377c478bd9Sstevel@tonic-gate break; 26387c478bd9Sstevel@tonic-gate } 26397c478bd9Sstevel@tonic-gate } 26407c478bd9Sstevel@tonic-gate if (pagelock) { 26417c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[6]); 26427c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 26437c478bd9Sstevel@tonic-gate if (availrmem >= pages_pp_maximum + pgcnt) { 26447c478bd9Sstevel@tonic-gate availrmem -= pgcnt; 26457c478bd9Sstevel@tonic-gate pages_useclaim += pgcnt; 26467c478bd9Sstevel@tonic-gate } else { 26477c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[7]); 26487c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 26497c478bd9Sstevel@tonic-gate if (ahmpages != NULL) { 26507c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 26517c478bd9Sstevel@tonic-gate } 26527c478bd9Sstevel@tonic-gate if (prealloc) { 26537c478bd9Sstevel@tonic-gate page_free_replacement_page(pplist); 26547c478bd9Sstevel@tonic-gate page_create_putback(pgcnt); 26557c478bd9Sstevel@tonic-gate } 26567c478bd9Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) 26577c478bd9Sstevel@tonic-gate if (ppa[pg_idx] != NULL) 26587c478bd9Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 26597c478bd9Sstevel@tonic-gate return (ENOMEM); 26607c478bd9Sstevel@tonic-gate } 26617c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 26627c478bd9Sstevel@tonic-gate } 26637c478bd9Sstevel@tonic-gate } 26647c478bd9Sstevel@tonic-gate 26657c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, cow_fault, pgcnt); 26667c478bd9Sstevel@tonic-gate 26677c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[8]); 26687c478bd9Sstevel@tonic-gate 26697c478bd9Sstevel@tonic-gate an_idx = start_idx; 26707c478bd9Sstevel@tonic-gate pg_idx = 0; 26717c478bd9Sstevel@tonic-gate vaddr = addr; 26727c478bd9Sstevel@tonic-gate for (; pg_idx < pgcnt; pg_idx++, an_idx++, vaddr += PAGESIZE) { 26737c478bd9Sstevel@tonic-gate ASSERT(ppa[pg_idx] != NULL); 26747c478bd9Sstevel@tonic-gate oldap = anon_get_ptr(amp->ahp, an_idx); 26757c478bd9Sstevel@tonic-gate ASSERT(ahmpages != NULL || oldap == NULL); 26767c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap != NULL); 26777c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap->an_refcnt > 1); 26787c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || pg_idx != 0 || 26797c478bd9Sstevel@tonic-gate (refcnt = oldap->an_refcnt)); 26807c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || pg_idx == 0 || 26817c478bd9Sstevel@tonic-gate refcnt == oldap->an_refcnt); 26827c478bd9Sstevel@tonic-gate 26837c478bd9Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 26847c478bd9Sstevel@tonic-gate 26857c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 26867c478bd9Sstevel@tonic-gate 26877c478bd9Sstevel@tonic-gate /* 26887c478bd9Sstevel@tonic-gate * Now setup our preallocated page to pass down to 26897c478bd9Sstevel@tonic-gate * swap_getpage(). 26907c478bd9Sstevel@tonic-gate */ 26917c478bd9Sstevel@tonic-gate if (prealloc) { 26927c478bd9Sstevel@tonic-gate pp = pplist; 26937c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 26947c478bd9Sstevel@tonic-gate conpp = pp; 26957c478bd9Sstevel@tonic-gate } 26967c478bd9Sstevel@tonic-gate 26977c478bd9Sstevel@tonic-gate err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, NULL, pl, 269807b65a64Saguzovsk PAGESIZE, conpp, NULL, &nreloc, seg, vaddr, 269907b65a64Saguzovsk S_CREATE, cred); 27007c478bd9Sstevel@tonic-gate 27017c478bd9Sstevel@tonic-gate /* 27027c478bd9Sstevel@tonic-gate * Impossible to fail this is S_CREATE. 27037c478bd9Sstevel@tonic-gate */ 27047c478bd9Sstevel@tonic-gate if (err) 27057c478bd9Sstevel@tonic-gate panic("anon_map_privatepages: VOP_GETPAGE failed"); 27067c478bd9Sstevel@tonic-gate 27077c478bd9Sstevel@tonic-gate ASSERT(prealloc ? pp == pl[0] : pl[0]->p_szc == 0); 27087c478bd9Sstevel@tonic-gate ASSERT(prealloc == 0 || nreloc == 1); 27097c478bd9Sstevel@tonic-gate 27107c478bd9Sstevel@tonic-gate pp = pl[0]; 27117c478bd9Sstevel@tonic-gate 27127c478bd9Sstevel@tonic-gate /* 27137c478bd9Sstevel@tonic-gate * If the original page was locked, we need to move 27147c478bd9Sstevel@tonic-gate * the lock to the new page by transfering 27157c478bd9Sstevel@tonic-gate * 'cowcnt/lckcnt' of the original page to 'cowcnt/lckcnt' 27167c478bd9Sstevel@tonic-gate * of the new page. pg_idx can be used to index 27177c478bd9Sstevel@tonic-gate * into the vpage array since the caller will guarentee 27187c478bd9Sstevel@tonic-gate * that vpage struct passed in corresponds to addr 27197c478bd9Sstevel@tonic-gate * and forward. 27207c478bd9Sstevel@tonic-gate */ 27217c478bd9Sstevel@tonic-gate if (vpage != NULL && VPP_ISPPLOCK(&vpage[pg_idx])) { 27227c478bd9Sstevel@tonic-gate page_pp_useclaim(ppa[pg_idx], pp, prot & PROT_WRITE); 27237c478bd9Sstevel@tonic-gate } else if (pagelock) { 27247c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 27257c478bd9Sstevel@tonic-gate availrmem++; 27267c478bd9Sstevel@tonic-gate pages_useclaim--; 27277c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 27287c478bd9Sstevel@tonic-gate } 27297c478bd9Sstevel@tonic-gate 27307c478bd9Sstevel@tonic-gate /* 27317c478bd9Sstevel@tonic-gate * Now copy the contents from the original page. 27327c478bd9Sstevel@tonic-gate */ 27338b464eb8Smec if (ppcopy(ppa[pg_idx], pp) == 0) { 27348b464eb8Smec /* 27358b464eb8Smec * Before ppcopy could hanlde UE or other faults, we 27368b464eb8Smec * would have panicked here, and still have no option 27378b464eb8Smec * but to do so now. 27388b464eb8Smec */ 27398b464eb8Smec panic("anon_map_privatepages, ppcopy failed"); 27408b464eb8Smec } 27417c478bd9Sstevel@tonic-gate 27427c478bd9Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified */ 27437c478bd9Sstevel@tonic-gate 27447c478bd9Sstevel@tonic-gate /* 27457c478bd9Sstevel@tonic-gate * Release the lock on the original page, 27467c478bd9Sstevel@tonic-gate * derement the old slot, and down grade the lock 27477c478bd9Sstevel@tonic-gate * on the new copy. 27487c478bd9Sstevel@tonic-gate */ 27497c478bd9Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 27507c478bd9Sstevel@tonic-gate 27517c478bd9Sstevel@tonic-gate if (!prealloc) 27527c478bd9Sstevel@tonic-gate page_downgrade(pp); 27537c478bd9Sstevel@tonic-gate 27547c478bd9Sstevel@tonic-gate ppa[pg_idx] = pp; 27557c478bd9Sstevel@tonic-gate 27567c478bd9Sstevel@tonic-gate /* 27577c478bd9Sstevel@tonic-gate * Now reflect the copy in the new anon array. 27587c478bd9Sstevel@tonic-gate */ 27597c478bd9Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap->an_refcnt > 1); 27607c478bd9Sstevel@tonic-gate if (oldap != NULL) 27617c478bd9Sstevel@tonic-gate anon_decref(oldap); 27627c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP); 27637c478bd9Sstevel@tonic-gate } 2764ae320ee6Speterte 2765ae320ee6Speterte /* 2766ae320ee6Speterte * Unload the old large page translation. 2767ae320ee6Speterte */ 2768ae320ee6Speterte hat_unload(seg->s_as->a_hat, addr, pgcnt << PAGESHIFT, HAT_UNLOAD); 2769ae320ee6Speterte 27707c478bd9Sstevel@tonic-gate if (ahmpages != NULL) { 27717c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 27727c478bd9Sstevel@tonic-gate } 27737c478bd9Sstevel@tonic-gate ASSERT(prealloc == 0 || pplist == NULL); 27747c478bd9Sstevel@tonic-gate if (prealloc) { 27757c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[9]); 27767c478bd9Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 27777c478bd9Sstevel@tonic-gate page_downgrade(ppa[pg_idx]); 27787c478bd9Sstevel@tonic-gate } 27797c478bd9Sstevel@tonic-gate } 27807c478bd9Sstevel@tonic-gate 27817c478bd9Sstevel@tonic-gate return (0); 27827c478bd9Sstevel@tonic-gate } 27837c478bd9Sstevel@tonic-gate 27847c478bd9Sstevel@tonic-gate /* 27857c478bd9Sstevel@tonic-gate * Allocate a private zero-filled anon page. 27867c478bd9Sstevel@tonic-gate */ 27877c478bd9Sstevel@tonic-gate page_t * 27887c478bd9Sstevel@tonic-gate anon_zero(struct seg *seg, caddr_t addr, struct anon **app, struct cred *cred) 27897c478bd9Sstevel@tonic-gate { 27907c478bd9Sstevel@tonic-gate struct anon *ap; 27917c478bd9Sstevel@tonic-gate page_t *pp; 27927c478bd9Sstevel@tonic-gate struct vnode *vp; 27937c478bd9Sstevel@tonic-gate anoff_t off; 27947c478bd9Sstevel@tonic-gate page_t *anon_pl[1 + 1]; 27957c478bd9Sstevel@tonic-gate int err; 27967c478bd9Sstevel@tonic-gate 27977c478bd9Sstevel@tonic-gate /* Kernel probe */ 27987c478bd9Sstevel@tonic-gate TNF_PROBE_1(anon_zero, "vm pagefault", /* CSTYLED */, 27997c478bd9Sstevel@tonic-gate tnf_opaque, address, addr); 28007c478bd9Sstevel@tonic-gate 28017c478bd9Sstevel@tonic-gate *app = ap = anon_alloc(NULL, 0); 28027c478bd9Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 28037c478bd9Sstevel@tonic-gate 28047c478bd9Sstevel@tonic-gate /* 28057c478bd9Sstevel@tonic-gate * Call the VOP_GETPAGE routine to create the page, thereby 28067c478bd9Sstevel@tonic-gate * enabling the vnode driver to allocate any filesystem 28077c478bd9Sstevel@tonic-gate * dependent structures (e.g., disk block allocation for UFS). 28087c478bd9Sstevel@tonic-gate * This also prevents more than on page from being added to 28097c478bd9Sstevel@tonic-gate * the vnode at the same time since it is locked. 28107c478bd9Sstevel@tonic-gate */ 28117c478bd9Sstevel@tonic-gate err = VOP_GETPAGE(vp, off, PAGESIZE, NULL, 2812da6c28aaSamw anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL); 28137c478bd9Sstevel@tonic-gate if (err) { 28147c478bd9Sstevel@tonic-gate *app = NULL; 28157c478bd9Sstevel@tonic-gate anon_decref(ap); 28167c478bd9Sstevel@tonic-gate return (NULL); 28177c478bd9Sstevel@tonic-gate } 28187c478bd9Sstevel@tonic-gate pp = anon_pl[0]; 28197c478bd9Sstevel@tonic-gate 28207c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); /* XXX - should set mod bit */ 28217c478bd9Sstevel@tonic-gate page_downgrade(pp); 28227c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 28237c478bd9Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified so pageout writes back */ 28247c478bd9Sstevel@tonic-gate return (pp); 28257c478bd9Sstevel@tonic-gate } 28267c478bd9Sstevel@tonic-gate 28277c478bd9Sstevel@tonic-gate 28287c478bd9Sstevel@tonic-gate /* 28297c478bd9Sstevel@tonic-gate * Allocate array of private zero-filled anon pages for empty slots 28307c478bd9Sstevel@tonic-gate * and kept pages for non empty slots within given range. 28317c478bd9Sstevel@tonic-gate * 28327c478bd9Sstevel@tonic-gate * NOTE: This rontine will try and use large pages 28337c478bd9Sstevel@tonic-gate * if available and supported by underlying platform. 28347c478bd9Sstevel@tonic-gate */ 28357c478bd9Sstevel@tonic-gate int 28367c478bd9Sstevel@tonic-gate anon_map_createpages( 28377c478bd9Sstevel@tonic-gate struct anon_map *amp, 28387c478bd9Sstevel@tonic-gate ulong_t start_index, 28397c478bd9Sstevel@tonic-gate size_t len, 28407c478bd9Sstevel@tonic-gate page_t *ppa[], 28417c478bd9Sstevel@tonic-gate struct seg *seg, 28427c478bd9Sstevel@tonic-gate caddr_t addr, 28437c478bd9Sstevel@tonic-gate enum seg_rw rw, 28447c478bd9Sstevel@tonic-gate struct cred *cred) 28457c478bd9Sstevel@tonic-gate { 28467c478bd9Sstevel@tonic-gate 28477c478bd9Sstevel@tonic-gate struct anon *ap; 28487c478bd9Sstevel@tonic-gate struct vnode *ap_vp; 28497c478bd9Sstevel@tonic-gate page_t *pp, *pplist, *anon_pl[1 + 1], *conpp = NULL; 28507c478bd9Sstevel@tonic-gate int err = 0; 28517c478bd9Sstevel@tonic-gate ulong_t p_index, index; 28527c478bd9Sstevel@tonic-gate pgcnt_t npgs, pg_cnt; 28537c478bd9Sstevel@tonic-gate spgcnt_t nreloc = 0; 28547c478bd9Sstevel@tonic-gate uint_t l_szc, szc, prot; 28557c478bd9Sstevel@tonic-gate anoff_t ap_off; 28567c478bd9Sstevel@tonic-gate size_t pgsz; 28577c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 28583230aa08Ssusans kmutex_t *ahm; 28597c478bd9Sstevel@tonic-gate 28607c478bd9Sstevel@tonic-gate /* 28617c478bd9Sstevel@tonic-gate * XXX For now only handle S_CREATE. 28627c478bd9Sstevel@tonic-gate */ 28637c478bd9Sstevel@tonic-gate ASSERT(rw == S_CREATE); 28647c478bd9Sstevel@tonic-gate 28657c478bd9Sstevel@tonic-gate index = start_index; 28667c478bd9Sstevel@tonic-gate p_index = 0; 28677c478bd9Sstevel@tonic-gate npgs = btopr(len); 28687c478bd9Sstevel@tonic-gate 28697c478bd9Sstevel@tonic-gate /* 28707c478bd9Sstevel@tonic-gate * If this platform supports multiple page sizes 28717c478bd9Sstevel@tonic-gate * then try and allocate directly from the free 28727c478bd9Sstevel@tonic-gate * list for pages larger than PAGESIZE. 28737c478bd9Sstevel@tonic-gate * 28747c478bd9Sstevel@tonic-gate * NOTE:When we have page_create_ru we can stop 28757c478bd9Sstevel@tonic-gate * directly allocating from the freelist. 28767c478bd9Sstevel@tonic-gate */ 28777c478bd9Sstevel@tonic-gate l_szc = seg->s_szc; 28787c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 28797c478bd9Sstevel@tonic-gate while (npgs) { 28807c478bd9Sstevel@tonic-gate 28817c478bd9Sstevel@tonic-gate /* 28827c478bd9Sstevel@tonic-gate * if anon slot already exists 28837c478bd9Sstevel@tonic-gate * (means page has been created) 28847c478bd9Sstevel@tonic-gate * so 1) look up the page 28857c478bd9Sstevel@tonic-gate * 2) if the page is still in memory, get it. 28867c478bd9Sstevel@tonic-gate * 3) if not, create a page and 28877c478bd9Sstevel@tonic-gate * page in from physical swap device. 28887c478bd9Sstevel@tonic-gate * These are done in anon_getpage(). 28897c478bd9Sstevel@tonic-gate */ 28907c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, index); 28917c478bd9Sstevel@tonic-gate if (ap) { 28927c478bd9Sstevel@tonic-gate err = anon_getpage(&ap, &prot, anon_pl, PAGESIZE, 28937c478bd9Sstevel@tonic-gate seg, addr, S_READ, cred); 28947c478bd9Sstevel@tonic-gate if (err) { 28957c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 28967c478bd9Sstevel@tonic-gate panic("anon_map_createpages: anon_getpage"); 28977c478bd9Sstevel@tonic-gate } 28987c478bd9Sstevel@tonic-gate pp = anon_pl[0]; 28997c478bd9Sstevel@tonic-gate ppa[p_index++] = pp; 29007c478bd9Sstevel@tonic-gate 29013230aa08Ssusans /* 29023230aa08Ssusans * an_pvp can become non-NULL after SysV's page was 29033230aa08Ssusans * paged out before ISM was attached to this SysV 29043230aa08Ssusans * shared memory segment. So free swap slot if needed. 29053230aa08Ssusans */ 29063230aa08Ssusans if (ap->an_pvp != NULL) { 29073230aa08Ssusans page_io_lock(pp); 290823d9e5acSMichael Corcoran ahm = AH_MUTEX(ap->an_vp, ap->an_off); 29093230aa08Ssusans mutex_enter(ahm); 29103230aa08Ssusans if (ap->an_pvp != NULL) { 29113230aa08Ssusans swap_phys_free(ap->an_pvp, 29123230aa08Ssusans ap->an_poff, PAGESIZE); 29133230aa08Ssusans ap->an_pvp = NULL; 29143230aa08Ssusans ap->an_poff = 0; 29153230aa08Ssusans mutex_exit(ahm); 29163230aa08Ssusans hat_setmod(pp); 29173230aa08Ssusans } else { 29183230aa08Ssusans mutex_exit(ahm); 29193230aa08Ssusans } 29203230aa08Ssusans page_io_unlock(pp); 29213230aa08Ssusans } 29223230aa08Ssusans 29237c478bd9Sstevel@tonic-gate addr += PAGESIZE; 29247c478bd9Sstevel@tonic-gate index++; 29257c478bd9Sstevel@tonic-gate npgs--; 29267c478bd9Sstevel@tonic-gate continue; 29277c478bd9Sstevel@tonic-gate } 29287c478bd9Sstevel@tonic-gate /* 29297c478bd9Sstevel@tonic-gate * Now try and allocate the largest page possible 29307c478bd9Sstevel@tonic-gate * for the current address and range. 29317c478bd9Sstevel@tonic-gate * Keep dropping down in page size until: 29327c478bd9Sstevel@tonic-gate * 29337c478bd9Sstevel@tonic-gate * 1) Properly aligned 29347c478bd9Sstevel@tonic-gate * 2) Does not overlap existing anon pages 29357c478bd9Sstevel@tonic-gate * 3) Fits in remaining range. 29367c478bd9Sstevel@tonic-gate * 4) able to allocate one. 29377c478bd9Sstevel@tonic-gate * 29387c478bd9Sstevel@tonic-gate * NOTE: XXX When page_create_ru is completed this code 29397c478bd9Sstevel@tonic-gate * will change. 29407c478bd9Sstevel@tonic-gate */ 29417c478bd9Sstevel@tonic-gate szc = l_szc; 29427c478bd9Sstevel@tonic-gate pplist = NULL; 29437c478bd9Sstevel@tonic-gate pg_cnt = 0; 29447c478bd9Sstevel@tonic-gate while (szc) { 29457c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 29467c478bd9Sstevel@tonic-gate pg_cnt = pgsz >> PAGESHIFT; 29477c478bd9Sstevel@tonic-gate if (IS_P2ALIGNED(addr, pgsz) && pg_cnt <= npgs && 29487c478bd9Sstevel@tonic-gate anon_pages(amp->ahp, index, pg_cnt) == 0) { 29497c478bd9Sstevel@tonic-gate /* 29507c478bd9Sstevel@tonic-gate * XXX 29517c478bd9Sstevel@tonic-gate * Since we are faking page_create() 29527c478bd9Sstevel@tonic-gate * we also need to do the freemem and 29537c478bd9Sstevel@tonic-gate * pcf accounting. 29547c478bd9Sstevel@tonic-gate */ 29557c478bd9Sstevel@tonic-gate (void) page_create_wait(pg_cnt, PG_WAIT); 29567c478bd9Sstevel@tonic-gate 29577c478bd9Sstevel@tonic-gate /* 29587c478bd9Sstevel@tonic-gate * Get lgroup to allocate next page of shared 29597c478bd9Sstevel@tonic-gate * memory from and use it to specify where to 29607c478bd9Sstevel@tonic-gate * allocate the physical memory 29617c478bd9Sstevel@tonic-gate */ 29627c478bd9Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, addr, pgsz); 29637c478bd9Sstevel@tonic-gate 29647c478bd9Sstevel@tonic-gate pplist = page_get_freelist( 2965e44bd21cSsusans anon_vp, (u_offset_t)0, seg, 29667c478bd9Sstevel@tonic-gate addr, pgsz, 0, lgrp); 29677c478bd9Sstevel@tonic-gate 29687c478bd9Sstevel@tonic-gate if (pplist == NULL) { 29697c478bd9Sstevel@tonic-gate page_create_putback(pg_cnt); 29707c478bd9Sstevel@tonic-gate } 29717c478bd9Sstevel@tonic-gate 29727c478bd9Sstevel@tonic-gate /* 29737c478bd9Sstevel@tonic-gate * If a request for a page of size 29747c478bd9Sstevel@tonic-gate * larger than PAGESIZE failed 29757c478bd9Sstevel@tonic-gate * then don't try that size anymore. 29767c478bd9Sstevel@tonic-gate */ 29777c478bd9Sstevel@tonic-gate if (pplist == NULL) { 29787c478bd9Sstevel@tonic-gate l_szc = szc - 1; 29797c478bd9Sstevel@tonic-gate } else { 29807c478bd9Sstevel@tonic-gate break; 29817c478bd9Sstevel@tonic-gate } 29827c478bd9Sstevel@tonic-gate } 29837c478bd9Sstevel@tonic-gate szc--; 29847c478bd9Sstevel@tonic-gate } 29857c478bd9Sstevel@tonic-gate 29867c478bd9Sstevel@tonic-gate /* 29877c478bd9Sstevel@tonic-gate * If just using PAGESIZE pages then don't 29887c478bd9Sstevel@tonic-gate * directly allocate from the free list. 29897c478bd9Sstevel@tonic-gate */ 29907c478bd9Sstevel@tonic-gate if (pplist == NULL) { 29917c478bd9Sstevel@tonic-gate ASSERT(szc == 0); 29927c478bd9Sstevel@tonic-gate pp = anon_zero(seg, addr, &ap, cred); 29937c478bd9Sstevel@tonic-gate if (pp == NULL) { 29947c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 29957c478bd9Sstevel@tonic-gate panic("anon_map_createpages: anon_zero"); 29967c478bd9Sstevel@tonic-gate } 29977c478bd9Sstevel@tonic-gate ppa[p_index++] = pp; 29987c478bd9Sstevel@tonic-gate 29997c478bd9Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, index) == NULL); 30007c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP); 30017c478bd9Sstevel@tonic-gate 30027c478bd9Sstevel@tonic-gate addr += PAGESIZE; 30037c478bd9Sstevel@tonic-gate index++; 30047c478bd9Sstevel@tonic-gate npgs--; 30057c478bd9Sstevel@tonic-gate continue; 30067c478bd9Sstevel@tonic-gate } 30077c478bd9Sstevel@tonic-gate 30087c478bd9Sstevel@tonic-gate /* 30097c478bd9Sstevel@tonic-gate * pplist is a list of pg_cnt PAGESIZE pages. 30107c478bd9Sstevel@tonic-gate * These pages are locked SE_EXCL since they 30117c478bd9Sstevel@tonic-gate * came directly off the free list. 30127c478bd9Sstevel@tonic-gate */ 30137c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pg_cnt, pg_cnt)); 30147c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(index, pg_cnt)); 30157c478bd9Sstevel@tonic-gate ASSERT(conpp == NULL); 30167c478bd9Sstevel@tonic-gate while (pg_cnt--) { 30177c478bd9Sstevel@tonic-gate 30187c478bd9Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 30197c478bd9Sstevel@tonic-gate swap_xlate(ap, &ap_vp, &ap_off); 30207c478bd9Sstevel@tonic-gate 30217c478bd9Sstevel@tonic-gate ASSERT(pplist != NULL); 30227c478bd9Sstevel@tonic-gate pp = pplist; 30237c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 30247c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 30257c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 30267c478bd9Sstevel@tonic-gate conpp = pp; 30277c478bd9Sstevel@tonic-gate 30287c478bd9Sstevel@tonic-gate err = swap_getconpage(ap_vp, ap_off, PAGESIZE, 302907b65a64Saguzovsk (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL, 303007b65a64Saguzovsk &nreloc, seg, addr, S_CREATE, cred); 30317c478bd9Sstevel@tonic-gate 30327c478bd9Sstevel@tonic-gate if (err) { 30337c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30347c478bd9Sstevel@tonic-gate panic("anon_map_createpages: S_CREATE"); 30357c478bd9Sstevel@tonic-gate } 30367c478bd9Sstevel@tonic-gate 30377c478bd9Sstevel@tonic-gate ASSERT(anon_pl[0] == pp); 30387c478bd9Sstevel@tonic-gate ASSERT(nreloc == 1); 30397c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 30407c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 30417c478bd9Sstevel@tonic-gate hat_setrefmod(pp); 30427c478bd9Sstevel@tonic-gate 30437c478bd9Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, index) == NULL); 30447c478bd9Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP); 30457c478bd9Sstevel@tonic-gate 30467c478bd9Sstevel@tonic-gate ppa[p_index++] = pp; 30477c478bd9Sstevel@tonic-gate 30487c478bd9Sstevel@tonic-gate addr += PAGESIZE; 30497c478bd9Sstevel@tonic-gate index++; 30507c478bd9Sstevel@tonic-gate npgs--; 30517c478bd9Sstevel@tonic-gate } 30527c478bd9Sstevel@tonic-gate conpp = NULL; 30537c478bd9Sstevel@tonic-gate pg_cnt = pgsz >> PAGESHIFT; 30547c478bd9Sstevel@tonic-gate p_index = p_index - pg_cnt; 30557c478bd9Sstevel@tonic-gate while (pg_cnt--) { 30567c478bd9Sstevel@tonic-gate page_downgrade(ppa[p_index++]); 30577c478bd9Sstevel@tonic-gate } 30587c478bd9Sstevel@tonic-gate } 30597c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30607c478bd9Sstevel@tonic-gate return (0); 30617c478bd9Sstevel@tonic-gate } 30627c478bd9Sstevel@tonic-gate 306307b65a64Saguzovsk static int 306407b65a64Saguzovsk anon_try_demote_pages( 306507b65a64Saguzovsk struct anon_hdr *ahp, 306607b65a64Saguzovsk ulong_t sidx, 306707b65a64Saguzovsk uint_t szc, 306807b65a64Saguzovsk page_t **ppa, 306907b65a64Saguzovsk int private) 30707c478bd9Sstevel@tonic-gate { 30717c478bd9Sstevel@tonic-gate struct anon *ap; 30727c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(szc); 30737c478bd9Sstevel@tonic-gate page_t *pp; 307407b65a64Saguzovsk pgcnt_t i; 30757c478bd9Sstevel@tonic-gate kmutex_t *ahmpages = NULL; 307607b65a64Saguzovsk int root = 0; 307707b65a64Saguzovsk pgcnt_t npgs; 307807b65a64Saguzovsk pgcnt_t curnpgs = 0; 307907b65a64Saguzovsk size_t ppasize = 0; 30807c478bd9Sstevel@tonic-gate 308107b65a64Saguzovsk ASSERT(szc != 0); 30827c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 308307b65a64Saguzovsk ASSERT(IS_P2ALIGNED(sidx, pgcnt)); 308407b65a64Saguzovsk ASSERT(sidx < ahp->size); 30857c478bd9Sstevel@tonic-gate 308607b65a64Saguzovsk if (ppa == NULL) { 308707b65a64Saguzovsk ppasize = pgcnt * sizeof (page_t *); 308807b65a64Saguzovsk ppa = kmem_alloc(ppasize, KM_SLEEP); 308907b65a64Saguzovsk } 30907c478bd9Sstevel@tonic-gate 309107b65a64Saguzovsk ap = anon_get_ptr(ahp, sidx); 309207b65a64Saguzovsk if (ap != NULL && private) { 30937c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[1]); 309423d9e5acSMichael Corcoran ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 30957c478bd9Sstevel@tonic-gate mutex_enter(ahmpages); 30967c478bd9Sstevel@tonic-gate } 30977c478bd9Sstevel@tonic-gate 309807b65a64Saguzovsk if (ap != NULL && ap->an_refcnt > 1) { 309907b65a64Saguzovsk if (ahmpages != NULL) { 31007c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[2]); 31017c478bd9Sstevel@tonic-gate mutex_exit(ahmpages); 310207b65a64Saguzovsk } 310307b65a64Saguzovsk if (ppasize != 0) { 310407b65a64Saguzovsk kmem_free(ppa, ppasize); 310507b65a64Saguzovsk } 310607b65a64Saguzovsk return (0); 310707b65a64Saguzovsk } 310807b65a64Saguzovsk if (ahmpages != NULL) { 310907b65a64Saguzovsk mutex_exit(ahmpages); 311007b65a64Saguzovsk } 311107b65a64Saguzovsk if (ahp->size - sidx < pgcnt) { 311207b65a64Saguzovsk ASSERT(private == 0); 311307b65a64Saguzovsk pgcnt = ahp->size - sidx; 311407b65a64Saguzovsk } 311507b65a64Saguzovsk for (i = 0; i < pgcnt; i++, sidx++) { 311607b65a64Saguzovsk ap = anon_get_ptr(ahp, sidx); 31177c478bd9Sstevel@tonic-gate if (ap != NULL) { 311807b65a64Saguzovsk if (ap->an_refcnt != 1) { 311907b65a64Saguzovsk panic("anon_try_demote_pages: an_refcnt != 1"); 312007b65a64Saguzovsk } 31217c478bd9Sstevel@tonic-gate pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off, 31227c478bd9Sstevel@tonic-gate SE_EXCL); 31237c478bd9Sstevel@tonic-gate if (pp != NULL) { 31247c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, 31257c478bd9Sstevel@tonic-gate HAT_FORCE_PGUNLOAD); 31267c478bd9Sstevel@tonic-gate } 31277c478bd9Sstevel@tonic-gate } else { 31287c478bd9Sstevel@tonic-gate ppa[i] = NULL; 31297c478bd9Sstevel@tonic-gate } 31307c478bd9Sstevel@tonic-gate } 31317c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 31327c478bd9Sstevel@tonic-gate if ((pp = ppa[i]) != NULL && pp->p_szc != 0) { 31337c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc <= szc); 31347c478bd9Sstevel@tonic-gate if (!root) { 31357c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[3]); 31367c478bd9Sstevel@tonic-gate if (curnpgs != 0) 313707b65a64Saguzovsk panic("anon_try_demote_pages: " 31387c478bd9Sstevel@tonic-gate "bad large page"); 31397c478bd9Sstevel@tonic-gate 31407c478bd9Sstevel@tonic-gate root = 1; 31417c478bd9Sstevel@tonic-gate curnpgs = npgs = 31427c478bd9Sstevel@tonic-gate page_get_pagecnt(pp->p_szc); 31437c478bd9Sstevel@tonic-gate 31447c478bd9Sstevel@tonic-gate ASSERT(npgs <= pgcnt); 31457c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npgs, npgs)); 314678b03d3aSkchow ASSERT(!(page_pptonum(pp) & (npgs - 1))); 31477c478bd9Sstevel@tonic-gate } else { 31487c478bd9Sstevel@tonic-gate ASSERT(i > 0); 31497c478bd9Sstevel@tonic-gate ASSERT(page_pptonum(pp) - 1 == 31507c478bd9Sstevel@tonic-gate page_pptonum(ppa[i - 1])); 31517c478bd9Sstevel@tonic-gate if ((page_pptonum(pp) & (npgs - 1)) == 31527c478bd9Sstevel@tonic-gate npgs - 1) 31537c478bd9Sstevel@tonic-gate root = 0; 31547c478bd9Sstevel@tonic-gate } 31557c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 31567c478bd9Sstevel@tonic-gate pp->p_szc = 0; 315707b65a64Saguzovsk ASSERT(curnpgs > 0); 31587c478bd9Sstevel@tonic-gate curnpgs--; 31597c478bd9Sstevel@tonic-gate } 31607c478bd9Sstevel@tonic-gate } 31617c478bd9Sstevel@tonic-gate if (root != 0 || curnpgs != 0) 316207b65a64Saguzovsk panic("anon_try_demote_pages: bad large page"); 31637c478bd9Sstevel@tonic-gate 31647c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 31657c478bd9Sstevel@tonic-gate if ((pp = ppa[i]) != NULL) { 31667c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 31677c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 31687c478bd9Sstevel@tonic-gate page_unlock(pp); 31697c478bd9Sstevel@tonic-gate } 31707c478bd9Sstevel@tonic-gate } 317107b65a64Saguzovsk if (ppasize != 0) { 31727c478bd9Sstevel@tonic-gate kmem_free(ppa, ppasize); 317307b65a64Saguzovsk } 317407b65a64Saguzovsk return (1); 317507b65a64Saguzovsk } 317607b65a64Saguzovsk 317707b65a64Saguzovsk /* 317807b65a64Saguzovsk * anon_map_demotepages() can only be called by MAP_PRIVATE segments. 317907b65a64Saguzovsk */ 318007b65a64Saguzovsk int 318107b65a64Saguzovsk anon_map_demotepages( 318207b65a64Saguzovsk struct anon_map *amp, 318307b65a64Saguzovsk ulong_t start_idx, 318407b65a64Saguzovsk struct seg *seg, 318507b65a64Saguzovsk caddr_t addr, 318607b65a64Saguzovsk uint_t prot, 318707b65a64Saguzovsk struct vpage vpage[], 318807b65a64Saguzovsk struct cred *cred) 318907b65a64Saguzovsk { 319007b65a64Saguzovsk struct anon *ap; 319107b65a64Saguzovsk uint_t szc = seg->s_szc; 319207b65a64Saguzovsk pgcnt_t pgcnt = page_get_pagecnt(szc); 319307b65a64Saguzovsk size_t ppasize = pgcnt * sizeof (page_t *); 319407b65a64Saguzovsk page_t **ppa = kmem_alloc(ppasize, KM_SLEEP); 319507b65a64Saguzovsk page_t *pp; 319607b65a64Saguzovsk page_t *pl[2]; 319707b65a64Saguzovsk pgcnt_t i, pg_idx; 319807b65a64Saguzovsk ulong_t an_idx; 319907b65a64Saguzovsk caddr_t vaddr; 320007b65a64Saguzovsk int err; 320107b65a64Saguzovsk int retry = 0; 320207b65a64Saguzovsk uint_t vpprot; 320307b65a64Saguzovsk 320407b65a64Saguzovsk ASSERT(RW_WRITE_HELD(&->a_rwlock)); 320507b65a64Saguzovsk ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 320607b65a64Saguzovsk ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 320707b65a64Saguzovsk ASSERT(ppa != NULL); 320807b65a64Saguzovsk ASSERT(szc != 0); 320907b65a64Saguzovsk ASSERT(szc == amp->a_szc); 321007b65a64Saguzovsk 321107b65a64Saguzovsk VM_STAT_ADD(anonvmstats.demotepages[0]); 321207b65a64Saguzovsk 321307b65a64Saguzovsk top: 321407b65a64Saguzovsk if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) { 32151b101e68Saguzovsk kmem_free(ppa, ppasize); 32167c478bd9Sstevel@tonic-gate return (0); 32177c478bd9Sstevel@tonic-gate } 32187c478bd9Sstevel@tonic-gate 32197c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[4]); 32207c478bd9Sstevel@tonic-gate 32217c478bd9Sstevel@tonic-gate ASSERT(retry == 0); /* we can be here only once */ 32227c478bd9Sstevel@tonic-gate 32237c478bd9Sstevel@tonic-gate vaddr = addr; 32247c478bd9Sstevel@tonic-gate for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt; 32257c478bd9Sstevel@tonic-gate pg_idx++, an_idx++, vaddr += PAGESIZE) { 32267c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 32277c478bd9Sstevel@tonic-gate if (ap == NULL) 32287c478bd9Sstevel@tonic-gate panic("anon_map_demotepages: no anon slot"); 32297c478bd9Sstevel@tonic-gate err = anon_getpage(&ap, &vpprot, pl, PAGESIZE, seg, vaddr, 32307c478bd9Sstevel@tonic-gate S_READ, cred); 32317c478bd9Sstevel@tonic-gate if (err) { 32327c478bd9Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 32337c478bd9Sstevel@tonic-gate if ((pp = ppa[i]) != NULL) 32347c478bd9Sstevel@tonic-gate page_unlock(pp); 32357c478bd9Sstevel@tonic-gate } 32367c478bd9Sstevel@tonic-gate kmem_free(ppa, ppasize); 32377c478bd9Sstevel@tonic-gate return (err); 32387c478bd9Sstevel@tonic-gate } 32397c478bd9Sstevel@tonic-gate ppa[pg_idx] = pl[0]; 32407c478bd9Sstevel@tonic-gate } 32417c478bd9Sstevel@tonic-gate 32427c478bd9Sstevel@tonic-gate err = anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, ppa, 32432cb27123Saguzovsk vpage, -1, 0, cred); 32447c478bd9Sstevel@tonic-gate if (err > 0) { 32457c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[5]); 32467c478bd9Sstevel@tonic-gate kmem_free(ppa, ppasize); 32477c478bd9Sstevel@tonic-gate return (err); 32487c478bd9Sstevel@tonic-gate } 32497c478bd9Sstevel@tonic-gate ASSERT(err == 0 || err == -1); 32507c478bd9Sstevel@tonic-gate if (err == -1) { 32517c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[6]); 32527c478bd9Sstevel@tonic-gate retry = 1; 32537c478bd9Sstevel@tonic-gate goto top; 32547c478bd9Sstevel@tonic-gate } 32557c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 32567c478bd9Sstevel@tonic-gate ASSERT(ppa[i] != NULL); 32577c478bd9Sstevel@tonic-gate if (ppa[i]->p_szc != 0) 32587c478bd9Sstevel@tonic-gate retry = 1; 32597c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 32607c478bd9Sstevel@tonic-gate } 32617c478bd9Sstevel@tonic-gate if (retry) { 32627c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[7]); 32637c478bd9Sstevel@tonic-gate goto top; 32647c478bd9Sstevel@tonic-gate } 32657c478bd9Sstevel@tonic-gate 32667c478bd9Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[8]); 32677c478bd9Sstevel@tonic-gate 32687c478bd9Sstevel@tonic-gate kmem_free(ppa, ppasize); 32697c478bd9Sstevel@tonic-gate 32707c478bd9Sstevel@tonic-gate return (0); 32717c478bd9Sstevel@tonic-gate } 32727c478bd9Sstevel@tonic-gate 32737c478bd9Sstevel@tonic-gate /* 327407b65a64Saguzovsk * Free pages of shared anon map. It's assumed that anon maps don't share anon 327507b65a64Saguzovsk * structures with private anon maps. Therefore all anon structures should 327607b65a64Saguzovsk * have at most one reference at this point. This means underlying pages can 327707b65a64Saguzovsk * be exclusively locked and demoted or freed. If not freeing the entire 327807b65a64Saguzovsk * large pages demote the ends of the region we free to be able to free 3279da6c28aaSamw * subpages. Page roots correspond to aligned index positions in anon map. 328007b65a64Saguzovsk */ 328107b65a64Saguzovsk void 328207b65a64Saguzovsk anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len) 328307b65a64Saguzovsk { 328407b65a64Saguzovsk ulong_t eidx = sidx + btopr(len); 328507b65a64Saguzovsk pgcnt_t pages = page_get_pagecnt(amp->a_szc); 328607b65a64Saguzovsk struct anon_hdr *ahp = amp->ahp; 328707b65a64Saguzovsk ulong_t tidx; 328807b65a64Saguzovsk size_t size; 328907b65a64Saguzovsk ulong_t sidx_aligned; 329007b65a64Saguzovsk ulong_t eidx_aligned; 329107b65a64Saguzovsk 3292a98e9dbfSaguzovsk ASSERT(ANON_WRITE_HELD(&->a_rwlock)); 329307b65a64Saguzovsk ASSERT(amp->refcnt <= 1); 329407b65a64Saguzovsk ASSERT(amp->a_szc > 0); 329507b65a64Saguzovsk ASSERT(eidx <= ahp->size); 329607b65a64Saguzovsk ASSERT(!anon_share(ahp, sidx, btopr(len))); 329707b65a64Saguzovsk 329807b65a64Saguzovsk if (len == 0) { /* XXX */ 329907b65a64Saguzovsk return; 330007b65a64Saguzovsk } 330107b65a64Saguzovsk 330207b65a64Saguzovsk sidx_aligned = P2ALIGN(sidx, pages); 330307b65a64Saguzovsk if (sidx_aligned != sidx || 330407b65a64Saguzovsk (eidx < sidx_aligned + pages && eidx < ahp->size)) { 330507b65a64Saguzovsk if (!anon_try_demote_pages(ahp, sidx_aligned, 330607b65a64Saguzovsk amp->a_szc, NULL, 0)) { 330707b65a64Saguzovsk panic("anon_shmap_free_pages: demote failed"); 330807b65a64Saguzovsk } 330907b65a64Saguzovsk size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) : 331007b65a64Saguzovsk P2NPHASE(sidx, pages); 331107b65a64Saguzovsk size <<= PAGESHIFT; 331207b65a64Saguzovsk anon_free(ahp, sidx, size); 331307b65a64Saguzovsk sidx = sidx_aligned + pages; 331407b65a64Saguzovsk if (eidx <= sidx) { 331507b65a64Saguzovsk return; 331607b65a64Saguzovsk } 331707b65a64Saguzovsk } 331807b65a64Saguzovsk eidx_aligned = P2ALIGN(eidx, pages); 331907b65a64Saguzovsk if (sidx < eidx_aligned) { 332007b65a64Saguzovsk anon_free_pages(ahp, sidx, 332107b65a64Saguzovsk (eidx_aligned - sidx) << PAGESHIFT, 332207b65a64Saguzovsk amp->a_szc); 332307b65a64Saguzovsk sidx = eidx_aligned; 332407b65a64Saguzovsk } 332507b65a64Saguzovsk ASSERT(sidx == eidx_aligned); 332607b65a64Saguzovsk if (eidx == eidx_aligned) { 332707b65a64Saguzovsk return; 332807b65a64Saguzovsk } 332907b65a64Saguzovsk tidx = eidx; 333007b65a64Saguzovsk if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL && 333107b65a64Saguzovsk tidx - sidx < pages) { 333207b65a64Saguzovsk if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) { 333307b65a64Saguzovsk panic("anon_shmap_free_pages: demote failed"); 333407b65a64Saguzovsk } 333507b65a64Saguzovsk size = (eidx - sidx) << PAGESHIFT; 333607b65a64Saguzovsk anon_free(ahp, sidx, size); 333707b65a64Saguzovsk } else { 333807b65a64Saguzovsk anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc); 333907b65a64Saguzovsk } 334007b65a64Saguzovsk } 334107b65a64Saguzovsk 334207b65a64Saguzovsk /* 3343a98e9dbfSaguzovsk * This routine should be called with amp's writer lock when there're no other 3344a98e9dbfSaguzovsk * users of amp. All pcache entries of this amp must have been already 3345a98e9dbfSaguzovsk * inactivated. We must not drop a_rwlock here to prevent new users from 3346a98e9dbfSaguzovsk * attaching to this amp. 3347a98e9dbfSaguzovsk */ 3348a98e9dbfSaguzovsk void 3349a98e9dbfSaguzovsk anonmap_purge(struct anon_map *amp) 3350a98e9dbfSaguzovsk { 3351a98e9dbfSaguzovsk ASSERT(ANON_WRITE_HELD(&->a_rwlock)); 3352a98e9dbfSaguzovsk ASSERT(amp->refcnt <= 1); 3353a98e9dbfSaguzovsk 3354a98e9dbfSaguzovsk if (amp->a_softlockcnt != 0) { 3355a98e9dbfSaguzovsk seg_ppurge(NULL, amp, 0); 3356a98e9dbfSaguzovsk } 3357a98e9dbfSaguzovsk 3358a98e9dbfSaguzovsk /* 3359a98e9dbfSaguzovsk * Since all pcache entries were already inactive before this routine 3360a98e9dbfSaguzovsk * was called seg_ppurge() couldn't return while there're still 3361a98e9dbfSaguzovsk * entries that can be found via the list anchored at a_phead. So we 3362a98e9dbfSaguzovsk * can assert this list is empty now. a_softlockcnt may be still non 0 3363a98e9dbfSaguzovsk * if asynchronous thread that manages pcache already removed pcache 3364a98e9dbfSaguzovsk * entries but hasn't unlocked the pages yet. If a_softlockcnt is non 3365a98e9dbfSaguzovsk * 0 we just wait on a_purgecv for shamp_reclaim() to finish. Even if 3366a98e9dbfSaguzovsk * a_softlockcnt is 0 we grab a_purgemtx to avoid freeing anon map 3367a98e9dbfSaguzovsk * before shamp_reclaim() is done with it. a_purgemtx also taken by 3368a98e9dbfSaguzovsk * shamp_reclaim() while a_softlockcnt was still not 0 acts as a 3369a98e9dbfSaguzovsk * barrier that prevents anonmap_purge() to complete while 3370a98e9dbfSaguzovsk * shamp_reclaim() may still be referencing this amp. 3371a98e9dbfSaguzovsk */ 3372a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 3373a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 3374a98e9dbfSaguzovsk 3375a98e9dbfSaguzovsk mutex_enter(&->a_purgemtx); 3376a98e9dbfSaguzovsk while (amp->a_softlockcnt != 0) { 3377a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 3378a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 3379a98e9dbfSaguzovsk amp->a_purgewait = 1; 3380a98e9dbfSaguzovsk cv_wait(&->a_purgecv, &->a_purgemtx); 3381a98e9dbfSaguzovsk } 3382a98e9dbfSaguzovsk mutex_exit(&->a_purgemtx); 3383a98e9dbfSaguzovsk 3384a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 3385a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 3386a98e9dbfSaguzovsk ASSERT(amp->a_softlockcnt == 0); 3387a98e9dbfSaguzovsk } 3388a98e9dbfSaguzovsk 3389a98e9dbfSaguzovsk /* 33907c478bd9Sstevel@tonic-gate * Allocate and initialize an anon_map structure for seg 33917c478bd9Sstevel@tonic-gate * associating the given swap reservation with the new anon_map. 33927c478bd9Sstevel@tonic-gate */ 33937c478bd9Sstevel@tonic-gate struct anon_map * 33942cb27123Saguzovsk anonmap_alloc(size_t size, size_t swresv, int flags) 33957c478bd9Sstevel@tonic-gate { 33967c478bd9Sstevel@tonic-gate struct anon_map *amp; 33972cb27123Saguzovsk int kmflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 33987c478bd9Sstevel@tonic-gate 33992cb27123Saguzovsk amp = kmem_cache_alloc(anonmap_cache, kmflags); 34002cb27123Saguzovsk if (amp == NULL) { 34012cb27123Saguzovsk ASSERT(kmflags == KM_NOSLEEP); 34022cb27123Saguzovsk return (NULL); 34032cb27123Saguzovsk } 34047c478bd9Sstevel@tonic-gate 34052cb27123Saguzovsk amp->ahp = anon_create(btopr(size), flags); 34062cb27123Saguzovsk if (amp->ahp == NULL) { 34072cb27123Saguzovsk ASSERT(flags == ANON_NOSLEEP); 34082cb27123Saguzovsk kmem_cache_free(anonmap_cache, amp); 34092cb27123Saguzovsk return (NULL); 34102cb27123Saguzovsk } 34117c478bd9Sstevel@tonic-gate amp->refcnt = 1; 34127c478bd9Sstevel@tonic-gate amp->size = size; 34137c478bd9Sstevel@tonic-gate amp->swresv = swresv; 34147c478bd9Sstevel@tonic-gate amp->locality = 0; 34157c478bd9Sstevel@tonic-gate amp->a_szc = 0; 3416c6939658Ssl108498 amp->a_sp = NULL; 3417a98e9dbfSaguzovsk amp->a_softlockcnt = 0; 3418a98e9dbfSaguzovsk amp->a_purgewait = 0; 3419a98e9dbfSaguzovsk amp->a_phead.p_lnext = &->a_phead; 3420a98e9dbfSaguzovsk amp->a_phead.p_lprev = &->a_phead; 3421a98e9dbfSaguzovsk 34227c478bd9Sstevel@tonic-gate return (amp); 34237c478bd9Sstevel@tonic-gate } 34247c478bd9Sstevel@tonic-gate 34257c478bd9Sstevel@tonic-gate void 34267c478bd9Sstevel@tonic-gate anonmap_free(struct anon_map *amp) 34277c478bd9Sstevel@tonic-gate { 3428a98e9dbfSaguzovsk ASSERT(amp->ahp != NULL); 34297c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt == 0); 3430a98e9dbfSaguzovsk ASSERT(amp->a_softlockcnt == 0); 3431a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 3432a98e9dbfSaguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 34337c478bd9Sstevel@tonic-gate 34347c478bd9Sstevel@tonic-gate lgrp_shm_policy_fini(amp, NULL); 34357c478bd9Sstevel@tonic-gate anon_release(amp->ahp, btopr(amp->size)); 34367c478bd9Sstevel@tonic-gate kmem_cache_free(anonmap_cache, amp); 34377c478bd9Sstevel@tonic-gate } 34387c478bd9Sstevel@tonic-gate 34397c478bd9Sstevel@tonic-gate /* 34407c478bd9Sstevel@tonic-gate * Returns true if the app array has some empty slots. 3441da6c28aaSamw * The offp and lenp parameters are in/out parameters. On entry 34427c478bd9Sstevel@tonic-gate * these values represent the starting offset and length of the 34437c478bd9Sstevel@tonic-gate * mapping. When true is returned, these values may be modified 34447c478bd9Sstevel@tonic-gate * to be the largest range which includes empty slots. 34457c478bd9Sstevel@tonic-gate */ 34467c478bd9Sstevel@tonic-gate int 34477c478bd9Sstevel@tonic-gate non_anon(struct anon_hdr *ahp, ulong_t anon_idx, u_offset_t *offp, 34487c478bd9Sstevel@tonic-gate size_t *lenp) 34497c478bd9Sstevel@tonic-gate { 34507c478bd9Sstevel@tonic-gate ulong_t i, el; 34517c478bd9Sstevel@tonic-gate ssize_t low, high; 34527c478bd9Sstevel@tonic-gate struct anon *ap; 34537c478bd9Sstevel@tonic-gate 34547c478bd9Sstevel@tonic-gate low = -1; 34557c478bd9Sstevel@tonic-gate for (i = 0, el = *lenp; i < el; i += PAGESIZE, anon_idx++) { 34567c478bd9Sstevel@tonic-gate ap = anon_get_ptr(ahp, anon_idx); 34577c478bd9Sstevel@tonic-gate if (ap == NULL) { 34587c478bd9Sstevel@tonic-gate if (low == -1) 34597c478bd9Sstevel@tonic-gate low = i; 34607c478bd9Sstevel@tonic-gate high = i; 34617c478bd9Sstevel@tonic-gate } 34627c478bd9Sstevel@tonic-gate } 34637c478bd9Sstevel@tonic-gate if (low != -1) { 34647c478bd9Sstevel@tonic-gate /* 34657c478bd9Sstevel@tonic-gate * Found at least one non-anon page. 34667c478bd9Sstevel@tonic-gate * Set up the off and len return values. 34677c478bd9Sstevel@tonic-gate */ 34687c478bd9Sstevel@tonic-gate if (low != 0) 34697c478bd9Sstevel@tonic-gate *offp += low; 34707c478bd9Sstevel@tonic-gate *lenp = high - low + PAGESIZE; 34717c478bd9Sstevel@tonic-gate return (1); 34727c478bd9Sstevel@tonic-gate } 34737c478bd9Sstevel@tonic-gate return (0); 34747c478bd9Sstevel@tonic-gate } 34757c478bd9Sstevel@tonic-gate 34767c478bd9Sstevel@tonic-gate /* 34777c478bd9Sstevel@tonic-gate * Return a count of the number of existing anon pages in the anon array 34787c478bd9Sstevel@tonic-gate * app in the range (off, off+len). The array and slots must be guaranteed 34797c478bd9Sstevel@tonic-gate * stable by the caller. 34807c478bd9Sstevel@tonic-gate */ 34817c478bd9Sstevel@tonic-gate pgcnt_t 34827c478bd9Sstevel@tonic-gate anon_pages(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots) 34837c478bd9Sstevel@tonic-gate { 34847c478bd9Sstevel@tonic-gate pgcnt_t cnt = 0; 34857c478bd9Sstevel@tonic-gate 34867c478bd9Sstevel@tonic-gate while (nslots-- > 0) { 34877c478bd9Sstevel@tonic-gate if ((anon_get_ptr(ahp, anon_index)) != NULL) 34887c478bd9Sstevel@tonic-gate cnt++; 34897c478bd9Sstevel@tonic-gate anon_index++; 34907c478bd9Sstevel@tonic-gate } 34917c478bd9Sstevel@tonic-gate return (cnt); 34927c478bd9Sstevel@tonic-gate } 34937c478bd9Sstevel@tonic-gate 34947c478bd9Sstevel@tonic-gate /* 3495e0cb4e8dSOndrej Kubecka * Move reserved phys swap into memory swap (unreserve phys swap 3496e0cb4e8dSOndrej Kubecka * and reserve mem swap by the same amount). 3497e0cb4e8dSOndrej Kubecka * Used by segspt when it needs to lock reserved swap npages in memory 34987c478bd9Sstevel@tonic-gate */ 34997c478bd9Sstevel@tonic-gate int 3500e0cb4e8dSOndrej Kubecka anon_swap_adjust(pgcnt_t npages) 35017c478bd9Sstevel@tonic-gate { 35027c478bd9Sstevel@tonic-gate pgcnt_t unlocked_mem_swap; 35037c478bd9Sstevel@tonic-gate 35047c478bd9Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 35077c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 35087c478bd9Sstevel@tonic-gate 35097c478bd9Sstevel@tonic-gate unlocked_mem_swap = k_anoninfo.ani_mem_resv 35107c478bd9Sstevel@tonic-gate - k_anoninfo.ani_locked_swap; 35117c478bd9Sstevel@tonic-gate if (npages > unlocked_mem_swap) { 35127c478bd9Sstevel@tonic-gate spgcnt_t adjusted_swap = npages - unlocked_mem_swap; 35137c478bd9Sstevel@tonic-gate 35147c478bd9Sstevel@tonic-gate /* 35157c478bd9Sstevel@tonic-gate * if there is not enough unlocked mem swap we take missing 35167c478bd9Sstevel@tonic-gate * amount from phys swap and give it to mem swap 35177c478bd9Sstevel@tonic-gate */ 3518e0cb4e8dSOndrej Kubecka if (!page_reclaim_mem(adjusted_swap, segspt_minfree, 1)) { 35197c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 35207c478bd9Sstevel@tonic-gate return (ENOMEM); 35217c478bd9Sstevel@tonic-gate } 35227c478bd9Sstevel@tonic-gate 35237c478bd9Sstevel@tonic-gate k_anoninfo.ani_mem_resv += adjusted_swap; 35247c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_phys_resv >= adjusted_swap); 35257c478bd9Sstevel@tonic-gate k_anoninfo.ani_phys_resv -= adjusted_swap; 35267c478bd9Sstevel@tonic-gate 35277c478bd9Sstevel@tonic-gate ANI_ADD(adjusted_swap); 35287c478bd9Sstevel@tonic-gate } 35297c478bd9Sstevel@tonic-gate k_anoninfo.ani_locked_swap += npages; 35307c478bd9Sstevel@tonic-gate 35317c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 35327c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 35337c478bd9Sstevel@tonic-gate 35347c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 35357c478bd9Sstevel@tonic-gate 35367c478bd9Sstevel@tonic-gate return (0); 35377c478bd9Sstevel@tonic-gate } 35387c478bd9Sstevel@tonic-gate 35397c478bd9Sstevel@tonic-gate /* 3540e0cb4e8dSOndrej Kubecka * 'unlocked' reserved mem swap so when it is unreserved it 3541e0cb4e8dSOndrej Kubecka * can be moved back phys (disk) swap 35427c478bd9Sstevel@tonic-gate */ 35437c478bd9Sstevel@tonic-gate void 35447c478bd9Sstevel@tonic-gate anon_swap_restore(pgcnt_t npages) 35457c478bd9Sstevel@tonic-gate { 35467c478bd9Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 35477c478bd9Sstevel@tonic-gate 35487c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv); 35497c478bd9Sstevel@tonic-gate 35507c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap >= npages); 35517c478bd9Sstevel@tonic-gate k_anoninfo.ani_locked_swap -= npages; 35527c478bd9Sstevel@tonic-gate 35537c478bd9Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv); 35547c478bd9Sstevel@tonic-gate 35557c478bd9Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 35567c478bd9Sstevel@tonic-gate } 35577c478bd9Sstevel@tonic-gate 35587c478bd9Sstevel@tonic-gate /* 35597c478bd9Sstevel@tonic-gate * Return the pointer from the list for a 35607c478bd9Sstevel@tonic-gate * specified anon index. 35617c478bd9Sstevel@tonic-gate */ 35627c478bd9Sstevel@tonic-gate ulong_t * 35637c478bd9Sstevel@tonic-gate anon_get_slot(struct anon_hdr *ahp, ulong_t an_idx) 35647c478bd9Sstevel@tonic-gate { 35657c478bd9Sstevel@tonic-gate struct anon **app; 35667c478bd9Sstevel@tonic-gate void **ppp; 35677c478bd9Sstevel@tonic-gate 35687c478bd9Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 35697c478bd9Sstevel@tonic-gate 35707c478bd9Sstevel@tonic-gate /* 35717c478bd9Sstevel@tonic-gate * Single level case. 35727c478bd9Sstevel@tonic-gate */ 35737c478bd9Sstevel@tonic-gate if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 35747c478bd9Sstevel@tonic-gate return ((ulong_t *)&ahp->array_chunk[an_idx]); 35757c478bd9Sstevel@tonic-gate } else { 35767c478bd9Sstevel@tonic-gate 35777c478bd9Sstevel@tonic-gate /* 35787c478bd9Sstevel@tonic-gate * 2 level case. 35797c478bd9Sstevel@tonic-gate */ 35807c478bd9Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 35817c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 35827c478bd9Sstevel@tonic-gate mutex_enter(&ahp->serial_lock); 35837c478bd9Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 35847c478bd9Sstevel@tonic-gate if (*ppp == NULL) 35857c478bd9Sstevel@tonic-gate *ppp = kmem_zalloc(PAGESIZE, KM_SLEEP); 35867c478bd9Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 35877c478bd9Sstevel@tonic-gate } 35887c478bd9Sstevel@tonic-gate app = *ppp; 35897c478bd9Sstevel@tonic-gate return ((ulong_t *)&app[an_idx & ANON_CHUNK_OFF]); 35907c478bd9Sstevel@tonic-gate } 35917c478bd9Sstevel@tonic-gate } 35927c478bd9Sstevel@tonic-gate 35937c478bd9Sstevel@tonic-gate void 35947c478bd9Sstevel@tonic-gate anon_array_enter(struct anon_map *amp, ulong_t an_idx, anon_sync_obj_t *sobj) 35957c478bd9Sstevel@tonic-gate { 35967c478bd9Sstevel@tonic-gate ulong_t *ap_slot; 35977c478bd9Sstevel@tonic-gate kmutex_t *mtx; 35987c478bd9Sstevel@tonic-gate kcondvar_t *cv; 35997c478bd9Sstevel@tonic-gate int hash; 36007c478bd9Sstevel@tonic-gate 36017c478bd9Sstevel@tonic-gate /* 36027c478bd9Sstevel@tonic-gate * Use szc to determine anon slot(s) to appear atomic. 36037c478bd9Sstevel@tonic-gate * If szc = 0, then lock the anon slot and mark it busy. 36047c478bd9Sstevel@tonic-gate * If szc > 0, then lock the range of slots by getting the 36057c478bd9Sstevel@tonic-gate * anon_array_lock for the first anon slot, and mark only the 36067c478bd9Sstevel@tonic-gate * first anon slot busy to represent whole range being busy. 36077c478bd9Sstevel@tonic-gate */ 36087c478bd9Sstevel@tonic-gate 36097c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&->a_rwlock)); 36107c478bd9Sstevel@tonic-gate an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc)); 36117c478bd9Sstevel@tonic-gate hash = ANON_ARRAY_HASH(amp, an_idx); 36127c478bd9Sstevel@tonic-gate sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex; 36137c478bd9Sstevel@tonic-gate sobj->sync_cv = cv = &anon_array_cv[hash]; 36147c478bd9Sstevel@tonic-gate mutex_enter(mtx); 36157c478bd9Sstevel@tonic-gate ap_slot = anon_get_slot(amp->ahp, an_idx); 36167c478bd9Sstevel@tonic-gate while (ANON_ISBUSY(ap_slot)) 36177c478bd9Sstevel@tonic-gate cv_wait(cv, mtx); 36187c478bd9Sstevel@tonic-gate ANON_SETBUSY(ap_slot); 36197c478bd9Sstevel@tonic-gate sobj->sync_data = ap_slot; 36207c478bd9Sstevel@tonic-gate mutex_exit(mtx); 36217c478bd9Sstevel@tonic-gate } 36227c478bd9Sstevel@tonic-gate 362387015465Scwb int 362487015465Scwb anon_array_try_enter(struct anon_map *amp, ulong_t an_idx, 362587015465Scwb anon_sync_obj_t *sobj) 362687015465Scwb { 362787015465Scwb ulong_t *ap_slot; 362887015465Scwb kmutex_t *mtx; 362987015465Scwb int hash; 363087015465Scwb 363187015465Scwb /* 363287015465Scwb * Try to lock a range of anon slots. 363387015465Scwb * Use szc to determine anon slot(s) to appear atomic. 363487015465Scwb * If szc = 0, then lock the anon slot and mark it busy. 363587015465Scwb * If szc > 0, then lock the range of slots by getting the 363687015465Scwb * anon_array_lock for the first anon slot, and mark only the 363787015465Scwb * first anon slot busy to represent whole range being busy. 363887015465Scwb * Fail if the mutex or the anon_array are busy. 363987015465Scwb */ 364087015465Scwb 364187015465Scwb ASSERT(RW_READ_HELD(&->a_rwlock)); 364287015465Scwb an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc)); 364387015465Scwb hash = ANON_ARRAY_HASH(amp, an_idx); 364487015465Scwb sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex; 3645f5164d87Scwb sobj->sync_cv = &anon_array_cv[hash]; 364687015465Scwb if (!mutex_tryenter(mtx)) { 364787015465Scwb return (EWOULDBLOCK); 364887015465Scwb } 364987015465Scwb ap_slot = anon_get_slot(amp->ahp, an_idx); 365087015465Scwb if (ANON_ISBUSY(ap_slot)) { 365187015465Scwb mutex_exit(mtx); 365287015465Scwb return (EWOULDBLOCK); 365387015465Scwb } 365487015465Scwb ANON_SETBUSY(ap_slot); 365587015465Scwb sobj->sync_data = ap_slot; 365687015465Scwb mutex_exit(mtx); 365787015465Scwb return (0); 365887015465Scwb } 365987015465Scwb 36607c478bd9Sstevel@tonic-gate void 36617c478bd9Sstevel@tonic-gate anon_array_exit(anon_sync_obj_t *sobj) 36627c478bd9Sstevel@tonic-gate { 36637c478bd9Sstevel@tonic-gate mutex_enter(sobj->sync_mutex); 36647c478bd9Sstevel@tonic-gate ASSERT(ANON_ISBUSY(sobj->sync_data)); 36657c478bd9Sstevel@tonic-gate ANON_CLRBUSY(sobj->sync_data); 36667c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(sobj->sync_cv)) 36677c478bd9Sstevel@tonic-gate cv_broadcast(sobj->sync_cv); 36687c478bd9Sstevel@tonic-gate mutex_exit(sobj->sync_mutex); 36697c478bd9Sstevel@tonic-gate } 3670