1a5652762Spraks /* 2a5652762Spraks * CDDL HEADER START 3a5652762Spraks * 4a5652762Spraks * The contents of this file are subject to the terms of the 5a5652762Spraks * Common Development and Distribution License (the "License"). 6a5652762Spraks * You may not use this file except in compliance with the License. 7a5652762Spraks * 8a5652762Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9a5652762Spraks * or http://www.opensolaris.org/os/licensing. 10a5652762Spraks * See the License for the specific language governing permissions 11a5652762Spraks * and limitations under the License. 12a5652762Spraks * 13a5652762Spraks * When distributing Covered Code, include this CDDL HEADER in each 14a5652762Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15a5652762Spraks * If applicable, add the following below this CDDL HEADER, with the 16a5652762Spraks * fields enclosed by brackets "[]" replaced with your own identifying 17a5652762Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 18a5652762Spraks * 19a5652762Spraks * CDDL HEADER END 20a5652762Spraks */ 21a5652762Spraks /* 22183971baSPrakash Sangappa * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23a5652762Spraks * Use is subject to license terms. 24a5652762Spraks */ 25a5652762Spraks 26a5652762Spraks 27a5652762Spraks /* 28a5652762Spraks * VM - generic vnode page mapping interfaces. 29a5652762Spraks * 30a5652762Spraks * Mechanism to provide temporary mappings to vnode pages. 31a5652762Spraks * The typical use would be to copy/access file data. 32a5652762Spraks */ 33a5652762Spraks 34a5652762Spraks #include <sys/types.h> 35a5652762Spraks #include <sys/t_lock.h> 36a5652762Spraks #include <sys/param.h> 37a5652762Spraks #include <sys/sysmacros.h> 38a5652762Spraks #include <sys/buf.h> 39a5652762Spraks #include <sys/systm.h> 40a5652762Spraks #include <sys/vnode.h> 41a5652762Spraks #include <sys/mman.h> 42a5652762Spraks #include <sys/errno.h> 43a5652762Spraks #include <sys/cred.h> 44a5652762Spraks #include <sys/kmem.h> 45a5652762Spraks #include <sys/vtrace.h> 46a5652762Spraks #include <sys/cmn_err.h> 47a5652762Spraks #include <sys/debug.h> 48a5652762Spraks #include <sys/thread.h> 49a5652762Spraks #include <sys/dumphdr.h> 50a5652762Spraks #include <sys/bitmap.h> 51a5652762Spraks #include <sys/lgrp.h> 52a5652762Spraks 53a5652762Spraks #include <vm/seg_kmem.h> 54a5652762Spraks #include <vm/hat.h> 55a5652762Spraks #include <vm/as.h> 56a5652762Spraks #include <vm/seg.h> 57a5652762Spraks #include <vm/seg_kpm.h> 58a5652762Spraks #include <vm/seg_map.h> 59a5652762Spraks #include <vm/page.h> 60a5652762Spraks #include <vm/pvn.h> 61a5652762Spraks #include <vm/rm.h> 62a5652762Spraks #include <vm/vpm.h> 63a5652762Spraks 64183971baSPrakash Sangappa 65183971baSPrakash Sangappa #ifdef SEGKPM_SUPPORT 66a5652762Spraks /* 67183971baSPrakash Sangappa * VPM can be disabled by setting vpm_enable = 0 in 68183971baSPrakash Sangappa * /etc/system. 69183971baSPrakash Sangappa * 70a5652762Spraks */ 71183971baSPrakash Sangappa int vpm_enable = 1; 72183971baSPrakash Sangappa 73183971baSPrakash Sangappa #else 74183971baSPrakash Sangappa 75a5652762Spraks int vpm_enable = 0; 76a5652762Spraks 77183971baSPrakash Sangappa #endif 78183971baSPrakash Sangappa 79a5652762Spraks #ifdef SEGKPM_SUPPORT 80a5652762Spraks 81a5652762Spraks 82a5652762Spraks int vpm_cache_enable = 1; 83a5652762Spraks long vpm_cache_percent = 12; 84a5652762Spraks long vpm_cache_size; 85a5652762Spraks int vpm_nfreelist = 0; 86a5652762Spraks int vpmd_freemsk = 0; 87a5652762Spraks 88a5652762Spraks #define VPM_S_PAD 64 89a5652762Spraks union vpm_cpu { 90a5652762Spraks struct { 91a5652762Spraks int vcpu_free_ndx; 92a5652762Spraks ulong_t vcpu_hits; 93a5652762Spraks ulong_t vcpu_misses; 94a5652762Spraks } vcpu; 95a5652762Spraks char vpm_pad[VPM_S_PAD]; 96a5652762Spraks }; 97a5652762Spraks static union vpm_cpu *vpmd_cpu; 98a5652762Spraks 99a5652762Spraks #define vfree_ndx vcpu.vcpu_free_ndx 100a5652762Spraks 101a5652762Spraks int vpm_cachemode = VPMCACHE_LRU; 102a5652762Spraks 103a5652762Spraks #define PPMTX(pp) (&(pp)->p_ilock) 104a5652762Spraks 105a5652762Spraks static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */ 106a5652762Spraks static struct vpmfree *vpmd_free; 107a5652762Spraks #define VPMAPMTX(vpm) (&vpm->vpm_mtx) 108a5652762Spraks #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk]) 109a5652762Spraks #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk) 110a5652762Spraks #define VPMP(id) (&vpmd_vpmap[id - 1]) 111a5652762Spraks #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1) 112a5652762Spraks 113a5652762Spraks 114a5652762Spraks #ifdef DEBUG 115a5652762Spraks 116a5652762Spraks struct vpm_debug { 117a5652762Spraks int vpmd_steals; 118a5652762Spraks int vpmd_contend; 119a5652762Spraks int vpmd_prevpagelocked; 120a5652762Spraks int vpmd_getpagefailed; 121a5652762Spraks int vpmd_zerostart; 122a5652762Spraks int vpmd_emptyfreelist; 123a5652762Spraks int vpmd_nofreevpms; 124a5652762Spraks } vpm_debug; 125a5652762Spraks 126a5652762Spraks #define VPM_DEBUG(x) ((vpm_debug.x)++) 127a5652762Spraks 128a5652762Spraks int steals; 129a5652762Spraks int steals_mtbf = 7; 130a5652762Spraks int contend; 131a5652762Spraks int contend_mtbf = 127; 132a5652762Spraks 133a5652762Spraks #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f)) 134a5652762Spraks 135a5652762Spraks #else /* DEBUG */ 136a5652762Spraks 137a5652762Spraks #define VPM_MTBF(v, f) (1) 138a5652762Spraks #define VPM_DEBUG(x) /* nothing */ 139a5652762Spraks 140a5652762Spraks #endif 141a5652762Spraks 142a5652762Spraks /* 143a5652762Spraks * The vpm cache. 144a5652762Spraks * 145a5652762Spraks * The main purpose of having a cache here is to speed up page_lookup() 146a5652762Spraks * operations and also provide an LRU(default) behaviour of file pages. The 147a5652762Spraks * page_lookup() operation tends to be expensive if a page has to be 148a5652762Spraks * reclaimed from the system page cache("cachelist"). Once we speed up the 149a5652762Spraks * page_lookup()->page_reclaim() path then there there should be no need for 150a5652762Spraks * this cache. The system page cache(cachelist) should effectively serve the 151a5652762Spraks * purpose of caching file pages. 152a5652762Spraks * 153a5652762Spraks * This cache is very similar to segmap's smap cache. Each page in the 154a5652762Spraks * cache is tracked by the structure vpmap_t. But unlike segmap, there is no 155a5652762Spraks * hash table. The page_t has a reference to the vpmap_t when cached. For a 156a5652762Spraks * given vnode, offset the page is found by means of a page_lookup() operation. 157a5652762Spraks * Any page which has a mapping(i.e when cached) will not be in the 158a5652762Spraks * system 'cachelist'. Hence the page_lookup() will not have to do a 159a5652762Spraks * page_reclaim(). That is how the cache serves to speed up page_lookup() 160a5652762Spraks * operations. 161a5652762Spraks * 162a5652762Spraks * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system. 163a5652762Spraks */ 164a5652762Spraks 165a5652762Spraks void 166a5652762Spraks vpm_init() 167a5652762Spraks { 168a5652762Spraks long npages; 169a5652762Spraks struct vpmap *vpm; 170a5652762Spraks struct vpmfree *vpmflp; 171a5652762Spraks int i, ndx; 172a5652762Spraks extern void prefetch_smap_w(void *); 173a5652762Spraks 174183971baSPrakash Sangappa if (!kpm_enable) { 175183971baSPrakash Sangappa vpm_enable = 0; 176183971baSPrakash Sangappa } 177183971baSPrakash Sangappa 178183971baSPrakash Sangappa if (!vpm_enable || !vpm_cache_enable) { 179a5652762Spraks return; 180a5652762Spraks } 181a5652762Spraks 182a5652762Spraks /* 183a5652762Spraks * Set the size of the cache. 184a5652762Spraks */ 185a5652762Spraks vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); 186a5652762Spraks if (vpm_cache_size < VPMAP_MINCACHE) { 187a5652762Spraks vpm_cache_size = VPMAP_MINCACHE; 188a5652762Spraks } 189a5652762Spraks 190183971baSPrakash Sangappa if (vpm_cache_size > VPMAP_MAXCACHE) { 191183971baSPrakash Sangappa vpm_cache_size = VPMAP_MAXCACHE; 192183971baSPrakash Sangappa } 193183971baSPrakash Sangappa 194a5652762Spraks /* 195a5652762Spraks * Number of freelists. 196a5652762Spraks */ 197a5652762Spraks if (vpm_nfreelist == 0) { 198a5652762Spraks vpm_nfreelist = max_ncpus; 199a5652762Spraks } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { 200a5652762Spraks cmn_err(CE_WARN, "vpmap create : number of freelist " 201a5652762Spraks "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); 202a5652762Spraks vpm_nfreelist = 2 * max_ncpus; 203a5652762Spraks } 204a5652762Spraks 205a5652762Spraks /* 206a5652762Spraks * Round it up to the next power of 2 207a5652762Spraks */ 208*de710d24SJosef 'Jeff' Sipek if (!ISP2(vpm_nfreelist)) { 209a5652762Spraks vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); 210a5652762Spraks } 211a5652762Spraks vpmd_freemsk = vpm_nfreelist - 1; 212a5652762Spraks 213a5652762Spraks /* 214a5652762Spraks * Use a per cpu rotor index to spread the allocations evenly 215a5652762Spraks * across the available vpm freelists. 216a5652762Spraks */ 217a5652762Spraks vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); 218a5652762Spraks ndx = 0; 219a5652762Spraks for (i = 0; i < max_ncpus; i++) { 220a5652762Spraks 221a5652762Spraks vpmd_cpu[i].vfree_ndx = ndx; 222a5652762Spraks ndx = (ndx + 1) & vpmd_freemsk; 223a5652762Spraks } 224a5652762Spraks 225a5652762Spraks /* 226a5652762Spraks * Allocate and initialize the freelist. 227a5652762Spraks */ 228a5652762Spraks vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), 229a5652762Spraks KM_SLEEP); 230a5652762Spraks for (i = 0; i < vpm_nfreelist; i++) { 231a5652762Spraks 232a5652762Spraks vpmflp = &vpmd_free[i]; 233a5652762Spraks /* 234a5652762Spraks * Set up initial queue pointers. They will get flipped 235a5652762Spraks * back and forth. 236a5652762Spraks */ 237a5652762Spraks vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; 238a5652762Spraks vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; 239a5652762Spraks } 240a5652762Spraks 241a5652762Spraks npages = mmu_btop(vpm_cache_size); 242a5652762Spraks 243a5652762Spraks 244a5652762Spraks /* 245183971baSPrakash Sangappa * Allocate and initialize the vpmap structs. We need to 246183971baSPrakash Sangappa * walk the array backwards as the prefetch happens in reverse 247183971baSPrakash Sangappa * order. 248a5652762Spraks */ 249183971baSPrakash Sangappa vpmd_vpmap = kmem_alloc(sizeof (struct vpmap) * npages, KM_SLEEP); 250183971baSPrakash Sangappa for (vpm = &vpmd_vpmap[npages - 1]; vpm >= vpmd_vpmap; vpm--) { 251a5652762Spraks struct vpmfree *vpmflp; 252a5652762Spraks union vpm_freeq *releq; 253a5652762Spraks struct vpmap *vpmapf; 254a5652762Spraks 255a5652762Spraks /* 256a5652762Spraks * Use prefetch as we have to walk thru a large number of 257a5652762Spraks * these data structures. We just use the smap's prefetch 258183971baSPrakash Sangappa * routine as it does the same. 259a5652762Spraks */ 260a5652762Spraks prefetch_smap_w((void *)vpm); 261a5652762Spraks 262183971baSPrakash Sangappa vpm->vpm_vp = NULL; 263183971baSPrakash Sangappa vpm->vpm_off = 0; 264183971baSPrakash Sangappa vpm->vpm_pp = NULL; 265183971baSPrakash Sangappa vpm->vpm_refcnt = 0; 266183971baSPrakash Sangappa mutex_init(&vpm->vpm_mtx, NULL, MUTEX_DEFAULT, NULL); 267a5652762Spraks vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); 268a5652762Spraks 269a5652762Spraks vpmflp = VPMAP2VMF(vpm); 270a5652762Spraks releq = vpmflp->vpm_releq; 271a5652762Spraks 272a5652762Spraks vpmapf = releq->vpmq_free; 273a5652762Spraks if (vpmapf == NULL) { 274a5652762Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 275a5652762Spraks } else { 276a5652762Spraks vpm->vpm_next = vpmapf; 277a5652762Spraks vpm->vpm_prev = vpmapf->vpm_prev; 278a5652762Spraks vpmapf->vpm_prev = vpm; 279a5652762Spraks vpm->vpm_prev->vpm_next = vpm; 280a5652762Spraks releq->vpmq_free = vpm->vpm_next; 281a5652762Spraks } 282a5652762Spraks 283a5652762Spraks /* 284a5652762Spraks * Indicate that the vpmap is on the releq at start 285a5652762Spraks */ 286a5652762Spraks vpm->vpm_ndxflg = VPMRELEQ; 287a5652762Spraks } 288a5652762Spraks } 289a5652762Spraks 290a5652762Spraks 291a5652762Spraks /* 292a5652762Spraks * unhooks vpm from the freelist if it is still on the freelist. 293a5652762Spraks */ 294a5652762Spraks #define VPMAP_RMFREELIST(vpm) \ 295a5652762Spraks { \ 296a5652762Spraks if (vpm->vpm_next != NULL) { \ 297a5652762Spraks union vpm_freeq *freeq; \ 298a5652762Spraks struct vpmfree *vpmflp; \ 299a5652762Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \ 300a5652762Spraks freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \ 301a5652762Spraks mutex_enter(&freeq->vpmq_mtx); \ 302a5652762Spraks if (freeq->vpmq_free != vpm) { \ 303a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 304a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 305a5652762Spraks } else if (vpm == vpm->vpm_next) { \ 306a5652762Spraks freeq->vpmq_free = NULL; \ 307a5652762Spraks } else { \ 308a5652762Spraks freeq->vpmq_free = vpm->vpm_next; \ 309a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 310a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 311a5652762Spraks } \ 312a5652762Spraks mutex_exit(&freeq->vpmq_mtx); \ 313a5652762Spraks vpm->vpm_next = vpm->vpm_prev = NULL; \ 314a5652762Spraks } \ 315a5652762Spraks } 316a5652762Spraks 317a5652762Spraks static int 318a5652762Spraks get_freelndx(int mode) 319a5652762Spraks { 320a5652762Spraks int ndx; 321a5652762Spraks 322a5652762Spraks ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk; 323a5652762Spraks switch (mode) { 324a5652762Spraks 325a5652762Spraks case VPMCACHE_LRU: 326a5652762Spraks default: 327a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vfree_ndx++; 328a5652762Spraks break; 329a5652762Spraks } 330a5652762Spraks return (ndx); 331a5652762Spraks } 332a5652762Spraks 333a5652762Spraks 334a5652762Spraks /* 335a5652762Spraks * Find one vpmap structure from the free lists and use it for the newpage. 336a5652762Spraks * The previous page it cached is dissociated and released. The page_t's 337a5652762Spraks * p_vpmref is cleared only when the vpm it is pointing to is locked(or 338a5652762Spraks * for AMD64 when the page is exclusively locked in page_unload. That is 339a5652762Spraks * because the p_vpmref is treated as mapping). 340a5652762Spraks * 341a5652762Spraks * The page's p_vpmref is set when the page is 342a5652762Spraks * locked(at least SHARED locked). 343a5652762Spraks */ 344a5652762Spraks static struct vpmap * 345a5652762Spraks get_free_vpmap(page_t *newpage) 346a5652762Spraks { 347a5652762Spraks struct vpmfree *vpmflp; 348a5652762Spraks kmutex_t *vmtx; 349a5652762Spraks struct vpmap *vpm, *first; 350a5652762Spraks union vpm_freeq *allocq, *releq; 351a5652762Spraks page_t *pp = NULL; 352a5652762Spraks int end_ndx, page_locked = 0; 353a5652762Spraks int free_ndx; 354a5652762Spraks 355a5652762Spraks /* 356a5652762Spraks * get the freelist bin index. 357a5652762Spraks */ 358a5652762Spraks free_ndx = get_freelndx(vpm_cachemode); 359a5652762Spraks 360a5652762Spraks end_ndx = free_ndx; 361a5652762Spraks vpmflp = &vpmd_free[free_ndx]; 362a5652762Spraks 363a5652762Spraks retry_queue: 364a5652762Spraks allocq = vpmflp->vpm_allocq; 365a5652762Spraks mutex_enter(&allocq->vpmq_mtx); 366a5652762Spraks 367a5652762Spraks if ((vpm = allocq->vpmq_free) == NULL) { 368a5652762Spraks 369a5652762Spraks skip_queue: 370a5652762Spraks /* 371a5652762Spraks * The alloc list is empty or this queue is being skipped; 372a5652762Spraks * first see if the allocq toggled. 373a5652762Spraks */ 374a5652762Spraks if (vpmflp->vpm_allocq != allocq) { 375a5652762Spraks /* queue changed */ 376a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 377a5652762Spraks goto retry_queue; 378a5652762Spraks } 379a5652762Spraks releq = vpmflp->vpm_releq; 380a5652762Spraks if (!mutex_tryenter(&releq->vpmq_mtx)) { 381a5652762Spraks /* cannot get releq; a free vpmap may be there now */ 382a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 383a5652762Spraks 384a5652762Spraks /* 385a5652762Spraks * This loop could spin forever if this thread has 386a5652762Spraks * higher priority than the thread that is holding 387a5652762Spraks * releq->vpmq_mtx. In order to force the other thread 388a5652762Spraks * to run, we'll lock/unlock the mutex which is safe 389a5652762Spraks * since we just unlocked the allocq mutex. 390a5652762Spraks */ 391a5652762Spraks mutex_enter(&releq->vpmq_mtx); 392a5652762Spraks mutex_exit(&releq->vpmq_mtx); 393a5652762Spraks goto retry_queue; 394a5652762Spraks } 395a5652762Spraks if (releq->vpmq_free == NULL) { 396a5652762Spraks VPM_DEBUG(vpmd_emptyfreelist); 397a5652762Spraks /* 398a5652762Spraks * This freelist is empty. 399a5652762Spraks * This should not happen unless clients 400a5652762Spraks * are failing to release the vpmap after 401a5652762Spraks * accessing the data. Before resorting 402a5652762Spraks * to sleeping, try the next list of the same color. 403a5652762Spraks */ 404a5652762Spraks free_ndx = (free_ndx + 1) & vpmd_freemsk; 405a5652762Spraks if (free_ndx != end_ndx) { 406a5652762Spraks mutex_exit(&releq->vpmq_mtx); 407a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 408a5652762Spraks vpmflp = &vpmd_free[free_ndx]; 409a5652762Spraks goto retry_queue; 410a5652762Spraks } 411a5652762Spraks /* 412a5652762Spraks * Tried all freelists. 413a5652762Spraks * wait on this list and hope something gets freed. 414a5652762Spraks */ 415a5652762Spraks vpmflp->vpm_want++; 416a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx); 417a5652762Spraks cv_wait(&vpmflp->vpm_free_cv, 418a5652762Spraks &vpmflp->vpm_freeq[0].vpmq_mtx); 419a5652762Spraks vpmflp->vpm_want--; 420a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 421a5652762Spraks vpmflp = &vpmd_free[free_ndx]; 422a5652762Spraks VPM_DEBUG(vpmd_nofreevpms); 423a5652762Spraks goto retry_queue; 424a5652762Spraks } else { 425a5652762Spraks /* 426a5652762Spraks * Something on the rele queue; flip the alloc 427a5652762Spraks * and rele queues and retry. 428a5652762Spraks */ 429a5652762Spraks vpmflp->vpm_allocq = releq; 430a5652762Spraks vpmflp->vpm_releq = allocq; 431a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 432a5652762Spraks mutex_exit(&releq->vpmq_mtx); 433a5652762Spraks if (page_locked) { 434a5652762Spraks delay(hz >> 2); 435a5652762Spraks page_locked = 0; 436a5652762Spraks } 437a5652762Spraks goto retry_queue; 438a5652762Spraks } 439a5652762Spraks } else { 440a5652762Spraks int gotnewvpm; 441a5652762Spraks kmutex_t *pmtx; 442a5652762Spraks uint_t vpmref; 443a5652762Spraks 444a5652762Spraks /* 445a5652762Spraks * Fastpath the case we get the vpmap mutex 446a5652762Spraks * on the first try. 447a5652762Spraks */ 448a5652762Spraks first = vpm; 449a5652762Spraks next_vpmap: 450a5652762Spraks vmtx = VPMAPMTX(vpm); 451a5652762Spraks if (!mutex_tryenter(vmtx)) { 452a5652762Spraks /* 453a5652762Spraks * Another thread is trying to reclaim this slot. 454a5652762Spraks * Skip to the next queue or vpmap. 455a5652762Spraks */ 456a5652762Spraks if ((vpm = vpm->vpm_next) == first) { 457a5652762Spraks goto skip_queue; 458a5652762Spraks } else { 459a5652762Spraks goto next_vpmap; 460a5652762Spraks } 461a5652762Spraks } 462a5652762Spraks 463a5652762Spraks /* 464a5652762Spraks * Assign this vpm to the newpage. 465a5652762Spraks */ 466a5652762Spraks pmtx = PPMTX(newpage); 467a5652762Spraks gotnewvpm = 0; 468a5652762Spraks mutex_enter(pmtx); 469a5652762Spraks 470a5652762Spraks /* 471a5652762Spraks * Check if some other thread already assigned a vpm to 472a5652762Spraks * this page. 473a5652762Spraks */ 474a5652762Spraks if ((vpmref = newpage->p_vpmref) == 0) { 475a5652762Spraks newpage->p_vpmref = VPMID(vpm); 476a5652762Spraks gotnewvpm = 1; 477a5652762Spraks } else { 478a5652762Spraks VPM_DEBUG(vpmd_contend); 479a5652762Spraks mutex_exit(vmtx); 480a5652762Spraks } 481a5652762Spraks mutex_exit(pmtx); 482a5652762Spraks 483a5652762Spraks if (gotnewvpm) { 484a5652762Spraks 485a5652762Spraks /* 486a5652762Spraks * At this point, we've selected the vpm. Remove vpm 487a5652762Spraks * from its freelist. If vpm is the first one in 488a5652762Spraks * the freelist, update the head of the freelist. 489a5652762Spraks */ 490a5652762Spraks if (first == vpm) { 491a5652762Spraks ASSERT(first == allocq->vpmq_free); 492a5652762Spraks allocq->vpmq_free = vpm->vpm_next; 493a5652762Spraks } 494a5652762Spraks 495a5652762Spraks /* 496a5652762Spraks * If the head of the freelist still points to vpm, 497a5652762Spraks * then there are no more free vpmaps in that list. 498a5652762Spraks */ 499a5652762Spraks if (allocq->vpmq_free == vpm) 500a5652762Spraks /* 501a5652762Spraks * Took the last one 502a5652762Spraks */ 503a5652762Spraks allocq->vpmq_free = NULL; 504a5652762Spraks else { 505a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; 506a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; 507a5652762Spraks } 508a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 509a5652762Spraks vpm->vpm_prev = vpm->vpm_next = NULL; 510a5652762Spraks 511a5652762Spraks /* 512183971baSPrakash Sangappa * Disassociate the previous page. 513a5652762Spraks * p_vpmref is used as a mapping reference to the page. 514a5652762Spraks */ 515a5652762Spraks if ((pp = vpm->vpm_pp) != NULL && 516a5652762Spraks vpm->vpm_vp == pp->p_vnode && 517a5652762Spraks vpm->vpm_off == pp->p_offset) { 518a5652762Spraks 519a5652762Spraks pmtx = PPMTX(pp); 520a5652762Spraks if (page_trylock(pp, SE_SHARED)) { 521a5652762Spraks /* 522a5652762Spraks * Now verify that it is the correct 523a5652762Spraks * page. If not someone else stole it, 524a5652762Spraks * so just unlock it and leave. 525a5652762Spraks */ 526a5652762Spraks mutex_enter(pmtx); 527a5652762Spraks if (PP_ISFREE(pp) || 528a5652762Spraks vpm->vpm_vp != pp->p_vnode || 529a5652762Spraks vpm->vpm_off != pp->p_offset || 530a5652762Spraks pp->p_vpmref != VPMID(vpm)) { 531a5652762Spraks mutex_exit(pmtx); 532a5652762Spraks 533a5652762Spraks page_unlock(pp); 534a5652762Spraks } else { 535a5652762Spraks /* 536a5652762Spraks * Release the page. 537a5652762Spraks */ 538a5652762Spraks pp->p_vpmref = 0; 539a5652762Spraks mutex_exit(pmtx); 540a5652762Spraks (void) page_release(pp, 1); 541a5652762Spraks } 542a5652762Spraks } else { 543a5652762Spraks /* 544a5652762Spraks * If the page cannot be locked, just 545a5652762Spraks * clear the p_vpmref and go. 546a5652762Spraks */ 547a5652762Spraks mutex_enter(pmtx); 548a5652762Spraks if (pp->p_vpmref == VPMID(vpm)) { 549a5652762Spraks pp->p_vpmref = 0; 550a5652762Spraks } 551a5652762Spraks mutex_exit(pmtx); 552a5652762Spraks VPM_DEBUG(vpmd_prevpagelocked); 553a5652762Spraks } 554a5652762Spraks } 555a5652762Spraks 556a5652762Spraks /* 557a5652762Spraks * Setup vpm to point to the new page. 558a5652762Spraks */ 559a5652762Spraks vpm->vpm_pp = newpage; 560a5652762Spraks vpm->vpm_vp = newpage->p_vnode; 561a5652762Spraks vpm->vpm_off = newpage->p_offset; 562a5652762Spraks 563a5652762Spraks } else { 564a5652762Spraks int steal = !VPM_MTBF(steals, steals_mtbf); 565a5652762Spraks /* 566a5652762Spraks * Page already has a vpm assigned just use that. 567a5652762Spraks * Grab the vpm mutex and verify that it is still 568a5652762Spraks * the correct one. The pp->p_vpmref should not change 569a5652762Spraks * once we have the vpm mutex and the page lock. 570a5652762Spraks */ 571a5652762Spraks mutex_exit(&allocq->vpmq_mtx); 572a5652762Spraks vpm = VPMP(vpmref); 573a5652762Spraks vmtx = VPMAPMTX(vpm); 574a5652762Spraks mutex_enter(vmtx); 575a5652762Spraks if ((steal && vpm->vpm_refcnt == 0) || 576a5652762Spraks vpm->vpm_pp != newpage) { 577a5652762Spraks /* 578a5652762Spraks * The vpm got stolen, retry. 579a5652762Spraks * clear the p_vpmref. 580a5652762Spraks */ 581a5652762Spraks pmtx = PPMTX(newpage); 582a5652762Spraks mutex_enter(pmtx); 583a5652762Spraks if (newpage->p_vpmref == vpmref) { 584a5652762Spraks newpage->p_vpmref = 0; 585a5652762Spraks } 586a5652762Spraks mutex_exit(pmtx); 587a5652762Spraks 588a5652762Spraks mutex_exit(vmtx); 589a5652762Spraks VPM_DEBUG(vpmd_steals); 590a5652762Spraks goto retry_queue; 591a5652762Spraks } else if (vpm->vpm_refcnt == 0) { 592a5652762Spraks /* 593a5652762Spraks * Remove it from the free list if it 594a5652762Spraks * exists there. 595a5652762Spraks */ 596a5652762Spraks VPMAP_RMFREELIST(vpm); 597a5652762Spraks } 598a5652762Spraks } 599a5652762Spraks return (vpm); 600a5652762Spraks } 601a5652762Spraks } 602a5652762Spraks 603a5652762Spraks static void 604a5652762Spraks free_vpmap(struct vpmap *vpm) 605a5652762Spraks { 606a5652762Spraks struct vpmfree *vpmflp; 607a5652762Spraks struct vpmap *vpmfreelist; 608a5652762Spraks union vpm_freeq *releq; 609a5652762Spraks 610a5652762Spraks ASSERT(MUTEX_HELD(VPMAPMTX(vpm))); 611a5652762Spraks 612a5652762Spraks if (vpm->vpm_refcnt != 0) { 613a5652762Spraks panic("free_vpmap"); 614a5652762Spraks /*NOTREACHED*/ 615a5652762Spraks } 616a5652762Spraks 617a5652762Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; 618a5652762Spraks /* 619a5652762Spraks * Add to the tail of the release queue 620a5652762Spraks * Note that vpm_releq and vpm_allocq could toggle 621a5652762Spraks * before we get the lock. This does not affect 622a5652762Spraks * correctness as the 2 queues are only maintained 623a5652762Spraks * to reduce lock pressure. 624a5652762Spraks */ 625a5652762Spraks releq = vpmflp->vpm_releq; 626a5652762Spraks if (releq == &vpmflp->vpm_freeq[0]) { 627a5652762Spraks vpm->vpm_ndxflg = 0; 628a5652762Spraks } else { 629a5652762Spraks vpm->vpm_ndxflg = 1; 630a5652762Spraks } 631a5652762Spraks mutex_enter(&releq->vpmq_mtx); 632a5652762Spraks vpmfreelist = releq->vpmq_free; 633a5652762Spraks if (vpmfreelist == 0) { 634a5652762Spraks int want; 635a5652762Spraks 636a5652762Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 637a5652762Spraks /* 638a5652762Spraks * Both queue mutexes are held to set vpm_want; 639a5652762Spraks * snapshot the value before dropping releq mutex. 640a5652762Spraks * If vpm_want appears after the releq mutex is dropped, 641a5652762Spraks * then the vpmap just freed is already gone. 642a5652762Spraks */ 643a5652762Spraks want = vpmflp->vpm_want; 644a5652762Spraks mutex_exit(&releq->vpmq_mtx); 645a5652762Spraks /* 646a5652762Spraks * See if there was a waiter before dropping the releq mutex 647a5652762Spraks * then recheck after obtaining vpm_freeq[0] mutex as 648a5652762Spraks * the another thread may have already signaled. 649a5652762Spraks */ 650a5652762Spraks if (want) { 651a5652762Spraks mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx); 652a5652762Spraks if (vpmflp->vpm_want) 653a5652762Spraks cv_signal(&vpmflp->vpm_free_cv); 654a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 655a5652762Spraks } 656a5652762Spraks } else { 657a5652762Spraks vpm->vpm_next = vpmfreelist; 658a5652762Spraks vpm->vpm_prev = vpmfreelist->vpm_prev; 659a5652762Spraks vpmfreelist->vpm_prev = vpm; 660a5652762Spraks vpm->vpm_prev->vpm_next = vpm; 661a5652762Spraks mutex_exit(&releq->vpmq_mtx); 662a5652762Spraks } 663a5652762Spraks } 664a5652762Spraks 665a5652762Spraks /* 666a5652762Spraks * Get the vpmap for the page. 667a5652762Spraks * The refcnt of this vpm is incremented. 668a5652762Spraks */ 669a5652762Spraks static struct vpmap * 670a5652762Spraks get_vpmap(page_t *pp) 671a5652762Spraks { 672a5652762Spraks struct vpmap *vpm = NULL; 673a5652762Spraks kmutex_t *vmtx; 674a5652762Spraks kmutex_t *pmtx; 675a5652762Spraks unsigned int refid; 676a5652762Spraks 677a5652762Spraks ASSERT((pp != NULL) && PAGE_LOCKED(pp)); 678a5652762Spraks 679a5652762Spraks if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) { 680a5652762Spraks vpm = VPMP(refid); 681a5652762Spraks vmtx = VPMAPMTX(vpm); 682a5652762Spraks mutex_enter(vmtx); 683a5652762Spraks /* 684a5652762Spraks * Since we have the page lock and the vpm mutex, the 685a5652762Spraks * pp->p_vpmref cannot change. 686a5652762Spraks */ 687a5652762Spraks if (vpm->vpm_pp != pp) { 688a5652762Spraks pmtx = PPMTX(pp); 689a5652762Spraks 690a5652762Spraks /* 691a5652762Spraks * Clear the p_vpmref as it is incorrect. 692a5652762Spraks * This can happen if the page was stolen. 693a5652762Spraks * On x64 this should not happen as p_vpmref 694a5652762Spraks * is treated as a mapping on the page. So 695a5652762Spraks * if the page is stolen, the mapping would have 696a5652762Spraks * been cleared in page_unload(). 697a5652762Spraks */ 698a5652762Spraks mutex_enter(pmtx); 699a5652762Spraks if (pp->p_vpmref == refid) 700a5652762Spraks pp->p_vpmref = 0; 701a5652762Spraks mutex_exit(pmtx); 702a5652762Spraks 703a5652762Spraks mutex_exit(vmtx); 704a5652762Spraks vpm = NULL; 705a5652762Spraks } else if (vpm->vpm_refcnt == 0) { 706a5652762Spraks /* 707a5652762Spraks * Got the vpm, remove it from the free 708a5652762Spraks * list if it exists there. 709a5652762Spraks */ 710a5652762Spraks VPMAP_RMFREELIST(vpm); 711a5652762Spraks } 712a5652762Spraks } 713a5652762Spraks if (vpm == NULL) { 714a5652762Spraks /* 715a5652762Spraks * get_free_vpmap() returns with the vpmap mutex held. 716a5652762Spraks */ 717a5652762Spraks vpm = get_free_vpmap(pp); 718a5652762Spraks vmtx = VPMAPMTX(vpm); 719a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++; 720a5652762Spraks } else { 721a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++; 722a5652762Spraks } 723a5652762Spraks 724a5652762Spraks vpm->vpm_refcnt++; 725a5652762Spraks mutex_exit(vmtx); 726a5652762Spraks 727a5652762Spraks return (vpm); 728a5652762Spraks } 729a5652762Spraks 730a5652762Spraks /* END --- vpm cache ---- */ 731a5652762Spraks 732a5652762Spraks /* 733a5652762Spraks * The vnode page mapping(vpm) interface routines. 734a5652762Spraks */ 735a5652762Spraks 736a5652762Spraks /* 737a5652762Spraks * Find or create the pages starting form baseoff for specified 738a5652762Spraks * length 'len'. 739a5652762Spraks */ 740a5652762Spraks static int 741a5652762Spraks vpm_pagecreate( 742a5652762Spraks struct vnode *vp, 743a5652762Spraks u_offset_t baseoff, 744a5652762Spraks size_t len, 745a5652762Spraks vmap_t vml[], 746a5652762Spraks int nseg, 747a5652762Spraks int *newpage) 748a5652762Spraks { 749a5652762Spraks 750a5652762Spraks page_t *pp = NULL; 751a5652762Spraks caddr_t base; 752a5652762Spraks u_offset_t off = baseoff; 753a5652762Spraks int i; 754183971baSPrakash Sangappa ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS); 755a5652762Spraks 7563bd1497bSpraks for (i = 0; len > 0; len -= PAGESIZE, i++) { 757a5652762Spraks struct vpmap *vpm; 758a5652762Spraks 759a5652762Spraks 760a5652762Spraks if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 761a5652762Spraks 762a5652762Spraks base = segkpm_create_va(off); 763a5652762Spraks 764a5652762Spraks /* 765a5652762Spraks * the seg pointer passed in is just advisor. Just 766a5652762Spraks * pass segkmap for now like segmap does with 767a5652762Spraks * segmap_kpm enabled. 768a5652762Spraks */ 769a5652762Spraks if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 770a5652762Spraks segkmap, base)) == NULL) { 771a5652762Spraks panic("segmap_pagecreate_vpm: " 772a5652762Spraks "page_create failed"); 773a5652762Spraks /*NOTREACHED*/ 774a5652762Spraks } 775a5652762Spraks if (newpage != NULL) 776a5652762Spraks *newpage = 1; 777a5652762Spraks 778a5652762Spraks page_io_unlock(pp); 779a5652762Spraks } 780a5652762Spraks 781a5652762Spraks /* 782a5652762Spraks * Get the vpm for this page_t. 783a5652762Spraks */ 784a5652762Spraks if (vpm_cache_enable) { 785a5652762Spraks vpm = get_vpmap(pp); 786a5652762Spraks vml[i].vs_data = (void *)&vpm->vpm_pp; 787a5652762Spraks } else { 788a5652762Spraks vml[i].vs_data = (void *)pp; 789a5652762Spraks pp->p_vpmref = 0; 790a5652762Spraks } 791a5652762Spraks 792a5652762Spraks vml[i].vs_addr = hat_kpm_mapin(pp, 0); 793a5652762Spraks vml[i].vs_len = PAGESIZE; 794a5652762Spraks 795a5652762Spraks off += PAGESIZE; 796a5652762Spraks } 797a5652762Spraks vml[i].vs_data = NULL; 798a5652762Spraks vml[i].vs_addr = (caddr_t)NULL; 799a5652762Spraks return (0); 800a5652762Spraks } 801a5652762Spraks 802a5652762Spraks 803a5652762Spraks /* 804a5652762Spraks * Returns vpm mappings of pages in the range [off, off+len], where 805a5652762Spraks * len is rounded up to the PAGESIZE boundary. The list of pages and 806a5652762Spraks * the page addresses are returned in the SGL vml (vmap_t) array passed in. 807a5652762Spraks * The nseg is the number of vmap_t entries in the array. 808a5652762Spraks * 809a5652762Spraks * The segmap's SM_LOCKPROTO usage is not supported by these interfaces. 810a5652762Spraks * For such cases, use the seg_map interfaces. 811a5652762Spraks */ 812a5652762Spraks int 813a5652762Spraks vpm_map_pages( 814a5652762Spraks struct vnode *vp, 815a5652762Spraks u_offset_t off, 816a5652762Spraks size_t len, 817a5652762Spraks int fetchpage, 818a5652762Spraks vmap_t *vml, 819a5652762Spraks int nseg, 820a5652762Spraks int *newpage, 821a5652762Spraks enum seg_rw rw) 822a5652762Spraks { 823a5652762Spraks extern struct vnode *common_specvp(); 824a5652762Spraks u_offset_t baseoff; 825a5652762Spraks uint_t prot; 826a5652762Spraks caddr_t base; 827a5652762Spraks page_t *pp, *pplist[MAXVMAPS]; 828a5652762Spraks struct vpmap *vpm; 829a5652762Spraks int i, error = 0; 830183971baSPrakash Sangappa size_t tlen; 831a5652762Spraks 832183971baSPrakash Sangappa ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS); 833a5652762Spraks baseoff = off & (offset_t)PAGEMASK; 834a5652762Spraks vml[0].vs_data = NULL; 835a5652762Spraks vml[0].vs_addr = (caddr_t)NULL; 836183971baSPrakash Sangappa 837183971baSPrakash Sangappa tlen = P2ROUNDUP(off + len, PAGESIZE) - baseoff; 838a5652762Spraks /* 839183971baSPrakash Sangappa * Restrict it to VPMMAXLEN. 840a5652762Spraks */ 841183971baSPrakash Sangappa if (tlen > (VPMMAXPGS * PAGESIZE)) { 842183971baSPrakash Sangappa tlen = VPMMAXPGS * PAGESIZE; 843a5652762Spraks } 844183971baSPrakash Sangappa /* 845183971baSPrakash Sangappa * Ensure length fits within the vml[] array. One element of 846183971baSPrakash Sangappa * the array is used to mark the end of the scatter/gather list 847183971baSPrakash Sangappa * of valid mappings by setting its vs_addr = NULL. Leave space 848183971baSPrakash Sangappa * for this element. 849183971baSPrakash Sangappa */ 850183971baSPrakash Sangappa if (tlen > ((nseg - 1) * PAGESIZE)) { 851183971baSPrakash Sangappa tlen = ((nseg - 1) * PAGESIZE); 852183971baSPrakash Sangappa } 853183971baSPrakash Sangappa len = tlen; 854a5652762Spraks 855a5652762Spraks /* 856a5652762Spraks * If this is a block device we have to be sure to use the 857a5652762Spraks * "common" block device vnode for the mapping. 858a5652762Spraks */ 859a5652762Spraks if (vp->v_type == VBLK) 860a5652762Spraks vp = common_specvp(vp); 861a5652762Spraks 862a5652762Spraks 863a5652762Spraks if (!fetchpage) 864a5652762Spraks return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage)); 865a5652762Spraks 8663bd1497bSpraks for (i = 0; len > 0; len -= PAGESIZE, i++, pplist[i] = NULL) { 867a5652762Spraks 868a5652762Spraks pp = page_lookup(vp, baseoff, SE_SHARED); 869a5652762Spraks 870a5652762Spraks /* 871a5652762Spraks * If we did not find the page or if this page was not 872183971baSPrakash Sangappa * in vpm cache(p_vpmref == 0), then let VOP_GETPAGE get 873183971baSPrakash Sangappa * all the pages. 874a5652762Spraks * We need to call VOP_GETPAGE so that filesytems can do some 875a5652762Spraks * (un)necessary tracking for sequential access. 876a5652762Spraks */ 877a5652762Spraks 878a5652762Spraks if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) || 879a5652762Spraks (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF) 880a5652762Spraks != (P_MOD | P_REF))) { 881183971baSPrakash Sangappa int j; 882a5652762Spraks if (pp != NULL) { 883a5652762Spraks page_unlock(pp); 884a5652762Spraks } 885183971baSPrakash Sangappa /* 886183971baSPrakash Sangappa * If we did not find the desired set of pages, 887183971baSPrakash Sangappa * from the page cache, just call VOP_GETPAGE to get 888183971baSPrakash Sangappa * all the pages. 889183971baSPrakash Sangappa */ 890183971baSPrakash Sangappa for (j = 0; j < i; j++) { 891183971baSPrakash Sangappa page_unlock(pplist[j]); 892183971baSPrakash Sangappa } 893a5652762Spraks 894183971baSPrakash Sangappa 895183971baSPrakash Sangappa baseoff = off & (offset_t)PAGEMASK; 896a5652762Spraks /* 897a5652762Spraks * Pass a dummy address as it will be required 898a5652762Spraks * by page_create_va(). We pass segkmap as the seg 899a5652762Spraks * as some file systems(UFS) check it. 900a5652762Spraks */ 901a5652762Spraks base = segkpm_create_va(baseoff); 902a5652762Spraks 903183971baSPrakash Sangappa error = VOP_GETPAGE(vp, baseoff, tlen, &prot, pplist, 904183971baSPrakash Sangappa tlen, segkmap, base, rw, CRED(), NULL); 905a5652762Spraks if (error) { 906a5652762Spraks VPM_DEBUG(vpmd_getpagefailed); 907183971baSPrakash Sangappa pplist[0] = NULL; 908a5652762Spraks } 909a5652762Spraks break; 910a5652762Spraks } else { 911a5652762Spraks pplist[i] = pp; 912a5652762Spraks baseoff += PAGESIZE; 913a5652762Spraks } 914a5652762Spraks } 915a5652762Spraks 916a5652762Spraks if (error) { 917a5652762Spraks for (i = 0; pplist[i] != NULL; i++) { 918a5652762Spraks page_unlock(pplist[i]); 919a5652762Spraks pplist[i] = NULL; 920a5652762Spraks } 921a5652762Spraks vml[0].vs_addr = NULL; 922a5652762Spraks vml[0].vs_data = NULL; 9239234f026Spraks return (error); 924a5652762Spraks } 925a5652762Spraks 926a5652762Spraks /* 927a5652762Spraks * Get the vpm's for pages. 928a5652762Spraks */ 929a5652762Spraks for (i = 0; pplist[i] != NULL; i++) { 930a5652762Spraks if (vpm_cache_enable) { 931a5652762Spraks vpm = get_vpmap(pplist[i]); 932a5652762Spraks vml[i].vs_data = (void *)&(vpm->vpm_pp); 933a5652762Spraks } else { 934a5652762Spraks vml[i].vs_data = (void *)pplist[i]; 935a5652762Spraks pplist[i]->p_vpmref = 0; 936a5652762Spraks } 937a5652762Spraks 938a5652762Spraks vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0); 939a5652762Spraks vml[i].vs_len = PAGESIZE; 940a5652762Spraks } 941a5652762Spraks 942a5652762Spraks vml[i].vs_data = NULL; 943a5652762Spraks vml[i].vs_addr = (caddr_t)NULL; 944a5652762Spraks 945a5652762Spraks return (0); 946a5652762Spraks } 947a5652762Spraks 948a5652762Spraks /* 949a5652762Spraks * Release the vpm mappings on the pages and unlock them. 950a5652762Spraks */ 951a5652762Spraks void 952a5652762Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 953a5652762Spraks { 954a5652762Spraks int i; 955a5652762Spraks struct vpmap *vpm; 956a5652762Spraks kmutex_t *mtx; 957a5652762Spraks page_t *pp; 958a5652762Spraks 959a5652762Spraks for (i = 0; vml[i].vs_data != NULL; i++) { 960a5652762Spraks ASSERT(IS_KPM_ADDR(vml[i].vs_addr)); 961a5652762Spraks 962a5652762Spraks if (vpm_cache_enable) { 963a5652762Spraks pp = *(((page_t **)vml[i].vs_data)); 964a5652762Spraks } else { 965a5652762Spraks pp = (page_t *)vml[i].vs_data; 966a5652762Spraks } 967a5652762Spraks 968a5652762Spraks /* 969a5652762Spraks * Mark page as being modified or referenced, bacause vpm pages 970a5652762Spraks * would not cause faults where it would be set normally. 971a5652762Spraks */ 972a5652762Spraks if (rw == S_WRITE) { 973a5652762Spraks hat_setrefmod(pp); 974a5652762Spraks } else { 975a5652762Spraks ASSERT(rw == S_READ); 976a5652762Spraks hat_setref(pp); 977a5652762Spraks } 978a5652762Spraks 979a5652762Spraks if (vpm_cache_enable) { 980a5652762Spraks vpm = (struct vpmap *)((char *)vml[i].vs_data 981a5652762Spraks - offsetof(struct vpmap, vpm_pp)); 982183971baSPrakash Sangappa hat_kpm_mapout(pp, 0, vml[i].vs_addr); 983183971baSPrakash Sangappa page_unlock(pp); 984a5652762Spraks mtx = VPMAPMTX(vpm); 985a5652762Spraks mutex_enter(mtx); 986a5652762Spraks 987a5652762Spraks if (--vpm->vpm_refcnt == 0) { 988a5652762Spraks free_vpmap(vpm); 989a5652762Spraks } 990a5652762Spraks mutex_exit(mtx); 991a5652762Spraks } else { 992a5652762Spraks hat_kpm_mapout(pp, 0, vml[i].vs_addr); 993a5652762Spraks (void) page_release(pp, 1); 994a5652762Spraks } 995a5652762Spraks vml[i].vs_data = NULL; 996a5652762Spraks vml[i].vs_addr = NULL; 997a5652762Spraks } 998a5652762Spraks } 999a5652762Spraks 1000a5652762Spraks /* 1001a5652762Spraks * Given the vp, off and the uio structure, this routine will do the 1002a5652762Spraks * the copy (uiomove). If the last page created is partially written, 1003a5652762Spraks * the rest of the page is zeroed out. It also zeros the beginning of 1004a5652762Spraks * the first page till the start offset if requested(zerostart). 1005a5652762Spraks * If pages are to be fetched, it will call the filesystem's getpage 1006a5652762Spraks * function (VOP_GETPAGE) to get them, otherwise they will be created if 1007a5652762Spraks * not already present in the page cache. 1008a5652762Spraks */ 1009a5652762Spraks int 1010a5652762Spraks vpm_data_copy(struct vnode *vp, 1011a5652762Spraks u_offset_t off, 1012a5652762Spraks size_t len, 1013a5652762Spraks struct uio *uio, 1014a5652762Spraks int fetchpage, 1015a5652762Spraks int *newpage, 1016a5652762Spraks int zerostart, 1017a5652762Spraks enum seg_rw rw) 1018a5652762Spraks { 1019a5652762Spraks int error; 1020a5652762Spraks struct vmap vml[MINVMAPS]; 1021a5652762Spraks enum uio_rw uiorw; 1022a5652762Spraks int npages = 0; 1023a5652762Spraks 1024a5652762Spraks uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ; 1025a5652762Spraks /* 1026a5652762Spraks * 'off' will be the offset where the I/O starts. 1027a5652762Spraks * We get the pages starting at the (off & PAGEMASK) 1028a5652762Spraks * page boundary. 1029a5652762Spraks */ 1030a5652762Spraks error = vpm_map_pages(vp, off, (uint_t)len, 1031a5652762Spraks fetchpage, vml, MINVMAPS, &npages, rw); 1032a5652762Spraks 1033a5652762Spraks if (newpage != NULL) 1034a5652762Spraks *newpage = npages; 1035a5652762Spraks if (!error) { 1036a5652762Spraks int i, pn, slen = len; 1037a5652762Spraks int pon = off & PAGEOFFSET; 1038a5652762Spraks 1039a5652762Spraks /* 1040a5652762Spraks * Clear from the beginning of the page to start offset 1041a5652762Spraks * if requested. 1042a5652762Spraks */ 1043a5652762Spraks if (!fetchpage && zerostart) { 1044a5652762Spraks (void) kzero(vml[0].vs_addr, (uint_t)pon); 1045a5652762Spraks VPM_DEBUG(vpmd_zerostart); 1046a5652762Spraks } 1047a5652762Spraks 1048a5652762Spraks for (i = 0; !error && slen > 0 && 1049a5652762Spraks vml[i].vs_addr != NULL; i++) { 1050a5652762Spraks pn = (int)MIN(slen, (PAGESIZE - pon)); 1051a5652762Spraks error = uiomove(vml[i].vs_addr + pon, 1052a5652762Spraks (long)pn, uiorw, uio); 1053a5652762Spraks slen -= pn; 1054a5652762Spraks pon = 0; 1055a5652762Spraks } 1056a5652762Spraks 1057a5652762Spraks /* 1058a5652762Spraks * When new pages are created, zero out part of the 1059a5652762Spraks * page we did not copy to. 1060a5652762Spraks */ 1061a5652762Spraks if (!fetchpage && npages && 1062a5652762Spraks uio->uio_loffset < roundup(off + len, PAGESIZE)) { 1063a5652762Spraks int nzero; 1064a5652762Spraks 1065a5652762Spraks pon = (uio->uio_loffset & PAGEOFFSET); 1066a5652762Spraks nzero = PAGESIZE - pon; 1067a5652762Spraks i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE; 1068a5652762Spraks (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero); 1069a5652762Spraks } 1070a5652762Spraks vpm_unmap_pages(vml, rw); 1071a5652762Spraks } 1072a5652762Spraks return (error); 1073a5652762Spraks } 1074a5652762Spraks 1075a5652762Spraks /* 1076a5652762Spraks * called to flush pages for the given vnode covering 1077a5652762Spraks * [off, off+len] range. 1078a5652762Spraks */ 1079a5652762Spraks int 1080a5652762Spraks vpm_sync_pages(struct vnode *vp, 1081a5652762Spraks u_offset_t off, 1082a5652762Spraks size_t len, 1083a5652762Spraks uint_t flags) 1084a5652762Spraks { 1085a5652762Spraks extern struct vnode *common_specvp(); 1086a5652762Spraks int bflags = 0; 1087a5652762Spraks int error = 0; 1088a5652762Spraks size_t psize = roundup(len, PAGESIZE); 1089a5652762Spraks 1090a5652762Spraks /* 1091a5652762Spraks * If this is a block device we have to be sure to use the 1092a5652762Spraks * "common" block device vnode for the mapping. 1093a5652762Spraks */ 1094a5652762Spraks if (vp->v_type == VBLK) 1095a5652762Spraks vp = common_specvp(vp); 1096a5652762Spraks 1097a5652762Spraks if ((flags & ~SM_DONTNEED) != 0) { 1098a5652762Spraks if (flags & SM_ASYNC) 1099a5652762Spraks bflags |= B_ASYNC; 1100a5652762Spraks if (flags & SM_INVAL) 1101a5652762Spraks bflags |= B_INVAL; 1102a5652762Spraks if (flags & SM_DESTROY) 1103a5652762Spraks bflags |= (B_INVAL|B_TRUNC); 1104a5652762Spraks if (flags & SM_FREE) 1105a5652762Spraks bflags |= B_FREE; 1106a5652762Spraks if (flags & SM_DONTNEED) 1107a5652762Spraks bflags |= B_DONTNEED; 1108a5652762Spraks 1109da6c28aaSamw error = VOP_PUTPAGE(vp, off, psize, bflags, CRED(), NULL); 1110a5652762Spraks } 1111a5652762Spraks 1112a5652762Spraks return (error); 1113a5652762Spraks } 1114a5652762Spraks 1115a5652762Spraks 1116a5652762Spraks #else /* SEGKPM_SUPPORT */ 1117a5652762Spraks 1118a5652762Spraks /* vpm stubs */ 1119a5652762Spraks void 1120a5652762Spraks vpm_init() 1121a5652762Spraks { 1122a5652762Spraks } 1123a5652762Spraks 1124a5652762Spraks /*ARGSUSED*/ 1125a5652762Spraks int 1126a5652762Spraks vpm_pagecreate( 1127a5652762Spraks struct vnode *vp, 1128a5652762Spraks u_offset_t baseoff, 1129a5652762Spraks size_t len, 1130a5652762Spraks vmap_t vml[], 1131a5652762Spraks int nseg, 1132a5652762Spraks int *newpage) 1133a5652762Spraks { 1134a5652762Spraks return (0); 1135a5652762Spraks } 1136a5652762Spraks 1137a5652762Spraks /*ARGSUSED*/ 1138a5652762Spraks int 1139a5652762Spraks vpm_map_pages( 1140a5652762Spraks struct vnode *vp, 1141a5652762Spraks u_offset_t off, 1142a5652762Spraks size_t len, 1143a5652762Spraks int fetchpage, 1144a5652762Spraks vmap_t vml[], 1145a5652762Spraks int nseg, 1146a5652762Spraks int *newpage, 1147a5652762Spraks enum seg_rw rw) 1148a5652762Spraks { 1149a5652762Spraks return (0); 1150a5652762Spraks } 1151a5652762Spraks 1152a5652762Spraks /*ARGSUSED*/ 1153a5652762Spraks int 1154a5652762Spraks vpm_data_copy(struct vnode *vp, 1155a5652762Spraks u_offset_t off, 1156a5652762Spraks size_t len, 1157a5652762Spraks struct uio *uio, 1158a5652762Spraks int fetchpage, 1159a5652762Spraks int *newpage, 1160a5652762Spraks int zerostart, 1161a5652762Spraks enum seg_rw rw) 1162a5652762Spraks { 1163a5652762Spraks return (0); 1164a5652762Spraks } 1165a5652762Spraks 1166a5652762Spraks /*ARGSUSED*/ 1167a5652762Spraks void 1168a5652762Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 1169a5652762Spraks { 1170a5652762Spraks } 1171a5652762Spraks /*ARGSUSED*/ 1172a5652762Spraks int 1173a5652762Spraks vpm_sync_pages(struct vnode *vp, 1174a5652762Spraks u_offset_t off, 1175a5652762Spraks size_t len, 1176a5652762Spraks uint_t flags) 1177a5652762Spraks { 1178a5652762Spraks return (0); 1179a5652762Spraks } 1180a5652762Spraks #endif /* SEGKPM_SUPPORT */ 1181