1a5652762Spraks /*
2a5652762Spraks * CDDL HEADER START
3a5652762Spraks *
4a5652762Spraks * The contents of this file are subject to the terms of the
5a5652762Spraks * Common Development and Distribution License (the "License").
6a5652762Spraks * You may not use this file except in compliance with the License.
7a5652762Spraks *
8a5652762Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9a5652762Spraks * or http://www.opensolaris.org/os/licensing.
10a5652762Spraks * See the License for the specific language governing permissions
11a5652762Spraks * and limitations under the License.
12a5652762Spraks *
13a5652762Spraks * When distributing Covered Code, include this CDDL HEADER in each
14a5652762Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15a5652762Spraks * If applicable, add the following below this CDDL HEADER, with the
16a5652762Spraks * fields enclosed by brackets "[]" replaced with your own identifying
17a5652762Spraks * information: Portions Copyright [yyyy] [name of copyright owner]
18a5652762Spraks *
19a5652762Spraks * CDDL HEADER END
20a5652762Spraks */
21a5652762Spraks /*
22183971baSPrakash Sangappa * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23a5652762Spraks * Use is subject to license terms.
24a5652762Spraks */
25a5652762Spraks
26a5652762Spraks
27a5652762Spraks /*
28a5652762Spraks * VM - generic vnode page mapping interfaces.
29a5652762Spraks *
30a5652762Spraks * Mechanism to provide temporary mappings to vnode pages.
31a5652762Spraks * The typical use would be to copy/access file data.
32a5652762Spraks */
33a5652762Spraks
34a5652762Spraks #include <sys/types.h>
35a5652762Spraks #include <sys/t_lock.h>
36a5652762Spraks #include <sys/param.h>
37a5652762Spraks #include <sys/sysmacros.h>
38a5652762Spraks #include <sys/buf.h>
39a5652762Spraks #include <sys/systm.h>
40a5652762Spraks #include <sys/vnode.h>
41a5652762Spraks #include <sys/mman.h>
42a5652762Spraks #include <sys/errno.h>
43a5652762Spraks #include <sys/cred.h>
44a5652762Spraks #include <sys/kmem.h>
45a5652762Spraks #include <sys/vtrace.h>
46a5652762Spraks #include <sys/cmn_err.h>
47a5652762Spraks #include <sys/debug.h>
48a5652762Spraks #include <sys/thread.h>
49a5652762Spraks #include <sys/dumphdr.h>
50a5652762Spraks #include <sys/bitmap.h>
51a5652762Spraks #include <sys/lgrp.h>
52a5652762Spraks
53a5652762Spraks #include <vm/seg_kmem.h>
54a5652762Spraks #include <vm/hat.h>
55a5652762Spraks #include <vm/as.h>
56a5652762Spraks #include <vm/seg.h>
57a5652762Spraks #include <vm/seg_kpm.h>
58a5652762Spraks #include <vm/seg_map.h>
59a5652762Spraks #include <vm/page.h>
60a5652762Spraks #include <vm/pvn.h>
61a5652762Spraks #include <vm/rm.h>
62a5652762Spraks #include <vm/vpm.h>
63a5652762Spraks
64183971baSPrakash Sangappa
65183971baSPrakash Sangappa #ifdef SEGKPM_SUPPORT
66a5652762Spraks /*
67183971baSPrakash Sangappa * VPM can be disabled by setting vpm_enable = 0 in
68183971baSPrakash Sangappa * /etc/system.
69183971baSPrakash Sangappa *
70a5652762Spraks */
71183971baSPrakash Sangappa int vpm_enable = 1;
72183971baSPrakash Sangappa
73183971baSPrakash Sangappa #else
74183971baSPrakash Sangappa
75a5652762Spraks int vpm_enable = 0;
76a5652762Spraks
77183971baSPrakash Sangappa #endif
78183971baSPrakash Sangappa
79a5652762Spraks #ifdef SEGKPM_SUPPORT
80a5652762Spraks
81a5652762Spraks
82a5652762Spraks int vpm_cache_enable = 1;
83a5652762Spraks long vpm_cache_percent = 12;
84a5652762Spraks long vpm_cache_size;
85a5652762Spraks int vpm_nfreelist = 0;
86a5652762Spraks int vpmd_freemsk = 0;
87a5652762Spraks
88a5652762Spraks #define VPM_S_PAD 64
89a5652762Spraks union vpm_cpu {
90a5652762Spraks struct {
91a5652762Spraks int vcpu_free_ndx;
92a5652762Spraks ulong_t vcpu_hits;
93a5652762Spraks ulong_t vcpu_misses;
94a5652762Spraks } vcpu;
95a5652762Spraks char vpm_pad[VPM_S_PAD];
96a5652762Spraks };
97a5652762Spraks static union vpm_cpu *vpmd_cpu;
98a5652762Spraks
99a5652762Spraks #define vfree_ndx vcpu.vcpu_free_ndx
100a5652762Spraks
101a5652762Spraks int vpm_cachemode = VPMCACHE_LRU;
102a5652762Spraks
103a5652762Spraks #define PPMTX(pp) (&(pp)->p_ilock)
104a5652762Spraks
105a5652762Spraks static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */
106a5652762Spraks static struct vpmfree *vpmd_free;
107a5652762Spraks #define VPMAPMTX(vpm) (&vpm->vpm_mtx)
108a5652762Spraks #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk])
109a5652762Spraks #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk)
110a5652762Spraks #define VPMP(id) (&vpmd_vpmap[id - 1])
111a5652762Spraks #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1)
112a5652762Spraks
113a5652762Spraks
114a5652762Spraks #ifdef DEBUG
115a5652762Spraks
116a5652762Spraks struct vpm_debug {
117a5652762Spraks int vpmd_steals;
118a5652762Spraks int vpmd_contend;
119a5652762Spraks int vpmd_prevpagelocked;
120a5652762Spraks int vpmd_getpagefailed;
121a5652762Spraks int vpmd_zerostart;
122a5652762Spraks int vpmd_emptyfreelist;
123a5652762Spraks int vpmd_nofreevpms;
124a5652762Spraks } vpm_debug;
125a5652762Spraks
126a5652762Spraks #define VPM_DEBUG(x) ((vpm_debug.x)++)
127a5652762Spraks
128a5652762Spraks int steals;
129a5652762Spraks int steals_mtbf = 7;
130a5652762Spraks int contend;
131a5652762Spraks int contend_mtbf = 127;
132a5652762Spraks
133a5652762Spraks #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f))
134a5652762Spraks
135a5652762Spraks #else /* DEBUG */
136a5652762Spraks
137a5652762Spraks #define VPM_MTBF(v, f) (1)
138a5652762Spraks #define VPM_DEBUG(x) /* nothing */
139a5652762Spraks
140a5652762Spraks #endif
141a5652762Spraks
142a5652762Spraks /*
143a5652762Spraks * The vpm cache.
144a5652762Spraks *
145a5652762Spraks * The main purpose of having a cache here is to speed up page_lookup()
146a5652762Spraks * operations and also provide an LRU(default) behaviour of file pages. The
147a5652762Spraks * page_lookup() operation tends to be expensive if a page has to be
148a5652762Spraks * reclaimed from the system page cache("cachelist"). Once we speed up the
149a5652762Spraks * page_lookup()->page_reclaim() path then there there should be no need for
150a5652762Spraks * this cache. The system page cache(cachelist) should effectively serve the
151a5652762Spraks * purpose of caching file pages.
152a5652762Spraks *
153a5652762Spraks * This cache is very similar to segmap's smap cache. Each page in the
154a5652762Spraks * cache is tracked by the structure vpmap_t. But unlike segmap, there is no
155a5652762Spraks * hash table. The page_t has a reference to the vpmap_t when cached. For a
156a5652762Spraks * given vnode, offset the page is found by means of a page_lookup() operation.
157a5652762Spraks * Any page which has a mapping(i.e when cached) will not be in the
158a5652762Spraks * system 'cachelist'. Hence the page_lookup() will not have to do a
159a5652762Spraks * page_reclaim(). That is how the cache serves to speed up page_lookup()
160a5652762Spraks * operations.
161a5652762Spraks *
162a5652762Spraks * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system.
163a5652762Spraks */
164a5652762Spraks
165a5652762Spraks void
vpm_init()166a5652762Spraks vpm_init()
167a5652762Spraks {
168a5652762Spraks long npages;
169a5652762Spraks struct vpmap *vpm;
170a5652762Spraks struct vpmfree *vpmflp;
171a5652762Spraks int i, ndx;
172a5652762Spraks extern void prefetch_smap_w(void *);
173a5652762Spraks
174183971baSPrakash Sangappa if (!kpm_enable) {
175183971baSPrakash Sangappa vpm_enable = 0;
176183971baSPrakash Sangappa }
177183971baSPrakash Sangappa
178183971baSPrakash Sangappa if (!vpm_enable || !vpm_cache_enable) {
179a5652762Spraks return;
180a5652762Spraks }
181a5652762Spraks
182a5652762Spraks /*
183a5652762Spraks * Set the size of the cache.
184a5652762Spraks */
185a5652762Spraks vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100);
186a5652762Spraks if (vpm_cache_size < VPMAP_MINCACHE) {
187a5652762Spraks vpm_cache_size = VPMAP_MINCACHE;
188a5652762Spraks }
189a5652762Spraks
190183971baSPrakash Sangappa if (vpm_cache_size > VPMAP_MAXCACHE) {
191183971baSPrakash Sangappa vpm_cache_size = VPMAP_MAXCACHE;
192183971baSPrakash Sangappa }
193183971baSPrakash Sangappa
194a5652762Spraks /*
195a5652762Spraks * Number of freelists.
196a5652762Spraks */
197a5652762Spraks if (vpm_nfreelist == 0) {
198a5652762Spraks vpm_nfreelist = max_ncpus;
199a5652762Spraks } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) {
200a5652762Spraks cmn_err(CE_WARN, "vpmap create : number of freelist "
201a5652762Spraks "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus);
202a5652762Spraks vpm_nfreelist = 2 * max_ncpus;
203a5652762Spraks }
204a5652762Spraks
205a5652762Spraks /*
206a5652762Spraks * Round it up to the next power of 2
207a5652762Spraks */
208*de710d24SJosef 'Jeff' Sipek if (!ISP2(vpm_nfreelist)) {
209a5652762Spraks vpm_nfreelist = 1 << (highbit(vpm_nfreelist));
210a5652762Spraks }
211a5652762Spraks vpmd_freemsk = vpm_nfreelist - 1;
212a5652762Spraks
213a5652762Spraks /*
214a5652762Spraks * Use a per cpu rotor index to spread the allocations evenly
215a5652762Spraks * across the available vpm freelists.
216a5652762Spraks */
217a5652762Spraks vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP);
218a5652762Spraks ndx = 0;
219a5652762Spraks for (i = 0; i < max_ncpus; i++) {
220a5652762Spraks
221a5652762Spraks vpmd_cpu[i].vfree_ndx = ndx;
222a5652762Spraks ndx = (ndx + 1) & vpmd_freemsk;
223a5652762Spraks }
224a5652762Spraks
225a5652762Spraks /*
226a5652762Spraks * Allocate and initialize the freelist.
227a5652762Spraks */
228a5652762Spraks vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree),
229a5652762Spraks KM_SLEEP);
230a5652762Spraks for (i = 0; i < vpm_nfreelist; i++) {
231a5652762Spraks
232a5652762Spraks vpmflp = &vpmd_free[i];
233a5652762Spraks /*
234a5652762Spraks * Set up initial queue pointers. They will get flipped
235a5652762Spraks * back and forth.
236a5652762Spraks */
237a5652762Spraks vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ];
238a5652762Spraks vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ];
239a5652762Spraks }
240a5652762Spraks
241a5652762Spraks npages = mmu_btop(vpm_cache_size);
242a5652762Spraks
243a5652762Spraks
244a5652762Spraks /*
245183971baSPrakash Sangappa * Allocate and initialize the vpmap structs. We need to
246183971baSPrakash Sangappa * walk the array backwards as the prefetch happens in reverse
247183971baSPrakash Sangappa * order.
248a5652762Spraks */
249183971baSPrakash Sangappa vpmd_vpmap = kmem_alloc(sizeof (struct vpmap) * npages, KM_SLEEP);
250183971baSPrakash Sangappa for (vpm = &vpmd_vpmap[npages - 1]; vpm >= vpmd_vpmap; vpm--) {
251a5652762Spraks struct vpmfree *vpmflp;
252a5652762Spraks union vpm_freeq *releq;
253a5652762Spraks struct vpmap *vpmapf;
254a5652762Spraks
255a5652762Spraks /*
256a5652762Spraks * Use prefetch as we have to walk thru a large number of
257a5652762Spraks * these data structures. We just use the smap's prefetch
258183971baSPrakash Sangappa * routine as it does the same.
259a5652762Spraks */
260a5652762Spraks prefetch_smap_w((void *)vpm);
261a5652762Spraks
262183971baSPrakash Sangappa vpm->vpm_vp = NULL;
263183971baSPrakash Sangappa vpm->vpm_off = 0;
264183971baSPrakash Sangappa vpm->vpm_pp = NULL;
265183971baSPrakash Sangappa vpm->vpm_refcnt = 0;
266183971baSPrakash Sangappa mutex_init(&vpm->vpm_mtx, NULL, MUTEX_DEFAULT, NULL);
267a5652762Spraks vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm);
268a5652762Spraks
269a5652762Spraks vpmflp = VPMAP2VMF(vpm);
270a5652762Spraks releq = vpmflp->vpm_releq;
271a5652762Spraks
272a5652762Spraks vpmapf = releq->vpmq_free;
273a5652762Spraks if (vpmapf == NULL) {
274a5652762Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
275a5652762Spraks } else {
276a5652762Spraks vpm->vpm_next = vpmapf;
277a5652762Spraks vpm->vpm_prev = vpmapf->vpm_prev;
278a5652762Spraks vpmapf->vpm_prev = vpm;
279a5652762Spraks vpm->vpm_prev->vpm_next = vpm;
280a5652762Spraks releq->vpmq_free = vpm->vpm_next;
281a5652762Spraks }
282a5652762Spraks
283a5652762Spraks /*
284a5652762Spraks * Indicate that the vpmap is on the releq at start
285a5652762Spraks */
286a5652762Spraks vpm->vpm_ndxflg = VPMRELEQ;
287a5652762Spraks }
288a5652762Spraks }
289a5652762Spraks
290a5652762Spraks
291a5652762Spraks /*
292a5652762Spraks * unhooks vpm from the freelist if it is still on the freelist.
293a5652762Spraks */
294a5652762Spraks #define VPMAP_RMFREELIST(vpm) \
295a5652762Spraks { \
296a5652762Spraks if (vpm->vpm_next != NULL) { \
297a5652762Spraks union vpm_freeq *freeq; \
298a5652762Spraks struct vpmfree *vpmflp; \
299a5652762Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \
300a5652762Spraks freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \
301a5652762Spraks mutex_enter(&freeq->vpmq_mtx); \
302a5652762Spraks if (freeq->vpmq_free != vpm) { \
303a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \
304a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
305a5652762Spraks } else if (vpm == vpm->vpm_next) { \
306a5652762Spraks freeq->vpmq_free = NULL; \
307a5652762Spraks } else { \
308a5652762Spraks freeq->vpmq_free = vpm->vpm_next; \
309a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \
310a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
311a5652762Spraks } \
312a5652762Spraks mutex_exit(&freeq->vpmq_mtx); \
313a5652762Spraks vpm->vpm_next = vpm->vpm_prev = NULL; \
314a5652762Spraks } \
315a5652762Spraks }
316a5652762Spraks
317a5652762Spraks static int
get_freelndx(int mode)318a5652762Spraks get_freelndx(int mode)
319a5652762Spraks {
320a5652762Spraks int ndx;
321a5652762Spraks
322a5652762Spraks ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk;
323a5652762Spraks switch (mode) {
324a5652762Spraks
325a5652762Spraks case VPMCACHE_LRU:
326a5652762Spraks default:
327a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vfree_ndx++;
328a5652762Spraks break;
329a5652762Spraks }
330a5652762Spraks return (ndx);
331a5652762Spraks }
332a5652762Spraks
333a5652762Spraks
334a5652762Spraks /*
335a5652762Spraks * Find one vpmap structure from the free lists and use it for the newpage.
336a5652762Spraks * The previous page it cached is dissociated and released. The page_t's
337a5652762Spraks * p_vpmref is cleared only when the vpm it is pointing to is locked(or
338a5652762Spraks * for AMD64 when the page is exclusively locked in page_unload. That is
339a5652762Spraks * because the p_vpmref is treated as mapping).
340a5652762Spraks *
341a5652762Spraks * The page's p_vpmref is set when the page is
342a5652762Spraks * locked(at least SHARED locked).
343a5652762Spraks */
344a5652762Spraks static struct vpmap *
get_free_vpmap(page_t * newpage)345a5652762Spraks get_free_vpmap(page_t *newpage)
346a5652762Spraks {
347a5652762Spraks struct vpmfree *vpmflp;
348a5652762Spraks kmutex_t *vmtx;
349a5652762Spraks struct vpmap *vpm, *first;
350a5652762Spraks union vpm_freeq *allocq, *releq;
351a5652762Spraks page_t *pp = NULL;
352a5652762Spraks int end_ndx, page_locked = 0;
353a5652762Spraks int free_ndx;
354a5652762Spraks
355a5652762Spraks /*
356a5652762Spraks * get the freelist bin index.
357a5652762Spraks */
358a5652762Spraks free_ndx = get_freelndx(vpm_cachemode);
359a5652762Spraks
360a5652762Spraks end_ndx = free_ndx;
361a5652762Spraks vpmflp = &vpmd_free[free_ndx];
362a5652762Spraks
363a5652762Spraks retry_queue:
364a5652762Spraks allocq = vpmflp->vpm_allocq;
365a5652762Spraks mutex_enter(&allocq->vpmq_mtx);
366a5652762Spraks
367a5652762Spraks if ((vpm = allocq->vpmq_free) == NULL) {
368a5652762Spraks
369a5652762Spraks skip_queue:
370a5652762Spraks /*
371a5652762Spraks * The alloc list is empty or this queue is being skipped;
372a5652762Spraks * first see if the allocq toggled.
373a5652762Spraks */
374a5652762Spraks if (vpmflp->vpm_allocq != allocq) {
375a5652762Spraks /* queue changed */
376a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
377a5652762Spraks goto retry_queue;
378a5652762Spraks }
379a5652762Spraks releq = vpmflp->vpm_releq;
380a5652762Spraks if (!mutex_tryenter(&releq->vpmq_mtx)) {
381a5652762Spraks /* cannot get releq; a free vpmap may be there now */
382a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
383a5652762Spraks
384a5652762Spraks /*
385a5652762Spraks * This loop could spin forever if this thread has
386a5652762Spraks * higher priority than the thread that is holding
387a5652762Spraks * releq->vpmq_mtx. In order to force the other thread
388a5652762Spraks * to run, we'll lock/unlock the mutex which is safe
389a5652762Spraks * since we just unlocked the allocq mutex.
390a5652762Spraks */
391a5652762Spraks mutex_enter(&releq->vpmq_mtx);
392a5652762Spraks mutex_exit(&releq->vpmq_mtx);
393a5652762Spraks goto retry_queue;
394a5652762Spraks }
395a5652762Spraks if (releq->vpmq_free == NULL) {
396a5652762Spraks VPM_DEBUG(vpmd_emptyfreelist);
397a5652762Spraks /*
398a5652762Spraks * This freelist is empty.
399a5652762Spraks * This should not happen unless clients
400a5652762Spraks * are failing to release the vpmap after
401a5652762Spraks * accessing the data. Before resorting
402a5652762Spraks * to sleeping, try the next list of the same color.
403a5652762Spraks */
404a5652762Spraks free_ndx = (free_ndx + 1) & vpmd_freemsk;
405a5652762Spraks if (free_ndx != end_ndx) {
406a5652762Spraks mutex_exit(&releq->vpmq_mtx);
407a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
408a5652762Spraks vpmflp = &vpmd_free[free_ndx];
409a5652762Spraks goto retry_queue;
410a5652762Spraks }
411a5652762Spraks /*
412a5652762Spraks * Tried all freelists.
413a5652762Spraks * wait on this list and hope something gets freed.
414a5652762Spraks */
415a5652762Spraks vpmflp->vpm_want++;
416a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx);
417a5652762Spraks cv_wait(&vpmflp->vpm_free_cv,
418a5652762Spraks &vpmflp->vpm_freeq[0].vpmq_mtx);
419a5652762Spraks vpmflp->vpm_want--;
420a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
421a5652762Spraks vpmflp = &vpmd_free[free_ndx];
422a5652762Spraks VPM_DEBUG(vpmd_nofreevpms);
423a5652762Spraks goto retry_queue;
424a5652762Spraks } else {
425a5652762Spraks /*
426a5652762Spraks * Something on the rele queue; flip the alloc
427a5652762Spraks * and rele queues and retry.
428a5652762Spraks */
429a5652762Spraks vpmflp->vpm_allocq = releq;
430a5652762Spraks vpmflp->vpm_releq = allocq;
431a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
432a5652762Spraks mutex_exit(&releq->vpmq_mtx);
433a5652762Spraks if (page_locked) {
434a5652762Spraks delay(hz >> 2);
435a5652762Spraks page_locked = 0;
436a5652762Spraks }
437a5652762Spraks goto retry_queue;
438a5652762Spraks }
439a5652762Spraks } else {
440a5652762Spraks int gotnewvpm;
441a5652762Spraks kmutex_t *pmtx;
442a5652762Spraks uint_t vpmref;
443a5652762Spraks
444a5652762Spraks /*
445a5652762Spraks * Fastpath the case we get the vpmap mutex
446a5652762Spraks * on the first try.
447a5652762Spraks */
448a5652762Spraks first = vpm;
449a5652762Spraks next_vpmap:
450a5652762Spraks vmtx = VPMAPMTX(vpm);
451a5652762Spraks if (!mutex_tryenter(vmtx)) {
452a5652762Spraks /*
453a5652762Spraks * Another thread is trying to reclaim this slot.
454a5652762Spraks * Skip to the next queue or vpmap.
455a5652762Spraks */
456a5652762Spraks if ((vpm = vpm->vpm_next) == first) {
457a5652762Spraks goto skip_queue;
458a5652762Spraks } else {
459a5652762Spraks goto next_vpmap;
460a5652762Spraks }
461a5652762Spraks }
462a5652762Spraks
463a5652762Spraks /*
464a5652762Spraks * Assign this vpm to the newpage.
465a5652762Spraks */
466a5652762Spraks pmtx = PPMTX(newpage);
467a5652762Spraks gotnewvpm = 0;
468a5652762Spraks mutex_enter(pmtx);
469a5652762Spraks
470a5652762Spraks /*
471a5652762Spraks * Check if some other thread already assigned a vpm to
472a5652762Spraks * this page.
473a5652762Spraks */
474a5652762Spraks if ((vpmref = newpage->p_vpmref) == 0) {
475a5652762Spraks newpage->p_vpmref = VPMID(vpm);
476a5652762Spraks gotnewvpm = 1;
477a5652762Spraks } else {
478a5652762Spraks VPM_DEBUG(vpmd_contend);
479a5652762Spraks mutex_exit(vmtx);
480a5652762Spraks }
481a5652762Spraks mutex_exit(pmtx);
482a5652762Spraks
483a5652762Spraks if (gotnewvpm) {
484a5652762Spraks
485a5652762Spraks /*
486a5652762Spraks * At this point, we've selected the vpm. Remove vpm
487a5652762Spraks * from its freelist. If vpm is the first one in
488a5652762Spraks * the freelist, update the head of the freelist.
489a5652762Spraks */
490a5652762Spraks if (first == vpm) {
491a5652762Spraks ASSERT(first == allocq->vpmq_free);
492a5652762Spraks allocq->vpmq_free = vpm->vpm_next;
493a5652762Spraks }
494a5652762Spraks
495a5652762Spraks /*
496a5652762Spraks * If the head of the freelist still points to vpm,
497a5652762Spraks * then there are no more free vpmaps in that list.
498a5652762Spraks */
499a5652762Spraks if (allocq->vpmq_free == vpm)
500a5652762Spraks /*
501a5652762Spraks * Took the last one
502a5652762Spraks */
503a5652762Spraks allocq->vpmq_free = NULL;
504a5652762Spraks else {
505a5652762Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next;
506a5652762Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev;
507a5652762Spraks }
508a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
509a5652762Spraks vpm->vpm_prev = vpm->vpm_next = NULL;
510a5652762Spraks
511a5652762Spraks /*
512183971baSPrakash Sangappa * Disassociate the previous page.
513a5652762Spraks * p_vpmref is used as a mapping reference to the page.
514a5652762Spraks */
515a5652762Spraks if ((pp = vpm->vpm_pp) != NULL &&
516a5652762Spraks vpm->vpm_vp == pp->p_vnode &&
517a5652762Spraks vpm->vpm_off == pp->p_offset) {
518a5652762Spraks
519a5652762Spraks pmtx = PPMTX(pp);
520a5652762Spraks if (page_trylock(pp, SE_SHARED)) {
521a5652762Spraks /*
522a5652762Spraks * Now verify that it is the correct
523a5652762Spraks * page. If not someone else stole it,
524a5652762Spraks * so just unlock it and leave.
525a5652762Spraks */
526a5652762Spraks mutex_enter(pmtx);
527a5652762Spraks if (PP_ISFREE(pp) ||
528a5652762Spraks vpm->vpm_vp != pp->p_vnode ||
529a5652762Spraks vpm->vpm_off != pp->p_offset ||
530a5652762Spraks pp->p_vpmref != VPMID(vpm)) {
531a5652762Spraks mutex_exit(pmtx);
532a5652762Spraks
533a5652762Spraks page_unlock(pp);
534a5652762Spraks } else {
535a5652762Spraks /*
536a5652762Spraks * Release the page.
537a5652762Spraks */
538a5652762Spraks pp->p_vpmref = 0;
539a5652762Spraks mutex_exit(pmtx);
540a5652762Spraks (void) page_release(pp, 1);
541a5652762Spraks }
542a5652762Spraks } else {
543a5652762Spraks /*
544a5652762Spraks * If the page cannot be locked, just
545a5652762Spraks * clear the p_vpmref and go.
546a5652762Spraks */
547a5652762Spraks mutex_enter(pmtx);
548a5652762Spraks if (pp->p_vpmref == VPMID(vpm)) {
549a5652762Spraks pp->p_vpmref = 0;
550a5652762Spraks }
551a5652762Spraks mutex_exit(pmtx);
552a5652762Spraks VPM_DEBUG(vpmd_prevpagelocked);
553a5652762Spraks }
554a5652762Spraks }
555a5652762Spraks
556a5652762Spraks /*
557a5652762Spraks * Setup vpm to point to the new page.
558a5652762Spraks */
559a5652762Spraks vpm->vpm_pp = newpage;
560a5652762Spraks vpm->vpm_vp = newpage->p_vnode;
561a5652762Spraks vpm->vpm_off = newpage->p_offset;
562a5652762Spraks
563a5652762Spraks } else {
564a5652762Spraks int steal = !VPM_MTBF(steals, steals_mtbf);
565a5652762Spraks /*
566a5652762Spraks * Page already has a vpm assigned just use that.
567a5652762Spraks * Grab the vpm mutex and verify that it is still
568a5652762Spraks * the correct one. The pp->p_vpmref should not change
569a5652762Spraks * once we have the vpm mutex and the page lock.
570a5652762Spraks */
571a5652762Spraks mutex_exit(&allocq->vpmq_mtx);
572a5652762Spraks vpm = VPMP(vpmref);
573a5652762Spraks vmtx = VPMAPMTX(vpm);
574a5652762Spraks mutex_enter(vmtx);
575a5652762Spraks if ((steal && vpm->vpm_refcnt == 0) ||
576a5652762Spraks vpm->vpm_pp != newpage) {
577a5652762Spraks /*
578a5652762Spraks * The vpm got stolen, retry.
579a5652762Spraks * clear the p_vpmref.
580a5652762Spraks */
581a5652762Spraks pmtx = PPMTX(newpage);
582a5652762Spraks mutex_enter(pmtx);
583a5652762Spraks if (newpage->p_vpmref == vpmref) {
584a5652762Spraks newpage->p_vpmref = 0;
585a5652762Spraks }
586a5652762Spraks mutex_exit(pmtx);
587a5652762Spraks
588a5652762Spraks mutex_exit(vmtx);
589a5652762Spraks VPM_DEBUG(vpmd_steals);
590a5652762Spraks goto retry_queue;
591a5652762Spraks } else if (vpm->vpm_refcnt == 0) {
592a5652762Spraks /*
593a5652762Spraks * Remove it from the free list if it
594a5652762Spraks * exists there.
595a5652762Spraks */
596a5652762Spraks VPMAP_RMFREELIST(vpm);
597a5652762Spraks }
598a5652762Spraks }
599a5652762Spraks return (vpm);
600a5652762Spraks }
601a5652762Spraks }
602a5652762Spraks
603a5652762Spraks static void
free_vpmap(struct vpmap * vpm)604a5652762Spraks free_vpmap(struct vpmap *vpm)
605a5652762Spraks {
606a5652762Spraks struct vpmfree *vpmflp;
607a5652762Spraks struct vpmap *vpmfreelist;
608a5652762Spraks union vpm_freeq *releq;
609a5652762Spraks
610a5652762Spraks ASSERT(MUTEX_HELD(VPMAPMTX(vpm)));
611a5652762Spraks
612a5652762Spraks if (vpm->vpm_refcnt != 0) {
613a5652762Spraks panic("free_vpmap");
614a5652762Spraks /*NOTREACHED*/
615a5652762Spraks }
616a5652762Spraks
617a5652762Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx];
618a5652762Spraks /*
619a5652762Spraks * Add to the tail of the release queue
620a5652762Spraks * Note that vpm_releq and vpm_allocq could toggle
621a5652762Spraks * before we get the lock. This does not affect
622a5652762Spraks * correctness as the 2 queues are only maintained
623a5652762Spraks * to reduce lock pressure.
624a5652762Spraks */
625a5652762Spraks releq = vpmflp->vpm_releq;
626a5652762Spraks if (releq == &vpmflp->vpm_freeq[0]) {
627a5652762Spraks vpm->vpm_ndxflg = 0;
628a5652762Spraks } else {
629a5652762Spraks vpm->vpm_ndxflg = 1;
630a5652762Spraks }
631a5652762Spraks mutex_enter(&releq->vpmq_mtx);
632a5652762Spraks vpmfreelist = releq->vpmq_free;
633a5652762Spraks if (vpmfreelist == 0) {
634a5652762Spraks int want;
635a5652762Spraks
636a5652762Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
637a5652762Spraks /*
638a5652762Spraks * Both queue mutexes are held to set vpm_want;
639a5652762Spraks * snapshot the value before dropping releq mutex.
640a5652762Spraks * If vpm_want appears after the releq mutex is dropped,
641a5652762Spraks * then the vpmap just freed is already gone.
642a5652762Spraks */
643a5652762Spraks want = vpmflp->vpm_want;
644a5652762Spraks mutex_exit(&releq->vpmq_mtx);
645a5652762Spraks /*
646a5652762Spraks * See if there was a waiter before dropping the releq mutex
647a5652762Spraks * then recheck after obtaining vpm_freeq[0] mutex as
648a5652762Spraks * the another thread may have already signaled.
649a5652762Spraks */
650a5652762Spraks if (want) {
651a5652762Spraks mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx);
652a5652762Spraks if (vpmflp->vpm_want)
653a5652762Spraks cv_signal(&vpmflp->vpm_free_cv);
654a5652762Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
655a5652762Spraks }
656a5652762Spraks } else {
657a5652762Spraks vpm->vpm_next = vpmfreelist;
658a5652762Spraks vpm->vpm_prev = vpmfreelist->vpm_prev;
659a5652762Spraks vpmfreelist->vpm_prev = vpm;
660a5652762Spraks vpm->vpm_prev->vpm_next = vpm;
661a5652762Spraks mutex_exit(&releq->vpmq_mtx);
662a5652762Spraks }
663a5652762Spraks }
664a5652762Spraks
665a5652762Spraks /*
666a5652762Spraks * Get the vpmap for the page.
667a5652762Spraks * The refcnt of this vpm is incremented.
668a5652762Spraks */
669a5652762Spraks static struct vpmap *
get_vpmap(page_t * pp)670a5652762Spraks get_vpmap(page_t *pp)
671a5652762Spraks {
672a5652762Spraks struct vpmap *vpm = NULL;
673a5652762Spraks kmutex_t *vmtx;
674a5652762Spraks kmutex_t *pmtx;
675a5652762Spraks unsigned int refid;
676a5652762Spraks
677a5652762Spraks ASSERT((pp != NULL) && PAGE_LOCKED(pp));
678a5652762Spraks
679a5652762Spraks if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) {
680a5652762Spraks vpm = VPMP(refid);
681a5652762Spraks vmtx = VPMAPMTX(vpm);
682a5652762Spraks mutex_enter(vmtx);
683a5652762Spraks /*
684a5652762Spraks * Since we have the page lock and the vpm mutex, the
685a5652762Spraks * pp->p_vpmref cannot change.
686a5652762Spraks */
687a5652762Spraks if (vpm->vpm_pp != pp) {
688a5652762Spraks pmtx = PPMTX(pp);
689a5652762Spraks
690a5652762Spraks /*
691a5652762Spraks * Clear the p_vpmref as it is incorrect.
692a5652762Spraks * This can happen if the page was stolen.
693a5652762Spraks * On x64 this should not happen as p_vpmref
694a5652762Spraks * is treated as a mapping on the page. So
695a5652762Spraks * if the page is stolen, the mapping would have
696a5652762Spraks * been cleared in page_unload().
697a5652762Spraks */
698a5652762Spraks mutex_enter(pmtx);
699a5652762Spraks if (pp->p_vpmref == refid)
700a5652762Spraks pp->p_vpmref = 0;
701a5652762Spraks mutex_exit(pmtx);
702a5652762Spraks
703a5652762Spraks mutex_exit(vmtx);
704a5652762Spraks vpm = NULL;
705a5652762Spraks } else if (vpm->vpm_refcnt == 0) {
706a5652762Spraks /*
707a5652762Spraks * Got the vpm, remove it from the free
708a5652762Spraks * list if it exists there.
709a5652762Spraks */
710a5652762Spraks VPMAP_RMFREELIST(vpm);
711a5652762Spraks }
712a5652762Spraks }
713a5652762Spraks if (vpm == NULL) {
714a5652762Spraks /*
715a5652762Spraks * get_free_vpmap() returns with the vpmap mutex held.
716a5652762Spraks */
717a5652762Spraks vpm = get_free_vpmap(pp);
718a5652762Spraks vmtx = VPMAPMTX(vpm);
719a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++;
720a5652762Spraks } else {
721a5652762Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++;
722a5652762Spraks }
723a5652762Spraks
724a5652762Spraks vpm->vpm_refcnt++;
725a5652762Spraks mutex_exit(vmtx);
726a5652762Spraks
727a5652762Spraks return (vpm);
728a5652762Spraks }
729a5652762Spraks
730a5652762Spraks /* END --- vpm cache ---- */
731a5652762Spraks
732a5652762Spraks /*
733a5652762Spraks * The vnode page mapping(vpm) interface routines.
734a5652762Spraks */
735a5652762Spraks
736a5652762Spraks /*
737a5652762Spraks * Find or create the pages starting form baseoff for specified
738a5652762Spraks * length 'len'.
739a5652762Spraks */
740a5652762Spraks static int
vpm_pagecreate(struct vnode * vp,u_offset_t baseoff,size_t len,vmap_t vml[],int nseg,int * newpage)741a5652762Spraks vpm_pagecreate(
742a5652762Spraks struct vnode *vp,
743a5652762Spraks u_offset_t baseoff,
744a5652762Spraks size_t len,
745a5652762Spraks vmap_t vml[],
746a5652762Spraks int nseg,
747a5652762Spraks int *newpage)
748a5652762Spraks {
749a5652762Spraks
750a5652762Spraks page_t *pp = NULL;
751a5652762Spraks caddr_t base;
752a5652762Spraks u_offset_t off = baseoff;
753a5652762Spraks int i;
754183971baSPrakash Sangappa ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS);
755a5652762Spraks
7563bd1497bSpraks for (i = 0; len > 0; len -= PAGESIZE, i++) {
757a5652762Spraks struct vpmap *vpm;
758a5652762Spraks
759a5652762Spraks
760a5652762Spraks if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
761a5652762Spraks
762a5652762Spraks base = segkpm_create_va(off);
763a5652762Spraks
764a5652762Spraks /*
765a5652762Spraks * the seg pointer passed in is just advisor. Just
766a5652762Spraks * pass segkmap for now like segmap does with
767a5652762Spraks * segmap_kpm enabled.
768a5652762Spraks */
769a5652762Spraks if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
770a5652762Spraks segkmap, base)) == NULL) {
771a5652762Spraks panic("segmap_pagecreate_vpm: "
772a5652762Spraks "page_create failed");
773a5652762Spraks /*NOTREACHED*/
774a5652762Spraks }
775a5652762Spraks if (newpage != NULL)
776a5652762Spraks *newpage = 1;
777a5652762Spraks
778a5652762Spraks page_io_unlock(pp);
779a5652762Spraks }
780a5652762Spraks
781a5652762Spraks /*
782a5652762Spraks * Get the vpm for this page_t.
783a5652762Spraks */
784a5652762Spraks if (vpm_cache_enable) {
785a5652762Spraks vpm = get_vpmap(pp);
786a5652762Spraks vml[i].vs_data = (void *)&vpm->vpm_pp;
787a5652762Spraks } else {
788a5652762Spraks vml[i].vs_data = (void *)pp;
789a5652762Spraks pp->p_vpmref = 0;
790a5652762Spraks }
791a5652762Spraks
792a5652762Spraks vml[i].vs_addr = hat_kpm_mapin(pp, 0);
793a5652762Spraks vml[i].vs_len = PAGESIZE;
794a5652762Spraks
795a5652762Spraks off += PAGESIZE;
796a5652762Spraks }
797a5652762Spraks vml[i].vs_data = NULL;
798a5652762Spraks vml[i].vs_addr = (caddr_t)NULL;
799a5652762Spraks return (0);
800a5652762Spraks }
801a5652762Spraks
802a5652762Spraks
803a5652762Spraks /*
804a5652762Spraks * Returns vpm mappings of pages in the range [off, off+len], where
805a5652762Spraks * len is rounded up to the PAGESIZE boundary. The list of pages and
806a5652762Spraks * the page addresses are returned in the SGL vml (vmap_t) array passed in.
807a5652762Spraks * The nseg is the number of vmap_t entries in the array.
808a5652762Spraks *
809a5652762Spraks * The segmap's SM_LOCKPROTO usage is not supported by these interfaces.
810a5652762Spraks * For such cases, use the seg_map interfaces.
811a5652762Spraks */
812a5652762Spraks int
vpm_map_pages(struct vnode * vp,u_offset_t off,size_t len,int fetchpage,vmap_t * vml,int nseg,int * newpage,enum seg_rw rw)813a5652762Spraks vpm_map_pages(
814a5652762Spraks struct vnode *vp,
815a5652762Spraks u_offset_t off,
816a5652762Spraks size_t len,
817a5652762Spraks int fetchpage,
818a5652762Spraks vmap_t *vml,
819a5652762Spraks int nseg,
820a5652762Spraks int *newpage,
821a5652762Spraks enum seg_rw rw)
822a5652762Spraks {
823a5652762Spraks extern struct vnode *common_specvp();
824a5652762Spraks u_offset_t baseoff;
825a5652762Spraks uint_t prot;
826a5652762Spraks caddr_t base;
827a5652762Spraks page_t *pp, *pplist[MAXVMAPS];
828a5652762Spraks struct vpmap *vpm;
829a5652762Spraks int i, error = 0;
830183971baSPrakash Sangappa size_t tlen;
831a5652762Spraks
832183971baSPrakash Sangappa ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS);
833a5652762Spraks baseoff = off & (offset_t)PAGEMASK;
834a5652762Spraks vml[0].vs_data = NULL;
835a5652762Spraks vml[0].vs_addr = (caddr_t)NULL;
836183971baSPrakash Sangappa
837183971baSPrakash Sangappa tlen = P2ROUNDUP(off + len, PAGESIZE) - baseoff;
838a5652762Spraks /*
839183971baSPrakash Sangappa * Restrict it to VPMMAXLEN.
840a5652762Spraks */
841183971baSPrakash Sangappa if (tlen > (VPMMAXPGS * PAGESIZE)) {
842183971baSPrakash Sangappa tlen = VPMMAXPGS * PAGESIZE;
843a5652762Spraks }
844183971baSPrakash Sangappa /*
845183971baSPrakash Sangappa * Ensure length fits within the vml[] array. One element of
846183971baSPrakash Sangappa * the array is used to mark the end of the scatter/gather list
847183971baSPrakash Sangappa * of valid mappings by setting its vs_addr = NULL. Leave space
848183971baSPrakash Sangappa * for this element.
849183971baSPrakash Sangappa */
850183971baSPrakash Sangappa if (tlen > ((nseg - 1) * PAGESIZE)) {
851183971baSPrakash Sangappa tlen = ((nseg - 1) * PAGESIZE);
852183971baSPrakash Sangappa }
853183971baSPrakash Sangappa len = tlen;
854a5652762Spraks
855a5652762Spraks /*
856a5652762Spraks * If this is a block device we have to be sure to use the
857a5652762Spraks * "common" block device vnode for the mapping.
858a5652762Spraks */
859a5652762Spraks if (vp->v_type == VBLK)
860a5652762Spraks vp = common_specvp(vp);
861a5652762Spraks
862a5652762Spraks
863a5652762Spraks if (!fetchpage)
864a5652762Spraks return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage));
865a5652762Spraks
8663bd1497bSpraks for (i = 0; len > 0; len -= PAGESIZE, i++, pplist[i] = NULL) {
867a5652762Spraks
868a5652762Spraks pp = page_lookup(vp, baseoff, SE_SHARED);
869a5652762Spraks
870a5652762Spraks /*
871a5652762Spraks * If we did not find the page or if this page was not
872183971baSPrakash Sangappa * in vpm cache(p_vpmref == 0), then let VOP_GETPAGE get
873183971baSPrakash Sangappa * all the pages.
874a5652762Spraks * We need to call VOP_GETPAGE so that filesytems can do some
875a5652762Spraks * (un)necessary tracking for sequential access.
876a5652762Spraks */
877a5652762Spraks
878a5652762Spraks if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) ||
879a5652762Spraks (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF)
880a5652762Spraks != (P_MOD | P_REF))) {
881183971baSPrakash Sangappa int j;
882a5652762Spraks if (pp != NULL) {
883a5652762Spraks page_unlock(pp);
884a5652762Spraks }
885183971baSPrakash Sangappa /*
886183971baSPrakash Sangappa * If we did not find the desired set of pages,
887183971baSPrakash Sangappa * from the page cache, just call VOP_GETPAGE to get
888183971baSPrakash Sangappa * all the pages.
889183971baSPrakash Sangappa */
890183971baSPrakash Sangappa for (j = 0; j < i; j++) {
891183971baSPrakash Sangappa page_unlock(pplist[j]);
892183971baSPrakash Sangappa }
893a5652762Spraks
894183971baSPrakash Sangappa
895183971baSPrakash Sangappa baseoff = off & (offset_t)PAGEMASK;
896a5652762Spraks /*
897a5652762Spraks * Pass a dummy address as it will be required
898a5652762Spraks * by page_create_va(). We pass segkmap as the seg
899a5652762Spraks * as some file systems(UFS) check it.
900a5652762Spraks */
901a5652762Spraks base = segkpm_create_va(baseoff);
902a5652762Spraks
903183971baSPrakash Sangappa error = VOP_GETPAGE(vp, baseoff, tlen, &prot, pplist,
904183971baSPrakash Sangappa tlen, segkmap, base, rw, CRED(), NULL);
905a5652762Spraks if (error) {
906a5652762Spraks VPM_DEBUG(vpmd_getpagefailed);
907183971baSPrakash Sangappa pplist[0] = NULL;
908a5652762Spraks }
909a5652762Spraks break;
910a5652762Spraks } else {
911a5652762Spraks pplist[i] = pp;
912a5652762Spraks baseoff += PAGESIZE;
913a5652762Spraks }
914a5652762Spraks }
915a5652762Spraks
916a5652762Spraks if (error) {
917a5652762Spraks for (i = 0; pplist[i] != NULL; i++) {
918a5652762Spraks page_unlock(pplist[i]);
919a5652762Spraks pplist[i] = NULL;
920a5652762Spraks }
921a5652762Spraks vml[0].vs_addr = NULL;
922a5652762Spraks vml[0].vs_data = NULL;
9239234f026Spraks return (error);
924a5652762Spraks }
925a5652762Spraks
926a5652762Spraks /*
927a5652762Spraks * Get the vpm's for pages.
928a5652762Spraks */
929a5652762Spraks for (i = 0; pplist[i] != NULL; i++) {
930a5652762Spraks if (vpm_cache_enable) {
931a5652762Spraks vpm = get_vpmap(pplist[i]);
932a5652762Spraks vml[i].vs_data = (void *)&(vpm->vpm_pp);
933a5652762Spraks } else {
934a5652762Spraks vml[i].vs_data = (void *)pplist[i];
935a5652762Spraks pplist[i]->p_vpmref = 0;
936a5652762Spraks }
937a5652762Spraks
938a5652762Spraks vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0);
939a5652762Spraks vml[i].vs_len = PAGESIZE;
940a5652762Spraks }
941a5652762Spraks
942a5652762Spraks vml[i].vs_data = NULL;
943a5652762Spraks vml[i].vs_addr = (caddr_t)NULL;
944a5652762Spraks
945a5652762Spraks return (0);
946a5652762Spraks }
947a5652762Spraks
948a5652762Spraks /*
949a5652762Spraks * Release the vpm mappings on the pages and unlock them.
950a5652762Spraks */
951a5652762Spraks void
vpm_unmap_pages(vmap_t vml[],enum seg_rw rw)952a5652762Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
953a5652762Spraks {
954a5652762Spraks int i;
955a5652762Spraks struct vpmap *vpm;
956a5652762Spraks kmutex_t *mtx;
957a5652762Spraks page_t *pp;
958a5652762Spraks
959a5652762Spraks for (i = 0; vml[i].vs_data != NULL; i++) {
960a5652762Spraks ASSERT(IS_KPM_ADDR(vml[i].vs_addr));
961a5652762Spraks
962a5652762Spraks if (vpm_cache_enable) {
963a5652762Spraks pp = *(((page_t **)vml[i].vs_data));
964a5652762Spraks } else {
965a5652762Spraks pp = (page_t *)vml[i].vs_data;
966a5652762Spraks }
967a5652762Spraks
968a5652762Spraks /*
969a5652762Spraks * Mark page as being modified or referenced, bacause vpm pages
970a5652762Spraks * would not cause faults where it would be set normally.
971a5652762Spraks */
972a5652762Spraks if (rw == S_WRITE) {
973a5652762Spraks hat_setrefmod(pp);
974a5652762Spraks } else {
975a5652762Spraks ASSERT(rw == S_READ);
976a5652762Spraks hat_setref(pp);
977a5652762Spraks }
978a5652762Spraks
979a5652762Spraks if (vpm_cache_enable) {
980a5652762Spraks vpm = (struct vpmap *)((char *)vml[i].vs_data
981a5652762Spraks - offsetof(struct vpmap, vpm_pp));
982183971baSPrakash Sangappa hat_kpm_mapout(pp, 0, vml[i].vs_addr);
983183971baSPrakash Sangappa page_unlock(pp);
984a5652762Spraks mtx = VPMAPMTX(vpm);
985a5652762Spraks mutex_enter(mtx);
986a5652762Spraks
987a5652762Spraks if (--vpm->vpm_refcnt == 0) {
988a5652762Spraks free_vpmap(vpm);
989a5652762Spraks }
990a5652762Spraks mutex_exit(mtx);
991a5652762Spraks } else {
992a5652762Spraks hat_kpm_mapout(pp, 0, vml[i].vs_addr);
993a5652762Spraks (void) page_release(pp, 1);
994a5652762Spraks }
995a5652762Spraks vml[i].vs_data = NULL;
996a5652762Spraks vml[i].vs_addr = NULL;
997a5652762Spraks }
998a5652762Spraks }
999a5652762Spraks
1000a5652762Spraks /*
1001a5652762Spraks * Given the vp, off and the uio structure, this routine will do the
1002a5652762Spraks * the copy (uiomove). If the last page created is partially written,
1003a5652762Spraks * the rest of the page is zeroed out. It also zeros the beginning of
1004a5652762Spraks * the first page till the start offset if requested(zerostart).
1005a5652762Spraks * If pages are to be fetched, it will call the filesystem's getpage
1006a5652762Spraks * function (VOP_GETPAGE) to get them, otherwise they will be created if
1007a5652762Spraks * not already present in the page cache.
1008a5652762Spraks */
1009a5652762Spraks int
vpm_data_copy(struct vnode * vp,u_offset_t off,size_t len,struct uio * uio,int fetchpage,int * newpage,int zerostart,enum seg_rw rw)1010a5652762Spraks vpm_data_copy(struct vnode *vp,
1011a5652762Spraks u_offset_t off,
1012a5652762Spraks size_t len,
1013a5652762Spraks struct uio *uio,
1014a5652762Spraks int fetchpage,
1015a5652762Spraks int *newpage,
1016a5652762Spraks int zerostart,
1017a5652762Spraks enum seg_rw rw)
1018a5652762Spraks {
1019a5652762Spraks int error;
1020a5652762Spraks struct vmap vml[MINVMAPS];
1021a5652762Spraks enum uio_rw uiorw;
1022a5652762Spraks int npages = 0;
1023a5652762Spraks
1024a5652762Spraks uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ;
1025a5652762Spraks /*
1026a5652762Spraks * 'off' will be the offset where the I/O starts.
1027a5652762Spraks * We get the pages starting at the (off & PAGEMASK)
1028a5652762Spraks * page boundary.
1029a5652762Spraks */
1030a5652762Spraks error = vpm_map_pages(vp, off, (uint_t)len,
1031a5652762Spraks fetchpage, vml, MINVMAPS, &npages, rw);
1032a5652762Spraks
1033a5652762Spraks if (newpage != NULL)
1034a5652762Spraks *newpage = npages;
1035a5652762Spraks if (!error) {
1036a5652762Spraks int i, pn, slen = len;
1037a5652762Spraks int pon = off & PAGEOFFSET;
1038a5652762Spraks
1039a5652762Spraks /*
1040a5652762Spraks * Clear from the beginning of the page to start offset
1041a5652762Spraks * if requested.
1042a5652762Spraks */
1043a5652762Spraks if (!fetchpage && zerostart) {
1044a5652762Spraks (void) kzero(vml[0].vs_addr, (uint_t)pon);
1045a5652762Spraks VPM_DEBUG(vpmd_zerostart);
1046a5652762Spraks }
1047a5652762Spraks
1048a5652762Spraks for (i = 0; !error && slen > 0 &&
1049a5652762Spraks vml[i].vs_addr != NULL; i++) {
1050a5652762Spraks pn = (int)MIN(slen, (PAGESIZE - pon));
1051a5652762Spraks error = uiomove(vml[i].vs_addr + pon,
1052a5652762Spraks (long)pn, uiorw, uio);
1053a5652762Spraks slen -= pn;
1054a5652762Spraks pon = 0;
1055a5652762Spraks }
1056a5652762Spraks
1057a5652762Spraks /*
1058a5652762Spraks * When new pages are created, zero out part of the
1059a5652762Spraks * page we did not copy to.
1060a5652762Spraks */
1061a5652762Spraks if (!fetchpage && npages &&
1062a5652762Spraks uio->uio_loffset < roundup(off + len, PAGESIZE)) {
1063a5652762Spraks int nzero;
1064a5652762Spraks
1065a5652762Spraks pon = (uio->uio_loffset & PAGEOFFSET);
1066a5652762Spraks nzero = PAGESIZE - pon;
1067a5652762Spraks i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE;
1068a5652762Spraks (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero);
1069a5652762Spraks }
1070a5652762Spraks vpm_unmap_pages(vml, rw);
1071a5652762Spraks }
1072a5652762Spraks return (error);
1073a5652762Spraks }
1074a5652762Spraks
1075a5652762Spraks /*
1076a5652762Spraks * called to flush pages for the given vnode covering
1077a5652762Spraks * [off, off+len] range.
1078a5652762Spraks */
1079a5652762Spraks int
vpm_sync_pages(struct vnode * vp,u_offset_t off,size_t len,uint_t flags)1080a5652762Spraks vpm_sync_pages(struct vnode *vp,
1081a5652762Spraks u_offset_t off,
1082a5652762Spraks size_t len,
1083a5652762Spraks uint_t flags)
1084a5652762Spraks {
1085a5652762Spraks extern struct vnode *common_specvp();
1086a5652762Spraks int bflags = 0;
1087a5652762Spraks int error = 0;
1088a5652762Spraks size_t psize = roundup(len, PAGESIZE);
1089a5652762Spraks
1090a5652762Spraks /*
1091a5652762Spraks * If this is a block device we have to be sure to use the
1092a5652762Spraks * "common" block device vnode for the mapping.
1093a5652762Spraks */
1094a5652762Spraks if (vp->v_type == VBLK)
1095a5652762Spraks vp = common_specvp(vp);
1096a5652762Spraks
1097a5652762Spraks if ((flags & ~SM_DONTNEED) != 0) {
1098a5652762Spraks if (flags & SM_ASYNC)
1099a5652762Spraks bflags |= B_ASYNC;
1100a5652762Spraks if (flags & SM_INVAL)
1101a5652762Spraks bflags |= B_INVAL;
1102a5652762Spraks if (flags & SM_DESTROY)
1103a5652762Spraks bflags |= (B_INVAL|B_TRUNC);
1104a5652762Spraks if (flags & SM_FREE)
1105a5652762Spraks bflags |= B_FREE;
1106a5652762Spraks if (flags & SM_DONTNEED)
1107a5652762Spraks bflags |= B_DONTNEED;
1108a5652762Spraks
1109da6c28aaSamw error = VOP_PUTPAGE(vp, off, psize, bflags, CRED(), NULL);
1110a5652762Spraks }
1111a5652762Spraks
1112a5652762Spraks return (error);
1113a5652762Spraks }
1114a5652762Spraks
1115a5652762Spraks
1116a5652762Spraks #else /* SEGKPM_SUPPORT */
1117a5652762Spraks
1118a5652762Spraks /* vpm stubs */
1119a5652762Spraks void
vpm_init()1120a5652762Spraks vpm_init()
1121a5652762Spraks {
1122a5652762Spraks }
1123a5652762Spraks
1124a5652762Spraks /*ARGSUSED*/
1125a5652762Spraks int
vpm_pagecreate(struct vnode * vp,u_offset_t baseoff,size_t len,vmap_t vml[],int nseg,int * newpage)1126a5652762Spraks vpm_pagecreate(
1127a5652762Spraks struct vnode *vp,
1128a5652762Spraks u_offset_t baseoff,
1129a5652762Spraks size_t len,
1130a5652762Spraks vmap_t vml[],
1131a5652762Spraks int nseg,
1132a5652762Spraks int *newpage)
1133a5652762Spraks {
1134a5652762Spraks return (0);
1135a5652762Spraks }
1136a5652762Spraks
1137a5652762Spraks /*ARGSUSED*/
1138a5652762Spraks int
vpm_map_pages(struct vnode * vp,u_offset_t off,size_t len,int fetchpage,vmap_t vml[],int nseg,int * newpage,enum seg_rw rw)1139a5652762Spraks vpm_map_pages(
1140a5652762Spraks struct vnode *vp,
1141a5652762Spraks u_offset_t off,
1142a5652762Spraks size_t len,
1143a5652762Spraks int fetchpage,
1144a5652762Spraks vmap_t vml[],
1145a5652762Spraks int nseg,
1146a5652762Spraks int *newpage,
1147a5652762Spraks enum seg_rw rw)
1148a5652762Spraks {
1149a5652762Spraks return (0);
1150a5652762Spraks }
1151a5652762Spraks
1152a5652762Spraks /*ARGSUSED*/
1153a5652762Spraks int
vpm_data_copy(struct vnode * vp,u_offset_t off,size_t len,struct uio * uio,int fetchpage,int * newpage,int zerostart,enum seg_rw rw)1154a5652762Spraks vpm_data_copy(struct vnode *vp,
1155a5652762Spraks u_offset_t off,
1156a5652762Spraks size_t len,
1157a5652762Spraks struct uio *uio,
1158a5652762Spraks int fetchpage,
1159a5652762Spraks int *newpage,
1160a5652762Spraks int zerostart,
1161a5652762Spraks enum seg_rw rw)
1162a5652762Spraks {
1163a5652762Spraks return (0);
1164a5652762Spraks }
1165a5652762Spraks
1166a5652762Spraks /*ARGSUSED*/
1167a5652762Spraks void
vpm_unmap_pages(vmap_t vml[],enum seg_rw rw)1168a5652762Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
1169a5652762Spraks {
1170a5652762Spraks }
1171a5652762Spraks /*ARGSUSED*/
1172a5652762Spraks int
vpm_sync_pages(struct vnode * vp,u_offset_t off,size_t len,uint_t flags)1173a5652762Spraks vpm_sync_pages(struct vnode *vp,
1174a5652762Spraks u_offset_t off,
1175a5652762Spraks size_t len,
1176a5652762Spraks uint_t flags)
1177a5652762Spraks {
1178a5652762Spraks return (0);
1179a5652762Spraks }
1180a5652762Spraks #endif /* SEGKPM_SUPPORT */
1181