17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
507b65a64Saguzovsk * Common Development and Distribution License (the "License").
607b65a64Saguzovsk * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
2223d9e5acSMichael Corcoran * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23*2bf00e07SJosef 'Jeff' Sipek * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/systm.h>
297c478bd9Sstevel@tonic-gate #include <sys/buf.h>
307c478bd9Sstevel@tonic-gate #include <sys/cred.h>
317c478bd9Sstevel@tonic-gate #include <sys/errno.h>
327c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
33aa59c4cbSrsb #include <sys/vfs_opreg.h>
347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
357c478bd9Sstevel@tonic-gate #include <sys/swap.h>
367c478bd9Sstevel@tonic-gate #include <sys/mman.h>
377c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
387c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
397c478bd9Sstevel@tonic-gate #include <sys/debug.h>
407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
417c478bd9Sstevel@tonic-gate #include <sys/vm.h>
427c478bd9Sstevel@tonic-gate
437c478bd9Sstevel@tonic-gate #include <sys/fs/swapnode.h>
447c478bd9Sstevel@tonic-gate
457c478bd9Sstevel@tonic-gate #include <vm/seg.h>
467c478bd9Sstevel@tonic-gate #include <vm/page.h>
477c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
487c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
497c478bd9Sstevel@tonic-gate
507c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h>
517c478bd9Sstevel@tonic-gate
527c478bd9Sstevel@tonic-gate /*
537c478bd9Sstevel@tonic-gate * Define the routines within this file.
547c478bd9Sstevel@tonic-gate */
557c478bd9Sstevel@tonic-gate static int swap_getpage(struct vnode *vp, offset_t off, size_t len,
56da6c28aaSamw uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
57da6c28aaSamw caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
587c478bd9Sstevel@tonic-gate static int swap_putpage(struct vnode *vp, offset_t off, size_t len,
59da6c28aaSamw int flags, struct cred *cr, caller_context_t *ct);
60da6c28aaSamw static void swap_inactive(struct vnode *vp, struct cred *cr,
61da6c28aaSamw caller_context_t *ct);
627c478bd9Sstevel@tonic-gate static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63da6c28aaSamw cred_t *cr, caller_context_t *ct);
647c478bd9Sstevel@tonic-gate
657c478bd9Sstevel@tonic-gate static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
667c478bd9Sstevel@tonic-gate uint_t *protp, page_t **plarr, size_t plsz,
677c478bd9Sstevel@tonic-gate struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
687c478bd9Sstevel@tonic-gate
697c478bd9Sstevel@tonic-gate int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
7007b65a64Saguzovsk uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
7107b65a64Saguzovsk uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
7207b65a64Saguzovsk enum seg_rw rw, struct cred *cr);
737c478bd9Sstevel@tonic-gate
747c478bd9Sstevel@tonic-gate static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
757c478bd9Sstevel@tonic-gate size_t *lenp, int flags, struct cred *cr);
767c478bd9Sstevel@tonic-gate
777c478bd9Sstevel@tonic-gate const fs_operation_def_t swap_vnodeops_template[] = {
78aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = swap_inactive },
79aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = swap_getpage },
80aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage },
81aa59c4cbSrsb VOPNAME_DISPOSE, { .vop_dispose = swap_dispose },
82aa59c4cbSrsb VOPNAME_SETFL, { .error = fs_error },
83aa59c4cbSrsb VOPNAME_POLL, { .error = fs_error },
84aa59c4cbSrsb VOPNAME_PATHCONF, { .error = fs_error },
85aa59c4cbSrsb VOPNAME_GETSECATTR, { .error = fs_error },
86aa59c4cbSrsb VOPNAME_SHRLOCK, { .error = fs_error },
877c478bd9Sstevel@tonic-gate NULL, NULL
887c478bd9Sstevel@tonic-gate };
897c478bd9Sstevel@tonic-gate
907c478bd9Sstevel@tonic-gate vnodeops_t *swap_vnodeops;
917c478bd9Sstevel@tonic-gate
927c478bd9Sstevel@tonic-gate /* ARGSUSED */
937c478bd9Sstevel@tonic-gate static void
swap_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)947c478bd9Sstevel@tonic-gate swap_inactive(
957c478bd9Sstevel@tonic-gate struct vnode *vp,
96da6c28aaSamw struct cred *cr,
97da6c28aaSamw caller_context_t *ct)
987c478bd9Sstevel@tonic-gate {
997c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
1007c478bd9Sstevel@tonic-gate }
1017c478bd9Sstevel@tonic-gate
1027c478bd9Sstevel@tonic-gate /*
1037c478bd9Sstevel@tonic-gate * Return all the pages from [off..off+len] in given file
1047c478bd9Sstevel@tonic-gate */
105da6c28aaSamw /*ARGSUSED*/
1067c478bd9Sstevel@tonic-gate static int
swap_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1077c478bd9Sstevel@tonic-gate swap_getpage(
1087c478bd9Sstevel@tonic-gate struct vnode *vp,
1097c478bd9Sstevel@tonic-gate offset_t off,
1107c478bd9Sstevel@tonic-gate size_t len,
1117c478bd9Sstevel@tonic-gate uint_t *protp,
1127c478bd9Sstevel@tonic-gate page_t *pl[],
1137c478bd9Sstevel@tonic-gate size_t plsz,
1147c478bd9Sstevel@tonic-gate struct seg *seg,
1157c478bd9Sstevel@tonic-gate caddr_t addr,
1167c478bd9Sstevel@tonic-gate enum seg_rw rw,
117da6c28aaSamw struct cred *cr,
118da6c28aaSamw caller_context_t *ct)
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
1217c478bd9Sstevel@tonic-gate (void *)vp, off, len, 0, 0);
1227c478bd9Sstevel@tonic-gate
1237c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
1247c478bd9Sstevel@tonic-gate "swapfs getpage:vp %p off %llx len %ld",
1257c478bd9Sstevel@tonic-gate (void *)vp, off, len);
1267c478bd9Sstevel@tonic-gate
127*2bf00e07SJosef 'Jeff' Sipek return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
128*2bf00e07SJosef 'Jeff' Sipek pl, plsz, seg, addr, rw, cr));
1297c478bd9Sstevel@tonic-gate }
1307c478bd9Sstevel@tonic-gate
1317c478bd9Sstevel@tonic-gate /*
132*2bf00e07SJosef 'Jeff' Sipek * Called from pvn_getpages to get a particular page.
1337c478bd9Sstevel@tonic-gate */
1347c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1357c478bd9Sstevel@tonic-gate static int
swap_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)1367c478bd9Sstevel@tonic-gate swap_getapage(
1377c478bd9Sstevel@tonic-gate struct vnode *vp,
1387c478bd9Sstevel@tonic-gate u_offset_t off,
1397c478bd9Sstevel@tonic-gate size_t len,
1407c478bd9Sstevel@tonic-gate uint_t *protp,
1417c478bd9Sstevel@tonic-gate page_t *pl[],
1427c478bd9Sstevel@tonic-gate size_t plsz,
1437c478bd9Sstevel@tonic-gate struct seg *seg,
1447c478bd9Sstevel@tonic-gate caddr_t addr,
1457c478bd9Sstevel@tonic-gate enum seg_rw rw,
1467c478bd9Sstevel@tonic-gate struct cred *cr)
1477c478bd9Sstevel@tonic-gate {
1487c478bd9Sstevel@tonic-gate struct page *pp, *rpp;
1497c478bd9Sstevel@tonic-gate int flags;
1507c478bd9Sstevel@tonic-gate int err = 0;
1517c478bd9Sstevel@tonic-gate struct vnode *pvp = NULL;
1527c478bd9Sstevel@tonic-gate u_offset_t poff;
1537c478bd9Sstevel@tonic-gate int flag_noreloc;
1547c478bd9Sstevel@tonic-gate se_t lock;
1557c478bd9Sstevel@tonic-gate extern int kcage_on;
1567c478bd9Sstevel@tonic-gate int upgrade = 0;
1577c478bd9Sstevel@tonic-gate
1587c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
1597c478bd9Sstevel@tonic-gate vp, off, len, 0, 0);
1607c478bd9Sstevel@tonic-gate
1617c478bd9Sstevel@tonic-gate /*
1627c478bd9Sstevel@tonic-gate * Until there is a call-back mechanism to cause SEGKP
1637c478bd9Sstevel@tonic-gate * pages to be unlocked, make them non-relocatable.
1647c478bd9Sstevel@tonic-gate */
1657c478bd9Sstevel@tonic-gate if (SEG_IS_SEGKP(seg))
1667c478bd9Sstevel@tonic-gate flag_noreloc = PG_NORELOC;
1677c478bd9Sstevel@tonic-gate else
1687c478bd9Sstevel@tonic-gate flag_noreloc = 0;
1697c478bd9Sstevel@tonic-gate
1707c478bd9Sstevel@tonic-gate if (protp != NULL)
1717c478bd9Sstevel@tonic-gate *protp = PROT_ALL;
1727c478bd9Sstevel@tonic-gate
1737c478bd9Sstevel@tonic-gate lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
1747c478bd9Sstevel@tonic-gate
1757c478bd9Sstevel@tonic-gate again:
1767c478bd9Sstevel@tonic-gate if (pp = page_lookup(vp, off, lock)) {
1777c478bd9Sstevel@tonic-gate /*
1787c478bd9Sstevel@tonic-gate * In very rare instances, a segkp page may have been
1797c478bd9Sstevel@tonic-gate * relocated outside of the kernel by the kernel cage
1807c478bd9Sstevel@tonic-gate * due to the window between page_unlock() and
1817c478bd9Sstevel@tonic-gate * VOP_PUTPAGE() in segkp_unlock(). Due to the
1827c478bd9Sstevel@tonic-gate * rareness of these occurances, the solution is to
1837c478bd9Sstevel@tonic-gate * relocate the page to a P_NORELOC page.
1847c478bd9Sstevel@tonic-gate */
1857c478bd9Sstevel@tonic-gate if (flag_noreloc != 0) {
1867c478bd9Sstevel@tonic-gate if (!PP_ISNORELOC(pp) && kcage_on) {
1877c478bd9Sstevel@tonic-gate if (lock != SE_EXCL) {
1887c478bd9Sstevel@tonic-gate upgrade = 1;
1897c478bd9Sstevel@tonic-gate if (!page_tryupgrade(pp)) {
1907c478bd9Sstevel@tonic-gate page_unlock(pp);
1917c478bd9Sstevel@tonic-gate lock = SE_EXCL;
1927c478bd9Sstevel@tonic-gate goto again;
1937c478bd9Sstevel@tonic-gate }
1947c478bd9Sstevel@tonic-gate }
1957c478bd9Sstevel@tonic-gate
1967c478bd9Sstevel@tonic-gate if (page_relocate_cage(&pp, &rpp) != 0)
1977c478bd9Sstevel@tonic-gate panic("swap_getapage: "
1987c478bd9Sstevel@tonic-gate "page_relocate_cage failed");
1997c478bd9Sstevel@tonic-gate
2007c478bd9Sstevel@tonic-gate pp = rpp;
2017c478bd9Sstevel@tonic-gate }
2027c478bd9Sstevel@tonic-gate }
2037c478bd9Sstevel@tonic-gate
2047c478bd9Sstevel@tonic-gate if (pl) {
2057c478bd9Sstevel@tonic-gate if (upgrade)
2067c478bd9Sstevel@tonic-gate page_downgrade(pp);
2077c478bd9Sstevel@tonic-gate
2087c478bd9Sstevel@tonic-gate pl[0] = pp;
2097c478bd9Sstevel@tonic-gate pl[1] = NULL;
2107c478bd9Sstevel@tonic-gate } else {
2117c478bd9Sstevel@tonic-gate page_unlock(pp);
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate } else {
2147c478bd9Sstevel@tonic-gate pp = page_create_va(vp, off, PAGESIZE,
2157c478bd9Sstevel@tonic-gate PG_WAIT | PG_EXCL | flag_noreloc,
2167c478bd9Sstevel@tonic-gate seg, addr);
2177c478bd9Sstevel@tonic-gate /*
2187c478bd9Sstevel@tonic-gate * Someone raced in and created the page after we did the
2197c478bd9Sstevel@tonic-gate * lookup but before we did the create, so go back and
2207c478bd9Sstevel@tonic-gate * try to look it up again.
2217c478bd9Sstevel@tonic-gate */
2227c478bd9Sstevel@tonic-gate if (pp == NULL)
2237c478bd9Sstevel@tonic-gate goto again;
2247c478bd9Sstevel@tonic-gate if (rw != S_CREATE) {
2257c478bd9Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff);
2267c478bd9Sstevel@tonic-gate if (pvp) {
2277c478bd9Sstevel@tonic-gate struct anon *ap;
2287c478bd9Sstevel@tonic-gate kmutex_t *ahm;
2297c478bd9Sstevel@tonic-gate
2307c478bd9Sstevel@tonic-gate flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
2317c478bd9Sstevel@tonic-gate err = VOP_PAGEIO(pvp, pp, poff,
232da6c28aaSamw PAGESIZE, flags, cr, NULL);
2337c478bd9Sstevel@tonic-gate
2347c478bd9Sstevel@tonic-gate if (!err) {
23523d9e5acSMichael Corcoran ahm = AH_MUTEX(vp, off);
2367c478bd9Sstevel@tonic-gate mutex_enter(ahm);
2377c478bd9Sstevel@tonic-gate
2387c478bd9Sstevel@tonic-gate ap = swap_anon(vp, off);
239a98e9dbfSaguzovsk if (ap == NULL) {
240a98e9dbfSaguzovsk panic("swap_getapage:"
241a98e9dbfSaguzovsk " null anon");
242a98e9dbfSaguzovsk }
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate if (ap->an_pvp == pvp &&
2457c478bd9Sstevel@tonic-gate ap->an_poff == poff) {
2467c478bd9Sstevel@tonic-gate swap_phys_free(pvp, poff,
2477c478bd9Sstevel@tonic-gate PAGESIZE);
2487c478bd9Sstevel@tonic-gate ap->an_pvp = NULL;
2497c478bd9Sstevel@tonic-gate ap->an_poff = NULL;
2507c478bd9Sstevel@tonic-gate hat_setmod(pp);
2517c478bd9Sstevel@tonic-gate }
2527c478bd9Sstevel@tonic-gate
2537c478bd9Sstevel@tonic-gate mutex_exit(ahm);
2547c478bd9Sstevel@tonic-gate }
2557c478bd9Sstevel@tonic-gate } else {
2567c478bd9Sstevel@tonic-gate if (!err)
2577c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE);
2587c478bd9Sstevel@tonic-gate
2597c478bd9Sstevel@tonic-gate /*
2607c478bd9Sstevel@tonic-gate * If it's a fault ahead, release page_io_lock
2617c478bd9Sstevel@tonic-gate * and SE_EXCL we grabbed in page_create_va
2627c478bd9Sstevel@tonic-gate *
2637c478bd9Sstevel@tonic-gate * If we are here, we haven't called VOP_PAGEIO
2647c478bd9Sstevel@tonic-gate * and thus calling pvn_read_done(pp, B_READ)
2657c478bd9Sstevel@tonic-gate * below may mislead that we tried i/o. Besides,
2667c478bd9Sstevel@tonic-gate * in case of async, pvn_read_done() should
2677c478bd9Sstevel@tonic-gate * not be called by *getpage()
2687c478bd9Sstevel@tonic-gate */
2697c478bd9Sstevel@tonic-gate if (pl == NULL) {
2707c478bd9Sstevel@tonic-gate /*
2717c478bd9Sstevel@tonic-gate * swap_getphysname can return error
2727c478bd9Sstevel@tonic-gate * only when we are getting called from
2737c478bd9Sstevel@tonic-gate * swapslot_free which passes non-NULL
2747c478bd9Sstevel@tonic-gate * pl to VOP_GETPAGE.
2757c478bd9Sstevel@tonic-gate */
2767c478bd9Sstevel@tonic-gate ASSERT(err == 0);
2777c478bd9Sstevel@tonic-gate page_io_unlock(pp);
2787c478bd9Sstevel@tonic-gate page_unlock(pp);
2797c478bd9Sstevel@tonic-gate }
2807c478bd9Sstevel@tonic-gate }
2817c478bd9Sstevel@tonic-gate }
2827c478bd9Sstevel@tonic-gate
2837c478bd9Sstevel@tonic-gate ASSERT(pp != NULL);
2847c478bd9Sstevel@tonic-gate
2857c478bd9Sstevel@tonic-gate if (err && pl)
2867c478bd9Sstevel@tonic-gate pvn_read_done(pp, B_ERROR);
2877c478bd9Sstevel@tonic-gate
2887c478bd9Sstevel@tonic-gate if (!err && pl)
2897c478bd9Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
2927c478bd9Sstevel@tonic-gate "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
2937c478bd9Sstevel@tonic-gate return (err);
2947c478bd9Sstevel@tonic-gate }
2957c478bd9Sstevel@tonic-gate
2967c478bd9Sstevel@tonic-gate /*
2977c478bd9Sstevel@tonic-gate * Called from large page anon routines only! This is an ugly hack where
2987c478bd9Sstevel@tonic-gate * the anon layer directly calls into swapfs with a preallocated large page.
2997c478bd9Sstevel@tonic-gate * Another method would have been to change to VOP and add an extra arg for
3007c478bd9Sstevel@tonic-gate * the preallocated large page. This all could be cleaned up later when we
3017c478bd9Sstevel@tonic-gate * solve the anonymous naming problem and no longer need to loop across of
3027c478bd9Sstevel@tonic-gate * the VOP in PAGESIZE increments to fill in or initialize a large page as
3037c478bd9Sstevel@tonic-gate * is done today. I think the latter is better since it avoid a change to
3047c478bd9Sstevel@tonic-gate * the VOP interface that could later be avoided.
3057c478bd9Sstevel@tonic-gate */
3067c478bd9Sstevel@tonic-gate int
swap_getconpage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,page_t * conpp,uint_t * pszc,spgcnt_t * nreloc,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)3077c478bd9Sstevel@tonic-gate swap_getconpage(
3087c478bd9Sstevel@tonic-gate struct vnode *vp,
3097c478bd9Sstevel@tonic-gate u_offset_t off,
3107c478bd9Sstevel@tonic-gate size_t len,
3117c478bd9Sstevel@tonic-gate uint_t *protp,
3127c478bd9Sstevel@tonic-gate page_t *pl[],
3137c478bd9Sstevel@tonic-gate size_t plsz,
3147c478bd9Sstevel@tonic-gate page_t *conpp,
31507b65a64Saguzovsk uint_t *pszc,
3167c478bd9Sstevel@tonic-gate spgcnt_t *nreloc,
3177c478bd9Sstevel@tonic-gate struct seg *seg,
3187c478bd9Sstevel@tonic-gate caddr_t addr,
3197c478bd9Sstevel@tonic-gate enum seg_rw rw,
3207c478bd9Sstevel@tonic-gate struct cred *cr)
3217c478bd9Sstevel@tonic-gate {
3227c478bd9Sstevel@tonic-gate struct page *pp;
3237c478bd9Sstevel@tonic-gate int err = 0;
3247c478bd9Sstevel@tonic-gate struct vnode *pvp = NULL;
3257c478bd9Sstevel@tonic-gate u_offset_t poff;
3267c478bd9Sstevel@tonic-gate
3277c478bd9Sstevel@tonic-gate ASSERT(len == PAGESIZE);
3287c478bd9Sstevel@tonic-gate ASSERT(pl != NULL);
3297c478bd9Sstevel@tonic-gate ASSERT(plsz == PAGESIZE);
3307c478bd9Sstevel@tonic-gate ASSERT(protp == NULL);
3317c478bd9Sstevel@tonic-gate ASSERT(nreloc != NULL);
3327c478bd9Sstevel@tonic-gate ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
3337c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
3347c478bd9Sstevel@tonic-gate vp, off, len, 0, 0);
3357c478bd9Sstevel@tonic-gate
3367c478bd9Sstevel@tonic-gate /*
3377c478bd9Sstevel@tonic-gate * If we are not using a preallocated page then we know one already
3387c478bd9Sstevel@tonic-gate * exists. So just let the old code handle it.
3397c478bd9Sstevel@tonic-gate */
3407c478bd9Sstevel@tonic-gate if (conpp == NULL) {
3417c478bd9Sstevel@tonic-gate err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
3427c478bd9Sstevel@tonic-gate seg, addr, rw, cr);
3437c478bd9Sstevel@tonic-gate return (err);
3447c478bd9Sstevel@tonic-gate }
3457c478bd9Sstevel@tonic-gate ASSERT(conpp->p_szc != 0);
3467c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(conpp));
3477c478bd9Sstevel@tonic-gate
3487c478bd9Sstevel@tonic-gate
3497c478bd9Sstevel@tonic-gate ASSERT(conpp->p_next == conpp);
3507c478bd9Sstevel@tonic-gate ASSERT(conpp->p_prev == conpp);
3517c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(conpp));
3527c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(conpp));
3537c478bd9Sstevel@tonic-gate
3547c478bd9Sstevel@tonic-gate *nreloc = 0;
3557c478bd9Sstevel@tonic-gate pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
3567c478bd9Sstevel@tonic-gate
3577c478bd9Sstevel@tonic-gate /*
3587c478bd9Sstevel@tonic-gate * If existing page is found we may need to relocate.
3597c478bd9Sstevel@tonic-gate */
3607c478bd9Sstevel@tonic-gate if (pp != conpp) {
3617c478bd9Sstevel@tonic-gate ASSERT(rw != S_CREATE);
36207b65a64Saguzovsk ASSERT(pszc != NULL);
3637c478bd9Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp));
3647c478bd9Sstevel@tonic-gate if (pp->p_szc < conpp->p_szc) {
36507b65a64Saguzovsk *pszc = pp->p_szc;
3667c478bd9Sstevel@tonic-gate page_unlock(pp);
3677c478bd9Sstevel@tonic-gate err = -1;
36807b65a64Saguzovsk } else if (pp->p_szc > conpp->p_szc &&
36907b65a64Saguzovsk seg->s_szc > conpp->p_szc) {
37007b65a64Saguzovsk *pszc = MIN(pp->p_szc, seg->s_szc);
3717c478bd9Sstevel@tonic-gate page_unlock(pp);
3727c478bd9Sstevel@tonic-gate err = -2;
3737c478bd9Sstevel@tonic-gate } else {
3747c478bd9Sstevel@tonic-gate pl[0] = pp;
3757c478bd9Sstevel@tonic-gate pl[1] = NULL;
3767c478bd9Sstevel@tonic-gate if (page_pptonum(pp) &
37707b65a64Saguzovsk (page_get_pagecnt(conpp->p_szc) - 1))
3787c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "swap_getconpage: no root");
3797c478bd9Sstevel@tonic-gate }
3807c478bd9Sstevel@tonic-gate return (err);
3817c478bd9Sstevel@tonic-gate }
3827c478bd9Sstevel@tonic-gate
3837c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp));
3847c478bd9Sstevel@tonic-gate
3857c478bd9Sstevel@tonic-gate if (*nreloc != 0) {
3867c478bd9Sstevel@tonic-gate ASSERT(rw != S_CREATE);
3877c478bd9Sstevel@tonic-gate pl[0] = pp;
3887c478bd9Sstevel@tonic-gate pl[1] = NULL;
3897c478bd9Sstevel@tonic-gate return (0);
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate
3927c478bd9Sstevel@tonic-gate *nreloc = 1;
3937c478bd9Sstevel@tonic-gate
3947c478bd9Sstevel@tonic-gate /*
3957c478bd9Sstevel@tonic-gate * If necessary do the page io.
3967c478bd9Sstevel@tonic-gate */
3977c478bd9Sstevel@tonic-gate if (rw != S_CREATE) {
3987c478bd9Sstevel@tonic-gate /*
3997c478bd9Sstevel@tonic-gate * Since we are only called now on behalf of an
4007c478bd9Sstevel@tonic-gate * address space operation it's impossible for
4017c478bd9Sstevel@tonic-gate * us to fail unlike swap_getapge() which
4027c478bd9Sstevel@tonic-gate * also gets called from swapslot_free().
4037c478bd9Sstevel@tonic-gate */
4047c478bd9Sstevel@tonic-gate if (swap_getphysname(vp, off, &pvp, &poff)) {
4057c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC,
4067c478bd9Sstevel@tonic-gate "swap_getconpage: swap_getphysname failed!");
4077c478bd9Sstevel@tonic-gate }
4087c478bd9Sstevel@tonic-gate
409a98e9dbfSaguzovsk if (pvp != NULL) {
410a98e9dbfSaguzovsk err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
411a98e9dbfSaguzovsk cr, NULL);
412a98e9dbfSaguzovsk if (err == 0) {
413a98e9dbfSaguzovsk struct anon *ap;
414a98e9dbfSaguzovsk kmutex_t *ahm;
415a98e9dbfSaguzovsk
41623d9e5acSMichael Corcoran ahm = AH_MUTEX(vp, off);
417a98e9dbfSaguzovsk mutex_enter(ahm);
418a98e9dbfSaguzovsk ap = swap_anon(vp, off);
419a98e9dbfSaguzovsk if (ap == NULL)
420a98e9dbfSaguzovsk panic("swap_getconpage: null anon");
421a98e9dbfSaguzovsk if (ap->an_pvp != pvp || ap->an_poff != poff)
422a98e9dbfSaguzovsk panic("swap_getconpage: bad anon");
423a98e9dbfSaguzovsk
424a98e9dbfSaguzovsk swap_phys_free(pvp, poff, PAGESIZE);
425a98e9dbfSaguzovsk ap->an_pvp = NULL;
426a98e9dbfSaguzovsk ap->an_poff = NULL;
427a98e9dbfSaguzovsk hat_setmod(pp);
428a98e9dbfSaguzovsk mutex_exit(ahm);
429a98e9dbfSaguzovsk }
4307c478bd9Sstevel@tonic-gate } else {
4317c478bd9Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE);
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate }
4347c478bd9Sstevel@tonic-gate
4357c478bd9Sstevel@tonic-gate /*
4367c478bd9Sstevel@tonic-gate * Normally we would let pvn_read_done() destroy
4377c478bd9Sstevel@tonic-gate * the page on IO error. But since this is a preallocated
4387c478bd9Sstevel@tonic-gate * page we'll let the anon layer handle it.
4397c478bd9Sstevel@tonic-gate */
4407c478bd9Sstevel@tonic-gate page_io_unlock(pp);
4417c478bd9Sstevel@tonic-gate if (err != 0)
4427c478bd9Sstevel@tonic-gate page_hashout(pp, NULL);
4437c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp);
4447c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev == pp);
4457c478bd9Sstevel@tonic-gate
4467c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
4477c478bd9Sstevel@tonic-gate "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
4487c478bd9Sstevel@tonic-gate
4497c478bd9Sstevel@tonic-gate pl[0] = pp;
4507c478bd9Sstevel@tonic-gate pl[1] = NULL;
4517c478bd9Sstevel@tonic-gate return (err);
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate
4547c478bd9Sstevel@tonic-gate /* Async putpage klustering stuff */
4557c478bd9Sstevel@tonic-gate int sw_pending_size;
4567c478bd9Sstevel@tonic-gate extern int klustsize;
4577c478bd9Sstevel@tonic-gate extern struct async_reqs *sw_getreq();
4587c478bd9Sstevel@tonic-gate extern void sw_putreq(struct async_reqs *);
4597c478bd9Sstevel@tonic-gate extern void sw_putbackreq(struct async_reqs *);
4607c478bd9Sstevel@tonic-gate extern struct async_reqs *sw_getfree();
4617c478bd9Sstevel@tonic-gate extern void sw_putfree(struct async_reqs *);
4627c478bd9Sstevel@tonic-gate
4637c478bd9Sstevel@tonic-gate static size_t swap_putpagecnt, swap_pagespushed;
4647c478bd9Sstevel@tonic-gate static size_t swap_otherfail, swap_otherpages;
4657c478bd9Sstevel@tonic-gate static size_t swap_klustfail, swap_klustpages;
4667c478bd9Sstevel@tonic-gate static size_t swap_getiofail, swap_getiopages;
4677c478bd9Sstevel@tonic-gate
4687c478bd9Sstevel@tonic-gate /*
4697c478bd9Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
4707c478bd9Sstevel@tonic-gate * If len == 0, do from off to EOF.
4717c478bd9Sstevel@tonic-gate */
4727c478bd9Sstevel@tonic-gate static int swap_nopage = 0; /* Don't do swap_putpage's if set */
4737c478bd9Sstevel@tonic-gate
4747c478bd9Sstevel@tonic-gate /* ARGSUSED */
4757c478bd9Sstevel@tonic-gate static int
swap_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)4767c478bd9Sstevel@tonic-gate swap_putpage(
4777c478bd9Sstevel@tonic-gate struct vnode *vp,
4787c478bd9Sstevel@tonic-gate offset_t off,
4797c478bd9Sstevel@tonic-gate size_t len,
4807c478bd9Sstevel@tonic-gate int flags,
481da6c28aaSamw struct cred *cr,
482da6c28aaSamw caller_context_t *ct)
4837c478bd9Sstevel@tonic-gate {
4847c478bd9Sstevel@tonic-gate page_t *pp;
4857c478bd9Sstevel@tonic-gate u_offset_t io_off;
4867c478bd9Sstevel@tonic-gate size_t io_len = 0;
4877c478bd9Sstevel@tonic-gate int err = 0;
488ed0efa68SDonghai Qiao int nowait;
4897c478bd9Sstevel@tonic-gate struct async_reqs *arg;
4907c478bd9Sstevel@tonic-gate
4917c478bd9Sstevel@tonic-gate if (swap_nopage)
4927c478bd9Sstevel@tonic-gate return (0);
4937c478bd9Sstevel@tonic-gate
4947c478bd9Sstevel@tonic-gate ASSERT(vp->v_count != 0);
4957c478bd9Sstevel@tonic-gate
496ed0efa68SDonghai Qiao nowait = flags & B_PAGE_NOWAIT;
497ed0efa68SDonghai Qiao
4988c12346dSsl108498 /*
4998c12346dSsl108498 * Clear force flag so that p_lckcnt pages are not invalidated.
5008c12346dSsl108498 */
501ed0efa68SDonghai Qiao flags &= ~(B_FORCE | B_PAGE_NOWAIT);
5028c12346dSsl108498
5037c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_VOPS,
5047c478bd9Sstevel@tonic-gate "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
5057c478bd9Sstevel@tonic-gate (void *)vp, off, len, flags, 0);
5067c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
5077c478bd9Sstevel@tonic-gate "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
5087c478bd9Sstevel@tonic-gate
5097c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP)
5107c478bd9Sstevel@tonic-gate return (ENOSYS);
5117c478bd9Sstevel@tonic-gate
5127c478bd9Sstevel@tonic-gate if (!vn_has_cached_data(vp))
5137c478bd9Sstevel@tonic-gate return (0);
5147c478bd9Sstevel@tonic-gate
5157c478bd9Sstevel@tonic-gate if (len == 0) {
5167c478bd9Sstevel@tonic-gate if (curproc == proc_pageout)
5177c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "swapfs: pageout can't block");
5187c478bd9Sstevel@tonic-gate
5197c478bd9Sstevel@tonic-gate /* Search the entire vp list for pages >= off. */
5207c478bd9Sstevel@tonic-gate err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
5217c478bd9Sstevel@tonic-gate flags, cr);
5227c478bd9Sstevel@tonic-gate } else {
5237c478bd9Sstevel@tonic-gate u_offset_t eoff;
5247c478bd9Sstevel@tonic-gate
5257c478bd9Sstevel@tonic-gate /*
5267c478bd9Sstevel@tonic-gate * Loop over all offsets in the range [off...off + len]
5277c478bd9Sstevel@tonic-gate * looking for pages to deal with.
5287c478bd9Sstevel@tonic-gate */
5297c478bd9Sstevel@tonic-gate eoff = off + len;
5307c478bd9Sstevel@tonic-gate for (io_off = (u_offset_t)off; io_off < eoff;
5317c478bd9Sstevel@tonic-gate io_off += io_len) {
5327c478bd9Sstevel@tonic-gate /*
5337c478bd9Sstevel@tonic-gate * If we run out of the async req slot, put the page
5347c478bd9Sstevel@tonic-gate * now instead of queuing.
5357c478bd9Sstevel@tonic-gate */
5367c478bd9Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) &&
5377c478bd9Sstevel@tonic-gate sw_pending_size < klustsize &&
5387c478bd9Sstevel@tonic-gate (arg = sw_getfree())) {
5397c478bd9Sstevel@tonic-gate /*
5407c478bd9Sstevel@tonic-gate * If we are clustering, we should allow
5417c478bd9Sstevel@tonic-gate * pageout to feed us more pages because # of
5427c478bd9Sstevel@tonic-gate * pushes is limited by # of I/Os, and one
5437c478bd9Sstevel@tonic-gate * cluster is considered to be one I/O.
5447c478bd9Sstevel@tonic-gate */
5457c478bd9Sstevel@tonic-gate if (pushes)
5467c478bd9Sstevel@tonic-gate pushes--;
5477c478bd9Sstevel@tonic-gate
5487c478bd9Sstevel@tonic-gate arg->a_vp = vp;
5497c478bd9Sstevel@tonic-gate arg->a_off = io_off;
5507c478bd9Sstevel@tonic-gate arg->a_len = PAGESIZE;
5517c478bd9Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE;
5527c478bd9Sstevel@tonic-gate arg->a_cred = kcred;
5537c478bd9Sstevel@tonic-gate sw_putreq(arg);
5547c478bd9Sstevel@tonic-gate io_len = PAGESIZE;
5557c478bd9Sstevel@tonic-gate continue;
5567c478bd9Sstevel@tonic-gate }
5577c478bd9Sstevel@tonic-gate /*
5587c478bd9Sstevel@tonic-gate * If we are not invalidating pages, use the
5597c478bd9Sstevel@tonic-gate * routine page_lookup_nowait() to prevent
5607c478bd9Sstevel@tonic-gate * reclaiming them from the free list.
5617c478bd9Sstevel@tonic-gate */
562ed0efa68SDonghai Qiao if (!nowait && ((flags & B_INVAL) ||
563ed0efa68SDonghai Qiao (flags & (B_ASYNC | B_FREE)) == B_FREE))
5647c478bd9Sstevel@tonic-gate pp = page_lookup(vp, io_off, SE_EXCL);
5657c478bd9Sstevel@tonic-gate else
5667c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off,
567ed0efa68SDonghai Qiao (flags & (B_FREE | B_INVAL)) ?
568ed0efa68SDonghai Qiao SE_EXCL : SE_SHARED);
5697c478bd9Sstevel@tonic-gate
5707c478bd9Sstevel@tonic-gate if (pp == NULL || pvn_getdirty(pp, flags) == 0)
5717c478bd9Sstevel@tonic-gate io_len = PAGESIZE;
5727c478bd9Sstevel@tonic-gate else {
5737c478bd9Sstevel@tonic-gate err = swap_putapage(vp, pp, &io_off, &io_len,
5747c478bd9Sstevel@tonic-gate flags, cr);
5757c478bd9Sstevel@tonic-gate if (err != 0)
5767c478bd9Sstevel@tonic-gate break;
5777c478bd9Sstevel@tonic-gate }
5787c478bd9Sstevel@tonic-gate }
5797c478bd9Sstevel@tonic-gate }
5807c478bd9Sstevel@tonic-gate /* If invalidating, verify all pages on vnode list are gone. */
5817c478bd9Sstevel@tonic-gate if (err == 0 && off == 0 && len == 0 &&
5827c478bd9Sstevel@tonic-gate (flags & B_INVAL) && vn_has_cached_data(vp)) {
5837c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
5847c478bd9Sstevel@tonic-gate "swap_putpage: B_INVAL, pages not gone");
5857c478bd9Sstevel@tonic-gate }
5867c478bd9Sstevel@tonic-gate return (err);
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate
5897c478bd9Sstevel@tonic-gate /*
5907c478bd9Sstevel@tonic-gate * Write out a single page.
5917c478bd9Sstevel@tonic-gate * For swapfs this means choose a physical swap slot and write the page
5927c478bd9Sstevel@tonic-gate * out using VOP_PAGEIO.
5937c478bd9Sstevel@tonic-gate * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
5947c478bd9Sstevel@tonic-gate * swapfs pages, a bunch of contiguous swap slots and then write them
5957c478bd9Sstevel@tonic-gate * all out in one clustered i/o.
5967c478bd9Sstevel@tonic-gate */
5977c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5987c478bd9Sstevel@tonic-gate static int
swap_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)5997c478bd9Sstevel@tonic-gate swap_putapage(
6007c478bd9Sstevel@tonic-gate struct vnode *vp,
6017c478bd9Sstevel@tonic-gate page_t *pp,
6027c478bd9Sstevel@tonic-gate u_offset_t *offp,
6037c478bd9Sstevel@tonic-gate size_t *lenp,
6047c478bd9Sstevel@tonic-gate int flags,
6057c478bd9Sstevel@tonic-gate struct cred *cr)
6067c478bd9Sstevel@tonic-gate {
6077c478bd9Sstevel@tonic-gate int err;
6087c478bd9Sstevel@tonic-gate struct vnode *pvp;
6097c478bd9Sstevel@tonic-gate u_offset_t poff, off;
6107c478bd9Sstevel@tonic-gate u_offset_t doff;
6117c478bd9Sstevel@tonic-gate size_t dlen;
6127c478bd9Sstevel@tonic-gate size_t klsz = 0;
6137c478bd9Sstevel@tonic-gate u_offset_t klstart = 0;
6147c478bd9Sstevel@tonic-gate struct vnode *klvp = NULL;
6157c478bd9Sstevel@tonic-gate page_t *pplist;
6167c478bd9Sstevel@tonic-gate se_t se;
6177c478bd9Sstevel@tonic-gate struct async_reqs *arg;
6187c478bd9Sstevel@tonic-gate size_t swap_klustsize;
6197c478bd9Sstevel@tonic-gate
6207c478bd9Sstevel@tonic-gate /*
6217c478bd9Sstevel@tonic-gate * This check is added for callers who access swap_putpage with len = 0.
6227c478bd9Sstevel@tonic-gate * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
6237c478bd9Sstevel@tonic-gate * And it's necessary to do the same queuing if users have the same
6247c478bd9Sstevel@tonic-gate * B_ASYNC|B_FREE flags on.
6257c478bd9Sstevel@tonic-gate */
6267c478bd9Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE) &&
6277c478bd9Sstevel@tonic-gate sw_pending_size < klustsize && (arg = sw_getfree())) {
6287c478bd9Sstevel@tonic-gate
6297c478bd9Sstevel@tonic-gate hat_setmod(pp);
6307c478bd9Sstevel@tonic-gate page_io_unlock(pp);
6317c478bd9Sstevel@tonic-gate page_unlock(pp);
6327c478bd9Sstevel@tonic-gate
6337c478bd9Sstevel@tonic-gate arg->a_vp = vp;
6347c478bd9Sstevel@tonic-gate arg->a_off = pp->p_offset;
6357c478bd9Sstevel@tonic-gate arg->a_len = PAGESIZE;
6367c478bd9Sstevel@tonic-gate arg->a_flags = B_ASYNC | B_FREE;
6377c478bd9Sstevel@tonic-gate arg->a_cred = kcred;
6387c478bd9Sstevel@tonic-gate sw_putreq(arg);
6397c478bd9Sstevel@tonic-gate
6407c478bd9Sstevel@tonic-gate return (0);
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate
6437c478bd9Sstevel@tonic-gate SWAPFS_PRINT(SWAP_PUTP,
6447c478bd9Sstevel@tonic-gate "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
6457c478bd9Sstevel@tonic-gate pp, vp, pp->p_offset, flags, 0);
6467c478bd9Sstevel@tonic-gate
6477c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp));
6487c478bd9Sstevel@tonic-gate
6497c478bd9Sstevel@tonic-gate off = pp->p_offset;
6507c478bd9Sstevel@tonic-gate
6517c478bd9Sstevel@tonic-gate doff = off;
6527c478bd9Sstevel@tonic-gate dlen = PAGESIZE;
6537c478bd9Sstevel@tonic-gate
6547c478bd9Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
6557c478bd9Sstevel@tonic-gate err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
6567c478bd9Sstevel@tonic-gate hat_setmod(pp);
6577c478bd9Sstevel@tonic-gate page_io_unlock(pp);
6587c478bd9Sstevel@tonic-gate page_unlock(pp);
6597c478bd9Sstevel@tonic-gate goto out;
6607c478bd9Sstevel@tonic-gate }
6617c478bd9Sstevel@tonic-gate
6627c478bd9Sstevel@tonic-gate klvp = pvp;
6637c478bd9Sstevel@tonic-gate klstart = poff;
6647c478bd9Sstevel@tonic-gate pplist = pp;
6657c478bd9Sstevel@tonic-gate /*
6667c478bd9Sstevel@tonic-gate * If this is ASYNC | FREE and we've accumulated a bunch of such
6677c478bd9Sstevel@tonic-gate * pending requests, kluster.
6687c478bd9Sstevel@tonic-gate */
6697c478bd9Sstevel@tonic-gate if (flags == (B_ASYNC | B_FREE))
6707c478bd9Sstevel@tonic-gate swap_klustsize = klustsize;
6717c478bd9Sstevel@tonic-gate else
6727c478bd9Sstevel@tonic-gate swap_klustsize = PAGESIZE;
6737c478bd9Sstevel@tonic-gate se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
6747c478bd9Sstevel@tonic-gate klsz = PAGESIZE;
6757c478bd9Sstevel@tonic-gate while (klsz < swap_klustsize) {
6767c478bd9Sstevel@tonic-gate if ((arg = sw_getreq()) == NULL) {
6777c478bd9Sstevel@tonic-gate swap_getiofail++;
6787c478bd9Sstevel@tonic-gate swap_getiopages += btop(klsz);
6797c478bd9Sstevel@tonic-gate break;
6807c478bd9Sstevel@tonic-gate }
6817c478bd9Sstevel@tonic-gate ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
6827c478bd9Sstevel@tonic-gate vp = arg->a_vp;
6837c478bd9Sstevel@tonic-gate off = arg->a_off;
6847c478bd9Sstevel@tonic-gate
6857c478bd9Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
6867c478bd9Sstevel@tonic-gate swap_otherfail++;
6877c478bd9Sstevel@tonic-gate swap_otherpages += btop(klsz);
6887c478bd9Sstevel@tonic-gate sw_putfree(arg);
6897c478bd9Sstevel@tonic-gate break;
6907c478bd9Sstevel@tonic-gate }
6917c478bd9Sstevel@tonic-gate if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
6927c478bd9Sstevel@tonic-gate sw_putfree(arg);
6937c478bd9Sstevel@tonic-gate continue;
6947c478bd9Sstevel@tonic-gate }
6957c478bd9Sstevel@tonic-gate /* Get new physical backing store for the page */
6967c478bd9Sstevel@tonic-gate doff = off;
6977c478bd9Sstevel@tonic-gate dlen = PAGESIZE;
6987c478bd9Sstevel@tonic-gate if (err = swap_newphysname(vp, off, &doff, &dlen,
6997c478bd9Sstevel@tonic-gate &pvp, &poff)) {
7007c478bd9Sstevel@tonic-gate swap_otherfail++;
7017c478bd9Sstevel@tonic-gate swap_otherpages += btop(klsz);
7027c478bd9Sstevel@tonic-gate hat_setmod(pp);
7037c478bd9Sstevel@tonic-gate page_io_unlock(pp);
7047c478bd9Sstevel@tonic-gate page_unlock(pp);
7057c478bd9Sstevel@tonic-gate sw_putbackreq(arg);
7067c478bd9Sstevel@tonic-gate break;
7077c478bd9Sstevel@tonic-gate }
7087c478bd9Sstevel@tonic-gate /* Try to cluster new physical name with previous ones */
7097c478bd9Sstevel@tonic-gate if (klvp == pvp && poff == klstart + klsz) {
7107c478bd9Sstevel@tonic-gate klsz += PAGESIZE;
7117c478bd9Sstevel@tonic-gate page_add(&pplist, pp);
7127c478bd9Sstevel@tonic-gate pplist = pplist->p_next;
7137c478bd9Sstevel@tonic-gate sw_putfree(arg);
7147c478bd9Sstevel@tonic-gate } else if (klvp == pvp && poff == klstart - PAGESIZE) {
7157c478bd9Sstevel@tonic-gate klsz += PAGESIZE;
7167c478bd9Sstevel@tonic-gate klstart -= PAGESIZE;
7177c478bd9Sstevel@tonic-gate page_add(&pplist, pp);
7187c478bd9Sstevel@tonic-gate sw_putfree(arg);
7197c478bd9Sstevel@tonic-gate } else {
7207c478bd9Sstevel@tonic-gate swap_klustfail++;
7217c478bd9Sstevel@tonic-gate swap_klustpages += btop(klsz);
7227c478bd9Sstevel@tonic-gate hat_setmod(pp);
7237c478bd9Sstevel@tonic-gate page_io_unlock(pp);
7247c478bd9Sstevel@tonic-gate page_unlock(pp);
7257c478bd9Sstevel@tonic-gate sw_putbackreq(arg);
7267c478bd9Sstevel@tonic-gate break;
7277c478bd9Sstevel@tonic-gate }
7287c478bd9Sstevel@tonic-gate }
7297c478bd9Sstevel@tonic-gate
7307c478bd9Sstevel@tonic-gate err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
731da6c28aaSamw B_WRITE | flags, cr, NULL);
7327c478bd9Sstevel@tonic-gate
7337c478bd9Sstevel@tonic-gate if ((flags & B_ASYNC) == 0)
7347c478bd9Sstevel@tonic-gate pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
7357c478bd9Sstevel@tonic-gate
7367c478bd9Sstevel@tonic-gate /* Statistics */
7377c478bd9Sstevel@tonic-gate if (!err) {
7387c478bd9Sstevel@tonic-gate swap_putpagecnt++;
7397c478bd9Sstevel@tonic-gate swap_pagespushed += btop(klsz);
7407c478bd9Sstevel@tonic-gate }
7417c478bd9Sstevel@tonic-gate out:
7427c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
7437c478bd9Sstevel@tonic-gate "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
7447c478bd9Sstevel@tonic-gate vp, klvp, klstart, klsz);
7457c478bd9Sstevel@tonic-gate if (err && err != ENOMEM)
7467c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
7477c478bd9Sstevel@tonic-gate if (lenp)
7487c478bd9Sstevel@tonic-gate *lenp = PAGESIZE;
7497c478bd9Sstevel@tonic-gate return (err);
7507c478bd9Sstevel@tonic-gate }
7517c478bd9Sstevel@tonic-gate
7527c478bd9Sstevel@tonic-gate static void
swap_dispose(vnode_t * vp,page_t * pp,int fl,int dn,cred_t * cr,caller_context_t * ct)753da6c28aaSamw swap_dispose(
754da6c28aaSamw vnode_t *vp,
755da6c28aaSamw page_t *pp,
756da6c28aaSamw int fl,
757da6c28aaSamw int dn,
758da6c28aaSamw cred_t *cr,
759da6c28aaSamw caller_context_t *ct)
7607c478bd9Sstevel@tonic-gate {
7617c478bd9Sstevel@tonic-gate int err;
7627c478bd9Sstevel@tonic-gate u_offset_t off = pp->p_offset;
7637c478bd9Sstevel@tonic-gate vnode_t *pvp;
7647c478bd9Sstevel@tonic-gate u_offset_t poff;
7657c478bd9Sstevel@tonic-gate
7667c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp));
7677c478bd9Sstevel@tonic-gate
7687c478bd9Sstevel@tonic-gate /*
7697c478bd9Sstevel@tonic-gate * The caller will free/invalidate large page in one shot instead of
7707c478bd9Sstevel@tonic-gate * one small page at a time.
7717c478bd9Sstevel@tonic-gate */
7727c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) {
7737c478bd9Sstevel@tonic-gate page_unlock(pp);
7747c478bd9Sstevel@tonic-gate return;
7757c478bd9Sstevel@tonic-gate }
7767c478bd9Sstevel@tonic-gate
7777c478bd9Sstevel@tonic-gate err = swap_getphysname(vp, off, &pvp, &poff);
7787c478bd9Sstevel@tonic-gate if (!err && pvp != NULL)
779da6c28aaSamw VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
7807c478bd9Sstevel@tonic-gate else
781da6c28aaSamw fs_dispose(vp, pp, fl, dn, cr, ct);
7827c478bd9Sstevel@tonic-gate }
783