xref: /titanic_41/usr/src/uts/common/fs/swapfs/swap_vnops.c (revision 2e14588420ccfbaa5be20605ed2be8b9802d1d49)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/cred.h>
33 #include <sys/errno.h>
34 #include <sys/vnode.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/cmn_err.h>
37 #include <sys/swap.h>
38 #include <sys/mman.h>
39 #include <sys/vmsystm.h>
40 #include <sys/vtrace.h>
41 #include <sys/debug.h>
42 #include <sys/sysmacros.h>
43 #include <sys/vm.h>
44 
45 #include <sys/fs/swapnode.h>
46 
47 #include <vm/seg.h>
48 #include <vm/page.h>
49 #include <vm/pvn.h>
50 #include <fs/fs_subr.h>
51 
52 #include <vm/seg_kp.h>
53 
54 /*
55  * Define the routines within this file.
56  */
57 static int	swap_getpage(struct vnode *vp, offset_t off, size_t len,
58     uint_t *protp, struct page **plarr, size_t plsz,
59     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
60 static int	swap_putpage(struct vnode *vp, offset_t off, size_t len,
61     int flags, struct cred *cr);
62 static void	swap_inactive(struct vnode *vp, struct cred *cr);
63 static void	swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
64     cred_t *cr);
65 
66 static int	swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
67     uint_t *protp, page_t **plarr, size_t plsz,
68     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
69 
70 int	swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
71     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
72     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
73     enum seg_rw rw, struct cred *cr);
74 
75 static int 	swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
76     size_t *lenp, int flags, struct cred *cr);
77 
78 const fs_operation_def_t swap_vnodeops_template[] = {
79 	VOPNAME_INACTIVE,	{ .vop_inactive = swap_inactive },
80 	VOPNAME_GETPAGE,	{ .vop_getpage = swap_getpage },
81 	VOPNAME_PUTPAGE,	{ .vop_putpage = swap_putpage },
82 	VOPNAME_DISPOSE,	{ .vop_dispose = swap_dispose },
83 	VOPNAME_SETFL,		{ .error = fs_error },
84 	VOPNAME_POLL,		{ .error = fs_error },
85 	VOPNAME_PATHCONF,	{ .error = fs_error },
86 	VOPNAME_GETSECATTR,	{ .error = fs_error },
87 	VOPNAME_SHRLOCK,	{ .error = fs_error },
88 	NULL,			NULL
89 };
90 
91 vnodeops_t *swap_vnodeops;
92 
93 /* ARGSUSED */
94 static void
95 swap_inactive(
96 	struct vnode *vp,
97 	struct cred *cr)
98 {
99 	SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
100 }
101 
102 /*
103  * Return all the pages from [off..off+len] in given file
104  */
105 static int
106 swap_getpage(
107 	struct vnode *vp,
108 	offset_t off,
109 	size_t len,
110 	uint_t *protp,
111 	page_t *pl[],
112 	size_t plsz,
113 	struct seg *seg,
114 	caddr_t addr,
115 	enum seg_rw rw,
116 	struct cred *cr)
117 {
118 	int err;
119 
120 	SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
121 	    (void *)vp, off, len, 0, 0);
122 
123 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
124 	    "swapfs getpage:vp %p off %llx len %ld",
125 	    (void *)vp, off, len);
126 
127 	if (len <= PAGESIZE) {
128 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
129 		    seg, addr, rw, cr);
130 	} else {
131 		err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
132 		    protp, pl, plsz, seg, addr, rw, cr);
133 	}
134 
135 	return (err);
136 }
137 
138 /*
139  * Called from pvn_getpages or swap_getpage to get a particular page.
140  */
141 /*ARGSUSED*/
142 static int
143 swap_getapage(
144 	struct vnode *vp,
145 	u_offset_t off,
146 	size_t len,
147 	uint_t *protp,
148 	page_t *pl[],
149 	size_t plsz,
150 	struct seg *seg,
151 	caddr_t addr,
152 	enum seg_rw rw,
153 	struct cred *cr)
154 {
155 	struct page *pp, *rpp;
156 	int flags;
157 	int err = 0;
158 	struct vnode *pvp = NULL;
159 	u_offset_t poff;
160 	int flag_noreloc;
161 	se_t lock;
162 	extern int kcage_on;
163 	int upgrade = 0;
164 
165 	SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
166 		vp, off, len, 0, 0);
167 
168 	/*
169 	 * Until there is a call-back mechanism to cause SEGKP
170 	 * pages to be unlocked, make them non-relocatable.
171 	 */
172 	if (SEG_IS_SEGKP(seg))
173 		flag_noreloc = PG_NORELOC;
174 	else
175 		flag_noreloc = 0;
176 
177 	if (protp != NULL)
178 		*protp = PROT_ALL;
179 
180 	lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
181 
182 again:
183 	if (pp = page_lookup(vp, off, lock)) {
184 		/*
185 		 * In very rare instances, a segkp page may have been
186 		 * relocated outside of the kernel by the kernel cage
187 		 * due to the window between page_unlock() and
188 		 * VOP_PUTPAGE() in segkp_unlock().  Due to the
189 		 * rareness of these occurances, the solution is to
190 		 * relocate the page to a P_NORELOC page.
191 		 */
192 		if (flag_noreloc != 0) {
193 			if (!PP_ISNORELOC(pp) && kcage_on) {
194 				if (lock != SE_EXCL) {
195 					upgrade = 1;
196 					if (!page_tryupgrade(pp)) {
197 						page_unlock(pp);
198 						lock = SE_EXCL;
199 						goto again;
200 					}
201 				}
202 
203 				if (page_relocate_cage(&pp, &rpp) != 0)
204 					panic("swap_getapage: "
205 					    "page_relocate_cage failed");
206 
207 				pp = rpp;
208 			}
209 		}
210 
211 		if (pl) {
212 			if (upgrade)
213 				page_downgrade(pp);
214 
215 			pl[0] = pp;
216 			pl[1] = NULL;
217 		} else {
218 			page_unlock(pp);
219 		}
220 	} else {
221 		pp = page_create_va(vp, off, PAGESIZE,
222 		    PG_WAIT | PG_EXCL | flag_noreloc,
223 		    seg, addr);
224 		/*
225 		 * Someone raced in and created the page after we did the
226 		 * lookup but before we did the create, so go back and
227 		 * try to look it up again.
228 		 */
229 		if (pp == NULL)
230 			goto again;
231 		if (rw != S_CREATE) {
232 			err = swap_getphysname(vp, off, &pvp, &poff);
233 			if (pvp) {
234 				struct anon *ap;
235 				kmutex_t *ahm;
236 
237 				flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
238 				err = VOP_PAGEIO(pvp, pp, poff,
239 				    PAGESIZE, flags, cr);
240 
241 				if (!err) {
242 					ahm = &anonhash_lock[AH_LOCK(vp, off)];
243 					mutex_enter(ahm);
244 
245 					ap = swap_anon(vp, off);
246 					if (ap == NULL)
247 					    panic("swap_getapage: null anon");
248 
249 					if (ap->an_pvp == pvp &&
250 					    ap->an_poff == poff) {
251 						swap_phys_free(pvp, poff,
252 						    PAGESIZE);
253 						ap->an_pvp = NULL;
254 						ap->an_poff = NULL;
255 						hat_setmod(pp);
256 					}
257 
258 					mutex_exit(ahm);
259 				}
260 			} else {
261 				if (!err)
262 					pagezero(pp, 0, PAGESIZE);
263 
264 				/*
265 				 * If it's a fault ahead, release page_io_lock
266 				 * and SE_EXCL we grabbed in page_create_va
267 				 *
268 				 * If we are here, we haven't called VOP_PAGEIO
269 				 * and thus calling pvn_read_done(pp, B_READ)
270 				 * below may mislead that we tried i/o. Besides,
271 				 * in case of async, pvn_read_done() should
272 				 * not be called by *getpage()
273 				 */
274 				if (pl == NULL) {
275 					/*
276 					 * swap_getphysname can return error
277 					 * only when we are getting called from
278 					 * swapslot_free which passes non-NULL
279 					 * pl to VOP_GETPAGE.
280 					 */
281 					ASSERT(err == 0);
282 					page_io_unlock(pp);
283 					page_unlock(pp);
284 				}
285 			}
286 		}
287 
288 		ASSERT(pp != NULL);
289 
290 		if (err && pl)
291 			pvn_read_done(pp, B_ERROR);
292 
293 		if (!err && pl)
294 			pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
295 	}
296 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
297 		"swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
298 	return (err);
299 }
300 
301 /*
302  * Called from large page anon routines only! This is an ugly hack where
303  * the anon layer directly calls into swapfs with a preallocated large page.
304  * Another method would have been to change to VOP and add an extra arg for
305  * the preallocated large page. This all could be cleaned up later when we
306  * solve the anonymous naming problem and no longer need to loop across of
307  * the VOP in PAGESIZE increments to fill in or initialize a large page as
308  * is done today. I think the latter is better since it avoid a change to
309  * the VOP interface that could later be avoided.
310  */
311 int
312 swap_getconpage(
313 	struct vnode *vp,
314 	u_offset_t off,
315 	size_t len,
316 	uint_t *protp,
317 	page_t *pl[],
318 	size_t plsz,
319 	page_t	*conpp,
320 	uint_t	*pszc,
321 	spgcnt_t *nreloc,
322 	struct seg *seg,
323 	caddr_t addr,
324 	enum seg_rw rw,
325 	struct cred *cr)
326 {
327 	struct page	*pp;
328 	int 		err = 0;
329 	struct vnode	*pvp = NULL;
330 	u_offset_t	poff;
331 
332 	ASSERT(len == PAGESIZE);
333 	ASSERT(pl != NULL);
334 	ASSERT(plsz == PAGESIZE);
335 	ASSERT(protp == NULL);
336 	ASSERT(nreloc != NULL);
337 	ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
338 	SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
339 		vp, off, len, 0, 0);
340 
341 	/*
342 	 * If we are not using a preallocated page then we know one already
343 	 * exists. So just let the old code handle it.
344 	 */
345 	if (conpp == NULL) {
346 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
347 		    seg, addr, rw, cr);
348 		return (err);
349 	}
350 	ASSERT(conpp->p_szc != 0);
351 	ASSERT(PAGE_EXCL(conpp));
352 
353 
354 	ASSERT(conpp->p_next == conpp);
355 	ASSERT(conpp->p_prev == conpp);
356 	ASSERT(!PP_ISAGED(conpp));
357 	ASSERT(!PP_ISFREE(conpp));
358 
359 	*nreloc = 0;
360 	pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
361 
362 	/*
363 	 * If existing page is found we may need to relocate.
364 	 */
365 	if (pp != conpp) {
366 		ASSERT(rw != S_CREATE);
367 		ASSERT(pszc != NULL);
368 		ASSERT(PAGE_SHARED(pp));
369 		if (pp->p_szc < conpp->p_szc) {
370 			*pszc = pp->p_szc;
371 			page_unlock(pp);
372 			err = -1;
373 		} else if (pp->p_szc > conpp->p_szc &&
374 		    seg->s_szc > conpp->p_szc) {
375 			*pszc = MIN(pp->p_szc, seg->s_szc);
376 			page_unlock(pp);
377 			err = -2;
378 		} else {
379 			pl[0] = pp;
380 			pl[1] = NULL;
381 			if (page_pptonum(pp) &
382 			    (page_get_pagecnt(conpp->p_szc) - 1))
383 			    cmn_err(CE_PANIC, "swap_getconpage: no root");
384 		}
385 		return (err);
386 	}
387 
388 	ASSERT(PAGE_EXCL(pp));
389 
390 	if (*nreloc != 0) {
391 		ASSERT(rw != S_CREATE);
392 		pl[0] = pp;
393 		pl[1] = NULL;
394 		return (0);
395 	}
396 
397 	*nreloc = 1;
398 
399 	/*
400 	 * If necessary do the page io.
401 	 */
402 	if (rw != S_CREATE) {
403 		/*
404 		 * Since we are only called now on behalf of an
405 		 * address space operation it's impossible for
406 		 * us to fail unlike swap_getapge() which
407 		 * also gets called from swapslot_free().
408 		 */
409 		if (swap_getphysname(vp, off, &pvp, &poff)) {
410 			cmn_err(CE_PANIC,
411 			    "swap_getconpage: swap_getphysname failed!");
412 		}
413 
414 		if (pvp) {
415 			err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, cr);
416 		} else {
417 			pagezero(pp, 0, PAGESIZE);
418 		}
419 	}
420 
421 	/*
422 	 * Normally we would let pvn_read_done() destroy
423 	 * the page on IO error. But since this is a preallocated
424 	 * page we'll let the anon layer handle it.
425 	 */
426 	page_io_unlock(pp);
427 	if (err != 0)
428 		page_hashout(pp, NULL);
429 	ASSERT(pp->p_next == pp);
430 	ASSERT(pp->p_prev == pp);
431 
432 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
433 		"swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
434 
435 	pl[0] = pp;
436 	pl[1] = NULL;
437 	return (err);
438 }
439 
440 /* Async putpage klustering stuff */
441 int sw_pending_size;
442 extern int klustsize;
443 extern struct async_reqs *sw_getreq();
444 extern void sw_putreq(struct async_reqs *);
445 extern void sw_putbackreq(struct async_reqs *);
446 extern struct async_reqs *sw_getfree();
447 extern void sw_putfree(struct async_reqs *);
448 
449 static size_t swap_putpagecnt, swap_pagespushed;
450 static size_t swap_otherfail, swap_otherpages;
451 static size_t swap_klustfail, swap_klustpages;
452 static size_t swap_getiofail, swap_getiopages;
453 
454 /*
455  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
456  * If len == 0, do from off to EOF.
457  */
458 static int swap_nopage = 0;	/* Don't do swap_putpage's if set */
459 
460 /* ARGSUSED */
461 static int
462 swap_putpage(
463 	struct vnode *vp,
464 	offset_t off,
465 	size_t len,
466 	int flags,
467 	struct cred *cr)
468 {
469 	page_t *pp;
470 	u_offset_t io_off;
471 	size_t io_len = 0;
472 	int err = 0;
473 	struct async_reqs *arg;
474 
475 	if (swap_nopage)
476 		return (0);
477 
478 	ASSERT(vp->v_count != 0);
479 
480 	/*
481 	 * Clear force flag so that p_lckcnt pages are not invalidated.
482 	 */
483 	flags &= ~B_FORCE;
484 
485 	SWAPFS_PRINT(SWAP_VOPS,
486 	    "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
487 	    (void *)vp, off, len, flags, 0);
488 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
489 	    "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
490 
491 	if (vp->v_flag & VNOMAP)
492 		return (ENOSYS);
493 
494 	if (!vn_has_cached_data(vp))
495 		return (0);
496 
497 	if (len == 0) {
498 		if (curproc == proc_pageout)
499 			cmn_err(CE_PANIC, "swapfs: pageout can't block");
500 
501 		/* Search the entire vp list for pages >= off. */
502 		err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
503 		    flags, cr);
504 	} else {
505 		u_offset_t eoff;
506 
507 		/*
508 		 * Loop over all offsets in the range [off...off + len]
509 		 * looking for pages to deal with.
510 		 */
511 		eoff = off + len;
512 		for (io_off = (u_offset_t)off; io_off < eoff;
513 		    io_off += io_len) {
514 			/*
515 			 * If we run out of the async req slot, put the page
516 			 * now instead of queuing.
517 			 */
518 			if (flags == (B_ASYNC | B_FREE) &&
519 			    sw_pending_size < klustsize &&
520 			    (arg = sw_getfree())) {
521 				/*
522 				 * If we are clustering, we should allow
523 				 * pageout to feed us more pages because # of
524 				 * pushes is limited by # of I/Os, and one
525 				 * cluster is considered to be one I/O.
526 				 */
527 				if (pushes)
528 					pushes--;
529 
530 				arg->a_vp = vp;
531 				arg->a_off = io_off;
532 				arg->a_len = PAGESIZE;
533 				arg->a_flags = B_ASYNC | B_FREE;
534 				arg->a_cred = kcred;
535 				sw_putreq(arg);
536 				io_len = PAGESIZE;
537 				continue;
538 			}
539 			/*
540 			 * If we are not invalidating pages, use the
541 			 * routine page_lookup_nowait() to prevent
542 			 * reclaiming them from the free list.
543 			 */
544 			if ((flags & B_INVAL) ||
545 			    (flags & (B_ASYNC | B_FREE)) == B_FREE)
546 				pp = page_lookup(vp, io_off, SE_EXCL);
547 			else
548 				pp = page_lookup_nowait(vp, io_off,
549 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
550 
551 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
552 				io_len = PAGESIZE;
553 			else {
554 				err = swap_putapage(vp, pp, &io_off, &io_len,
555 				    flags, cr);
556 				if (err != 0)
557 					break;
558 			}
559 		}
560 	}
561 	/* If invalidating, verify all pages on vnode list are gone. */
562 	if (err == 0 && off == 0 && len == 0 &&
563 	    (flags & B_INVAL) && vn_has_cached_data(vp)) {
564 		cmn_err(CE_WARN,
565 		    "swap_putpage: B_INVAL, pages not gone");
566 	}
567 	return (err);
568 }
569 
570 /*
571  * Write out a single page.
572  * For swapfs this means choose a physical swap slot and write the page
573  * out using VOP_PAGEIO.
574  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
575  * swapfs pages, a bunch of contiguous swap slots and then write them
576  * all out in one clustered i/o.
577  */
578 /*ARGSUSED*/
579 static int
580 swap_putapage(
581 	struct vnode *vp,
582 	page_t *pp,
583 	u_offset_t *offp,
584 	size_t *lenp,
585 	int flags,
586 	struct cred *cr)
587 {
588 	int err;
589 	struct vnode *pvp;
590 	u_offset_t poff, off;
591 	u_offset_t doff;
592 	size_t dlen;
593 	size_t klsz = 0;
594 	u_offset_t klstart = 0;
595 	struct vnode *klvp = NULL;
596 	page_t *pplist;
597 	se_t se;
598 	struct async_reqs *arg;
599 	size_t swap_klustsize;
600 
601 	/*
602 	 * This check is added for callers who access swap_putpage with len = 0.
603 	 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
604 	 * And it's necessary to do the same queuing if users have the same
605 	 * B_ASYNC|B_FREE flags on.
606 	 */
607 	if (flags == (B_ASYNC | B_FREE) &&
608 	    sw_pending_size < klustsize && (arg = sw_getfree())) {
609 
610 		hat_setmod(pp);
611 		page_io_unlock(pp);
612 		page_unlock(pp);
613 
614 		arg->a_vp = vp;
615 		arg->a_off = pp->p_offset;
616 		arg->a_len = PAGESIZE;
617 		arg->a_flags = B_ASYNC | B_FREE;
618 		arg->a_cred = kcred;
619 		sw_putreq(arg);
620 
621 		return (0);
622 	}
623 
624 	SWAPFS_PRINT(SWAP_PUTP,
625 		"swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
626 		pp, vp, pp->p_offset, flags, 0);
627 
628 	ASSERT(PAGE_LOCKED(pp));
629 
630 	off = pp->p_offset;
631 
632 	doff = off;
633 	dlen = PAGESIZE;
634 
635 	if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
636 		err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
637 		hat_setmod(pp);
638 		page_io_unlock(pp);
639 		page_unlock(pp);
640 		goto out;
641 	}
642 
643 	klvp = pvp;
644 	klstart = poff;
645 	pplist = pp;
646 	/*
647 	 * If this is ASYNC | FREE and we've accumulated a bunch of such
648 	 * pending requests, kluster.
649 	 */
650 	if (flags == (B_ASYNC | B_FREE))
651 		swap_klustsize = klustsize;
652 	else
653 		swap_klustsize = PAGESIZE;
654 	se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
655 	klsz = PAGESIZE;
656 	while (klsz < swap_klustsize) {
657 		if ((arg = sw_getreq()) == NULL) {
658 			swap_getiofail++;
659 			swap_getiopages += btop(klsz);
660 			break;
661 		}
662 		ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
663 		vp = arg->a_vp;
664 		off = arg->a_off;
665 
666 		if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
667 			swap_otherfail++;
668 			swap_otherpages += btop(klsz);
669 			sw_putfree(arg);
670 			break;
671 		}
672 		if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
673 			sw_putfree(arg);
674 			continue;
675 		}
676 		/* Get new physical backing store for the page */
677 		doff = off;
678 		dlen = PAGESIZE;
679 		if (err = swap_newphysname(vp, off, &doff, &dlen,
680 						&pvp, &poff)) {
681 			swap_otherfail++;
682 			swap_otherpages += btop(klsz);
683 			hat_setmod(pp);
684 			page_io_unlock(pp);
685 			page_unlock(pp);
686 			sw_putbackreq(arg);
687 			break;
688 		}
689 		/* Try to cluster new physical name with previous ones */
690 		if (klvp == pvp && poff == klstart + klsz) {
691 			klsz += PAGESIZE;
692 			page_add(&pplist, pp);
693 			pplist = pplist->p_next;
694 			sw_putfree(arg);
695 		} else if (klvp == pvp && poff == klstart - PAGESIZE) {
696 			klsz += PAGESIZE;
697 			klstart -= PAGESIZE;
698 			page_add(&pplist, pp);
699 			sw_putfree(arg);
700 		} else {
701 			swap_klustfail++;
702 			swap_klustpages += btop(klsz);
703 			hat_setmod(pp);
704 			page_io_unlock(pp);
705 			page_unlock(pp);
706 			sw_putbackreq(arg);
707 			break;
708 		}
709 	}
710 
711 	err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
712 		    B_WRITE | flags, cr);
713 
714 	if ((flags & B_ASYNC) == 0)
715 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
716 
717 	/* Statistics */
718 	if (!err) {
719 		swap_putpagecnt++;
720 		swap_pagespushed += btop(klsz);
721 	}
722 out:
723 	TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
724 		"swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
725 		vp, klvp, klstart, klsz);
726 	if (err && err != ENOMEM)
727 		cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
728 	if (lenp)
729 		*lenp = PAGESIZE;
730 	return (err);
731 }
732 
733 static void
734 swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
735 {
736 	int err;
737 	u_offset_t off = pp->p_offset;
738 	vnode_t *pvp;
739 	u_offset_t poff;
740 
741 	ASSERT(PAGE_EXCL(pp));
742 
743 	/*
744 	 * The caller will free/invalidate large page in one shot instead of
745 	 * one small page at a time.
746 	 */
747 	if (pp->p_szc != 0) {
748 		page_unlock(pp);
749 		return;
750 	}
751 
752 	err = swap_getphysname(vp, off, &pvp, &poff);
753 	if (!err && pvp != NULL)
754 		VOP_DISPOSE(pvp, pp, fl, dn, cr);
755 	else
756 		fs_dispose(vp, pp, fl, dn, cr);
757 }
758