xref: /titanic_50/usr/src/uts/common/fs/swapfs/swap_vnops.c (revision 381a2a9a387f449fab7d0c7e97c4184c26963abf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/cred.h>
33 #include <sys/errno.h>
34 #include <sys/vnode.h>
35 #include <sys/cmn_err.h>
36 #include <sys/swap.h>
37 #include <sys/mman.h>
38 #include <sys/vmsystm.h>
39 #include <sys/vtrace.h>
40 #include <sys/debug.h>
41 #include <sys/sysmacros.h>
42 #include <sys/vm.h>
43 
44 #include <sys/fs/swapnode.h>
45 
46 #include <vm/seg.h>
47 #include <vm/page.h>
48 #include <vm/pvn.h>
49 #include <fs/fs_subr.h>
50 
51 #include <vm/seg_kp.h>
52 
53 /*
54  * Define the routines within this file.
55  */
56 static int	swap_getpage(struct vnode *vp, offset_t off, size_t len,
57     uint_t *protp, struct page **plarr, size_t plsz,
58     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
59 static int	swap_putpage(struct vnode *vp, offset_t off, size_t len,
60     int flags, struct cred *cr);
61 static void	swap_inactive(struct vnode *vp, struct cred *cr);
62 static void	swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63     cred_t *cr);
64 
65 static int	swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
66     uint_t *protp, page_t **plarr, size_t plsz,
67     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
68 
69 int	swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
70     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
71     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
72     enum seg_rw rw, struct cred *cr);
73 
74 static int 	swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
75     size_t *lenp, int flags, struct cred *cr);
76 
77 const fs_operation_def_t swap_vnodeops_template[] = {
78 	VOPNAME_INACTIVE, (fs_generic_func_p) swap_inactive,
79 	VOPNAME_GETPAGE, swap_getpage,
80 	VOPNAME_PUTPAGE, swap_putpage,
81 	VOPNAME_DISPOSE, (fs_generic_func_p) swap_dispose,
82 	VOPNAME_SETFL, fs_error,
83 	VOPNAME_POLL, fs_error,
84 	VOPNAME_PATHCONF, fs_error,
85 	VOPNAME_GETSECATTR, fs_error,
86 	VOPNAME_SHRLOCK, fs_error,
87 	NULL, NULL
88 };
89 
90 vnodeops_t *swap_vnodeops;
91 
92 /* ARGSUSED */
93 static void
94 swap_inactive(
95 	struct vnode *vp,
96 	struct cred *cr)
97 {
98 	SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
99 }
100 
101 /*
102  * Return all the pages from [off..off+len] in given file
103  */
104 static int
105 swap_getpage(
106 	struct vnode *vp,
107 	offset_t off,
108 	size_t len,
109 	uint_t *protp,
110 	page_t *pl[],
111 	size_t plsz,
112 	struct seg *seg,
113 	caddr_t addr,
114 	enum seg_rw rw,
115 	struct cred *cr)
116 {
117 	int err;
118 
119 	SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
120 	    (void *)vp, off, len, 0, 0);
121 
122 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
123 	    "swapfs getpage:vp %p off %llx len %ld",
124 	    (void *)vp, off, len);
125 
126 	if (len <= PAGESIZE) {
127 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
128 		    seg, addr, rw, cr);
129 	} else {
130 		err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
131 		    protp, pl, plsz, seg, addr, rw, cr);
132 	}
133 
134 	return (err);
135 }
136 
137 /*
138  * Called from pvn_getpages or swap_getpage to get a particular page.
139  */
140 /*ARGSUSED*/
141 static int
142 swap_getapage(
143 	struct vnode *vp,
144 	u_offset_t off,
145 	size_t len,
146 	uint_t *protp,
147 	page_t *pl[],
148 	size_t plsz,
149 	struct seg *seg,
150 	caddr_t addr,
151 	enum seg_rw rw,
152 	struct cred *cr)
153 {
154 	struct page *pp, *rpp;
155 	int flags;
156 	int err = 0;
157 	struct vnode *pvp = NULL;
158 	u_offset_t poff;
159 	int flag_noreloc;
160 	se_t lock;
161 	extern int kcage_on;
162 	int upgrade = 0;
163 
164 	SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
165 		vp, off, len, 0, 0);
166 
167 	/*
168 	 * Until there is a call-back mechanism to cause SEGKP
169 	 * pages to be unlocked, make them non-relocatable.
170 	 */
171 	if (SEG_IS_SEGKP(seg))
172 		flag_noreloc = PG_NORELOC;
173 	else
174 		flag_noreloc = 0;
175 
176 	if (protp != NULL)
177 		*protp = PROT_ALL;
178 
179 	lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
180 
181 again:
182 	if (pp = page_lookup(vp, off, lock)) {
183 		/*
184 		 * In very rare instances, a segkp page may have been
185 		 * relocated outside of the kernel by the kernel cage
186 		 * due to the window between page_unlock() and
187 		 * VOP_PUTPAGE() in segkp_unlock().  Due to the
188 		 * rareness of these occurances, the solution is to
189 		 * relocate the page to a P_NORELOC page.
190 		 */
191 		if (flag_noreloc != 0) {
192 			if (!PP_ISNORELOC(pp) && kcage_on) {
193 				if (lock != SE_EXCL) {
194 					upgrade = 1;
195 					if (!page_tryupgrade(pp)) {
196 						page_unlock(pp);
197 						lock = SE_EXCL;
198 						goto again;
199 					}
200 				}
201 
202 				if (page_relocate_cage(&pp, &rpp) != 0)
203 					panic("swap_getapage: "
204 					    "page_relocate_cage failed");
205 
206 				pp = rpp;
207 			}
208 		}
209 
210 		if (pl) {
211 			if (upgrade)
212 				page_downgrade(pp);
213 
214 			pl[0] = pp;
215 			pl[1] = NULL;
216 		} else {
217 			page_unlock(pp);
218 		}
219 	} else {
220 		pp = page_create_va(vp, off, PAGESIZE,
221 		    PG_WAIT | PG_EXCL | flag_noreloc,
222 		    seg, addr);
223 		/*
224 		 * Someone raced in and created the page after we did the
225 		 * lookup but before we did the create, so go back and
226 		 * try to look it up again.
227 		 */
228 		if (pp == NULL)
229 			goto again;
230 		if (rw != S_CREATE) {
231 			err = swap_getphysname(vp, off, &pvp, &poff);
232 			if (pvp) {
233 				struct anon *ap;
234 				kmutex_t *ahm;
235 
236 				flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
237 				err = VOP_PAGEIO(pvp, pp, poff,
238 				    PAGESIZE, flags, cr);
239 
240 				if (!err) {
241 					ahm = &anonhash_lock[AH_LOCK(vp, off)];
242 					mutex_enter(ahm);
243 
244 					ap = swap_anon(vp, off);
245 					if (ap == NULL)
246 					    panic("swap_getapage: null anon");
247 
248 					if (ap->an_pvp == pvp &&
249 					    ap->an_poff == poff) {
250 						swap_phys_free(pvp, poff,
251 						    PAGESIZE);
252 						ap->an_pvp = NULL;
253 						ap->an_poff = NULL;
254 						hat_setmod(pp);
255 					}
256 
257 					mutex_exit(ahm);
258 				}
259 			} else {
260 				if (!err)
261 					pagezero(pp, 0, PAGESIZE);
262 
263 				/*
264 				 * If it's a fault ahead, release page_io_lock
265 				 * and SE_EXCL we grabbed in page_create_va
266 				 *
267 				 * If we are here, we haven't called VOP_PAGEIO
268 				 * and thus calling pvn_read_done(pp, B_READ)
269 				 * below may mislead that we tried i/o. Besides,
270 				 * in case of async, pvn_read_done() should
271 				 * not be called by *getpage()
272 				 */
273 				if (pl == NULL) {
274 					/*
275 					 * swap_getphysname can return error
276 					 * only when we are getting called from
277 					 * swapslot_free which passes non-NULL
278 					 * pl to VOP_GETPAGE.
279 					 */
280 					ASSERT(err == 0);
281 					page_io_unlock(pp);
282 					page_unlock(pp);
283 				}
284 			}
285 		}
286 
287 		ASSERT(pp != NULL);
288 
289 		if (err && pl)
290 			pvn_read_done(pp, B_ERROR);
291 
292 		if (!err && pl)
293 			pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
294 	}
295 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
296 		"swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
297 	return (err);
298 }
299 
300 /*
301  * Called from large page anon routines only! This is an ugly hack where
302  * the anon layer directly calls into swapfs with a preallocated large page.
303  * Another method would have been to change to VOP and add an extra arg for
304  * the preallocated large page. This all could be cleaned up later when we
305  * solve the anonymous naming problem and no longer need to loop across of
306  * the VOP in PAGESIZE increments to fill in or initialize a large page as
307  * is done today. I think the latter is better since it avoid a change to
308  * the VOP interface that could later be avoided.
309  */
310 int
311 swap_getconpage(
312 	struct vnode *vp,
313 	u_offset_t off,
314 	size_t len,
315 	uint_t *protp,
316 	page_t *pl[],
317 	size_t plsz,
318 	page_t	*conpp,
319 	uint_t	*pszc,
320 	spgcnt_t *nreloc,
321 	struct seg *seg,
322 	caddr_t addr,
323 	enum seg_rw rw,
324 	struct cred *cr)
325 {
326 	struct page	*pp;
327 	int 		err = 0;
328 	struct vnode	*pvp = NULL;
329 	u_offset_t	poff;
330 
331 	ASSERT(len == PAGESIZE);
332 	ASSERT(pl != NULL);
333 	ASSERT(plsz == PAGESIZE);
334 	ASSERT(protp == NULL);
335 	ASSERT(nreloc != NULL);
336 	ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
337 	SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
338 		vp, off, len, 0, 0);
339 
340 	/*
341 	 * If we are not using a preallocated page then we know one already
342 	 * exists. So just let the old code handle it.
343 	 */
344 	if (conpp == NULL) {
345 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
346 		    seg, addr, rw, cr);
347 		return (err);
348 	}
349 	ASSERT(conpp->p_szc != 0);
350 	ASSERT(PAGE_EXCL(conpp));
351 
352 
353 	ASSERT(conpp->p_next == conpp);
354 	ASSERT(conpp->p_prev == conpp);
355 	ASSERT(!PP_ISAGED(conpp));
356 	ASSERT(!PP_ISFREE(conpp));
357 
358 	*nreloc = 0;
359 	pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
360 
361 	/*
362 	 * If existing page is found we may need to relocate.
363 	 */
364 	if (pp != conpp) {
365 		ASSERT(rw != S_CREATE);
366 		ASSERT(pszc != NULL);
367 		ASSERT(PAGE_SHARED(pp));
368 		if (pp->p_szc < conpp->p_szc) {
369 			*pszc = pp->p_szc;
370 			page_unlock(pp);
371 			err = -1;
372 		} else if (pp->p_szc > conpp->p_szc &&
373 		    seg->s_szc > conpp->p_szc) {
374 			*pszc = MIN(pp->p_szc, seg->s_szc);
375 			page_unlock(pp);
376 			err = -2;
377 		} else {
378 			pl[0] = pp;
379 			pl[1] = NULL;
380 			if (page_pptonum(pp) &
381 			    (page_get_pagecnt(conpp->p_szc) - 1))
382 			    cmn_err(CE_PANIC, "swap_getconpage: no root");
383 		}
384 		return (err);
385 	}
386 
387 	ASSERT(PAGE_EXCL(pp));
388 
389 	if (*nreloc != 0) {
390 		ASSERT(rw != S_CREATE);
391 		pl[0] = pp;
392 		pl[1] = NULL;
393 		return (0);
394 	}
395 
396 	*nreloc = 1;
397 
398 	/*
399 	 * If necessary do the page io.
400 	 */
401 	if (rw != S_CREATE) {
402 		/*
403 		 * Since we are only called now on behalf of an
404 		 * address space operation it's impossible for
405 		 * us to fail unlike swap_getapge() which
406 		 * also gets called from swapslot_free().
407 		 */
408 		if (swap_getphysname(vp, off, &pvp, &poff)) {
409 			cmn_err(CE_PANIC,
410 			    "swap_getconpage: swap_getphysname failed!");
411 		}
412 
413 		if (pvp) {
414 			err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, cr);
415 		} else {
416 			pagezero(pp, 0, PAGESIZE);
417 		}
418 	}
419 
420 	/*
421 	 * Normally we would let pvn_read_done() destroy
422 	 * the page on IO error. But since this is a preallocated
423 	 * page we'll let the anon layer handle it.
424 	 */
425 	page_io_unlock(pp);
426 	if (err != 0)
427 		page_hashout(pp, NULL);
428 	ASSERT(pp->p_next == pp);
429 	ASSERT(pp->p_prev == pp);
430 
431 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
432 		"swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
433 
434 	pl[0] = pp;
435 	pl[1] = NULL;
436 	return (err);
437 }
438 
439 /* Async putpage klustering stuff */
440 int sw_pending_size;
441 extern int klustsize;
442 extern struct async_reqs *sw_getreq();
443 extern void sw_putreq(struct async_reqs *);
444 extern void sw_putbackreq(struct async_reqs *);
445 extern struct async_reqs *sw_getfree();
446 extern void sw_putfree(struct async_reqs *);
447 
448 static size_t swap_putpagecnt, swap_pagespushed;
449 static size_t swap_otherfail, swap_otherpages;
450 static size_t swap_klustfail, swap_klustpages;
451 static size_t swap_getiofail, swap_getiopages;
452 
453 /*
454  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
455  * If len == 0, do from off to EOF.
456  */
457 static int swap_nopage = 0;	/* Don't do swap_putpage's if set */
458 
459 /* ARGSUSED */
460 static int
461 swap_putpage(
462 	struct vnode *vp,
463 	offset_t off,
464 	size_t len,
465 	int flags,
466 	struct cred *cr)
467 {
468 	page_t *pp;
469 	u_offset_t io_off;
470 	size_t io_len = 0;
471 	int err = 0;
472 	struct async_reqs *arg;
473 
474 	if (swap_nopage)
475 		return (0);
476 
477 	ASSERT(vp->v_count != 0);
478 
479 	/*
480 	 * Clear force flag so that p_lckcnt pages are not invalidated.
481 	 */
482 	flags &= ~B_FORCE;
483 
484 	SWAPFS_PRINT(SWAP_VOPS,
485 	    "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
486 	    (void *)vp, off, len, flags, 0);
487 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
488 	    "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
489 
490 	if (vp->v_flag & VNOMAP)
491 		return (ENOSYS);
492 
493 	if (!vn_has_cached_data(vp))
494 		return (0);
495 
496 	if (len == 0) {
497 		if (curproc == proc_pageout)
498 			cmn_err(CE_PANIC, "swapfs: pageout can't block");
499 
500 		/* Search the entire vp list for pages >= off. */
501 		err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
502 		    flags, cr);
503 	} else {
504 		u_offset_t eoff;
505 
506 		/*
507 		 * Loop over all offsets in the range [off...off + len]
508 		 * looking for pages to deal with.
509 		 */
510 		eoff = off + len;
511 		for (io_off = (u_offset_t)off; io_off < eoff;
512 		    io_off += io_len) {
513 			/*
514 			 * If we run out of the async req slot, put the page
515 			 * now instead of queuing.
516 			 */
517 			if (flags == (B_ASYNC | B_FREE) &&
518 			    sw_pending_size < klustsize &&
519 			    (arg = sw_getfree())) {
520 				/*
521 				 * If we are clustering, we should allow
522 				 * pageout to feed us more pages because # of
523 				 * pushes is limited by # of I/Os, and one
524 				 * cluster is considered to be one I/O.
525 				 */
526 				if (pushes)
527 					pushes--;
528 
529 				arg->a_vp = vp;
530 				arg->a_off = io_off;
531 				arg->a_len = PAGESIZE;
532 				arg->a_flags = B_ASYNC | B_FREE;
533 				arg->a_cred = kcred;
534 				sw_putreq(arg);
535 				io_len = PAGESIZE;
536 				continue;
537 			}
538 			/*
539 			 * If we are not invalidating pages, use the
540 			 * routine page_lookup_nowait() to prevent
541 			 * reclaiming them from the free list.
542 			 */
543 			if ((flags & B_INVAL) ||
544 			    (flags & (B_ASYNC | B_FREE)) == B_FREE)
545 				pp = page_lookup(vp, io_off, SE_EXCL);
546 			else
547 				pp = page_lookup_nowait(vp, io_off,
548 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
549 
550 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
551 				io_len = PAGESIZE;
552 			else {
553 				err = swap_putapage(vp, pp, &io_off, &io_len,
554 				    flags, cr);
555 				if (err != 0)
556 					break;
557 			}
558 		}
559 	}
560 	/* If invalidating, verify all pages on vnode list are gone. */
561 	if (err == 0 && off == 0 && len == 0 &&
562 	    (flags & B_INVAL) && vn_has_cached_data(vp)) {
563 		cmn_err(CE_WARN,
564 		    "swap_putpage: B_INVAL, pages not gone");
565 	}
566 	return (err);
567 }
568 
569 /*
570  * Write out a single page.
571  * For swapfs this means choose a physical swap slot and write the page
572  * out using VOP_PAGEIO.
573  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
574  * swapfs pages, a bunch of contiguous swap slots and then write them
575  * all out in one clustered i/o.
576  */
577 /*ARGSUSED*/
578 static int
579 swap_putapage(
580 	struct vnode *vp,
581 	page_t *pp,
582 	u_offset_t *offp,
583 	size_t *lenp,
584 	int flags,
585 	struct cred *cr)
586 {
587 	int err;
588 	struct vnode *pvp;
589 	u_offset_t poff, off;
590 	u_offset_t doff;
591 	size_t dlen;
592 	size_t klsz = 0;
593 	u_offset_t klstart = 0;
594 	struct vnode *klvp = NULL;
595 	page_t *pplist;
596 	se_t se;
597 	struct async_reqs *arg;
598 	size_t swap_klustsize;
599 
600 	/*
601 	 * This check is added for callers who access swap_putpage with len = 0.
602 	 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
603 	 * And it's necessary to do the same queuing if users have the same
604 	 * B_ASYNC|B_FREE flags on.
605 	 */
606 	if (flags == (B_ASYNC | B_FREE) &&
607 	    sw_pending_size < klustsize && (arg = sw_getfree())) {
608 
609 		hat_setmod(pp);
610 		page_io_unlock(pp);
611 		page_unlock(pp);
612 
613 		arg->a_vp = vp;
614 		arg->a_off = pp->p_offset;
615 		arg->a_len = PAGESIZE;
616 		arg->a_flags = B_ASYNC | B_FREE;
617 		arg->a_cred = kcred;
618 		sw_putreq(arg);
619 
620 		return (0);
621 	}
622 
623 	SWAPFS_PRINT(SWAP_PUTP,
624 		"swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
625 		pp, vp, pp->p_offset, flags, 0);
626 
627 	ASSERT(PAGE_LOCKED(pp));
628 
629 	off = pp->p_offset;
630 
631 	doff = off;
632 	dlen = PAGESIZE;
633 
634 	if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
635 		err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
636 		hat_setmod(pp);
637 		page_io_unlock(pp);
638 		page_unlock(pp);
639 		goto out;
640 	}
641 
642 	klvp = pvp;
643 	klstart = poff;
644 	pplist = pp;
645 	/*
646 	 * If this is ASYNC | FREE and we've accumulated a bunch of such
647 	 * pending requests, kluster.
648 	 */
649 	if (flags == (B_ASYNC | B_FREE))
650 		swap_klustsize = klustsize;
651 	else
652 		swap_klustsize = PAGESIZE;
653 	se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
654 	klsz = PAGESIZE;
655 	while (klsz < swap_klustsize) {
656 		if ((arg = sw_getreq()) == NULL) {
657 			swap_getiofail++;
658 			swap_getiopages += btop(klsz);
659 			break;
660 		}
661 		ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
662 		vp = arg->a_vp;
663 		off = arg->a_off;
664 
665 		if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
666 			swap_otherfail++;
667 			swap_otherpages += btop(klsz);
668 			sw_putfree(arg);
669 			break;
670 		}
671 		if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
672 			sw_putfree(arg);
673 			continue;
674 		}
675 		/* Get new physical backing store for the page */
676 		doff = off;
677 		dlen = PAGESIZE;
678 		if (err = swap_newphysname(vp, off, &doff, &dlen,
679 						&pvp, &poff)) {
680 			swap_otherfail++;
681 			swap_otherpages += btop(klsz);
682 			hat_setmod(pp);
683 			page_io_unlock(pp);
684 			page_unlock(pp);
685 			sw_putbackreq(arg);
686 			break;
687 		}
688 		/* Try to cluster new physical name with previous ones */
689 		if (klvp == pvp && poff == klstart + klsz) {
690 			klsz += PAGESIZE;
691 			page_add(&pplist, pp);
692 			pplist = pplist->p_next;
693 			sw_putfree(arg);
694 		} else if (klvp == pvp && poff == klstart - PAGESIZE) {
695 			klsz += PAGESIZE;
696 			klstart -= PAGESIZE;
697 			page_add(&pplist, pp);
698 			sw_putfree(arg);
699 		} else {
700 			swap_klustfail++;
701 			swap_klustpages += btop(klsz);
702 			hat_setmod(pp);
703 			page_io_unlock(pp);
704 			page_unlock(pp);
705 			sw_putbackreq(arg);
706 			break;
707 		}
708 	}
709 
710 	err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
711 		    B_WRITE | flags, cr);
712 
713 	if ((flags & B_ASYNC) == 0)
714 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
715 
716 	/* Statistics */
717 	if (!err) {
718 		swap_putpagecnt++;
719 		swap_pagespushed += btop(klsz);
720 	}
721 out:
722 	TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
723 		"swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
724 		vp, klvp, klstart, klsz);
725 	if (err && err != ENOMEM)
726 		cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
727 	if (lenp)
728 		*lenp = PAGESIZE;
729 	return (err);
730 }
731 
732 static void
733 swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
734 {
735 	int err;
736 	u_offset_t off = pp->p_offset;
737 	vnode_t *pvp;
738 	u_offset_t poff;
739 
740 	ASSERT(PAGE_EXCL(pp));
741 
742 	/*
743 	 * The caller will free/invalidate large page in one shot instead of
744 	 * one small page at a time.
745 	 */
746 	if (pp->p_szc != 0) {
747 		page_unlock(pp);
748 		return;
749 	}
750 
751 	err = swap_getphysname(vp, off, &pvp, &poff);
752 	if (!err && pvp != NULL)
753 		VOP_DISPOSE(pvp, pp, fl, dn, cr);
754 	else
755 		fs_dispose(vp, pp, fl, dn, cr);
756 }
757