1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/buf.h>
30 #include <sys/cred.h>
31 #include <sys/errno.h>
32 #include <sys/vnode.h>
33 #include <sys/vfs_opreg.h>
34 #include <sys/cmn_err.h>
35 #include <sys/swap.h>
36 #include <sys/mman.h>
37 #include <sys/vmsystm.h>
38 #include <sys/vtrace.h>
39 #include <sys/debug.h>
40 #include <sys/sysmacros.h>
41 #include <sys/vm.h>
42
43 #include <sys/fs/swapnode.h>
44
45 #include <vm/seg.h>
46 #include <vm/page.h>
47 #include <vm/pvn.h>
48 #include <fs/fs_subr.h>
49
50 #include <vm/seg_kp.h>
51
52 /*
53 * Define the routines within this file.
54 */
55 static int swap_getpage(struct vnode *vp, offset_t off, size_t len,
56 uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
57 caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
58 static int swap_putpage(struct vnode *vp, offset_t off, size_t len,
59 int flags, struct cred *cr, caller_context_t *ct);
60 static void swap_inactive(struct vnode *vp, struct cred *cr,
61 caller_context_t *ct);
62 static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63 cred_t *cr, caller_context_t *ct);
64
65 static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
66 uint_t *protp, page_t **plarr, size_t plsz,
67 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
68
69 int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
70 uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
71 uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
72 enum seg_rw rw, struct cred *cr);
73
74 static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
75 size_t *lenp, int flags, struct cred *cr);
76
77 const fs_operation_def_t swap_vnodeops_template[] = {
78 VOPNAME_INACTIVE, { .vop_inactive = swap_inactive },
79 VOPNAME_GETPAGE, { .vop_getpage = swap_getpage },
80 VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage },
81 VOPNAME_DISPOSE, { .vop_dispose = swap_dispose },
82 VOPNAME_SETFL, { .error = fs_error },
83 VOPNAME_POLL, { .error = fs_error },
84 VOPNAME_PATHCONF, { .error = fs_error },
85 VOPNAME_GETSECATTR, { .error = fs_error },
86 VOPNAME_SHRLOCK, { .error = fs_error },
87 NULL, NULL
88 };
89
90 vnodeops_t *swap_vnodeops;
91
92 /* ARGSUSED */
93 static void
swap_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)94 swap_inactive(
95 struct vnode *vp,
96 struct cred *cr,
97 caller_context_t *ct)
98 {
99 SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
100 }
101
102 /*
103 * Return all the pages from [off..off+len] in given file
104 */
105 /*ARGSUSED*/
106 static int
swap_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)107 swap_getpage(
108 struct vnode *vp,
109 offset_t off,
110 size_t len,
111 uint_t *protp,
112 page_t *pl[],
113 size_t plsz,
114 struct seg *seg,
115 caddr_t addr,
116 enum seg_rw rw,
117 struct cred *cr,
118 caller_context_t *ct)
119 {
120 SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
121 (void *)vp, off, len, 0, 0);
122
123 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
124 "swapfs getpage:vp %p off %llx len %ld",
125 (void *)vp, off, len);
126
127 return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
128 pl, plsz, seg, addr, rw, cr));
129 }
130
131 /*
132 * Called from pvn_getpages to get a particular page.
133 */
134 /*ARGSUSED*/
135 static int
swap_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)136 swap_getapage(
137 struct vnode *vp,
138 u_offset_t off,
139 size_t len,
140 uint_t *protp,
141 page_t *pl[],
142 size_t plsz,
143 struct seg *seg,
144 caddr_t addr,
145 enum seg_rw rw,
146 struct cred *cr)
147 {
148 struct page *pp, *rpp;
149 int flags;
150 int err = 0;
151 struct vnode *pvp = NULL;
152 u_offset_t poff;
153 int flag_noreloc;
154 se_t lock;
155 extern int kcage_on;
156 int upgrade = 0;
157
158 SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
159 vp, off, len, 0, 0);
160
161 /*
162 * Until there is a call-back mechanism to cause SEGKP
163 * pages to be unlocked, make them non-relocatable.
164 */
165 if (SEG_IS_SEGKP(seg))
166 flag_noreloc = PG_NORELOC;
167 else
168 flag_noreloc = 0;
169
170 if (protp != NULL)
171 *protp = PROT_ALL;
172
173 lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
174
175 again:
176 if (pp = page_lookup(vp, off, lock)) {
177 /*
178 * In very rare instances, a segkp page may have been
179 * relocated outside of the kernel by the kernel cage
180 * due to the window between page_unlock() and
181 * VOP_PUTPAGE() in segkp_unlock(). Due to the
182 * rareness of these occurances, the solution is to
183 * relocate the page to a P_NORELOC page.
184 */
185 if (flag_noreloc != 0) {
186 if (!PP_ISNORELOC(pp) && kcage_on) {
187 if (lock != SE_EXCL) {
188 upgrade = 1;
189 if (!page_tryupgrade(pp)) {
190 page_unlock(pp);
191 lock = SE_EXCL;
192 goto again;
193 }
194 }
195
196 if (page_relocate_cage(&pp, &rpp) != 0)
197 panic("swap_getapage: "
198 "page_relocate_cage failed");
199
200 pp = rpp;
201 }
202 }
203
204 if (pl) {
205 if (upgrade)
206 page_downgrade(pp);
207
208 pl[0] = pp;
209 pl[1] = NULL;
210 } else {
211 page_unlock(pp);
212 }
213 } else {
214 pp = page_create_va(vp, off, PAGESIZE,
215 PG_WAIT | PG_EXCL | flag_noreloc,
216 seg, addr);
217 /*
218 * Someone raced in and created the page after we did the
219 * lookup but before we did the create, so go back and
220 * try to look it up again.
221 */
222 if (pp == NULL)
223 goto again;
224 if (rw != S_CREATE) {
225 err = swap_getphysname(vp, off, &pvp, &poff);
226 if (pvp) {
227 struct anon *ap;
228 kmutex_t *ahm;
229
230 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
231 err = VOP_PAGEIO(pvp, pp, poff,
232 PAGESIZE, flags, cr, NULL);
233
234 if (!err) {
235 ahm = AH_MUTEX(vp, off);
236 mutex_enter(ahm);
237
238 ap = swap_anon(vp, off);
239 if (ap == NULL) {
240 panic("swap_getapage:"
241 " null anon");
242 }
243
244 if (ap->an_pvp == pvp &&
245 ap->an_poff == poff) {
246 swap_phys_free(pvp, poff,
247 PAGESIZE);
248 ap->an_pvp = NULL;
249 ap->an_poff = 0;
250 hat_setmod(pp);
251 }
252
253 mutex_exit(ahm);
254 }
255 } else {
256 if (!err)
257 pagezero(pp, 0, PAGESIZE);
258
259 /*
260 * If it's a fault ahead, release page_io_lock
261 * and SE_EXCL we grabbed in page_create_va
262 *
263 * If we are here, we haven't called VOP_PAGEIO
264 * and thus calling pvn_read_done(pp, B_READ)
265 * below may mislead that we tried i/o. Besides,
266 * in case of async, pvn_read_done() should
267 * not be called by *getpage()
268 */
269 if (pl == NULL) {
270 /*
271 * swap_getphysname can return error
272 * only when we are getting called from
273 * swapslot_free which passes non-NULL
274 * pl to VOP_GETPAGE.
275 */
276 ASSERT(err == 0);
277 page_io_unlock(pp);
278 page_unlock(pp);
279 }
280 }
281 }
282
283 ASSERT(pp != NULL);
284
285 if (err && pl)
286 pvn_read_done(pp, B_ERROR);
287
288 if (!err && pl)
289 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
290 }
291 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
292 "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
293 return (err);
294 }
295
296 /*
297 * Called from large page anon routines only! This is an ugly hack where
298 * the anon layer directly calls into swapfs with a preallocated large page.
299 * Another method would have been to change to VOP and add an extra arg for
300 * the preallocated large page. This all could be cleaned up later when we
301 * solve the anonymous naming problem and no longer need to loop across of
302 * the VOP in PAGESIZE increments to fill in or initialize a large page as
303 * is done today. I think the latter is better since it avoid a change to
304 * the VOP interface that could later be avoided.
305 */
306 int
swap_getconpage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,page_t * conpp,uint_t * pszc,spgcnt_t * nreloc,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)307 swap_getconpage(
308 struct vnode *vp,
309 u_offset_t off,
310 size_t len,
311 uint_t *protp,
312 page_t *pl[],
313 size_t plsz,
314 page_t *conpp,
315 uint_t *pszc,
316 spgcnt_t *nreloc,
317 struct seg *seg,
318 caddr_t addr,
319 enum seg_rw rw,
320 struct cred *cr)
321 {
322 struct page *pp;
323 int err = 0;
324 struct vnode *pvp = NULL;
325 u_offset_t poff;
326
327 ASSERT(len == PAGESIZE);
328 ASSERT(pl != NULL);
329 ASSERT(plsz == PAGESIZE);
330 ASSERT(protp == NULL);
331 ASSERT(nreloc != NULL);
332 ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
333 SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
334 vp, off, len, 0, 0);
335
336 /*
337 * If we are not using a preallocated page then we know one already
338 * exists. So just let the old code handle it.
339 */
340 if (conpp == NULL) {
341 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
342 seg, addr, rw, cr);
343 return (err);
344 }
345 ASSERT(conpp->p_szc != 0);
346 ASSERT(PAGE_EXCL(conpp));
347
348
349 ASSERT(conpp->p_next == conpp);
350 ASSERT(conpp->p_prev == conpp);
351 ASSERT(!PP_ISAGED(conpp));
352 ASSERT(!PP_ISFREE(conpp));
353
354 *nreloc = 0;
355 pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
356
357 /*
358 * If existing page is found we may need to relocate.
359 */
360 if (pp != conpp) {
361 ASSERT(rw != S_CREATE);
362 ASSERT(pszc != NULL);
363 ASSERT(PAGE_SHARED(pp));
364 if (pp->p_szc < conpp->p_szc) {
365 *pszc = pp->p_szc;
366 page_unlock(pp);
367 err = -1;
368 } else if (pp->p_szc > conpp->p_szc &&
369 seg->s_szc > conpp->p_szc) {
370 *pszc = MIN(pp->p_szc, seg->s_szc);
371 page_unlock(pp);
372 err = -2;
373 } else {
374 pl[0] = pp;
375 pl[1] = NULL;
376 if (page_pptonum(pp) &
377 (page_get_pagecnt(conpp->p_szc) - 1))
378 cmn_err(CE_PANIC, "swap_getconpage: no root");
379 }
380 return (err);
381 }
382
383 ASSERT(PAGE_EXCL(pp));
384
385 if (*nreloc != 0) {
386 ASSERT(rw != S_CREATE);
387 pl[0] = pp;
388 pl[1] = NULL;
389 return (0);
390 }
391
392 *nreloc = 1;
393
394 /*
395 * If necessary do the page io.
396 */
397 if (rw != S_CREATE) {
398 /*
399 * Since we are only called now on behalf of an
400 * address space operation it's impossible for
401 * us to fail unlike swap_getapge() which
402 * also gets called from swapslot_free().
403 */
404 if (swap_getphysname(vp, off, &pvp, &poff)) {
405 cmn_err(CE_PANIC,
406 "swap_getconpage: swap_getphysname failed!");
407 }
408
409 if (pvp != NULL) {
410 err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
411 cr, NULL);
412 if (err == 0) {
413 struct anon *ap;
414 kmutex_t *ahm;
415
416 ahm = AH_MUTEX(vp, off);
417 mutex_enter(ahm);
418 ap = swap_anon(vp, off);
419 if (ap == NULL)
420 panic("swap_getconpage: null anon");
421 if (ap->an_pvp != pvp || ap->an_poff != poff)
422 panic("swap_getconpage: bad anon");
423
424 swap_phys_free(pvp, poff, PAGESIZE);
425 ap->an_pvp = NULL;
426 ap->an_poff = 0;
427 hat_setmod(pp);
428 mutex_exit(ahm);
429 }
430 } else {
431 pagezero(pp, 0, PAGESIZE);
432 }
433 }
434
435 /*
436 * Normally we would let pvn_read_done() destroy
437 * the page on IO error. But since this is a preallocated
438 * page we'll let the anon layer handle it.
439 */
440 page_io_unlock(pp);
441 if (err != 0)
442 page_hashout(pp, NULL);
443 ASSERT(pp->p_next == pp);
444 ASSERT(pp->p_prev == pp);
445
446 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
447 "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
448
449 pl[0] = pp;
450 pl[1] = NULL;
451 return (err);
452 }
453
454 /* Async putpage klustering stuff */
455 int sw_pending_size;
456 extern int klustsize;
457 extern struct async_reqs *sw_getreq();
458 extern void sw_putreq(struct async_reqs *);
459 extern void sw_putbackreq(struct async_reqs *);
460 extern struct async_reqs *sw_getfree();
461 extern void sw_putfree(struct async_reqs *);
462
463 static size_t swap_putpagecnt, swap_pagespushed;
464 static size_t swap_otherfail, swap_otherpages;
465 static size_t swap_klustfail, swap_klustpages;
466 static size_t swap_getiofail, swap_getiopages;
467
468 /*
469 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
470 * If len == 0, do from off to EOF.
471 */
472 static int swap_nopage = 0; /* Don't do swap_putpage's if set */
473
474 /* ARGSUSED */
475 static int
swap_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)476 swap_putpage(
477 struct vnode *vp,
478 offset_t off,
479 size_t len,
480 int flags,
481 struct cred *cr,
482 caller_context_t *ct)
483 {
484 page_t *pp;
485 u_offset_t io_off;
486 size_t io_len = 0;
487 int err = 0;
488 int nowait;
489 struct async_reqs *arg;
490
491 if (swap_nopage)
492 return (0);
493
494 ASSERT(vp->v_count != 0);
495
496 nowait = flags & B_PAGE_NOWAIT;
497
498 /*
499 * Clear force flag so that p_lckcnt pages are not invalidated.
500 */
501 flags &= ~(B_FORCE | B_PAGE_NOWAIT);
502
503 SWAPFS_PRINT(SWAP_VOPS,
504 "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
505 (void *)vp, off, len, flags, 0);
506 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
507 "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
508
509 if (vp->v_flag & VNOMAP)
510 return (ENOSYS);
511
512 if (!vn_has_cached_data(vp))
513 return (0);
514
515 if (len == 0) {
516 if (curproc == proc_pageout)
517 cmn_err(CE_PANIC, "swapfs: pageout can't block");
518
519 /* Search the entire vp list for pages >= off. */
520 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
521 flags, cr);
522 } else {
523 u_offset_t eoff;
524
525 /*
526 * Loop over all offsets in the range [off...off + len]
527 * looking for pages to deal with.
528 */
529 eoff = off + len;
530 for (io_off = (u_offset_t)off; io_off < eoff;
531 io_off += io_len) {
532 /*
533 * If we run out of the async req slot, put the page
534 * now instead of queuing.
535 */
536 if (flags == (B_ASYNC | B_FREE) &&
537 sw_pending_size < klustsize &&
538 (arg = sw_getfree())) {
539 /*
540 * If we are clustering, we should allow
541 * pageout to feed us more pages because # of
542 * pushes is limited by # of I/Os, and one
543 * cluster is considered to be one I/O.
544 */
545 if (pushes)
546 pushes--;
547
548 arg->a_vp = vp;
549 arg->a_off = io_off;
550 arg->a_len = PAGESIZE;
551 arg->a_flags = B_ASYNC | B_FREE;
552 arg->a_cred = kcred;
553 sw_putreq(arg);
554 io_len = PAGESIZE;
555 continue;
556 }
557 /*
558 * If we are not invalidating pages, use the
559 * routine page_lookup_nowait() to prevent
560 * reclaiming them from the free list.
561 */
562 if (!nowait && ((flags & B_INVAL) ||
563 (flags & (B_ASYNC | B_FREE)) == B_FREE))
564 pp = page_lookup(vp, io_off, SE_EXCL);
565 else
566 pp = page_lookup_nowait(vp, io_off,
567 (flags & (B_FREE | B_INVAL)) ?
568 SE_EXCL : SE_SHARED);
569
570 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
571 io_len = PAGESIZE;
572 else {
573 err = swap_putapage(vp, pp, &io_off, &io_len,
574 flags, cr);
575 if (err != 0)
576 break;
577 }
578 }
579 }
580 /* If invalidating, verify all pages on vnode list are gone. */
581 if (err == 0 && off == 0 && len == 0 &&
582 (flags & B_INVAL) && vn_has_cached_data(vp)) {
583 cmn_err(CE_WARN,
584 "swap_putpage: B_INVAL, pages not gone");
585 }
586 return (err);
587 }
588
589 /*
590 * Write out a single page.
591 * For swapfs this means choose a physical swap slot and write the page
592 * out using VOP_PAGEIO.
593 * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
594 * swapfs pages, a bunch of contiguous swap slots and then write them
595 * all out in one clustered i/o.
596 */
597 /*ARGSUSED*/
598 static int
swap_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)599 swap_putapage(
600 struct vnode *vp,
601 page_t *pp,
602 u_offset_t *offp,
603 size_t *lenp,
604 int flags,
605 struct cred *cr)
606 {
607 int err;
608 struct vnode *pvp;
609 u_offset_t poff, off;
610 u_offset_t doff;
611 size_t dlen;
612 size_t klsz = 0;
613 u_offset_t klstart = 0;
614 struct vnode *klvp = NULL;
615 page_t *pplist;
616 se_t se;
617 struct async_reqs *arg;
618 size_t swap_klustsize;
619
620 /*
621 * This check is added for callers who access swap_putpage with len = 0.
622 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
623 * And it's necessary to do the same queuing if users have the same
624 * B_ASYNC|B_FREE flags on.
625 */
626 if (flags == (B_ASYNC | B_FREE) &&
627 sw_pending_size < klustsize && (arg = sw_getfree())) {
628
629 hat_setmod(pp);
630 page_io_unlock(pp);
631 page_unlock(pp);
632
633 arg->a_vp = vp;
634 arg->a_off = pp->p_offset;
635 arg->a_len = PAGESIZE;
636 arg->a_flags = B_ASYNC | B_FREE;
637 arg->a_cred = kcred;
638 sw_putreq(arg);
639
640 return (0);
641 }
642
643 SWAPFS_PRINT(SWAP_PUTP,
644 "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
645 pp, vp, pp->p_offset, flags, 0);
646
647 ASSERT(PAGE_LOCKED(pp));
648
649 off = pp->p_offset;
650
651 doff = off;
652 dlen = PAGESIZE;
653
654 if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
655 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
656 hat_setmod(pp);
657 page_io_unlock(pp);
658 page_unlock(pp);
659 goto out;
660 }
661
662 klvp = pvp;
663 klstart = poff;
664 pplist = pp;
665 /*
666 * If this is ASYNC | FREE and we've accumulated a bunch of such
667 * pending requests, kluster.
668 */
669 if (flags == (B_ASYNC | B_FREE))
670 swap_klustsize = klustsize;
671 else
672 swap_klustsize = PAGESIZE;
673 se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
674 klsz = PAGESIZE;
675 while (klsz < swap_klustsize) {
676 if ((arg = sw_getreq()) == NULL) {
677 swap_getiofail++;
678 swap_getiopages += btop(klsz);
679 break;
680 }
681 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
682 vp = arg->a_vp;
683 off = arg->a_off;
684
685 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
686 swap_otherfail++;
687 swap_otherpages += btop(klsz);
688 sw_putfree(arg);
689 break;
690 }
691 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
692 sw_putfree(arg);
693 continue;
694 }
695 /* Get new physical backing store for the page */
696 doff = off;
697 dlen = PAGESIZE;
698 if (err = swap_newphysname(vp, off, &doff, &dlen,
699 &pvp, &poff)) {
700 swap_otherfail++;
701 swap_otherpages += btop(klsz);
702 hat_setmod(pp);
703 page_io_unlock(pp);
704 page_unlock(pp);
705 sw_putbackreq(arg);
706 break;
707 }
708 /* Try to cluster new physical name with previous ones */
709 if (klvp == pvp && poff == klstart + klsz) {
710 klsz += PAGESIZE;
711 page_add(&pplist, pp);
712 pplist = pplist->p_next;
713 sw_putfree(arg);
714 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
715 klsz += PAGESIZE;
716 klstart -= PAGESIZE;
717 page_add(&pplist, pp);
718 sw_putfree(arg);
719 } else {
720 swap_klustfail++;
721 swap_klustpages += btop(klsz);
722 hat_setmod(pp);
723 page_io_unlock(pp);
724 page_unlock(pp);
725 sw_putbackreq(arg);
726 break;
727 }
728 }
729
730 err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
731 B_WRITE | flags, cr, NULL);
732
733 if ((flags & B_ASYNC) == 0)
734 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
735
736 /* Statistics */
737 if (!err) {
738 swap_putpagecnt++;
739 swap_pagespushed += btop(klsz);
740 }
741 out:
742 TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
743 "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
744 vp, klvp, klstart, klsz);
745 if (err && err != ENOMEM)
746 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
747 if (lenp)
748 *lenp = PAGESIZE;
749 return (err);
750 }
751
752 static void
swap_dispose(vnode_t * vp,page_t * pp,int fl,int dn,cred_t * cr,caller_context_t * ct)753 swap_dispose(
754 vnode_t *vp,
755 page_t *pp,
756 int fl,
757 int dn,
758 cred_t *cr,
759 caller_context_t *ct)
760 {
761 int err;
762 u_offset_t off = pp->p_offset;
763 vnode_t *pvp;
764 u_offset_t poff;
765
766 ASSERT(PAGE_EXCL(pp));
767
768 /*
769 * The caller will free/invalidate large page in one shot instead of
770 * one small page at a time.
771 */
772 if (pp->p_szc != 0) {
773 page_unlock(pp);
774 return;
775 }
776
777 err = swap_getphysname(vp, off, &pvp, &poff);
778 if (!err && pvp != NULL)
779 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
780 else
781 fs_dispose(vp, pp, fl, dn, cr, ct);
782 }
783