1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 /* 42 * Each physical swap area has an associated bitmap representing 43 * its physical storage. The bitmap records which swap slots are 44 * currently allocated or freed. Allocation is done by searching 45 * through the bitmap for the first free slot. Thus, there's 46 * no linear relation between offset within the swap device and the 47 * address (within its segment(s)) of the page that the slot backs; 48 * instead, it's an arbitrary one-to-one mapping. 49 * 50 * Associated with each swap area is a swapinfo structure. These 51 * structures are linked into a linear list that determines the 52 * ordering of swap areas in the logical swap device. Each contains a 53 * pointer to the corresponding bitmap, the area's size, and its 54 * associated vnode. 55 */ 56 57 #include <sys/types.h> 58 #include <sys/inttypes.h> 59 #include <sys/param.h> 60 #include <sys/t_lock.h> 61 #include <sys/sysmacros.h> 62 #include <sys/systm.h> 63 #include <sys/errno.h> 64 #include <sys/kmem.h> 65 #include <sys/vfs.h> 66 #include <sys/vnode.h> 67 #include <sys/pathname.h> 68 #include <sys/cmn_err.h> 69 #include <sys/vtrace.h> 70 #include <sys/swap.h> 71 #include <sys/dumphdr.h> 72 #include <sys/debug.h> 73 #include <sys/fs/snode.h> 74 #include <sys/fs/swapnode.h> 75 #include <sys/policy.h> 76 #include <sys/zone.h> 77 78 #include <vm/as.h> 79 #include <vm/seg.h> 80 #include <vm/page.h> 81 #include <vm/seg_vn.h> 82 #include <vm/hat.h> 83 #include <vm/anon.h> 84 #include <vm/seg_map.h> 85 86 /* 87 * To balance the load among multiple swap areas, we don't allow 88 * more than swap_maxcontig allocations to be satisfied from a 89 * single swap area before moving on to the next swap area. This 90 * effectively "interleaves" allocations among the many swap areas. 91 */ 92 int swap_maxcontig; /* set by anon_init() to 1 Mb */ 93 94 #define MINIROOTSIZE 12000 /* ~6 Meg XXX */ 95 96 /* 97 * XXX - this lock is a kludge. It serializes some aspects of swapadd() and 98 * swapdel() (namely VOP_OPEN, VOP_CLOSE, VN_RELE). It protects against 99 * somebody swapadd'ing and getting swap slots from a vnode, while someone 100 * else is in the process of closing or rele'ing it. 101 */ 102 static kmutex_t swap_lock; 103 104 kmutex_t swapinfo_lock; 105 106 /* 107 * protected by the swapinfo_lock 108 */ 109 struct swapinfo *swapinfo; 110 111 static struct swapinfo *silast; 112 static int nswapfiles; 113 114 static u_offset_t swap_getoff(struct swapinfo *); 115 static int swapadd(struct vnode *, ulong_t, ulong_t, char *); 116 static int swapdel(struct vnode *, ulong_t); 117 static int swapslot_free(struct vnode *, u_offset_t, struct swapinfo *); 118 119 /* 120 * swap device bitmap allocation macros 121 */ 122 #define MAPSHIFT 5 123 #define NBBW (NBPW * NBBY) /* number of bits per word */ 124 #define TESTBIT(map, i) (((map)[(i) >> MAPSHIFT] & (1 << (i) % NBBW))) 125 #define SETBIT(map, i) (((map)[(i) >> MAPSHIFT] |= (1 << (i) % NBBW))) 126 #define CLEARBIT(map, i) (((map)[(i) >> MAPSHIFT] &= ~(1 << (i) % NBBW))) 127 128 int swap_debug = 0; /* set for debug printf's */ 129 int swap_verify = 0; /* set to verify slots when freeing and allocating */ 130 131 uint_t swapalloc_maxcontig; 132 133 /* 134 * Allocate a range of up to *lenp contiguous slots (page) from a physical 135 * swap device. Flags are one of: 136 * SA_NOT Must have a slot from a physical swap device other than the 137 * the one containing input (*vpp, *offp). 138 * Less slots than requested may be returned. *lenp allocated slots are 139 * returned starting at *offp on *vpp. 140 * Returns 1 for a successful allocation, 0 for couldn't allocate any slots. 141 */ 142 int 143 swap_phys_alloc( 144 struct vnode **vpp, 145 u_offset_t *offp, 146 size_t *lenp, 147 uint_t flags) 148 { 149 struct swapinfo *sip; 150 offset_t soff, noff; 151 size_t len; 152 153 mutex_enter(&swapinfo_lock); 154 sip = silast; 155 156 /* Find a desirable physical device and allocate from it. */ 157 do { 158 if (sip == NULL) 159 break; 160 if (!(sip->si_flags & ST_INDEL) && 161 (spgcnt_t)sip->si_nfpgs > 0) { 162 /* Caller wants other than specified swap device */ 163 if (flags & SA_NOT) { 164 if (*vpp != sip->si_vp || 165 *offp < sip->si_soff || 166 *offp >= sip->si_eoff) 167 goto found; 168 /* Caller is loose, will take anything */ 169 } else 170 goto found; 171 } else if (sip->si_nfpgs == 0) 172 sip->si_allocs = 0; 173 if ((sip = sip->si_next) == NULL) 174 sip = swapinfo; 175 } while (sip != silast); 176 mutex_exit(&swapinfo_lock); 177 return (0); 178 found: 179 soff = swap_getoff(sip); 180 sip->si_nfpgs--; 181 if (soff == -1) 182 panic("swap_alloc: swap_getoff failed!"); 183 184 for (len = PAGESIZE; len < *lenp; len += PAGESIZE) { 185 if (sip->si_nfpgs == 0) 186 break; 187 if (swapalloc_maxcontig && len >= swapalloc_maxcontig) 188 break; 189 noff = swap_getoff(sip); 190 if (noff == -1) { 191 break; 192 } else if (noff != soff + len) { 193 CLEARBIT(sip->si_swapslots, btop(noff - sip->si_soff)); 194 break; 195 } 196 sip->si_nfpgs--; 197 } 198 *vpp = sip->si_vp; 199 *offp = soff; 200 *lenp = len; 201 ASSERT((spgcnt_t)sip->si_nfpgs >= 0); 202 sip->si_allocs += btop(len); 203 if (sip->si_allocs >= swap_maxcontig) { 204 sip->si_allocs = 0; 205 if ((silast = sip->si_next) == NULL) 206 silast = swapinfo; 207 } 208 TRACE_2(TR_FAC_VM, TR_SWAP_ALLOC, 209 "swap_alloc:sip %p offset %lx", sip, soff); 210 mutex_exit(&swapinfo_lock); 211 return (1); 212 } 213 214 int swap_backsearch = 0; 215 216 /* 217 * Get a free offset on swap device sip. 218 * Return >=0 offset if succeeded, -1 for failure. 219 */ 220 static u_offset_t 221 swap_getoff(struct swapinfo *sip) 222 { 223 uint_t *sp, *ep; 224 size_t aoff, boff, poff, slotnumber; 225 226 ASSERT(MUTEX_HELD(&swapinfo_lock)); 227 228 sip->si_alloccnt++; 229 for (sp = &sip->si_swapslots[sip->si_hint >> MAPSHIFT], 230 ep = &sip->si_swapslots[sip->si_mapsize / NBPW]; sp < ep; sp++) { 231 if (*sp != (uint_t)0xffffffff) 232 goto foundentry; 233 else 234 sip->si_checkcnt++; 235 } 236 SWAP_PRINT(SW_ALLOC, 237 "swap_getoff: couldn't find slot from hint %ld to end\n", 238 sip->si_hint, 0, 0, 0, 0); 239 /* 240 * Go backwards? Check for faster method XXX 241 */ 242 if (swap_backsearch) { 243 for (sp = &sip->si_swapslots[sip->si_hint >> MAPSHIFT], 244 ep = sip->si_swapslots; sp > ep; sp--) { 245 if (*sp != (uint_t)0xffffffff) 246 goto foundentry; 247 else 248 sip->si_checkcnt++; 249 } 250 } else { 251 for (sp = sip->si_swapslots, 252 ep = &sip->si_swapslots[sip->si_hint >> MAPSHIFT]; 253 sp < ep; sp++) { 254 if (*sp != (uint_t)0xffffffff) 255 goto foundentry; 256 else 257 sip->si_checkcnt++; 258 } 259 } 260 if (*sp == 0xffffffff) { 261 cmn_err(CE_WARN, "No free swap slots!"); 262 return ((u_offset_t)-1); 263 } 264 265 foundentry: 266 /* 267 * aoff is the page number offset (in bytes) of the si_swapslots 268 * array element containing a free page 269 * 270 * boff is the page number offset of the free page 271 * (i.e. cleared bit) in si_swapslots[aoff]. 272 */ 273 aoff = ((char *)sp - (char *)sip->si_swapslots) * NBBY; 274 275 for (boff = (sip->si_hint % NBBW); boff < NBBW; boff++) { 276 if (!TESTBIT(sip->si_swapslots, aoff + boff)) 277 goto foundslot; 278 else 279 sip->si_checkcnt++; 280 } 281 for (boff = 0; boff < (sip->si_hint % NBBW); boff++) { 282 if (!TESTBIT(sip->si_swapslots, aoff + boff)) 283 goto foundslot; 284 else 285 sip->si_checkcnt++; 286 } 287 panic("swap_getoff: didn't find slot in word hint %ld", sip->si_hint); 288 289 foundslot: 290 /* 291 * Return the offset of the free page in swap device. 292 * Convert page number of byte offset and add starting 293 * offset of swap device. 294 */ 295 slotnumber = aoff + boff; 296 SWAP_PRINT(SW_ALLOC, "swap_getoff: allocating slot %ld\n", 297 slotnumber, 0, 0, 0, 0); 298 poff = ptob(slotnumber); 299 if (poff + sip->si_soff >= sip->si_eoff) 300 printf("ptob(aoff(%ld) + boff(%ld))(%ld) >= eoff(%ld)\n", 301 aoff, boff, ptob(slotnumber), (long)sip->si_eoff); 302 ASSERT(poff < sip->si_eoff); 303 /* 304 * We could verify here that the slot isn't already allocated 305 * by looking through all the anon slots. 306 */ 307 SETBIT(sip->si_swapslots, slotnumber); 308 sip->si_hint = slotnumber + 1; /* hint = next slot */ 309 return (poff + sip->si_soff); 310 } 311 312 /* 313 * Free a swap page. 314 */ 315 void 316 swap_phys_free(struct vnode *vp, u_offset_t off, size_t len) 317 { 318 struct swapinfo *sip; 319 ssize_t pagenumber, npage; 320 321 mutex_enter(&swapinfo_lock); 322 sip = swapinfo; 323 324 do { 325 if (sip->si_vp == vp && 326 sip->si_soff <= off && off < sip->si_eoff) { 327 for (pagenumber = btop(off - sip->si_soff), 328 npage = btop(len) + pagenumber; 329 pagenumber < npage; pagenumber++) { 330 SWAP_PRINT(SW_ALLOC, 331 "swap_phys_free: freeing slot %ld on " 332 "sip %p\n", 333 pagenumber, sip, 0, 0, 0); 334 if (!TESTBIT(sip->si_swapslots, pagenumber)) { 335 panic( 336 "swap_phys_free: freeing free slot " 337 "%p,%lx\n", (void *)vp, 338 ptob(pagenumber) + sip->si_soff); 339 } 340 CLEARBIT(sip->si_swapslots, pagenumber); 341 sip->si_nfpgs++; 342 } 343 ASSERT(sip->si_nfpgs <= sip->si_npgs); 344 mutex_exit(&swapinfo_lock); 345 return; 346 } 347 } while ((sip = sip->si_next) != NULL); 348 panic("swap_phys_free"); 349 /*NOTREACHED*/ 350 } 351 352 /* 353 * Return the anon struct corresponding for the given 354 * <vnode, off> if it is part of the virtual swap device. 355 * Return the anon struct if found, otherwise NULL. 356 */ 357 struct anon * 358 swap_anon(struct vnode *vp, u_offset_t off) 359 { 360 struct anon *ap; 361 362 ASSERT(MUTEX_HELD(&anonhash_lock[AH_LOCK(vp, off)])); 363 364 for (ap = anon_hash[ANON_HASH(vp, off)]; ap != NULL; ap = ap->an_hash) { 365 if (ap->an_vp == vp && ap->an_off == off) 366 return (ap); 367 } 368 return (NULL); 369 } 370 371 372 /* 373 * Determine if the vp offset range overlap a swap device. 374 */ 375 int 376 swap_in_range(struct vnode *vp, u_offset_t offset, size_t len) 377 { 378 struct swapinfo *sip; 379 u_offset_t eoff; 380 381 eoff = offset + len; 382 ASSERT(eoff > offset); 383 384 mutex_enter(&swapinfo_lock); 385 sip = swapinfo; 386 if (vp && sip) { 387 do { 388 if (vp != sip->si_vp || eoff <= sip->si_soff || 389 offset >= sip->si_eoff) 390 continue; 391 mutex_exit(&swapinfo_lock); 392 return (1); 393 } while ((sip = sip->si_next) != NULL); 394 } 395 mutex_exit(&swapinfo_lock); 396 return (0); 397 } 398 399 /* 400 * See if name is one of our swap files 401 * even though lookupname failed. 402 * This can be used by swapdel to delete 403 * swap resources on remote machines 404 * where the link has gone down. 405 */ 406 static struct vnode * 407 swapdel_byname( 408 char *name, /* pathname to delete */ 409 ulong_t lowblk) /* Low block number of area to delete */ 410 { 411 struct swapinfo **sipp, *osip; 412 u_offset_t soff; 413 414 /* 415 * Find the swap file entry for the file to 416 * be deleted. Skip any entries that are in 417 * transition. 418 */ 419 420 soff = ptob(btopr(lowblk << SCTRSHFT)); /* must be page aligned */ 421 422 mutex_enter(&swapinfo_lock); 423 for (sipp = &swapinfo; (osip = *sipp) != NULL; sipp = &osip->si_next) { 424 if ((strcmp(osip->si_pname, name) == 0) && 425 (osip->si_soff == soff) && (osip->si_flags == 0)) { 426 struct vnode *vp = osip->si_vp; 427 428 VN_HOLD(vp); 429 mutex_exit(&swapinfo_lock); 430 return (vp); 431 } 432 } 433 mutex_exit(&swapinfo_lock); 434 return (NULL); 435 } 436 437 438 /* 439 * New system call to manipulate swap files. 440 */ 441 int 442 swapctl(int sc_cmd, void *sc_arg, int *rv) 443 { 444 struct swapinfo *sip, *csip, *tsip; 445 int error = 0; 446 struct swapent st, *ust; 447 struct swapres sr; 448 struct vnode *vp; 449 int cnt = 0; 450 int tmp_nswapfiles; 451 int nswap; 452 int length, nlen; 453 int gplen = 0, plen; 454 char *swapname; 455 char *pname; 456 char *tpname; 457 struct anoninfo ai; 458 spgcnt_t avail; 459 int global = INGLOBALZONE(curproc); 460 461 /* 462 * When running in a zone we want to hide the details of the swap 463 * devices: we report there only being one swap device named "swap" 464 * having a size equal to the sum of the sizes of all real swap devices 465 * on the system. 466 */ 467 switch (sc_cmd) { 468 case SC_GETNSWP: 469 if (global) 470 *rv = nswapfiles; 471 else 472 *rv = 1; 473 return (0); 474 475 case SC_AINFO: 476 /* 477 * Return anoninfo information with these changes: 478 * ani_max = maximum amount of swap space 479 * (including potentially available physical memory) 480 * ani_free = amount of unallocated anonymous memory 481 * (some of which might be reserved and including 482 * potentially available physical memory) 483 * ani_resv = amount of claimed (reserved) anonymous memory 484 */ 485 avail = MAX((spgcnt_t)(availrmem - swapfs_minfree), 0); 486 ai.ani_max = (k_anoninfo.ani_max + 487 k_anoninfo.ani_mem_resv) +avail; 488 489 ai.ani_free = k_anoninfo.ani_free + avail; 490 491 ai.ani_resv = k_anoninfo.ani_phys_resv + 492 k_anoninfo.ani_mem_resv; 493 494 if (copyout(&ai, sc_arg, sizeof (struct anoninfo)) != 0) 495 return (EFAULT); 496 return (0); 497 498 case SC_LIST: 499 if (copyin(sc_arg, &length, sizeof (int)) != 0) 500 return (EFAULT); 501 if (!global) { 502 struct swapent st; 503 char *swappath = "swap"; 504 505 if (length < 1) 506 return (ENOMEM); 507 ust = (swapent_t *)((swaptbl_t *)sc_arg)->swt_ent; 508 if (copyin(ust, &st, sizeof (swapent_t)) != 0) 509 return (EFAULT); 510 st.ste_start = PAGESIZE >> SCTRSHFT; 511 st.ste_length = (off_t)0; 512 st.ste_pages = 0; 513 st.ste_free = 0; 514 st.ste_flags = 0; 515 mutex_enter(&swapinfo_lock); 516 for (sip = swapinfo, nswap = 0; 517 sip != NULL && nswap < nswapfiles; 518 sip = sip->si_next, nswap++) { 519 st.ste_length += 520 (sip->si_eoff - sip->si_soff) >> SCTRSHFT; 521 st.ste_pages += sip->si_npgs; 522 st.ste_free += sip->si_nfpgs; 523 } 524 mutex_exit(&swapinfo_lock); 525 if (copyout(&st, ust, sizeof (swapent_t)) != 0 || 526 copyout(swappath, st.ste_path, 527 strlen(swappath) + 1) != 0) { 528 return (EFAULT); 529 } 530 *rv = 1; 531 return (0); 532 } 533 beginning: 534 tmp_nswapfiles = nswapfiles; 535 /* Return an error if not enough space for the whole table. */ 536 if (length < tmp_nswapfiles) 537 return (ENOMEM); 538 /* 539 * Get memory to hold the swap entries and their names. We'll 540 * copy the real entries into these and then copy these out. 541 * Allocating the pathname memory is only a guess so we may 542 * find that we need more and have to do it again. 543 * All this is because we have to hold the anon lock while 544 * traversing the swapinfo list, and we can't be doing copyouts 545 * and/or kmem_alloc()s during this. 546 */ 547 csip = kmem_zalloc(tmp_nswapfiles * sizeof (struct swapinfo), 548 KM_SLEEP); 549 retry: 550 nlen = tmp_nswapfiles * (gplen += 100); 551 pname = kmem_zalloc(nlen, KM_SLEEP); 552 553 mutex_enter(&swapinfo_lock); 554 555 if (tmp_nswapfiles != nswapfiles) { 556 mutex_exit(&swapinfo_lock); 557 kmem_free(pname, nlen); 558 kmem_free(csip, 559 tmp_nswapfiles * sizeof (struct swapinfo)); 560 gplen = 0; 561 goto beginning; 562 } 563 for (sip = swapinfo, tsip = csip, tpname = pname, nswap = 0; 564 sip && nswap < tmp_nswapfiles; 565 sip = sip->si_next, tsip++, tpname += plen, nswap++) { 566 plen = sip->si_pnamelen; 567 if (tpname + plen - pname > nlen) { 568 mutex_exit(&swapinfo_lock); 569 kmem_free(pname, nlen); 570 goto retry; 571 } 572 *tsip = *sip; 573 tsip->si_pname = tpname; 574 (void) strcpy(tsip->si_pname, sip->si_pname); 575 } 576 mutex_exit(&swapinfo_lock); 577 578 if (sip) { 579 error = ENOMEM; 580 goto lout; 581 } 582 ust = (swapent_t *)((swaptbl_t *)sc_arg)->swt_ent; 583 for (tsip = csip, cnt = 0; cnt < nswap; tsip++, ust++, cnt++) { 584 if (copyin(ust, &st, sizeof (swapent_t)) != 0) { 585 error = EFAULT; 586 goto lout; 587 } 588 st.ste_flags = tsip->si_flags; 589 st.ste_length = 590 (tsip->si_eoff - tsip->si_soff) >> SCTRSHFT; 591 st.ste_start = tsip->si_soff >> SCTRSHFT; 592 st.ste_pages = tsip->si_npgs; 593 st.ste_free = tsip->si_nfpgs; 594 if (copyout(&st, ust, sizeof (swapent_t)) != 0) { 595 error = EFAULT; 596 goto lout; 597 } 598 if (!tsip->si_pnamelen) 599 continue; 600 if (copyout(tsip->si_pname, st.ste_path, 601 tsip->si_pnamelen) != 0) { 602 error = EFAULT; 603 goto lout; 604 } 605 } 606 *rv = nswap; 607 lout: 608 kmem_free(csip, tmp_nswapfiles * sizeof (struct swapinfo)); 609 kmem_free(pname, nlen); 610 return (error); 611 612 case SC_ADD: 613 case SC_REMOVE: 614 break; 615 default: 616 return (EINVAL); 617 } 618 if ((error = secpolicy_swapctl(CRED())) != 0) 619 return (error); 620 621 if (copyin(sc_arg, &sr, sizeof (swapres_t))) 622 return (EFAULT); 623 624 /* Allocate the space to read in pathname */ 625 if ((swapname = kmem_alloc(MAXPATHLEN, KM_NOSLEEP)) == NULL) 626 return (ENOMEM); 627 628 error = copyinstr(sr.sr_name, swapname, MAXPATHLEN, 0); 629 if (error) 630 goto out; 631 632 error = lookupname(swapname, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 633 if (error) { 634 if (sc_cmd == SC_ADD) 635 goto out; 636 /* see if we match by name */ 637 vp = swapdel_byname(swapname, (size_t)sr.sr_start); 638 if (vp == NULL) 639 goto out; 640 } 641 642 if (vp->v_flag & (VNOMAP | VNOSWAP)) { 643 VN_RELE(vp); 644 error = ENOSYS; 645 goto out; 646 } 647 switch (vp->v_type) { 648 case VBLK: 649 break; 650 651 case VREG: 652 if (vp->v_vfsp && vn_is_readonly(vp)) 653 error = EROFS; 654 else 655 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL); 656 break; 657 658 case VDIR: 659 error = EISDIR; 660 break; 661 default: 662 error = ENOSYS; 663 break; 664 } 665 if (error == 0) { 666 if (sc_cmd == SC_REMOVE) 667 error = swapdel(vp, sr.sr_start); 668 else 669 error = swapadd(vp, sr.sr_start, 670 sr.sr_length, swapname); 671 } 672 VN_RELE(vp); 673 out: 674 kmem_free(swapname, MAXPATHLEN); 675 return (error); 676 } 677 678 #if defined(_LP64) && defined(_SYSCALL32) 679 680 int 681 swapctl32(int sc_cmd, void *sc_arg, int *rv) 682 { 683 struct swapinfo *sip, *csip, *tsip; 684 int error = 0; 685 struct swapent32 st, *ust; 686 struct swapres32 sr; 687 struct vnode *vp; 688 int cnt = 0; 689 int tmp_nswapfiles; 690 int nswap; 691 int length, nlen; 692 int gplen = 0, plen; 693 char *swapname; 694 char *pname; 695 char *tpname; 696 struct anoninfo32 ai; 697 size_t s; 698 spgcnt_t avail; 699 700 switch (sc_cmd) { 701 case SC_GETNSWP: 702 *rv = nswapfiles; 703 return (0); 704 705 case SC_AINFO: 706 /* 707 * Return anoninfo information with these changes: 708 * ani_max = maximum amount of swap space 709 * (including potentially available physical memory) 710 * ani_free = amount of unallocated anonymous memory 711 * (some of which might be reserved and including 712 * potentially available physical memory) 713 * ani_resv = amount of claimed (reserved) anonymous memory 714 */ 715 avail = MAX((spgcnt_t)(availrmem - swapfs_minfree), 0); 716 s = (k_anoninfo.ani_max + k_anoninfo.ani_mem_resv) + avail; 717 if (s > UINT32_MAX) 718 return (EOVERFLOW); 719 ai.ani_max = s; 720 721 s = k_anoninfo.ani_free + avail; 722 if (s > UINT32_MAX) 723 return (EOVERFLOW); 724 ai.ani_free = s; 725 726 s = k_anoninfo.ani_phys_resv + k_anoninfo.ani_mem_resv; 727 if (s > UINT32_MAX) 728 return (EOVERFLOW); 729 ai.ani_resv = s; 730 731 if (copyout(&ai, sc_arg, sizeof (ai)) != 0) 732 return (EFAULT); 733 return (0); 734 735 case SC_LIST: 736 if (copyin(sc_arg, &length, sizeof (int32_t)) != 0) 737 return (EFAULT); 738 beginning: 739 tmp_nswapfiles = nswapfiles; 740 /* Return an error if not enough space for the whole table. */ 741 if (length < tmp_nswapfiles) 742 return (ENOMEM); 743 /* 744 * Get memory to hold the swap entries and their names. We'll 745 * copy the real entries into these and then copy these out. 746 * Allocating the pathname memory is only a guess so we may 747 * find that we need more and have to do it again. 748 * All this is because we have to hold the anon lock while 749 * traversing the swapinfo list, and we can't be doing copyouts 750 * and/or kmem_alloc()s during this. 751 */ 752 csip = kmem_zalloc(tmp_nswapfiles * sizeof (*csip), KM_SLEEP); 753 retry: 754 nlen = tmp_nswapfiles * (gplen += 100); 755 pname = kmem_zalloc(nlen, KM_SLEEP); 756 757 mutex_enter(&swapinfo_lock); 758 759 if (tmp_nswapfiles != nswapfiles) { 760 mutex_exit(&swapinfo_lock); 761 kmem_free(pname, nlen); 762 kmem_free(csip, tmp_nswapfiles * sizeof (*csip)); 763 gplen = 0; 764 goto beginning; 765 } 766 for (sip = swapinfo, tsip = csip, tpname = pname, nswap = 0; 767 (sip != NULL) && (nswap < tmp_nswapfiles); 768 sip = sip->si_next, tsip++, tpname += plen, nswap++) { 769 plen = sip->si_pnamelen; 770 if (tpname + plen - pname > nlen) { 771 mutex_exit(&swapinfo_lock); 772 kmem_free(pname, nlen); 773 goto retry; 774 } 775 *tsip = *sip; 776 tsip->si_pname = tpname; 777 (void) strcpy(tsip->si_pname, sip->si_pname); 778 } 779 mutex_exit(&swapinfo_lock); 780 781 if (sip != NULL) { 782 error = ENOMEM; 783 goto lout; 784 } 785 ust = (swapent32_t *)((swaptbl32_t *)sc_arg)->swt_ent; 786 for (tsip = csip, cnt = 0; cnt < nswap; tsip++, ust++, cnt++) { 787 if (copyin(ust, &st, sizeof (*ust)) != 0) { 788 error = EFAULT; 789 goto lout; 790 } 791 st.ste_flags = tsip->si_flags; 792 st.ste_length = 793 (tsip->si_eoff - tsip->si_soff) >> SCTRSHFT; 794 st.ste_start = tsip->si_soff >> SCTRSHFT; 795 st.ste_pages = tsip->si_npgs; 796 st.ste_free = tsip->si_nfpgs; 797 if (copyout(&st, ust, sizeof (st)) != 0) { 798 error = EFAULT; 799 goto lout; 800 } 801 if (!tsip->si_pnamelen) 802 continue; 803 if (copyout(tsip->si_pname, 804 (caddr_t)(uintptr_t)st.ste_path, 805 tsip->si_pnamelen) != 0) { 806 error = EFAULT; 807 goto lout; 808 } 809 } 810 *rv = nswap; 811 lout: 812 kmem_free(csip, tmp_nswapfiles * sizeof (*csip)); 813 kmem_free(pname, nlen); 814 return (error); 815 816 case SC_ADD: 817 case SC_REMOVE: 818 break; 819 default: 820 return (EINVAL); 821 } 822 if ((error = secpolicy_swapctl(CRED())) != 0) 823 return (error); 824 825 if (copyin(sc_arg, &sr, sizeof (sr))) 826 return (EFAULT); 827 828 /* Allocate the space to read in pathname */ 829 if ((swapname = kmem_alloc(MAXPATHLEN, KM_NOSLEEP)) == NULL) 830 return (ENOMEM); 831 832 error = copyinstr((caddr_t)(uintptr_t)sr.sr_name, 833 swapname, MAXPATHLEN, NULL); 834 if (error) 835 goto out; 836 837 error = lookupname(swapname, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 838 if (error) { 839 if (sc_cmd == SC_ADD) 840 goto out; 841 /* see if we match by name */ 842 vp = swapdel_byname(swapname, (uint_t)sr.sr_start); 843 if (vp == NULL) 844 goto out; 845 } 846 847 if (vp->v_flag & (VNOMAP | VNOSWAP)) { 848 VN_RELE(vp); 849 error = ENOSYS; 850 goto out; 851 } 852 switch (vp->v_type) { 853 case VBLK: 854 break; 855 856 case VREG: 857 if (vp->v_vfsp && vn_is_readonly(vp)) 858 error = EROFS; 859 else 860 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL); 861 break; 862 863 case VDIR: 864 error = EISDIR; 865 break; 866 default: 867 error = ENOSYS; 868 break; 869 } 870 if (error == 0) { 871 if (sc_cmd == SC_REMOVE) 872 error = swapdel(vp, sr.sr_start); 873 else 874 error = swapadd(vp, sr.sr_start, sr.sr_length, 875 swapname); 876 } 877 VN_RELE(vp); 878 out: 879 kmem_free(swapname, MAXPATHLEN); 880 return (error); 881 } 882 883 #endif /* _LP64 && _SYSCALL32 */ 884 885 /* 886 * Add a new swap file. 887 */ 888 int 889 swapadd(struct vnode *vp, ulong_t lowblk, ulong_t nblks, char *swapname) 890 { 891 struct swapinfo **sipp, *nsip = NULL, *esip = NULL; 892 struct vnode *cvp; 893 struct vattr vattr; 894 pgcnt_t pages; 895 u_offset_t soff, eoff; 896 int error; 897 ssize_t i, start, end; 898 ushort_t wasswap; 899 ulong_t startblk; 900 size_t returned_mem; 901 902 SWAP_PRINT(SW_CTL, "swapadd: vp %p lowblk %ld nblks %ld swapname %s\n", 903 vp, lowblk, nblks, swapname, 0); 904 /* 905 * Get the real vnode. (If vp is not a specnode it just returns vp, so 906 * it does the right thing, but having this code know about specnodes 907 * violates the spirit of having it be indepedent of vnode type.) 908 */ 909 cvp = common_specvp(vp); 910 911 /* 912 * Or in VISSWAP so file system has chance to deny swap-ons during open. 913 */ 914 mutex_enter(&cvp->v_lock); 915 wasswap = cvp->v_flag & VISSWAP; 916 cvp->v_flag |= VISSWAP; 917 mutex_exit(&cvp->v_lock); 918 919 mutex_enter(&swap_lock); 920 if (error = VOP_OPEN(&cvp, FREAD|FWRITE, CRED(), NULL)) { 921 mutex_exit(&swap_lock); 922 /* restore state of v_flag */ 923 if (!wasswap) { 924 mutex_enter(&cvp->v_lock); 925 cvp->v_flag &= ~VISSWAP; 926 mutex_exit(&cvp->v_lock); 927 } 928 return (error); 929 } 930 mutex_exit(&swap_lock); 931 932 /* 933 * Get partition size. Return error if empty partition, 934 * or if request does not fit within the partition. 935 * If this is the first swap device, we can reduce 936 * the size of the swap area to match what is 937 * available. This can happen if the system was built 938 * on a machine with a different size swap partition. 939 */ 940 vattr.va_mask = AT_SIZE; 941 if (error = VOP_GETATTR(cvp, &vattr, ATTR_COMM, CRED(), NULL)) 942 goto out; 943 944 /* 945 * Specfs returns a va_size of MAXOFFSET_T (UNKNOWN_SIZE) when the 946 * size of the device can't be determined. 947 */ 948 if ((vattr.va_size == 0) || (vattr.va_size == MAXOFFSET_T)) { 949 error = EINVAL; 950 goto out; 951 } 952 953 #ifdef _ILP32 954 /* 955 * No support for large swap in 32-bit OS, if the size of the swap is 956 * bigger than MAXOFF32_T then the size used by swapfs must be limited. 957 * This limitation is imposed by the swap subsystem itself, a D_64BIT 958 * driver as the target of swap operation should be able to field 959 * the IO. 960 */ 961 if (vattr.va_size > MAXOFF32_T) { 962 cmn_err(CE_NOTE, 963 "!swap device %s truncated from 0x%llx to 0x%x bytes", 964 swapname, vattr.va_size, MAXOFF32_T); 965 vattr.va_size = MAXOFF32_T; 966 } 967 #endif /* _ILP32 */ 968 969 /* Fail if file not writeable (try to set size to current size) */ 970 vattr.va_mask = AT_SIZE; 971 if (error = VOP_SETATTR(cvp, &vattr, 0, CRED(), NULL)) 972 goto out; 973 974 /* Fail if fs does not support VOP_PAGEIO */ 975 error = VOP_PAGEIO(cvp, (page_t *)NULL, (u_offset_t)0, 0, 0, CRED(), 976 NULL); 977 978 if (error == ENOSYS) 979 goto out; 980 else 981 error = 0; 982 /* 983 * If swapping on the root filesystem don't put swap blocks that 984 * correspond to the miniroot filesystem on the swap free list. 985 */ 986 if (cvp == rootdir) 987 startblk = roundup(MINIROOTSIZE<<SCTRSHFT, klustsize)>>SCTRSHFT; 988 else /* Skip 1st page (disk label) */ 989 startblk = (ulong_t)(lowblk ? lowblk : 1); 990 991 soff = startblk << SCTRSHFT; 992 if (soff >= vattr.va_size) { 993 error = EINVAL; 994 goto out; 995 } 996 997 /* 998 * If user specified 0 blks, use the size of the device 999 */ 1000 eoff = nblks ? soff + (nblks - (startblk - lowblk) << SCTRSHFT) : 1001 vattr.va_size; 1002 1003 SWAP_PRINT(SW_CTL, "swapadd: va_size %ld soff %ld eoff %ld\n", 1004 vattr.va_size, soff, eoff, 0, 0); 1005 1006 if (eoff > vattr.va_size) { 1007 error = EINVAL; 1008 goto out; 1009 } 1010 1011 /* 1012 * The starting and ending offsets must be page aligned. 1013 * Round soff up to next page boundary, round eoff 1014 * down to previous page boundary. 1015 */ 1016 soff = ptob(btopr(soff)); 1017 eoff = ptob(btop(eoff)); 1018 if (soff >= eoff) { 1019 SWAP_PRINT(SW_CTL, "swapadd: soff %ld >= eoff %ld\n", 1020 soff, eoff, 0, 0, 0); 1021 error = EINVAL; 1022 goto out; 1023 } 1024 1025 pages = btop(eoff - soff); 1026 1027 /* Allocate and partially set up the new swapinfo */ 1028 nsip = kmem_zalloc(sizeof (struct swapinfo), KM_SLEEP); 1029 nsip->si_vp = cvp; 1030 1031 nsip->si_soff = soff; 1032 nsip->si_eoff = eoff; 1033 nsip->si_hint = 0; 1034 nsip->si_checkcnt = nsip->si_alloccnt = 0; 1035 1036 nsip->si_pnamelen = (int)strlen(swapname) + 1; 1037 nsip->si_pname = (char *)kmem_zalloc(nsip->si_pnamelen, KM_SLEEP); 1038 bcopy(swapname, nsip->si_pname, nsip->si_pnamelen - 1); 1039 SWAP_PRINT(SW_CTL, "swapadd: allocating swapinfo for %s, %ld pages\n", 1040 swapname, pages, 0, 0, 0); 1041 /* 1042 * Size of swapslots map in bytes 1043 */ 1044 nsip->si_mapsize = P2ROUNDUP(pages, NBBW) / NBBY; 1045 nsip->si_swapslots = kmem_zalloc(nsip->si_mapsize, KM_SLEEP); 1046 1047 /* 1048 * Permanently set the bits that can't ever be allocated, 1049 * i.e. those from the ending offset to the round up slot for the 1050 * swapslots bit map. 1051 */ 1052 start = pages; 1053 end = P2ROUNDUP(pages, NBBW); 1054 for (i = start; i < end; i++) { 1055 SWAP_PRINT(SW_CTL, "swapadd: set bit for page %ld\n", i, 1056 0, 0, 0, 0); 1057 SETBIT(nsip->si_swapslots, i); 1058 } 1059 nsip->si_npgs = nsip->si_nfpgs = pages; 1060 /* 1061 * Now check to see if we can add it. We wait til now to check because 1062 * we need the swapinfo_lock and we don't want sleep with it (e.g., 1063 * during kmem_alloc()) while we're setting up the swapinfo. 1064 */ 1065 mutex_enter(&swapinfo_lock); 1066 for (sipp = &swapinfo; (esip = *sipp) != NULL; sipp = &esip->si_next) { 1067 if (esip->si_vp == cvp) { 1068 if (esip->si_soff == soff && esip->si_npgs == pages && 1069 (esip->si_flags & ST_DOINGDEL)) { 1070 /* 1071 * We are adding a device that we are in the 1072 * middle of deleting. Just clear the 1073 * ST_DOINGDEL flag to signal this and 1074 * the deletion routine will eventually notice 1075 * it and add it back. 1076 */ 1077 esip->si_flags &= ~ST_DOINGDEL; 1078 mutex_exit(&swapinfo_lock); 1079 goto out; 1080 } 1081 /* disallow overlapping swap files */ 1082 if ((soff < esip->si_eoff) && (eoff > esip->si_soff)) { 1083 error = EEXIST; 1084 mutex_exit(&swapinfo_lock); 1085 goto out; 1086 } 1087 } 1088 } 1089 1090 nswapfiles++; 1091 1092 /* 1093 * add new swap device to list and shift allocations to it 1094 * before updating the anoninfo counters 1095 */ 1096 *sipp = nsip; 1097 silast = nsip; 1098 1099 /* 1100 * Update the total amount of reservable swap space 1101 * accounting properly for swap space from physical memory 1102 */ 1103 /* New swap device soaks up currently reserved memory swap */ 1104 mutex_enter(&anoninfo_lock); 1105 1106 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 1107 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 1108 1109 k_anoninfo.ani_max += pages; 1110 ANI_ADD(pages); 1111 if (k_anoninfo.ani_mem_resv > k_anoninfo.ani_locked_swap) { 1112 returned_mem = MIN(k_anoninfo.ani_mem_resv - 1113 k_anoninfo.ani_locked_swap, 1114 k_anoninfo.ani_max - k_anoninfo.ani_phys_resv); 1115 1116 ANI_ADD(-returned_mem); 1117 k_anoninfo.ani_free -= returned_mem; 1118 k_anoninfo.ani_mem_resv -= returned_mem; 1119 k_anoninfo.ani_phys_resv += returned_mem; 1120 1121 mutex_enter(&freemem_lock); 1122 availrmem += returned_mem; 1123 mutex_exit(&freemem_lock); 1124 } 1125 /* 1126 * At boot time, to permit booting small memory machines using 1127 * only physical memory as swap space, we allowed a dangerously 1128 * large amount of memory to be used as swap space; now that 1129 * more physical backing store is available bump down the amount 1130 * we can get from memory to a safer size. 1131 */ 1132 if (swapfs_minfree < swapfs_desfree) { 1133 mutex_enter(&freemem_lock); 1134 if (availrmem > swapfs_desfree || !k_anoninfo.ani_mem_resv) 1135 swapfs_minfree = swapfs_desfree; 1136 mutex_exit(&freemem_lock); 1137 } 1138 1139 SWAP_PRINT(SW_CTL, "swapadd: ani_max %ld ani_free %ld\n", 1140 k_anoninfo.ani_free, k_anoninfo.ani_free, 0, 0, 0); 1141 1142 mutex_exit(&anoninfo_lock); 1143 1144 mutex_exit(&swapinfo_lock); 1145 1146 /* Initialize the dump device */ 1147 mutex_enter(&dump_lock); 1148 if (dumpvp == NULL) 1149 (void) dumpinit(vp, swapname, 0); 1150 mutex_exit(&dump_lock); 1151 1152 VN_HOLD(cvp); 1153 out: 1154 if (error || esip) { 1155 SWAP_PRINT(SW_CTL, "swapadd: error (%d)\n", error, 0, 0, 0, 0); 1156 1157 if (!wasswap) { 1158 mutex_enter(&cvp->v_lock); 1159 cvp->v_flag &= ~VISSWAP; 1160 mutex_exit(&cvp->v_lock); 1161 } 1162 if (nsip) { 1163 kmem_free(nsip->si_swapslots, (size_t)nsip->si_mapsize); 1164 kmem_free(nsip->si_pname, nsip->si_pnamelen); 1165 kmem_free(nsip, sizeof (*nsip)); 1166 } 1167 mutex_enter(&swap_lock); 1168 (void) VOP_CLOSE(cvp, FREAD|FWRITE, 1, (offset_t)0, CRED(), 1169 NULL); 1170 mutex_exit(&swap_lock); 1171 } 1172 return (error); 1173 } 1174 1175 /* 1176 * Delete a swap file. 1177 */ 1178 static int 1179 swapdel( 1180 struct vnode *vp, 1181 ulong_t lowblk) /* Low block number of area to delete. */ 1182 { 1183 struct swapinfo **sipp, *osip = NULL; 1184 struct vnode *cvp; 1185 u_offset_t soff; 1186 int error = 0; 1187 u_offset_t toff = 0; 1188 struct vnode *tvp = NULL; 1189 spgcnt_t pages; 1190 struct anon **app, *ap; 1191 kmutex_t *ahm; 1192 pgcnt_t adjust_swap = 0; 1193 1194 /* Find the swap file entry for the file to be deleted */ 1195 cvp = common_specvp(vp); 1196 1197 1198 lowblk = lowblk ? lowblk : 1; /* Skip first page (disk label) */ 1199 soff = ptob(btopr(lowblk << SCTRSHFT)); /* must be page aligned */ 1200 1201 mutex_enter(&swapinfo_lock); 1202 for (sipp = &swapinfo; (osip = *sipp) != NULL; sipp = &osip->si_next) { 1203 if ((osip->si_vp == cvp) && 1204 (osip->si_soff == soff) && (osip->si_flags == 0)) 1205 break; 1206 } 1207 1208 /* If the file was not found, error. */ 1209 if (osip == NULL) { 1210 error = EINVAL; 1211 mutex_exit(&swapinfo_lock); 1212 goto out; 1213 } 1214 1215 pages = osip->si_npgs; 1216 1217 /* 1218 * Do not delete if we will be low on swap pages. 1219 */ 1220 mutex_enter(&anoninfo_lock); 1221 1222 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 1223 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 1224 1225 mutex_enter(&freemem_lock); 1226 if (((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) + 1227 MAX((spgcnt_t)(availrmem - swapfs_minfree), 0)) < pages) { 1228 mutex_exit(&freemem_lock); 1229 mutex_exit(&anoninfo_lock); 1230 error = ENOMEM; 1231 cmn_err(CE_WARN, "swapdel - too few free pages"); 1232 mutex_exit(&swapinfo_lock); 1233 goto out; 1234 } 1235 mutex_exit(&freemem_lock); 1236 1237 k_anoninfo.ani_max -= pages; 1238 1239 /* If needed, reserve memory swap to replace old device */ 1240 if (k_anoninfo.ani_phys_resv > k_anoninfo.ani_max) { 1241 adjust_swap = k_anoninfo.ani_phys_resv - k_anoninfo.ani_max; 1242 k_anoninfo.ani_phys_resv -= adjust_swap; 1243 k_anoninfo.ani_mem_resv += adjust_swap; 1244 mutex_enter(&freemem_lock); 1245 availrmem -= adjust_swap; 1246 mutex_exit(&freemem_lock); 1247 ANI_ADD(adjust_swap); 1248 } 1249 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 1250 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 1251 mutex_exit(&anoninfo_lock); 1252 1253 ANI_ADD(-pages); 1254 1255 /* 1256 * Set the delete flag. This prevents anyone from allocating more 1257 * pages from this file. Also set ST_DOINGDEL. Someone who wants to 1258 * add the file back while we're deleting it will signify by clearing 1259 * this flag. 1260 */ 1261 osip->si_flags |= ST_INDEL|ST_DOINGDEL; 1262 mutex_exit(&swapinfo_lock); 1263 1264 /* 1265 * Free all the allocated physical slots for this file. We do this 1266 * by walking through the entire anon hash array, because we need 1267 * to update all the anon slots that have physical swap slots on 1268 * this file, and this is the only way to find them all. We go back 1269 * to the beginning of a bucket after each slot is freed because the 1270 * anonhash_lock is not held during the free and thus the hash table 1271 * may change under us. 1272 */ 1273 for (app = anon_hash; app < &anon_hash[ANON_HASH_SIZE]; app++) { 1274 ahm = &anonhash_lock[(app-anon_hash) & (AH_LOCK_SIZE - 1)]; 1275 mutex_enter(ahm); 1276 top: 1277 for (ap = *app; ap != NULL; ap = ap->an_hash) { 1278 if (ap->an_pvp == cvp && 1279 ap->an_poff >= osip->si_soff && 1280 ap->an_poff < osip->si_eoff) { 1281 ASSERT(TESTBIT(osip->si_swapslots, 1282 btop((size_t)(ap->an_poff - 1283 osip->si_soff)))); 1284 tvp = ap->an_vp; 1285 toff = ap->an_off; 1286 VN_HOLD(tvp); 1287 mutex_exit(ahm); 1288 1289 error = swapslot_free(tvp, toff, osip); 1290 1291 VN_RELE(tvp); 1292 mutex_enter(ahm); 1293 if (!error && (osip->si_flags & ST_DOINGDEL)) { 1294 goto top; 1295 } else { 1296 if (error) { 1297 cmn_err(CE_WARN, 1298 "swapslot_free failed %d", 1299 error); 1300 } 1301 1302 /* 1303 * Add device back before making it 1304 * visible. 1305 */ 1306 mutex_enter(&swapinfo_lock); 1307 osip->si_flags &= 1308 ~(ST_INDEL | ST_DOINGDEL); 1309 mutex_exit(&swapinfo_lock); 1310 1311 /* 1312 * Update the anon space available 1313 */ 1314 mutex_enter(&anoninfo_lock); 1315 1316 k_anoninfo.ani_phys_resv += adjust_swap; 1317 k_anoninfo.ani_mem_resv -= adjust_swap; 1318 k_anoninfo.ani_max += pages; 1319 1320 mutex_enter(&freemem_lock); 1321 availrmem += adjust_swap; 1322 mutex_exit(&freemem_lock); 1323 1324 mutex_exit(&anoninfo_lock); 1325 1326 ANI_ADD(pages); 1327 1328 mutex_exit(ahm); 1329 goto out; 1330 } 1331 } 1332 } 1333 mutex_exit(ahm); 1334 } 1335 1336 /* All done, they'd better all be free! */ 1337 mutex_enter(&swapinfo_lock); 1338 ASSERT(osip->si_nfpgs == osip->si_npgs); 1339 1340 /* Now remove it from the swapinfo list */ 1341 for (sipp = &swapinfo; *sipp != NULL; sipp = &(*sipp)->si_next) { 1342 if (*sipp == osip) 1343 break; 1344 } 1345 ASSERT(*sipp); 1346 *sipp = osip->si_next; 1347 if (silast == osip) 1348 if ((silast = osip->si_next) == NULL) 1349 silast = swapinfo; 1350 nswapfiles--; 1351 mutex_exit(&swapinfo_lock); 1352 1353 kmem_free(osip->si_swapslots, osip->si_mapsize); 1354 kmem_free(osip->si_pname, osip->si_pnamelen); 1355 kmem_free(osip, sizeof (*osip)); 1356 1357 mutex_enter(&dump_lock); 1358 if (cvp == dumpvp) 1359 dumpfini(); 1360 mutex_exit(&dump_lock); 1361 1362 /* Release the vnode */ 1363 1364 mutex_enter(&swap_lock); 1365 (void) VOP_CLOSE(cvp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL); 1366 mutex_enter(&cvp->v_lock); 1367 cvp->v_flag &= ~VISSWAP; 1368 mutex_exit(&cvp->v_lock); 1369 VN_RELE(cvp); 1370 mutex_exit(&swap_lock); 1371 out: 1372 return (error); 1373 } 1374 1375 /* 1376 * Free up a physical swap slot on swapinfo sip, currently in use by the 1377 * anonymous page whose name is (vp, off). 1378 */ 1379 static int 1380 swapslot_free( 1381 struct vnode *vp, 1382 u_offset_t off, 1383 struct swapinfo *sip) 1384 { 1385 struct page *pp = NULL; 1386 struct anon *ap = NULL; 1387 int error = 0; 1388 kmutex_t *ahm; 1389 struct vnode *pvp = NULL; 1390 u_offset_t poff; 1391 int alloc_pg = 0; 1392 1393 ASSERT(sip->si_vp != NULL); 1394 /* 1395 * Get the page for the old swap slot if exists or create a new one. 1396 */ 1397 again: 1398 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 1399 pp = page_create_va(vp, off, PAGESIZE, PG_WAIT | PG_EXCL, 1400 segkmap, NULL); 1401 if (pp == NULL) 1402 goto again; 1403 alloc_pg = 1; 1404 1405 error = swap_getphysname(vp, off, &pvp, &poff); 1406 if (error || pvp != sip->si_vp || poff < sip->si_soff || 1407 poff >= sip->si_eoff) { 1408 page_io_unlock(pp); 1409 /*LINTED: constant in conditional context*/ 1410 VN_DISPOSE(pp, B_INVAL, 0, kcred); 1411 return (0); 1412 } 1413 1414 error = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ, 1415 CRED(), NULL); 1416 if (error) { 1417 page_io_unlock(pp); 1418 if (error == EFAULT) 1419 error = 0; 1420 /*LINTED: constant in conditional context*/ 1421 VN_DISPOSE(pp, B_INVAL, 0, kcred); 1422 return (error); 1423 } 1424 } 1425 1426 /* 1427 * The anon could have been removed by anon_decref* and/or reallocated 1428 * by anon layer (an_pvp == NULL) with the same vp, off. 1429 * In this case the page which has been allocated needs to 1430 * be freed. 1431 */ 1432 if (!alloc_pg) 1433 page_io_lock(pp); 1434 ahm = &anonhash_lock[AH_LOCK(vp, off)]; 1435 mutex_enter(ahm); 1436 ap = swap_anon(vp, off); 1437 if ((ap == NULL || ap->an_pvp == NULL) && alloc_pg) { 1438 mutex_exit(ahm); 1439 page_io_unlock(pp); 1440 /*LINTED: constant in conditional context*/ 1441 VN_DISPOSE(pp, B_INVAL, 0, kcred); 1442 return (0); 1443 } 1444 1445 /* 1446 * Free the physical slot. It may have been freed up and replaced with 1447 * another one while we were getting the page so we have to re-verify 1448 * that this is really one we want. If we do free the slot we have 1449 * to mark the page modified, as its backing store is now gone. 1450 */ 1451 if ((ap != NULL) && (ap->an_pvp == sip->si_vp && ap->an_poff >= 1452 sip->si_soff && ap->an_poff < sip->si_eoff)) { 1453 swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE); 1454 ap->an_pvp = NULL; 1455 ap->an_poff = 0; 1456 mutex_exit(ahm); 1457 hat_setmod(pp); 1458 } else { 1459 mutex_exit(ahm); 1460 } 1461 page_io_unlock(pp); 1462 page_unlock(pp); 1463 return (0); 1464 } 1465 1466 1467 /* 1468 * Get contig physical backing store for vp, in the range 1469 * [*offp, *offp + *lenp), May back a subrange of this, but must 1470 * always include the requested offset or fail. Returns the offsets 1471 * backed as [*offp, *offp + *lenp) and the physical offsets used to 1472 * back them from *pvpp in the range [*pstartp, *pstartp + *lenp). 1473 * Returns 0 for success 1474 * SE_NOANON -- no anon slot for requested paged 1475 * SE_NOSWAP -- no physical swap space available 1476 */ 1477 int 1478 swap_newphysname( 1479 struct vnode *vp, 1480 u_offset_t offset, 1481 u_offset_t *offp, 1482 size_t *lenp, 1483 struct vnode **pvpp, 1484 u_offset_t *poffp) 1485 { 1486 struct anon *ap = NULL; /* anon slot for vp, off */ 1487 int error = 0; 1488 struct vnode *pvp; 1489 u_offset_t poff, pstart, prem; 1490 size_t plen; 1491 u_offset_t off, start; 1492 kmutex_t *ahm; 1493 1494 ASSERT(*offp <= offset && offset < *offp + *lenp); 1495 1496 /* Get new physical swap slots. */ 1497 plen = *lenp; 1498 if (!swap_phys_alloc(&pvp, &pstart, &plen, 0)) { 1499 /* 1500 * No swap available so return error unless requested 1501 * offset is already backed in which case return that. 1502 */ 1503 ahm = &anonhash_lock[AH_LOCK(vp, offset)]; 1504 mutex_enter(ahm); 1505 if ((ap = swap_anon(vp, offset)) == NULL) { 1506 error = SE_NOANON; 1507 mutex_exit(ahm); 1508 return (error); 1509 } 1510 error = (ap->an_pvp ? 0 : SE_NOSWAP); 1511 *offp = offset; 1512 *lenp = PAGESIZE; 1513 *pvpp = ap->an_pvp; 1514 *poffp = ap->an_poff; 1515 mutex_exit(ahm); 1516 return (error); 1517 } 1518 1519 /* 1520 * We got plen (<= *lenp) contig slots. Use these to back a 1521 * subrange of [*offp, *offp + *lenp) which includes offset. 1522 * For now we just put offset at the end of the kluster. 1523 * Clearly there are other possible choices - which is best? 1524 */ 1525 start = MAX(*offp, 1526 (offset + PAGESIZE > plen) ? (offset + PAGESIZE - plen) : 0); 1527 ASSERT(start + plen <= *offp + *lenp); 1528 1529 for (off = start, poff = pstart; poff < pstart + plen; 1530 off += PAGESIZE, poff += PAGESIZE) { 1531 ahm = &anonhash_lock[AH_LOCK(vp, off)]; 1532 mutex_enter(ahm); 1533 if ((ap = swap_anon(vp, off)) != NULL) { 1534 /* Free old slot if any, and assign new one */ 1535 if (ap->an_pvp) 1536 swap_phys_free(ap->an_pvp, ap->an_poff, 1537 PAGESIZE); 1538 ap->an_pvp = pvp; 1539 ap->an_poff = poff; 1540 } else { /* No anon slot for a klustered page, quit. */ 1541 prem = (pstart + plen) - poff; 1542 /* Already did requested page, do partial kluster */ 1543 if (off > offset) { 1544 plen = poff - pstart; 1545 error = 0; 1546 /* Fail on requested page, error */ 1547 } else if (off == offset) { 1548 error = SE_NOANON; 1549 /* Fail on prior page, fail on requested page, error */ 1550 } else if ((ap = swap_anon(vp, offset)) == NULL) { 1551 error = SE_NOANON; 1552 /* Fail on prior page, got requested page, do only it */ 1553 } else { 1554 /* Free old slot if any, and assign new one */ 1555 if (ap->an_pvp) 1556 swap_phys_free(ap->an_pvp, ap->an_poff, 1557 PAGESIZE); 1558 ap->an_pvp = pvp; 1559 ap->an_poff = poff; 1560 /* One page kluster */ 1561 start = offset; 1562 plen = PAGESIZE; 1563 pstart = poff; 1564 poff += PAGESIZE; 1565 prem -= PAGESIZE; 1566 } 1567 /* Free unassigned slots */ 1568 swap_phys_free(pvp, poff, prem); 1569 mutex_exit(ahm); 1570 break; 1571 } 1572 mutex_exit(ahm); 1573 } 1574 ASSERT(*offp <= start && start + plen <= *offp + *lenp); 1575 ASSERT(start <= offset && offset < start + plen); 1576 *offp = start; 1577 *lenp = plen; 1578 *pvpp = pvp; 1579 *poffp = pstart; 1580 return (error); 1581 } 1582 1583 1584 /* 1585 * Get the physical swap backing store location for a given anonymous page 1586 * named (vp, off). The backing store name is returned in (*pvpp, *poffp). 1587 * Returns 0 success 1588 * EIDRM -- no anon slot (page is not allocated) 1589 */ 1590 int 1591 swap_getphysname( 1592 struct vnode *vp, 1593 u_offset_t off, 1594 struct vnode **pvpp, 1595 u_offset_t *poffp) 1596 { 1597 struct anon *ap; 1598 int error = 0; 1599 kmutex_t *ahm; 1600 1601 ahm = &anonhash_lock[AH_LOCK(vp, off)]; 1602 mutex_enter(ahm); 1603 1604 /* Get anon slot for vp, off */ 1605 ap = swap_anon(vp, off); 1606 if (ap == NULL) { 1607 error = EIDRM; 1608 goto out; 1609 } 1610 *pvpp = ap->an_pvp; 1611 *poffp = ap->an_poff; 1612 out: 1613 mutex_exit(ahm); 1614 return (error); 1615 } 1616