1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/sysmacros.h> 31 #include <sys/conf.h> 32 #include <sys/fssnap_if.h> 33 #include <sys/fs/ufs_inode.h> 34 #include <sys/fs/ufs_lockfs.h> 35 #include <sys/fs/ufs_log.h> 36 #include <sys/fs/ufs_trans.h> 37 #include <sys/cmn_err.h> 38 #include <vm/pvn.h> 39 #include <vm/seg_map.h> 40 #include <sys/fdbuffer.h> 41 42 #ifdef DEBUG 43 int evn_ufs_debug = 0; 44 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; } 45 #else 46 #define DEBUGF(args) 47 #endif 48 49 /* 50 * ufs_rdwr_data - supports reading or writing data when 51 * no changes are permitted in file size or space allocation. 52 * 53 * Inputs: 54 * fdb - The mandatory fdbuffer supports 55 * the read or write operation. 56 * flags - defaults (zero value) to synchronous write 57 * B_READ - indicates read operation 58 * B_ASYNC - indicates perform operation asynchronously 59 */ 60 /*ARGSUSED*/ 61 int 62 ufs_rdwr_data( 63 vnode_t *vnodep, 64 u_offset_t offset, 65 size_t len, 66 fdbuffer_t *fdbp, 67 int flags, 68 cred_t *credp) 69 { 70 struct inode *ip = VTOI(vnodep); 71 struct fs *fs; 72 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 73 struct buf *bp; 74 krw_t rwtype = RW_READER; 75 u_offset_t offset1 = offset; /* Initial offset */ 76 size_t iolen; 77 int curlen = 0; 78 int pplen; 79 daddr_t bn; 80 int contig = 0; 81 int error = 0; 82 int nbytes; /* Number bytes this IO */ 83 int offsetn; /* Start point this IO */ 84 int iswrite = flags & B_WRITE; 85 int io_started = 0; /* No IO started */ 86 struct ulockfs *ulp; 87 uint_t protp = PROT_ALL; 88 89 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite, 90 &protp); 91 if (error) { 92 if (flags & B_ASYNC) { 93 fdb_ioerrdone(fdbp, error); 94 } 95 return (error); 96 } 97 fs = ufsvfsp->vfs_fs; 98 iolen = len; 99 100 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx" 101 " isize: %llx fdb: %p\n", 102 flags & B_READ ? "READ" : "WRITE", (void *)vnodep, 103 (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp)); 104 105 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 106 rw_enter(&ip->i_contents, rwtype); 107 108 ASSERT(offset1 < ip->i_size); 109 110 if ((offset1 + iolen) > ip->i_size) { 111 iolen = ip->i_size - offset1; 112 } 113 while (!error && curlen < iolen) { 114 115 contig = 0; 116 117 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) { 118 break; 119 } 120 ASSERT(!(bn == UFS_HOLE && iswrite)); 121 if (bn == UFS_HOLE) { 122 /* 123 * If the above assertion is true, 124 * then the following if statement can never be true. 125 */ 126 if (iswrite && (rwtype == RW_READER)) { 127 rwtype = RW_WRITER; 128 if (!rw_tryupgrade(&ip->i_contents)) { 129 rw_exit(&ip->i_contents); 130 rw_enter(&ip->i_contents, rwtype); 131 continue; 132 } 133 } 134 offsetn = blkoff(fs, offset1); 135 pplen = P2ROUNDUP(len, PAGESIZE); 136 nbytes = MIN((pplen - curlen), 137 (fs->fs_bsize - offsetn)); 138 ASSERT(nbytes > 0); 139 140 /* 141 * We may be reading or writing. 142 */ 143 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n", 144 offset1, (iolen - curlen))); 145 146 if (iswrite) { 147 printf("**WARNING: ignoring hole in write\n"); 148 error = ENOSPC; 149 } else { 150 fdb_add_hole(fdbp, offset1 - offset, nbytes); 151 } 152 offset1 += nbytes; 153 curlen += nbytes; 154 continue; 155 156 } 157 ASSERT(contig > 0); 158 pplen = P2ROUNDUP(len, PAGESIZE); 159 160 contig = MIN(contig, len - curlen); 161 contig = P2ROUNDUP(contig, DEV_BSIZE); 162 163 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags); 164 165 bp->b_edev = ip->i_dev; 166 bp->b_dev = cmpdev(ip->i_dev); 167 bp->b_blkno = bn; 168 bp->b_file = ip->i_vnode; 169 bp->b_offset = (offset_t)offset1; 170 171 if (ufsvfsp->vfs_snapshot) { 172 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 173 } else { 174 (void) bdev_strategy(bp); 175 } 176 io_started = 1; 177 178 offset1 += contig; 179 curlen += contig; 180 if (iswrite) 181 lwp_stat_update(LWP_STAT_OUBLK, 1); 182 else 183 lwp_stat_update(LWP_STAT_INBLK, 1); 184 185 if ((flags & B_ASYNC) == 0) { 186 error = biowait(bp); 187 fdb_iodone(bp); 188 } 189 190 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n", 191 offset1, (iolen - curlen))); 192 } 193 194 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n", 195 offset1, (iolen - curlen), (void *)vnodep->v_pages)); 196 197 rw_exit(&ip->i_contents); 198 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 199 200 if (flags & B_ASYNC) { 201 /* 202 * Show that no more asynchronous IO will be added 203 */ 204 fdb_ioerrdone(fdbp, error); 205 } 206 if (ulp) { 207 ufs_lockfs_end(ulp); 208 } 209 if (io_started && flags & B_ASYNC) { 210 return (0); 211 } else { 212 return (error); 213 } 214 } 215 216 /* 217 * ufs_alloc_data - supports allocating space and reads or writes 218 * that involve changes to file length or space allocation. 219 * 220 * This function is more expensive, because of the UFS log transaction, 221 * so ufs_rdwr_data() should be used when space or file length changes 222 * will not occur. 223 * 224 * Inputs: 225 * fdb - A null pointer instructs this function to only allocate 226 * space for the specified offset and length. 227 * An actual fdbuffer instructs this function to perform 228 * the read or write operation. 229 * flags - defaults (zero value) to synchronous write 230 * B_READ - indicates read operation 231 * B_ASYNC - indicates perform operation asynchronously 232 */ 233 int 234 ufs_alloc_data( 235 vnode_t *vnodep, 236 u_offset_t offset, 237 size_t *len, 238 fdbuffer_t *fdbp, 239 int flags, 240 cred_t *credp) 241 { 242 struct inode *ip = VTOI(vnodep); 243 size_t done_len, io_len; 244 int contig; 245 u_offset_t uoff, io_off; 246 int error = 0; /* No error occurred */ 247 int offsetn; /* Start point this IO */ 248 int nbytes; /* Number bytes in this IO */ 249 daddr_t bn; 250 struct fs *fs; 251 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 252 int i_size_changed = 0; 253 u_offset_t old_i_size; 254 struct ulockfs *ulp; 255 int trans_size; 256 int issync; /* UFS Log transaction */ 257 /* synchronous when non-zero */ 258 259 int io_started = 0; /* No IO started */ 260 uint_t protp = PROT_ALL; 261 262 ASSERT((flags & B_WRITE) == 0); 263 264 /* 265 * Obey the lockfs protocol 266 */ 267 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp); 268 if (error) { 269 if ((fdbp != NULL) && (flags & B_ASYNC)) { 270 fdb_ioerrdone(fdbp, error); 271 } 272 return (error); 273 } 274 if (ulp) { 275 /* 276 * Try to begin a UFS log transaction 277 */ 278 trans_size = TOP_GETPAGE_SIZE(ip); 279 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE, 280 trans_size, error); 281 if (error == EWOULDBLOCK) { 282 ufs_lockfs_end(ulp); 283 if ((fdbp != NULL) && (flags & B_ASYNC)) { 284 fdb_ioerrdone(fdbp, EDEADLK); 285 } 286 return (EDEADLK); 287 } 288 } 289 290 uoff = offset; 291 io_off = offset; 292 io_len = *len; 293 done_len = 0; 294 295 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n", 296 uoff, (io_len - done_len), ip->i_size, (void *)fdbp)); 297 298 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 299 rw_enter(&ip->i_contents, RW_WRITER); 300 301 ASSERT((ip->i_mode & IFMT) == IFREG); 302 303 fs = ip->i_fs; 304 305 while (error == 0 && done_len < io_len) { 306 uoff = (u_offset_t)(io_off + done_len); 307 offsetn = (int)blkoff(fs, uoff); 308 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len); 309 310 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n", 311 uoff, nbytes)); 312 313 if (uoff + nbytes > ip->i_size) { 314 /* 315 * We are extending the length of the file. 316 * bmap is used so that we are sure that 317 * if we need to allocate new blocks, that it 318 * is done here before we up the file size. 319 */ 320 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n", 321 ip->i_size, uoff + nbytes)); 322 323 error = bmap_write(ip, uoff, (offsetn + nbytes), 324 BI_ALLOC_ONLY, NULL, credp); 325 if (ip->i_flag & (ICHG|IUPD)) 326 ip->i_seq++; 327 if (error) { 328 DEBUGF((CE_CONT, "?ufs_alloc_data: grow " 329 "failed err: %d\n", error)); 330 break; 331 } 332 if (fdbp != NULL) { 333 if (uoff >= ip->i_size) { 334 /* 335 * Desired offset is past end of bytes 336 * in file, so we have a hole. 337 */ 338 fdb_add_hole(fdbp, uoff - offset, 339 nbytes); 340 } else { 341 int contig; 342 buf_t *bp; 343 344 error = bmap_read(ip, uoff, &bn, 345 &contig); 346 if (error) { 347 break; 348 } 349 350 contig = ip->i_size - uoff; 351 contig = P2ROUNDUP(contig, DEV_BSIZE); 352 353 bp = fdb_iosetup(fdbp, uoff - offset, 354 contig, vnodep, flags); 355 356 bp->b_edev = ip->i_dev; 357 bp->b_dev = cmpdev(ip->i_dev); 358 bp->b_blkno = bn; 359 bp->b_file = ip->i_vnode; 360 bp->b_offset = (offset_t)uoff; 361 362 if (ufsvfsp->vfs_snapshot) { 363 fssnap_strategy( 364 &ufsvfsp->vfs_snapshot, bp); 365 } else { 366 (void) bdev_strategy(bp); 367 } 368 io_started = 1; 369 370 lwp_stat_update(LWP_STAT_OUBLK, 1); 371 372 if ((flags & B_ASYNC) == 0) { 373 error = biowait(bp); 374 fdb_iodone(bp); 375 if (error) { 376 break; 377 } 378 } 379 if (contig > (ip->i_size - uoff)) { 380 contig -= ip->i_size - uoff; 381 382 fdb_add_hole(fdbp, 383 ip->i_size - offset, 384 contig); 385 } 386 } 387 } 388 389 i_size_changed = 1; 390 old_i_size = ip->i_size; 391 UFS_SET_ISIZE(uoff + nbytes, ip); 392 TRANS_INODE(ip->i_ufsvfs, ip); 393 /* 394 * file has grown larger than 2GB. Set flag 395 * in superblock to indicate this, if it 396 * is not already set. 397 */ 398 if ((ip->i_size > MAXOFF32_T) && 399 !(fs->fs_flags & FSLARGEFILES)) { 400 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 401 mutex_enter(&ufsvfsp->vfs_lock); 402 fs->fs_flags |= FSLARGEFILES; 403 ufs_sbwrite(ufsvfsp); 404 mutex_exit(&ufsvfsp->vfs_lock); 405 } 406 } else { 407 /* 408 * The file length is not being extended. 409 */ 410 error = bmap_read(ip, uoff, &bn, &contig); 411 if (error) { 412 DEBUGF((CE_CONT, "?ufs_alloc_data: " 413 "bmap_read err: %d\n", error)); 414 break; 415 } 416 417 if (bn != UFS_HOLE) { 418 /* 419 * Did not map a hole in the file 420 */ 421 int contig = P2ROUNDUP(nbytes, DEV_BSIZE); 422 buf_t *bp; 423 424 if (fdbp != NULL) { 425 bp = fdb_iosetup(fdbp, uoff - offset, 426 contig, vnodep, flags); 427 428 bp->b_edev = ip->i_dev; 429 bp->b_dev = cmpdev(ip->i_dev); 430 bp->b_blkno = bn; 431 bp->b_file = ip->i_vnode; 432 bp->b_offset = (offset_t)uoff; 433 434 if (ufsvfsp->vfs_snapshot) { 435 fssnap_strategy( 436 &ufsvfsp->vfs_snapshot, bp); 437 } else { 438 (void) bdev_strategy(bp); 439 } 440 io_started = 1; 441 442 lwp_stat_update(LWP_STAT_OUBLK, 1); 443 444 if ((flags & B_ASYNC) == 0) { 445 error = biowait(bp); 446 fdb_iodone(bp); 447 if (error) { 448 break; 449 } 450 } 451 } 452 } else { 453 /* 454 * We read a hole in the file. 455 * We have to allocate blocks for the hole. 456 */ 457 error = bmap_write(ip, uoff, (offsetn + nbytes), 458 BI_ALLOC_ONLY, NULL, credp); 459 if (ip->i_flag & (ICHG|IUPD)) 460 ip->i_seq++; 461 if (error) { 462 DEBUGF((CE_CONT, "?ufs_alloc_data: fill" 463 " hole failed error: %d\n", error)); 464 break; 465 } 466 if (fdbp != NULL) { 467 fdb_add_hole(fdbp, uoff - offset, 468 nbytes); 469 } 470 } 471 } 472 done_len += nbytes; 473 } 474 475 if (error) { 476 if (i_size_changed) { 477 /* 478 * Allocation of the blocks for the file failed. 479 * So truncate the file size back to its original size. 480 */ 481 (void) ufs_itrunc(ip, old_i_size, 0, credp); 482 } 483 } 484 485 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n", 486 uoff, (io_len - done_len))); 487 488 if ((offset + *len) < (NDADDR * fs->fs_bsize)) { 489 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset); 490 } else { 491 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset); 492 } 493 494 /* 495 * Flush cached pages. 496 * 497 * XXX - There should be no pages involved, since the I/O was performed 498 * through the device strategy routine and the page cache was bypassed. 499 * However, testing has demonstrated that this VOP_PUTPAGE is 500 * necessary. Without this, data might not always be read back as it 501 * was written. 502 * 503 */ 504 (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL); 505 506 rw_exit(&ip->i_contents); 507 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 508 509 if ((fdbp != NULL) && (flags & B_ASYNC)) { 510 /* 511 * Show that no more asynchronous IO will be added 512 */ 513 fdb_ioerrdone(fdbp, error); 514 } 515 if (ulp) { 516 /* 517 * End the UFS Log transaction 518 */ 519 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE, 520 trans_size); 521 ufs_lockfs_end(ulp); 522 } 523 if (io_started && (flags & B_ASYNC)) { 524 return (0); 525 } else { 526 return (error); 527 } 528 } 529