1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/sysmacros.h> 29 #include <sys/conf.h> 30 #include <sys/fssnap_if.h> 31 #include <sys/fs/ufs_inode.h> 32 #include <sys/fs/ufs_lockfs.h> 33 #include <sys/fs/ufs_log.h> 34 #include <sys/fs/ufs_trans.h> 35 #include <sys/cmn_err.h> 36 #include <vm/pvn.h> 37 #include <vm/seg_map.h> 38 #include <sys/fdbuffer.h> 39 40 #ifdef DEBUG 41 int evn_ufs_debug = 0; 42 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; } 43 #else 44 #define DEBUGF(args) 45 #endif 46 47 /* 48 * ufs_rdwr_data - supports reading or writing data when 49 * no changes are permitted in file size or space allocation. 50 * 51 * Inputs: 52 * fdb - The mandatory fdbuffer supports 53 * the read or write operation. 54 * flags - defaults (zero value) to synchronous write 55 * B_READ - indicates read operation 56 * B_ASYNC - indicates perform operation asynchronously 57 */ 58 /*ARGSUSED*/ 59 int 60 ufs_rdwr_data( 61 vnode_t *vnodep, 62 u_offset_t offset, 63 size_t len, 64 fdbuffer_t *fdbp, 65 int flags, 66 cred_t *credp) 67 { 68 struct inode *ip = VTOI(vnodep); 69 struct fs *fs; 70 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 71 struct buf *bp; 72 krw_t rwtype = RW_READER; 73 u_offset_t offset1 = offset; /* Initial offset */ 74 size_t iolen; 75 int curlen = 0; 76 int pplen; 77 daddr_t bn; 78 int contig = 0; 79 int error = 0; 80 int nbytes; /* Number bytes this IO */ 81 int offsetn; /* Start point this IO */ 82 int iswrite = flags & B_WRITE; 83 int io_started = 0; /* No IO started */ 84 struct ulockfs *ulp; 85 uint_t protp = PROT_ALL; 86 87 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite, 88 &protp); 89 if (error) { 90 if (flags & B_ASYNC) { 91 fdb_ioerrdone(fdbp, error); 92 } 93 return (error); 94 } 95 fs = ufsvfsp->vfs_fs; 96 iolen = len; 97 98 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx" 99 " isize: %llx fdb: %p\n", 100 flags & B_READ ? "READ" : "WRITE", (void *)vnodep, 101 (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp)); 102 103 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 104 rw_enter(&ip->i_contents, rwtype); 105 106 ASSERT(offset1 < ip->i_size); 107 108 if ((offset1 + iolen) > ip->i_size) { 109 iolen = ip->i_size - offset1; 110 } 111 while (!error && curlen < iolen) { 112 113 contig = 0; 114 115 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) { 116 break; 117 } 118 ASSERT(!(bn == UFS_HOLE && iswrite)); 119 if (bn == UFS_HOLE) { 120 /* 121 * If the above assertion is true, 122 * then the following if statement can never be true. 123 */ 124 if (iswrite && (rwtype == RW_READER)) { 125 rwtype = RW_WRITER; 126 if (!rw_tryupgrade(&ip->i_contents)) { 127 rw_exit(&ip->i_contents); 128 rw_enter(&ip->i_contents, rwtype); 129 continue; 130 } 131 } 132 offsetn = blkoff(fs, offset1); 133 pplen = P2ROUNDUP(len, PAGESIZE); 134 nbytes = MIN((pplen - curlen), 135 (fs->fs_bsize - offsetn)); 136 ASSERT(nbytes > 0); 137 138 /* 139 * We may be reading or writing. 140 */ 141 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n", 142 offset1, (iolen - curlen))); 143 144 if (iswrite) { 145 printf("**WARNING: ignoring hole in write\n"); 146 error = ENOSPC; 147 } else { 148 fdb_add_hole(fdbp, offset1 - offset, nbytes); 149 } 150 offset1 += nbytes; 151 curlen += nbytes; 152 continue; 153 154 } 155 ASSERT(contig > 0); 156 pplen = P2ROUNDUP(len, PAGESIZE); 157 158 contig = MIN(contig, len - curlen); 159 contig = P2ROUNDUP(contig, DEV_BSIZE); 160 161 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags); 162 163 bp->b_edev = ip->i_dev; 164 bp->b_dev = cmpdev(ip->i_dev); 165 bp->b_blkno = bn; 166 bp->b_file = ip->i_vnode; 167 bp->b_offset = (offset_t)offset1; 168 169 if (ufsvfsp->vfs_snapshot) { 170 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 171 } else { 172 (void) bdev_strategy(bp); 173 } 174 io_started = 1; 175 176 offset1 += contig; 177 curlen += contig; 178 if (iswrite) 179 lwp_stat_update(LWP_STAT_OUBLK, 1); 180 else 181 lwp_stat_update(LWP_STAT_INBLK, 1); 182 183 if ((flags & B_ASYNC) == 0) { 184 error = biowait(bp); 185 fdb_iodone(bp); 186 } 187 188 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n", 189 offset1, (iolen - curlen))); 190 } 191 192 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n", 193 offset1, (iolen - curlen), (void *)vnodep->v_pages)); 194 195 rw_exit(&ip->i_contents); 196 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 197 198 if (flags & B_ASYNC) { 199 /* 200 * Show that no more asynchronous IO will be added 201 */ 202 fdb_ioerrdone(fdbp, error); 203 } 204 if (ulp) { 205 ufs_lockfs_end(ulp); 206 } 207 if (io_started && flags & B_ASYNC) { 208 return (0); 209 } else { 210 return (error); 211 } 212 } 213 214 /* 215 * ufs_alloc_data - supports allocating space and reads or writes 216 * that involve changes to file length or space allocation. 217 * 218 * This function is more expensive, because of the UFS log transaction, 219 * so ufs_rdwr_data() should be used when space or file length changes 220 * will not occur. 221 * 222 * Inputs: 223 * fdb - A null pointer instructs this function to only allocate 224 * space for the specified offset and length. 225 * An actual fdbuffer instructs this function to perform 226 * the read or write operation. 227 * flags - defaults (zero value) to synchronous write 228 * B_READ - indicates read operation 229 * B_ASYNC - indicates perform operation asynchronously 230 */ 231 int 232 ufs_alloc_data( 233 vnode_t *vnodep, 234 u_offset_t offset, 235 size_t *len, 236 fdbuffer_t *fdbp, 237 int flags, 238 cred_t *credp) 239 { 240 struct inode *ip = VTOI(vnodep); 241 size_t done_len, io_len; 242 int contig; 243 u_offset_t uoff, io_off; 244 int error = 0; /* No error occurred */ 245 int offsetn; /* Start point this IO */ 246 int nbytes; /* Number bytes in this IO */ 247 daddr_t bn; 248 struct fs *fs; 249 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 250 int i_size_changed = 0; 251 u_offset_t old_i_size; 252 struct ulockfs *ulp; 253 int trans_size; 254 int issync; /* UFS Log transaction */ 255 /* synchronous when non-zero */ 256 257 int io_started = 0; /* No IO started */ 258 uint_t protp = PROT_ALL; 259 260 ASSERT((flags & B_WRITE) == 0); 261 262 /* 263 * Obey the lockfs protocol 264 */ 265 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp); 266 if (error) { 267 if ((fdbp != NULL) && (flags & B_ASYNC)) { 268 fdb_ioerrdone(fdbp, error); 269 } 270 return (error); 271 } 272 if (ulp) { 273 /* 274 * Try to begin a UFS log transaction 275 */ 276 trans_size = TOP_GETPAGE_SIZE(ip); 277 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE, 278 trans_size, error); 279 if (error == EWOULDBLOCK) { 280 ufs_lockfs_end(ulp); 281 if ((fdbp != NULL) && (flags & B_ASYNC)) { 282 fdb_ioerrdone(fdbp, EDEADLK); 283 } 284 return (EDEADLK); 285 } 286 } 287 288 uoff = offset; 289 io_off = offset; 290 io_len = *len; 291 done_len = 0; 292 293 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n", 294 uoff, (io_len - done_len), ip->i_size, (void *)fdbp)); 295 296 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 297 rw_enter(&ip->i_contents, RW_WRITER); 298 299 ASSERT((ip->i_mode & IFMT) == IFREG); 300 301 fs = ip->i_fs; 302 303 while (error == 0 && done_len < io_len) { 304 uoff = (u_offset_t)(io_off + done_len); 305 offsetn = (int)blkoff(fs, uoff); 306 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len); 307 308 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n", 309 uoff, nbytes)); 310 311 if (uoff + nbytes > ip->i_size) { 312 /* 313 * We are extending the length of the file. 314 * bmap is used so that we are sure that 315 * if we need to allocate new blocks, that it 316 * is done here before we up the file size. 317 */ 318 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n", 319 ip->i_size, uoff + nbytes)); 320 321 error = bmap_write(ip, uoff, (offsetn + nbytes), 322 BI_ALLOC_ONLY, NULL, credp); 323 if (ip->i_flag & (ICHG|IUPD)) 324 ip->i_seq++; 325 if (error) { 326 DEBUGF((CE_CONT, "?ufs_alloc_data: grow " 327 "failed err: %d\n", error)); 328 break; 329 } 330 if (fdbp != NULL) { 331 if (uoff >= ip->i_size) { 332 /* 333 * Desired offset is past end of bytes 334 * in file, so we have a hole. 335 */ 336 fdb_add_hole(fdbp, uoff - offset, 337 nbytes); 338 } else { 339 int contig; 340 buf_t *bp; 341 342 error = bmap_read(ip, uoff, &bn, 343 &contig); 344 if (error) { 345 break; 346 } 347 348 contig = ip->i_size - uoff; 349 contig = P2ROUNDUP(contig, DEV_BSIZE); 350 351 bp = fdb_iosetup(fdbp, uoff - offset, 352 contig, vnodep, flags); 353 354 bp->b_edev = ip->i_dev; 355 bp->b_dev = cmpdev(ip->i_dev); 356 bp->b_blkno = bn; 357 bp->b_file = ip->i_vnode; 358 bp->b_offset = (offset_t)uoff; 359 360 if (ufsvfsp->vfs_snapshot) { 361 fssnap_strategy( 362 &ufsvfsp->vfs_snapshot, bp); 363 } else { 364 (void) bdev_strategy(bp); 365 } 366 io_started = 1; 367 368 lwp_stat_update(LWP_STAT_OUBLK, 1); 369 370 if ((flags & B_ASYNC) == 0) { 371 error = biowait(bp); 372 fdb_iodone(bp); 373 if (error) { 374 break; 375 } 376 } 377 if (contig > (ip->i_size - uoff)) { 378 contig -= ip->i_size - uoff; 379 380 fdb_add_hole(fdbp, 381 ip->i_size - offset, 382 contig); 383 } 384 } 385 } 386 387 i_size_changed = 1; 388 old_i_size = ip->i_size; 389 UFS_SET_ISIZE(uoff + nbytes, ip); 390 TRANS_INODE(ip->i_ufsvfs, ip); 391 /* 392 * file has grown larger than 2GB. Set flag 393 * in superblock to indicate this, if it 394 * is not already set. 395 */ 396 if ((ip->i_size > MAXOFF32_T) && 397 !(fs->fs_flags & FSLARGEFILES)) { 398 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 399 mutex_enter(&ufsvfsp->vfs_lock); 400 fs->fs_flags |= FSLARGEFILES; 401 ufs_sbwrite(ufsvfsp); 402 mutex_exit(&ufsvfsp->vfs_lock); 403 } 404 } else { 405 /* 406 * The file length is not being extended. 407 */ 408 error = bmap_read(ip, uoff, &bn, &contig); 409 if (error) { 410 DEBUGF((CE_CONT, "?ufs_alloc_data: " 411 "bmap_read err: %d\n", error)); 412 break; 413 } 414 415 if (bn != UFS_HOLE) { 416 /* 417 * Did not map a hole in the file 418 */ 419 int contig = P2ROUNDUP(nbytes, DEV_BSIZE); 420 buf_t *bp; 421 422 if (fdbp != NULL) { 423 bp = fdb_iosetup(fdbp, uoff - offset, 424 contig, vnodep, flags); 425 426 bp->b_edev = ip->i_dev; 427 bp->b_dev = cmpdev(ip->i_dev); 428 bp->b_blkno = bn; 429 bp->b_file = ip->i_vnode; 430 bp->b_offset = (offset_t)uoff; 431 432 if (ufsvfsp->vfs_snapshot) { 433 fssnap_strategy( 434 &ufsvfsp->vfs_snapshot, bp); 435 } else { 436 (void) bdev_strategy(bp); 437 } 438 io_started = 1; 439 440 lwp_stat_update(LWP_STAT_OUBLK, 1); 441 442 if ((flags & B_ASYNC) == 0) { 443 error = biowait(bp); 444 fdb_iodone(bp); 445 if (error) { 446 break; 447 } 448 } 449 } 450 } else { 451 /* 452 * We read a hole in the file. 453 * We have to allocate blocks for the hole. 454 */ 455 error = bmap_write(ip, uoff, (offsetn + nbytes), 456 BI_ALLOC_ONLY, NULL, credp); 457 if (ip->i_flag & (ICHG|IUPD)) 458 ip->i_seq++; 459 if (error) { 460 DEBUGF((CE_CONT, "?ufs_alloc_data: fill" 461 " hole failed error: %d\n", error)); 462 break; 463 } 464 if (fdbp != NULL) { 465 fdb_add_hole(fdbp, uoff - offset, 466 nbytes); 467 } 468 } 469 } 470 done_len += nbytes; 471 } 472 473 if (error) { 474 if (i_size_changed) { 475 /* 476 * Allocation of the blocks for the file failed. 477 * So truncate the file size back to its original size. 478 */ 479 (void) ufs_itrunc(ip, old_i_size, 0, credp); 480 } 481 } 482 483 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n", 484 uoff, (io_len - done_len))); 485 486 if ((offset + *len) < (NDADDR * fs->fs_bsize)) { 487 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset); 488 } else { 489 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset); 490 } 491 492 /* 493 * Flush cached pages. 494 * 495 * XXX - There should be no pages involved, since the I/O was performed 496 * through the device strategy routine and the page cache was bypassed. 497 * However, testing has demonstrated that this VOP_PUTPAGE is 498 * necessary. Without this, data might not always be read back as it 499 * was written. 500 * 501 */ 502 (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL); 503 504 rw_exit(&ip->i_contents); 505 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 506 507 if ((fdbp != NULL) && (flags & B_ASYNC)) { 508 /* 509 * Show that no more asynchronous IO will be added 510 */ 511 fdb_ioerrdone(fdbp, error); 512 } 513 if (ulp) { 514 /* 515 * End the UFS Log transaction 516 */ 517 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE, 518 trans_size); 519 ufs_lockfs_end(ulp); 520 } 521 if (io_started && (flags & B_ASYNC)) { 522 return (0); 523 } else { 524 return (error); 525 } 526 } 527