1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/conf.h> 33 #include <sys/fssnap_if.h> 34 #include <sys/fs/ufs_inode.h> 35 #include <sys/fs/ufs_lockfs.h> 36 #include <sys/fs/ufs_log.h> 37 #include <sys/fs/ufs_trans.h> 38 #include <sys/cmn_err.h> 39 #include <vm/pvn.h> 40 #include <vm/seg_map.h> 41 #include <sys/fdbuffer.h> 42 43 #ifdef DEBUG 44 int evn_ufs_debug = 0; 45 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; } 46 #else 47 #define DEBUGF(args) 48 #endif 49 50 /* 51 * ufs_rdwr_data - supports reading or writing data when 52 * no changes are permitted in file size or space allocation. 53 * 54 * Inputs: 55 * fdb - The mandatory fdbuffer supports 56 * the read or write operation. 57 * flags - defaults (zero value) to synchronous write 58 * B_READ - indicates read operation 59 * B_ASYNC - indicates perform operation asynchronously 60 */ 61 /*ARGSUSED*/ 62 int 63 ufs_rdwr_data( 64 vnode_t *vnodep, 65 u_offset_t offset, 66 size_t len, 67 fdbuffer_t *fdbp, 68 int flags, 69 cred_t *credp) 70 { 71 struct inode *ip = VTOI(vnodep); 72 struct fs *fs; 73 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 74 struct buf *bp; 75 krw_t rwtype = RW_READER; 76 u_offset_t offset1 = offset; /* Initial offset */ 77 size_t iolen; 78 int curlen = 0; 79 int pplen; 80 daddr_t bn; 81 int contig = 0; 82 int error = 0; 83 int nbytes; /* Number bytes this IO */ 84 int offsetn; /* Start point this IO */ 85 int iswrite = flags & B_WRITE; 86 int io_started = 0; /* No IO started */ 87 struct ulockfs *ulp; 88 uint_t protp = PROT_ALL; 89 90 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite, 91 &protp); 92 if (error) { 93 if (flags & B_ASYNC) { 94 fdb_ioerrdone(fdbp, error); 95 } 96 return (error); 97 } 98 fs = ufsvfsp->vfs_fs; 99 iolen = len; 100 101 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx" 102 " isize: %llx fdb: %p\n", 103 flags & B_READ ? "READ" : "WRITE", (void *)vnodep, 104 (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp)); 105 106 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 107 rw_enter(&ip->i_contents, rwtype); 108 109 ASSERT(offset1 < ip->i_size); 110 111 if ((offset1 + iolen) > ip->i_size) { 112 iolen = ip->i_size - offset1; 113 } 114 while (!error && curlen < iolen) { 115 116 contig = 0; 117 118 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) { 119 break; 120 } 121 ASSERT(!(bn == UFS_HOLE && iswrite)); 122 if (bn == UFS_HOLE) { 123 /* 124 * If the above assertion is true, 125 * then the following if statement can never be true. 126 */ 127 if (iswrite && (rwtype == RW_READER)) { 128 rwtype = RW_WRITER; 129 if (!rw_tryupgrade(&ip->i_contents)) { 130 rw_exit(&ip->i_contents); 131 rw_enter(&ip->i_contents, rwtype); 132 continue; 133 } 134 } 135 offsetn = blkoff(fs, offset1); 136 pplen = P2ROUNDUP(len, PAGESIZE); 137 nbytes = MIN((pplen - curlen), 138 (fs->fs_bsize - offsetn)); 139 ASSERT(nbytes > 0); 140 141 /* 142 * We may be reading or writing. 143 */ 144 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n", 145 offset1, (iolen - curlen))); 146 147 if (iswrite) { 148 printf("**WARNING: ignoring hole in write\n"); 149 error = ENOSPC; 150 } else { 151 fdb_add_hole(fdbp, offset1 - offset, nbytes); 152 } 153 offset1 += nbytes; 154 curlen += nbytes; 155 continue; 156 157 } 158 ASSERT(contig > 0); 159 pplen = P2ROUNDUP(len, PAGESIZE); 160 161 contig = MIN(contig, len - curlen); 162 contig = P2ROUNDUP(contig, DEV_BSIZE); 163 164 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags); 165 166 bp->b_edev = ip->i_dev; 167 bp->b_dev = cmpdev(ip->i_dev); 168 bp->b_blkno = bn; 169 bp->b_file = ip->i_vnode; 170 bp->b_offset = (offset_t)offset1; 171 172 if (ufsvfsp->vfs_snapshot) { 173 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 174 } else { 175 (void) bdev_strategy(bp); 176 } 177 io_started = 1; 178 179 offset1 += contig; 180 curlen += contig; 181 if (iswrite) 182 lwp_stat_update(LWP_STAT_OUBLK, 1); 183 else 184 lwp_stat_update(LWP_STAT_INBLK, 1); 185 186 if ((flags & B_ASYNC) == 0) { 187 error = biowait(bp); 188 fdb_iodone(bp); 189 } 190 191 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n", 192 offset1, (iolen - curlen))); 193 } 194 195 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n", 196 offset1, (iolen - curlen), (void *)vnodep->v_pages)); 197 198 rw_exit(&ip->i_contents); 199 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 200 201 if (flags & B_ASYNC) { 202 /* 203 * Show that no more asynchronous IO will be added 204 */ 205 fdb_ioerrdone(fdbp, error); 206 } 207 if (ulp) { 208 ufs_lockfs_end(ulp); 209 } 210 if (io_started && flags & B_ASYNC) { 211 return (0); 212 } else { 213 return (error); 214 } 215 } 216 217 /* 218 * ufs_alloc_data - supports allocating space and reads or writes 219 * that involve changes to file length or space allocation. 220 * 221 * This function is more expensive, because of the UFS log transaction, 222 * so ufs_rdwr_data() should be used when space or file length changes 223 * will not occur. 224 * 225 * Inputs: 226 * fdb - A null pointer instructs this function to only allocate 227 * space for the specified offset and length. 228 * An actual fdbuffer instructs this function to perform 229 * the read or write operation. 230 * flags - defaults (zero value) to synchronous write 231 * B_READ - indicates read operation 232 * B_ASYNC - indicates perform operation asynchronously 233 */ 234 int 235 ufs_alloc_data( 236 vnode_t *vnodep, 237 u_offset_t offset, 238 size_t *len, 239 fdbuffer_t *fdbp, 240 int flags, 241 cred_t *credp) 242 { 243 struct inode *ip = VTOI(vnodep); 244 size_t done_len, io_len; 245 int contig; 246 u_offset_t uoff, io_off; 247 int error = 0; /* No error occured */ 248 int offsetn; /* Start point this IO */ 249 int nbytes; /* Number bytes in this IO */ 250 daddr_t bn; 251 struct fs *fs; 252 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 253 int i_size_changed = 0; 254 u_offset_t old_i_size; 255 struct ulockfs *ulp; 256 int trans_size; 257 int issync; /* UFS Log transaction */ 258 /* synchronous when non-zero */ 259 260 int io_started = 0; /* No IO started */ 261 uint_t protp = PROT_ALL; 262 263 ASSERT((flags & B_WRITE) == 0); 264 265 /* 266 * Obey the lockfs protocol 267 */ 268 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp); 269 if (error) { 270 if ((fdbp != NULL) && (flags & B_ASYNC)) { 271 fdb_ioerrdone(fdbp, error); 272 } 273 return (error); 274 } 275 if (ulp) { 276 /* 277 * Try to begin a UFS log transaction 278 */ 279 trans_size = TOP_GETPAGE_SIZE(ip); 280 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE, 281 trans_size, error); 282 if (error == EWOULDBLOCK) { 283 ufs_lockfs_end(ulp); 284 if ((fdbp != NULL) && (flags & B_ASYNC)) { 285 fdb_ioerrdone(fdbp, EDEADLK); 286 } 287 return (EDEADLK); 288 } 289 } 290 291 uoff = offset; 292 io_off = offset; 293 io_len = *len; 294 done_len = 0; 295 296 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n", 297 uoff, (io_len - done_len), ip->i_size, (void *)fdbp)); 298 299 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); 300 rw_enter(&ip->i_contents, RW_WRITER); 301 302 ASSERT((ip->i_mode & IFMT) == IFREG); 303 304 fs = ip->i_fs; 305 306 while (error == 0 && done_len < io_len) { 307 uoff = (u_offset_t)(io_off + done_len); 308 offsetn = (int)blkoff(fs, uoff); 309 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len); 310 311 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n", 312 uoff, nbytes)); 313 314 if (uoff + nbytes > ip->i_size) { 315 /* 316 * We are extending the length of the file. 317 * bmap is used so that we are sure that 318 * if we need to allocate new blocks, that it 319 * is done here before we up the file size. 320 */ 321 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n", 322 ip->i_size, uoff + nbytes)); 323 324 error = bmap_write(ip, uoff, (offsetn + nbytes), 325 BI_ALLOC_ONLY, NULL, credp); 326 if (ip->i_flag & (ICHG|IUPD)) 327 ip->i_seq++; 328 if (error) { 329 DEBUGF((CE_CONT, "?ufs_alloc_data: grow " 330 "failed err: %d\n", error)); 331 break; 332 } 333 if (fdbp != NULL) { 334 if (uoff >= ip->i_size) { 335 /* 336 * Desired offset is past end of bytes 337 * in file, so we have a hole. 338 */ 339 fdb_add_hole(fdbp, uoff - offset, 340 nbytes); 341 } else { 342 int contig; 343 buf_t *bp; 344 345 error = bmap_read(ip, uoff, &bn, 346 &contig); 347 if (error) { 348 break; 349 } 350 351 contig = ip->i_size - uoff; 352 contig = P2ROUNDUP(contig, DEV_BSIZE); 353 354 bp = fdb_iosetup(fdbp, uoff - offset, 355 contig, vnodep, flags); 356 357 bp->b_edev = ip->i_dev; 358 bp->b_dev = cmpdev(ip->i_dev); 359 bp->b_blkno = bn; 360 bp->b_file = ip->i_vnode; 361 bp->b_offset = (offset_t)uoff; 362 363 if (ufsvfsp->vfs_snapshot) { 364 fssnap_strategy( 365 &ufsvfsp->vfs_snapshot, bp); 366 } else { 367 (void) bdev_strategy(bp); 368 } 369 io_started = 1; 370 371 lwp_stat_update(LWP_STAT_OUBLK, 1); 372 373 if ((flags & B_ASYNC) == 0) { 374 error = biowait(bp); 375 fdb_iodone(bp); 376 if (error) { 377 break; 378 } 379 } 380 if (contig > (ip->i_size - uoff)) { 381 contig -= ip->i_size - uoff; 382 383 fdb_add_hole(fdbp, 384 ip->i_size - offset, 385 contig); 386 } 387 } 388 } 389 390 i_size_changed = 1; 391 old_i_size = ip->i_size; 392 UFS_SET_ISIZE(uoff + nbytes, ip); 393 TRANS_INODE(ip->i_ufsvfs, ip); 394 /* 395 * file has grown larger than 2GB. Set flag 396 * in superblock to indicate this, if it 397 * is not already set. 398 */ 399 if ((ip->i_size > MAXOFF32_T) && 400 !(fs->fs_flags & FSLARGEFILES)) { 401 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 402 mutex_enter(&ufsvfsp->vfs_lock); 403 fs->fs_flags |= FSLARGEFILES; 404 ufs_sbwrite(ufsvfsp); 405 mutex_exit(&ufsvfsp->vfs_lock); 406 } 407 } else { 408 /* 409 * The file length is not being extended. 410 */ 411 error = bmap_read(ip, uoff, &bn, &contig); 412 if (error) { 413 DEBUGF((CE_CONT, "?ufs_alloc_data: " 414 "bmap_read err: %d\n", error)); 415 break; 416 } 417 418 if (bn != UFS_HOLE) { 419 /* 420 * Did not map a hole in the file 421 */ 422 int contig = P2ROUNDUP(nbytes, DEV_BSIZE); 423 buf_t *bp; 424 425 if (fdbp != NULL) { 426 bp = fdb_iosetup(fdbp, uoff - offset, 427 contig, vnodep, flags); 428 429 bp->b_edev = ip->i_dev; 430 bp->b_dev = cmpdev(ip->i_dev); 431 bp->b_blkno = bn; 432 bp->b_file = ip->i_vnode; 433 bp->b_offset = (offset_t)uoff; 434 435 if (ufsvfsp->vfs_snapshot) { 436 fssnap_strategy( 437 &ufsvfsp->vfs_snapshot, bp); 438 } else { 439 (void) bdev_strategy(bp); 440 } 441 io_started = 1; 442 443 lwp_stat_update(LWP_STAT_OUBLK, 1); 444 445 if ((flags & B_ASYNC) == 0) { 446 error = biowait(bp); 447 fdb_iodone(bp); 448 if (error) { 449 break; 450 } 451 } 452 } 453 } else { 454 /* 455 * We read a hole in the file. 456 * We have to allocate blocks for the hole. 457 */ 458 error = bmap_write(ip, uoff, (offsetn + nbytes), 459 BI_ALLOC_ONLY, NULL, credp); 460 if (ip->i_flag & (ICHG|IUPD)) 461 ip->i_seq++; 462 if (error) { 463 DEBUGF((CE_CONT, "?ufs_alloc_data: fill" 464 " hole failed error: %d\n", error)); 465 break; 466 } 467 if (fdbp != NULL) { 468 fdb_add_hole(fdbp, uoff - offset, 469 nbytes); 470 } 471 } 472 } 473 done_len += nbytes; 474 } 475 476 if (error) { 477 if (i_size_changed) { 478 /* 479 * Allocation of the blocks for the file failed. 480 * So truncate the file size back to its original size. 481 */ 482 (void) ufs_itrunc(ip, old_i_size, 0, credp); 483 } 484 } 485 486 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n", 487 uoff, (io_len - done_len))); 488 489 if ((offset + *len) < (NDADDR * fs->fs_bsize)) { 490 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset); 491 } else { 492 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset); 493 } 494 495 /* 496 * Flush cached pages. 497 * 498 * XXX - There should be no pages involved, since the I/O was performed 499 * through the device strategy routine and the page cache was bypassed. 500 * However, testing has demonstrated that this VOP_PUTPAGE is 501 * necessary. Without this, data might not always be read back as it 502 * was written. 503 * 504 */ 505 (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp); 506 507 rw_exit(&ip->i_contents); 508 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); 509 510 if ((fdbp != NULL) && (flags & B_ASYNC)) { 511 /* 512 * Show that no more asynchronous IO will be added 513 */ 514 fdb_ioerrdone(fdbp, error); 515 } 516 if (ulp) { 517 /* 518 * End the UFS Log transaction 519 */ 520 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE, 521 trans_size); 522 ufs_lockfs_end(ulp); 523 } 524 if (io_started && (flags & B_ASYNC)) { 525 return (0); 526 } else { 527 return (error); 528 } 529 } 530