1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/conf.h>
32 #include <sys/fssnap_if.h>
33 #include <sys/fs/ufs_inode.h>
34 #include <sys/fs/ufs_lockfs.h>
35 #include <sys/fs/ufs_log.h>
36 #include <sys/fs/ufs_trans.h>
37 #include <sys/cmn_err.h>
38 #include <vm/pvn.h>
39 #include <vm/seg_map.h>
40 #include <sys/fdbuffer.h>
41
42 #ifdef DEBUG
43 int evn_ufs_debug = 0;
44 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; }
45 #else
46 #define DEBUGF(args)
47 #endif
48
49 /*
50 * ufs_rdwr_data - supports reading or writing data when
51 * no changes are permitted in file size or space allocation.
52 *
53 * Inputs:
54 * fdb - The mandatory fdbuffer supports
55 * the read or write operation.
56 * flags - defaults (zero value) to synchronous write
57 * B_READ - indicates read operation
58 * B_ASYNC - indicates perform operation asynchronously
59 */
60 /*ARGSUSED*/
61 int
ufs_rdwr_data(vnode_t * vnodep,u_offset_t offset,size_t len,fdbuffer_t * fdbp,int flags,cred_t * credp)62 ufs_rdwr_data(
63 vnode_t *vnodep,
64 u_offset_t offset,
65 size_t len,
66 fdbuffer_t *fdbp,
67 int flags,
68 cred_t *credp)
69 {
70 struct inode *ip = VTOI(vnodep);
71 struct fs *fs;
72 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
73 struct buf *bp;
74 krw_t rwtype = RW_READER;
75 u_offset_t offset1 = offset; /* Initial offset */
76 size_t iolen;
77 int curlen = 0;
78 int pplen;
79 daddr_t bn;
80 int contig = 0;
81 int error = 0;
82 int nbytes; /* Number bytes this IO */
83 int offsetn; /* Start point this IO */
84 int iswrite = flags & B_WRITE;
85 int io_started = 0; /* No IO started */
86 struct ulockfs *ulp;
87 uint_t protp = PROT_ALL;
88
89 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
90 &protp);
91 if (error) {
92 if (flags & B_ASYNC) {
93 fdb_ioerrdone(fdbp, error);
94 }
95 return (error);
96 }
97 fs = ufsvfsp->vfs_fs;
98 iolen = len;
99
100 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx"
101 " isize: %llx fdb: %p\n",
102 flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
103 (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp));
104
105 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
106 rw_enter(&ip->i_contents, rwtype);
107
108 ASSERT(offset1 < ip->i_size);
109
110 if ((offset1 + iolen) > ip->i_size) {
111 iolen = ip->i_size - offset1;
112 }
113 while (!error && curlen < iolen) {
114
115 contig = 0;
116
117 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
118 break;
119 }
120 ASSERT(!(bn == UFS_HOLE && iswrite));
121 if (bn == UFS_HOLE) {
122 /*
123 * If the above assertion is true,
124 * then the following if statement can never be true.
125 */
126 if (iswrite && (rwtype == RW_READER)) {
127 rwtype = RW_WRITER;
128 if (!rw_tryupgrade(&ip->i_contents)) {
129 rw_exit(&ip->i_contents);
130 rw_enter(&ip->i_contents, rwtype);
131 continue;
132 }
133 }
134 offsetn = blkoff(fs, offset1);
135 pplen = P2ROUNDUP(len, PAGESIZE);
136 nbytes = MIN((pplen - curlen),
137 (fs->fs_bsize - offsetn));
138 ASSERT(nbytes > 0);
139
140 /*
141 * We may be reading or writing.
142 */
143 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
144 offset1, (iolen - curlen)));
145
146 if (iswrite) {
147 printf("**WARNING: ignoring hole in write\n");
148 error = ENOSPC;
149 } else {
150 fdb_add_hole(fdbp, offset1 - offset, nbytes);
151 }
152 offset1 += nbytes;
153 curlen += nbytes;
154 continue;
155
156 }
157 ASSERT(contig > 0);
158 pplen = P2ROUNDUP(len, PAGESIZE);
159
160 contig = MIN(contig, len - curlen);
161 contig = P2ROUNDUP(contig, DEV_BSIZE);
162
163 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);
164
165 bp->b_edev = ip->i_dev;
166 bp->b_dev = cmpdev(ip->i_dev);
167 bp->b_blkno = bn;
168 bp->b_file = ip->i_vnode;
169 bp->b_offset = (offset_t)offset1;
170
171 if (ufsvfsp->vfs_snapshot) {
172 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
173 } else {
174 (void) bdev_strategy(bp);
175 }
176 io_started = 1;
177
178 offset1 += contig;
179 curlen += contig;
180 if (iswrite)
181 lwp_stat_update(LWP_STAT_OUBLK, 1);
182 else
183 lwp_stat_update(LWP_STAT_INBLK, 1);
184
185 if ((flags & B_ASYNC) == 0) {
186 error = biowait(bp);
187 fdb_iodone(bp);
188 }
189
190 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
191 offset1, (iolen - curlen)));
192 }
193
194 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n",
195 offset1, (iolen - curlen), (void *)vnodep->v_pages));
196
197 rw_exit(&ip->i_contents);
198 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
199
200 if (flags & B_ASYNC) {
201 /*
202 * Show that no more asynchronous IO will be added
203 */
204 fdb_ioerrdone(fdbp, error);
205 }
206 if (ulp) {
207 ufs_lockfs_end(ulp);
208 }
209 if (io_started && flags & B_ASYNC) {
210 return (0);
211 } else {
212 return (error);
213 }
214 }
215
216 /*
217 * ufs_alloc_data - supports allocating space and reads or writes
218 * that involve changes to file length or space allocation.
219 *
220 * This function is more expensive, because of the UFS log transaction,
221 * so ufs_rdwr_data() should be used when space or file length changes
222 * will not occur.
223 *
224 * Inputs:
225 * fdb - A null pointer instructs this function to only allocate
226 * space for the specified offset and length.
227 * An actual fdbuffer instructs this function to perform
228 * the read or write operation.
229 * flags - defaults (zero value) to synchronous write
230 * B_READ - indicates read operation
231 * B_ASYNC - indicates perform operation asynchronously
232 */
233 int
ufs_alloc_data(vnode_t * vnodep,u_offset_t offset,size_t * len,fdbuffer_t * fdbp,int flags,cred_t * credp)234 ufs_alloc_data(
235 vnode_t *vnodep,
236 u_offset_t offset,
237 size_t *len,
238 fdbuffer_t *fdbp,
239 int flags,
240 cred_t *credp)
241 {
242 struct inode *ip = VTOI(vnodep);
243 size_t done_len, io_len;
244 int contig;
245 u_offset_t uoff, io_off;
246 int error = 0; /* No error occurred */
247 int offsetn; /* Start point this IO */
248 int nbytes; /* Number bytes in this IO */
249 daddr_t bn;
250 struct fs *fs;
251 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
252 int i_size_changed = 0;
253 u_offset_t old_i_size;
254 struct ulockfs *ulp;
255 int trans_size;
256 int issync; /* UFS Log transaction */
257 /* synchronous when non-zero */
258
259 int io_started = 0; /* No IO started */
260 uint_t protp = PROT_ALL;
261
262 ASSERT((flags & B_WRITE) == 0);
263
264 /*
265 * Obey the lockfs protocol
266 */
267 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
268 if (error) {
269 if ((fdbp != NULL) && (flags & B_ASYNC)) {
270 fdb_ioerrdone(fdbp, error);
271 }
272 return (error);
273 }
274 if (ulp) {
275 /*
276 * Try to begin a UFS log transaction
277 */
278 trans_size = TOP_GETPAGE_SIZE(ip);
279 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
280 trans_size, error);
281 if (error == EWOULDBLOCK) {
282 ufs_lockfs_end(ulp);
283 if ((fdbp != NULL) && (flags & B_ASYNC)) {
284 fdb_ioerrdone(fdbp, EDEADLK);
285 }
286 return (EDEADLK);
287 }
288 }
289
290 uoff = offset;
291 io_off = offset;
292 io_len = *len;
293 done_len = 0;
294
295 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
296 uoff, (io_len - done_len), ip->i_size, (void *)fdbp));
297
298 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
299 rw_enter(&ip->i_contents, RW_WRITER);
300
301 ASSERT((ip->i_mode & IFMT) == IFREG);
302
303 fs = ip->i_fs;
304
305 while (error == 0 && done_len < io_len) {
306 uoff = (u_offset_t)(io_off + done_len);
307 offsetn = (int)blkoff(fs, uoff);
308 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);
309
310 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
311 uoff, nbytes));
312
313 if (uoff + nbytes > ip->i_size) {
314 /*
315 * We are extending the length of the file.
316 * bmap is used so that we are sure that
317 * if we need to allocate new blocks, that it
318 * is done here before we up the file size.
319 */
320 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
321 ip->i_size, uoff + nbytes));
322
323 error = bmap_write(ip, uoff, (offsetn + nbytes),
324 BI_ALLOC_ONLY, NULL, credp);
325 if (ip->i_flag & (ICHG|IUPD))
326 ip->i_seq++;
327 if (error) {
328 DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
329 "failed err: %d\n", error));
330 break;
331 }
332 if (fdbp != NULL) {
333 if (uoff >= ip->i_size) {
334 /*
335 * Desired offset is past end of bytes
336 * in file, so we have a hole.
337 */
338 fdb_add_hole(fdbp, uoff - offset,
339 nbytes);
340 } else {
341 int contig;
342 buf_t *bp;
343
344 error = bmap_read(ip, uoff, &bn,
345 &contig);
346 if (error) {
347 break;
348 }
349
350 contig = ip->i_size - uoff;
351 contig = P2ROUNDUP(contig, DEV_BSIZE);
352
353 bp = fdb_iosetup(fdbp, uoff - offset,
354 contig, vnodep, flags);
355
356 bp->b_edev = ip->i_dev;
357 bp->b_dev = cmpdev(ip->i_dev);
358 bp->b_blkno = bn;
359 bp->b_file = ip->i_vnode;
360 bp->b_offset = (offset_t)uoff;
361
362 if (ufsvfsp->vfs_snapshot) {
363 fssnap_strategy(
364 &ufsvfsp->vfs_snapshot, bp);
365 } else {
366 (void) bdev_strategy(bp);
367 }
368 io_started = 1;
369
370 lwp_stat_update(LWP_STAT_OUBLK, 1);
371
372 if ((flags & B_ASYNC) == 0) {
373 error = biowait(bp);
374 fdb_iodone(bp);
375 if (error) {
376 break;
377 }
378 }
379 if (contig > (ip->i_size - uoff)) {
380 contig -= ip->i_size - uoff;
381
382 fdb_add_hole(fdbp,
383 ip->i_size - offset,
384 contig);
385 }
386 }
387 }
388
389 i_size_changed = 1;
390 old_i_size = ip->i_size;
391 UFS_SET_ISIZE(uoff + nbytes, ip);
392 TRANS_INODE(ip->i_ufsvfs, ip);
393 /*
394 * file has grown larger than 2GB. Set flag
395 * in superblock to indicate this, if it
396 * is not already set.
397 */
398 if ((ip->i_size > MAXOFF32_T) &&
399 !(fs->fs_flags & FSLARGEFILES)) {
400 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
401 mutex_enter(&ufsvfsp->vfs_lock);
402 fs->fs_flags |= FSLARGEFILES;
403 ufs_sbwrite(ufsvfsp);
404 mutex_exit(&ufsvfsp->vfs_lock);
405 }
406 } else {
407 /*
408 * The file length is not being extended.
409 */
410 error = bmap_read(ip, uoff, &bn, &contig);
411 if (error) {
412 DEBUGF((CE_CONT, "?ufs_alloc_data: "
413 "bmap_read err: %d\n", error));
414 break;
415 }
416
417 if (bn != UFS_HOLE) {
418 /*
419 * Did not map a hole in the file
420 */
421 int contig = P2ROUNDUP(nbytes, DEV_BSIZE);
422 buf_t *bp;
423
424 if (fdbp != NULL) {
425 bp = fdb_iosetup(fdbp, uoff - offset,
426 contig, vnodep, flags);
427
428 bp->b_edev = ip->i_dev;
429 bp->b_dev = cmpdev(ip->i_dev);
430 bp->b_blkno = bn;
431 bp->b_file = ip->i_vnode;
432 bp->b_offset = (offset_t)uoff;
433
434 if (ufsvfsp->vfs_snapshot) {
435 fssnap_strategy(
436 &ufsvfsp->vfs_snapshot, bp);
437 } else {
438 (void) bdev_strategy(bp);
439 }
440 io_started = 1;
441
442 lwp_stat_update(LWP_STAT_OUBLK, 1);
443
444 if ((flags & B_ASYNC) == 0) {
445 error = biowait(bp);
446 fdb_iodone(bp);
447 if (error) {
448 break;
449 }
450 }
451 }
452 } else {
453 /*
454 * We read a hole in the file.
455 * We have to allocate blocks for the hole.
456 */
457 error = bmap_write(ip, uoff, (offsetn + nbytes),
458 BI_ALLOC_ONLY, NULL, credp);
459 if (ip->i_flag & (ICHG|IUPD))
460 ip->i_seq++;
461 if (error) {
462 DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
463 " hole failed error: %d\n", error));
464 break;
465 }
466 if (fdbp != NULL) {
467 fdb_add_hole(fdbp, uoff - offset,
468 nbytes);
469 }
470 }
471 }
472 done_len += nbytes;
473 }
474
475 if (error) {
476 if (i_size_changed) {
477 /*
478 * Allocation of the blocks for the file failed.
479 * So truncate the file size back to its original size.
480 */
481 (void) ufs_itrunc(ip, old_i_size, 0, credp);
482 }
483 }
484
485 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
486 uoff, (io_len - done_len)));
487
488 if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
489 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
490 } else {
491 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
492 }
493
494 /*
495 * Flush cached pages.
496 *
497 * XXX - There should be no pages involved, since the I/O was performed
498 * through the device strategy routine and the page cache was bypassed.
499 * However, testing has demonstrated that this VOP_PUTPAGE is
500 * necessary. Without this, data might not always be read back as it
501 * was written.
502 *
503 */
504 (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL);
505
506 rw_exit(&ip->i_contents);
507 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
508
509 if ((fdbp != NULL) && (flags & B_ASYNC)) {
510 /*
511 * Show that no more asynchronous IO will be added
512 */
513 fdb_ioerrdone(fdbp, error);
514 }
515 if (ulp) {
516 /*
517 * End the UFS Log transaction
518 */
519 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
520 trans_size);
521 ufs_lockfs_end(ulp);
522 }
523 if (io_started && (flags & B_ASYNC)) {
524 return (0);
525 } else {
526 return (error);
527 }
528 }
529