1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/fssnap_if.h>
31 #include <sys/fs/ufs_inode.h>
32 #include <sys/fs/ufs_lockfs.h>
33 #include <sys/fs/ufs_log.h>
34 #include <sys/fs/ufs_trans.h>
35 #include <sys/cmn_err.h>
36 #include <vm/pvn.h>
37 #include <vm/seg_map.h>
38 #include <sys/fdbuffer.h>
39
40 #ifdef DEBUG
41 int evn_ufs_debug = 0;
42 #define DEBUGF(args) { if (evn_ufs_debug) cmn_err args; }
43 #else
44 #define DEBUGF(args)
45 #endif
46
47 /*
48 * ufs_rdwr_data - supports reading or writing data when
49 * no changes are permitted in file size or space allocation.
50 *
51 * Inputs:
52 * fdb - The mandatory fdbuffer supports
53 * the read or write operation.
54 * flags - defaults (zero value) to synchronous write
55 * B_READ - indicates read operation
56 * B_ASYNC - indicates perform operation asynchronously
57 */
58 /*ARGSUSED*/
59 int
ufs_rdwr_data(vnode_t * vnodep,u_offset_t offset,size_t len,fdbuffer_t * fdbp,int flags,cred_t * credp)60 ufs_rdwr_data(
61 vnode_t *vnodep,
62 u_offset_t offset,
63 size_t len,
64 fdbuffer_t *fdbp,
65 int flags,
66 cred_t *credp)
67 {
68 struct inode *ip = VTOI(vnodep);
69 struct fs *fs;
70 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
71 struct buf *bp;
72 krw_t rwtype = RW_READER;
73 u_offset_t offset1 = offset; /* Initial offset */
74 size_t iolen;
75 int curlen = 0;
76 int pplen;
77 daddr_t bn;
78 int contig = 0;
79 int error = 0;
80 int nbytes; /* Number bytes this IO */
81 int offsetn; /* Start point this IO */
82 int iswrite = flags & B_WRITE;
83 int io_started = 0; /* No IO started */
84 struct ulockfs *ulp;
85 uint_t protp = PROT_ALL;
86
87 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
88 &protp);
89 if (error) {
90 if (flags & B_ASYNC) {
91 fdb_ioerrdone(fdbp, error);
92 }
93 return (error);
94 }
95 fs = ufsvfsp->vfs_fs;
96 iolen = len;
97
98 DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx"
99 " isize: %llx fdb: %p\n",
100 flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
101 (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp));
102
103 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
104 rw_enter(&ip->i_contents, rwtype);
105
106 ASSERT(offset1 < ip->i_size);
107
108 if ((offset1 + iolen) > ip->i_size) {
109 iolen = ip->i_size - offset1;
110 }
111 while (!error && curlen < iolen) {
112
113 contig = 0;
114
115 if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
116 break;
117 }
118 ASSERT(!(bn == UFS_HOLE && iswrite));
119 if (bn == UFS_HOLE) {
120 /*
121 * If the above assertion is true,
122 * then the following if statement can never be true.
123 */
124 if (iswrite && (rwtype == RW_READER)) {
125 rwtype = RW_WRITER;
126 if (!rw_tryupgrade(&ip->i_contents)) {
127 rw_exit(&ip->i_contents);
128 rw_enter(&ip->i_contents, rwtype);
129 continue;
130 }
131 }
132 offsetn = blkoff(fs, offset1);
133 pplen = P2ROUNDUP(len, PAGESIZE);
134 nbytes = MIN((pplen - curlen),
135 (fs->fs_bsize - offsetn));
136 ASSERT(nbytes > 0);
137
138 /*
139 * We may be reading or writing.
140 */
141 DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
142 offset1, (iolen - curlen)));
143
144 if (iswrite) {
145 printf("**WARNING: ignoring hole in write\n");
146 error = ENOSPC;
147 } else {
148 fdb_add_hole(fdbp, offset1 - offset, nbytes);
149 }
150 offset1 += nbytes;
151 curlen += nbytes;
152 continue;
153
154 }
155 ASSERT(contig > 0);
156 pplen = P2ROUNDUP(len, PAGESIZE);
157
158 contig = MIN(contig, len - curlen);
159 contig = P2ROUNDUP(contig, DEV_BSIZE);
160
161 bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);
162
163 bp->b_edev = ip->i_dev;
164 bp->b_dev = cmpdev(ip->i_dev);
165 bp->b_blkno = bn;
166 bp->b_file = ip->i_vnode;
167 bp->b_offset = (offset_t)offset1;
168
169 if (ufsvfsp->vfs_snapshot) {
170 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
171 } else {
172 (void) bdev_strategy(bp);
173 }
174 io_started = 1;
175
176 offset1 += contig;
177 curlen += contig;
178 if (iswrite)
179 lwp_stat_update(LWP_STAT_OUBLK, 1);
180 else
181 lwp_stat_update(LWP_STAT_INBLK, 1);
182
183 if ((flags & B_ASYNC) == 0) {
184 error = biowait(bp);
185 fdb_iodone(bp);
186 }
187
188 DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
189 offset1, (iolen - curlen)));
190 }
191
192 DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n",
193 offset1, (iolen - curlen), (void *)vnodep->v_pages));
194
195 rw_exit(&ip->i_contents);
196 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
197
198 if (flags & B_ASYNC) {
199 /*
200 * Show that no more asynchronous IO will be added
201 */
202 fdb_ioerrdone(fdbp, error);
203 }
204 if (ulp) {
205 ufs_lockfs_end(ulp);
206 }
207 if (io_started && flags & B_ASYNC) {
208 return (0);
209 } else {
210 return (error);
211 }
212 }
213
214 /*
215 * ufs_alloc_data - supports allocating space and reads or writes
216 * that involve changes to file length or space allocation.
217 *
218 * This function is more expensive, because of the UFS log transaction,
219 * so ufs_rdwr_data() should be used when space or file length changes
220 * will not occur.
221 *
222 * Inputs:
223 * fdb - A null pointer instructs this function to only allocate
224 * space for the specified offset and length.
225 * An actual fdbuffer instructs this function to perform
226 * the read or write operation.
227 * flags - defaults (zero value) to synchronous write
228 * B_READ - indicates read operation
229 * B_ASYNC - indicates perform operation asynchronously
230 */
231 int
ufs_alloc_data(vnode_t * vnodep,u_offset_t offset,size_t * len,fdbuffer_t * fdbp,int flags,cred_t * credp)232 ufs_alloc_data(
233 vnode_t *vnodep,
234 u_offset_t offset,
235 size_t *len,
236 fdbuffer_t *fdbp,
237 int flags,
238 cred_t *credp)
239 {
240 struct inode *ip = VTOI(vnodep);
241 size_t done_len, io_len;
242 int contig;
243 u_offset_t uoff, io_off;
244 int error = 0; /* No error occurred */
245 int offsetn; /* Start point this IO */
246 int nbytes; /* Number bytes in this IO */
247 daddr_t bn;
248 struct fs *fs;
249 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
250 int i_size_changed = 0;
251 u_offset_t old_i_size;
252 struct ulockfs *ulp;
253 int trans_size;
254 int issync; /* UFS Log transaction */
255 /* synchronous when non-zero */
256
257 int io_started = 0; /* No IO started */
258 uint_t protp = PROT_ALL;
259
260 ASSERT((flags & B_WRITE) == 0);
261
262 /*
263 * Obey the lockfs protocol
264 */
265 error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
266 if (error) {
267 if ((fdbp != NULL) && (flags & B_ASYNC)) {
268 fdb_ioerrdone(fdbp, error);
269 }
270 return (error);
271 }
272 if (ulp) {
273 /*
274 * Try to begin a UFS log transaction
275 */
276 trans_size = TOP_GETPAGE_SIZE(ip);
277 TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
278 trans_size, error);
279 if (error == EWOULDBLOCK) {
280 ufs_lockfs_end(ulp);
281 if ((fdbp != NULL) && (flags & B_ASYNC)) {
282 fdb_ioerrdone(fdbp, EDEADLK);
283 }
284 return (EDEADLK);
285 }
286 }
287
288 uoff = offset;
289 io_off = offset;
290 io_len = *len;
291 done_len = 0;
292
293 DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
294 uoff, (io_len - done_len), ip->i_size, (void *)fdbp));
295
296 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
297 rw_enter(&ip->i_contents, RW_WRITER);
298
299 ASSERT((ip->i_mode & IFMT) == IFREG);
300
301 fs = ip->i_fs;
302
303 while (error == 0 && done_len < io_len) {
304 uoff = (u_offset_t)(io_off + done_len);
305 offsetn = (int)blkoff(fs, uoff);
306 nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);
307
308 DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
309 uoff, nbytes));
310
311 if (uoff + nbytes > ip->i_size) {
312 /*
313 * We are extending the length of the file.
314 * bmap is used so that we are sure that
315 * if we need to allocate new blocks, that it
316 * is done here before we up the file size.
317 */
318 DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
319 ip->i_size, uoff + nbytes));
320
321 error = bmap_write(ip, uoff, (offsetn + nbytes),
322 BI_ALLOC_ONLY, NULL, credp);
323 if (ip->i_flag & (ICHG|IUPD))
324 ip->i_seq++;
325 if (error) {
326 DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
327 "failed err: %d\n", error));
328 break;
329 }
330 if (fdbp != NULL) {
331 if (uoff >= ip->i_size) {
332 /*
333 * Desired offset is past end of bytes
334 * in file, so we have a hole.
335 */
336 fdb_add_hole(fdbp, uoff - offset,
337 nbytes);
338 } else {
339 int contig;
340 buf_t *bp;
341
342 error = bmap_read(ip, uoff, &bn,
343 &contig);
344 if (error) {
345 break;
346 }
347
348 contig = ip->i_size - uoff;
349 contig = P2ROUNDUP(contig, DEV_BSIZE);
350
351 bp = fdb_iosetup(fdbp, uoff - offset,
352 contig, vnodep, flags);
353
354 bp->b_edev = ip->i_dev;
355 bp->b_dev = cmpdev(ip->i_dev);
356 bp->b_blkno = bn;
357 bp->b_file = ip->i_vnode;
358 bp->b_offset = (offset_t)uoff;
359
360 if (ufsvfsp->vfs_snapshot) {
361 fssnap_strategy(
362 &ufsvfsp->vfs_snapshot, bp);
363 } else {
364 (void) bdev_strategy(bp);
365 }
366 io_started = 1;
367
368 lwp_stat_update(LWP_STAT_OUBLK, 1);
369
370 if ((flags & B_ASYNC) == 0) {
371 error = biowait(bp);
372 fdb_iodone(bp);
373 if (error) {
374 break;
375 }
376 }
377 if (contig > (ip->i_size - uoff)) {
378 contig -= ip->i_size - uoff;
379
380 fdb_add_hole(fdbp,
381 ip->i_size - offset,
382 contig);
383 }
384 }
385 }
386
387 i_size_changed = 1;
388 old_i_size = ip->i_size;
389 UFS_SET_ISIZE(uoff + nbytes, ip);
390 TRANS_INODE(ip->i_ufsvfs, ip);
391 /*
392 * file has grown larger than 2GB. Set flag
393 * in superblock to indicate this, if it
394 * is not already set.
395 */
396 if ((ip->i_size > MAXOFF32_T) &&
397 !(fs->fs_flags & FSLARGEFILES)) {
398 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
399 mutex_enter(&ufsvfsp->vfs_lock);
400 fs->fs_flags |= FSLARGEFILES;
401 ufs_sbwrite(ufsvfsp);
402 mutex_exit(&ufsvfsp->vfs_lock);
403 }
404 } else {
405 /*
406 * The file length is not being extended.
407 */
408 error = bmap_read(ip, uoff, &bn, &contig);
409 if (error) {
410 DEBUGF((CE_CONT, "?ufs_alloc_data: "
411 "bmap_read err: %d\n", error));
412 break;
413 }
414
415 if (bn != UFS_HOLE) {
416 /*
417 * Did not map a hole in the file
418 */
419 int contig = P2ROUNDUP(nbytes, DEV_BSIZE);
420 buf_t *bp;
421
422 if (fdbp != NULL) {
423 bp = fdb_iosetup(fdbp, uoff - offset,
424 contig, vnodep, flags);
425
426 bp->b_edev = ip->i_dev;
427 bp->b_dev = cmpdev(ip->i_dev);
428 bp->b_blkno = bn;
429 bp->b_file = ip->i_vnode;
430 bp->b_offset = (offset_t)uoff;
431
432 if (ufsvfsp->vfs_snapshot) {
433 fssnap_strategy(
434 &ufsvfsp->vfs_snapshot, bp);
435 } else {
436 (void) bdev_strategy(bp);
437 }
438 io_started = 1;
439
440 lwp_stat_update(LWP_STAT_OUBLK, 1);
441
442 if ((flags & B_ASYNC) == 0) {
443 error = biowait(bp);
444 fdb_iodone(bp);
445 if (error) {
446 break;
447 }
448 }
449 }
450 } else {
451 /*
452 * We read a hole in the file.
453 * We have to allocate blocks for the hole.
454 */
455 error = bmap_write(ip, uoff, (offsetn + nbytes),
456 BI_ALLOC_ONLY, NULL, credp);
457 if (ip->i_flag & (ICHG|IUPD))
458 ip->i_seq++;
459 if (error) {
460 DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
461 " hole failed error: %d\n", error));
462 break;
463 }
464 if (fdbp != NULL) {
465 fdb_add_hole(fdbp, uoff - offset,
466 nbytes);
467 }
468 }
469 }
470 done_len += nbytes;
471 }
472
473 if (error) {
474 if (i_size_changed) {
475 /*
476 * Allocation of the blocks for the file failed.
477 * So truncate the file size back to its original size.
478 */
479 (void) ufs_itrunc(ip, old_i_size, 0, credp);
480 }
481 }
482
483 DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
484 uoff, (io_len - done_len)));
485
486 if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
487 *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
488 } else {
489 *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
490 }
491
492 /*
493 * Flush cached pages.
494 *
495 * XXX - There should be no pages involved, since the I/O was performed
496 * through the device strategy routine and the page cache was bypassed.
497 * However, testing has demonstrated that this VOP_PUTPAGE is
498 * necessary. Without this, data might not always be read back as it
499 * was written.
500 *
501 */
502 (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL);
503
504 rw_exit(&ip->i_contents);
505 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
506
507 if ((fdbp != NULL) && (flags & B_ASYNC)) {
508 /*
509 * Show that no more asynchronous IO will be added
510 */
511 fdb_ioerrdone(fdbp, error);
512 }
513 if (ulp) {
514 /*
515 * End the UFS Log transaction
516 */
517 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
518 trans_size);
519 ufs_lockfs_end(ulp);
520 }
521 if (io_started && (flags & B_ASYNC)) {
522 return (0);
523 } else {
524 return (error);
525 }
526 }
527