1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <sys/signal.h>
38 #include <sys/cred.h>
39 #include <sys/user.h>
40 #include <sys/buf.h>
41 #include <sys/vfs.h>
42 #include <sys/vfs_opreg.h>
43 #include <sys/stat.h>
44 #include <sys/vnode.h>
45 #include <sys/mode.h>
46 #include <sys/proc.h>
47 #include <sys/disp.h>
48 #include <sys/file.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/kmem.h>
52 #include <sys/uio.h>
53 #include <sys/dnlc.h>
54 #include <sys/conf.h>
55 #include <sys/errno.h>
56 #include <sys/mman.h>
57 #include <sys/fbuf.h>
58 #include <sys/pathname.h>
59 #include <sys/debug.h>
60 #include <sys/vmsystm.h>
61 #include <sys/cmn_err.h>
62 #include <sys/dirent.h>
63 #include <sys/errno.h>
64 #include <sys/modctl.h>
65 #include <sys/statvfs.h>
66 #include <sys/mount.h>
67 #include <sys/sunddi.h>
68 #include <sys/bootconf.h>
69 #include <sys/policy.h>
70
71 #include <vm/hat.h>
72 #include <vm/page.h>
73 #include <vm/pvn.h>
74 #include <vm/as.h>
75 #include <vm/seg.h>
76 #include <vm/seg_map.h>
77 #include <vm/seg_kmem.h>
78 #include <vm/seg_vn.h>
79 #include <vm/rm.h>
80 #include <vm/page.h>
81 #include <sys/swap.h>
82
83 #include <fs/fs_subr.h>
84
85 #include <sys/fs/udf_volume.h>
86 #include <sys/fs/udf_inode.h>
87
88 static int32_t udf_open(struct vnode **,
89 int32_t, struct cred *, caller_context_t *);
90 static int32_t udf_close(struct vnode *,
91 int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
92 static int32_t udf_read(struct vnode *,
93 struct uio *, int32_t, struct cred *, caller_context_t *);
94 static int32_t udf_write(struct vnode *,
95 struct uio *, int32_t, struct cred *, caller_context_t *);
96 static int32_t udf_ioctl(struct vnode *,
97 int32_t, intptr_t, int32_t, struct cred *, int32_t *,
98 caller_context_t *);
99 static int32_t udf_getattr(struct vnode *,
100 struct vattr *, int32_t, struct cred *, caller_context_t *);
101 static int32_t udf_setattr(struct vnode *,
102 struct vattr *, int32_t, struct cred *, caller_context_t *);
103 static int32_t udf_access(struct vnode *,
104 int32_t, int32_t, struct cred *, caller_context_t *);
105 static int32_t udf_lookup(struct vnode *,
106 char *, struct vnode **, struct pathname *,
107 int32_t, struct vnode *, struct cred *,
108 caller_context_t *, int *, pathname_t *);
109 static int32_t udf_create(struct vnode *,
110 char *, struct vattr *, enum vcexcl,
111 int32_t, struct vnode **, struct cred *, int32_t,
112 caller_context_t *, vsecattr_t *);
113 static int32_t udf_remove(struct vnode *,
114 char *, struct cred *, caller_context_t *, int);
115 static int32_t udf_link(struct vnode *,
116 struct vnode *, char *, struct cred *, caller_context_t *, int);
117 static int32_t udf_rename(struct vnode *,
118 char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
119 static int32_t udf_mkdir(struct vnode *,
120 char *, struct vattr *, struct vnode **, struct cred *,
121 caller_context_t *, int, vsecattr_t *);
122 static int32_t udf_rmdir(struct vnode *,
123 char *, struct vnode *, struct cred *, caller_context_t *, int);
124 static int32_t udf_readdir(struct vnode *,
125 struct uio *, struct cred *, int32_t *, caller_context_t *, int);
126 static int32_t udf_symlink(struct vnode *,
127 char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
128 static int32_t udf_readlink(struct vnode *,
129 struct uio *, struct cred *, caller_context_t *);
130 static int32_t udf_fsync(struct vnode *,
131 int32_t, struct cred *, caller_context_t *);
132 static void udf_inactive(struct vnode *,
133 struct cred *, caller_context_t *);
134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
138 caller_context_t *);
139 static int32_t udf_frlock(struct vnode *, int32_t,
140 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
141 caller_context_t *);
142 static int32_t udf_space(struct vnode *, int32_t,
143 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
144 static int32_t udf_getpage(struct vnode *, offset_t,
145 size_t, uint32_t *, struct page **, size_t,
146 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
147 static int32_t udf_putpage(struct vnode *, offset_t,
148 size_t, int32_t, struct cred *, caller_context_t *);
149 static int32_t udf_map(struct vnode *, offset_t, struct as *,
150 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
151 caller_context_t *);
152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
153 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
154 caller_context_t *);
155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
156 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
157 caller_context_t *);
158 static int32_t udf_l_pathconf(struct vnode *, int32_t,
159 ulong_t *, struct cred *, caller_context_t *);
160 static int32_t udf_pageio(struct vnode *, struct page *,
161 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
162
163 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
164 size_t, struct seg *, caddr_t, page_t *pl[],
165 size_t, enum seg_rw, int32_t);
166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
168 int32_t ud_page_fill(struct ud_inode *, page_t *,
169 u_offset_t, uint32_t, u_offset_t *);
170 int32_t ud_iodone(struct buf *);
171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
174 int32_t ud_slave_done(struct buf *);
175
176 /*
177 * Structures to control multiple IO operations to get or put pages
178 * that are backed by discontiguous blocks. The master struct is
179 * a dummy that holds the original bp from pageio_setup. The
180 * slave struct holds the working bp's to do the actual IO. Once
181 * all the slave IOs complete. The master is processed as if a single
182 * IO op has completed.
183 */
184 uint32_t master_index = 0;
185 typedef struct mio_master {
186 kmutex_t mm_mutex; /* protect the fields below */
187 int32_t mm_size;
188 buf_t *mm_bp; /* original bp */
189 int32_t mm_resid; /* bytes remaining to transfer */
190 int32_t mm_error; /* accumulated error from slaves */
191 int32_t mm_index; /* XXX debugging */
192 } mio_master_t;
193
194 typedef struct mio_slave {
195 buf_t ms_buf; /* working buffer for this IO chunk */
196 mio_master_t *ms_ptr; /* pointer to master */
197 } mio_slave_t;
198
199 struct vnodeops *udf_vnodeops;
200
201 const fs_operation_def_t udf_vnodeops_template[] = {
202 VOPNAME_OPEN, { .vop_open = udf_open },
203 VOPNAME_CLOSE, { .vop_close = udf_close },
204 VOPNAME_READ, { .vop_read = udf_read },
205 VOPNAME_WRITE, { .vop_write = udf_write },
206 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl },
207 VOPNAME_GETATTR, { .vop_getattr = udf_getattr },
208 VOPNAME_SETATTR, { .vop_setattr = udf_setattr },
209 VOPNAME_ACCESS, { .vop_access = udf_access },
210 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup },
211 VOPNAME_CREATE, { .vop_create = udf_create },
212 VOPNAME_REMOVE, { .vop_remove = udf_remove },
213 VOPNAME_LINK, { .vop_link = udf_link },
214 VOPNAME_RENAME, { .vop_rename = udf_rename },
215 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir },
216 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir },
217 VOPNAME_READDIR, { .vop_readdir = udf_readdir },
218 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink },
219 VOPNAME_READLINK, { .vop_readlink = udf_readlink },
220 VOPNAME_FSYNC, { .vop_fsync = udf_fsync },
221 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive },
222 VOPNAME_FID, { .vop_fid = udf_fid },
223 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock },
224 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock },
225 VOPNAME_SEEK, { .vop_seek = udf_seek },
226 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock },
227 VOPNAME_SPACE, { .vop_space = udf_space },
228 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage },
229 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage },
230 VOPNAME_MAP, { .vop_map = udf_map },
231 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap },
232 VOPNAME_DELMAP, { .vop_delmap = udf_delmap },
233 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf },
234 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio },
235 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
236 NULL, NULL
237 };
238
239 /* ARGSUSED */
240 static int32_t
udf_open(struct vnode ** vpp,int32_t flag,struct cred * cr,caller_context_t * ct)241 udf_open(
242 struct vnode **vpp,
243 int32_t flag,
244 struct cred *cr,
245 caller_context_t *ct)
246 {
247 ud_printf("udf_open\n");
248
249 return (0);
250 }
251
252 /* ARGSUSED */
253 static int32_t
udf_close(struct vnode * vp,int32_t flag,int32_t count,offset_t offset,struct cred * cr,caller_context_t * ct)254 udf_close(
255 struct vnode *vp,
256 int32_t flag,
257 int32_t count,
258 offset_t offset,
259 struct cred *cr,
260 caller_context_t *ct)
261 {
262 struct ud_inode *ip = VTOI(vp);
263
264 ud_printf("udf_close\n");
265
266 ITIMES(ip);
267
268 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
269 cleanshares(vp, ttoproc(curthread)->p_pid);
270
271 /*
272 * Push partially filled cluster at last close.
273 * ``last close'' is approximated because the dnlc
274 * may have a hold on the vnode.
275 */
276 if (vp->v_count <= 2 && vp->v_type != VBAD) {
277 struct ud_inode *ip = VTOI(vp);
278 if (ip->i_delaylen) {
279 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
280 B_ASYNC | B_FREE, cr);
281 ip->i_delaylen = 0;
282 }
283 }
284
285 return (0);
286 }
287
288 /* ARGSUSED */
289 static int32_t
udf_read(struct vnode * vp,struct uio * uiop,int32_t ioflag,struct cred * cr,caller_context_t * ct)290 udf_read(
291 struct vnode *vp,
292 struct uio *uiop,
293 int32_t ioflag,
294 struct cred *cr,
295 caller_context_t *ct)
296 {
297 struct ud_inode *ip = VTOI(vp);
298 int32_t error;
299
300 ud_printf("udf_read\n");
301
302 #ifdef __lock_lint
303 rw_enter(&ip->i_rwlock, RW_READER);
304 #endif
305
306 ASSERT(RW_READ_HELD(&ip->i_rwlock));
307
308 if (MANDLOCK(vp, ip->i_char)) {
309 /*
310 * udf_getattr ends up being called by chklock
311 */
312 error = chklock(vp, FREAD, uiop->uio_loffset,
313 uiop->uio_resid, uiop->uio_fmode, ct);
314 if (error) {
315 goto end;
316 }
317 }
318
319 rw_enter(&ip->i_contents, RW_READER);
320 error = ud_rdip(ip, uiop, ioflag, cr);
321 rw_exit(&ip->i_contents);
322
323 end:
324 #ifdef __lock_lint
325 rw_exit(&ip->i_rwlock);
326 #endif
327
328 return (error);
329 }
330
331
332 int32_t ud_WRITES = 1;
333 int32_t ud_HW = 96 * 1024;
334 int32_t ud_LW = 64 * 1024;
335 int32_t ud_throttles = 0;
336
337 /* ARGSUSED */
338 static int32_t
udf_write(struct vnode * vp,struct uio * uiop,int32_t ioflag,struct cred * cr,caller_context_t * ct)339 udf_write(
340 struct vnode *vp,
341 struct uio *uiop,
342 int32_t ioflag,
343 struct cred *cr,
344 caller_context_t *ct)
345 {
346 struct ud_inode *ip = VTOI(vp);
347 int32_t error = 0;
348
349 ud_printf("udf_write\n");
350
351 #ifdef __lock_lint
352 rw_enter(&ip->i_rwlock, RW_WRITER);
353 #endif
354
355 ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
356
357 if (MANDLOCK(vp, ip->i_char)) {
358 /*
359 * ud_getattr ends up being called by chklock
360 */
361 error = chklock(vp, FWRITE, uiop->uio_loffset,
362 uiop->uio_resid, uiop->uio_fmode, ct);
363 if (error) {
364 goto end;
365 }
366 }
367 /*
368 * Throttle writes.
369 */
370 mutex_enter(&ip->i_tlock);
371 if (ud_WRITES && (ip->i_writes > ud_HW)) {
372 while (ip->i_writes > ud_HW) {
373 ud_throttles++;
374 cv_wait(&ip->i_wrcv, &ip->i_tlock);
375 }
376 }
377 mutex_exit(&ip->i_tlock);
378
379 /*
380 * Write to the file
381 */
382 rw_enter(&ip->i_contents, RW_WRITER);
383 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
384 /*
385 * In append mode start at end of file.
386 */
387 uiop->uio_loffset = ip->i_size;
388 }
389 error = ud_wrip(ip, uiop, ioflag, cr);
390 rw_exit(&ip->i_contents);
391
392 end:
393 #ifdef __lock_lint
394 rw_exit(&ip->i_rwlock);
395 #endif
396
397 return (error);
398 }
399
400 /* ARGSUSED */
401 static int32_t
udf_ioctl(struct vnode * vp,int32_t cmd,intptr_t arg,int32_t flag,struct cred * cr,int32_t * rvalp,caller_context_t * ct)402 udf_ioctl(
403 struct vnode *vp,
404 int32_t cmd,
405 intptr_t arg,
406 int32_t flag,
407 struct cred *cr,
408 int32_t *rvalp,
409 caller_context_t *ct)
410 {
411 return (ENOTTY);
412 }
413
414 /* ARGSUSED */
415 static int32_t
udf_getattr(struct vnode * vp,struct vattr * vap,int32_t flags,struct cred * cr,caller_context_t * ct)416 udf_getattr(
417 struct vnode *vp,
418 struct vattr *vap,
419 int32_t flags,
420 struct cred *cr,
421 caller_context_t *ct)
422 {
423 struct ud_inode *ip = VTOI(vp);
424
425 ud_printf("udf_getattr\n");
426
427 if (vap->va_mask == AT_SIZE) {
428 /*
429 * for performance, if only the size is requested don't bother
430 * with anything else.
431 */
432 vap->va_size = ip->i_size;
433 return (0);
434 }
435
436 rw_enter(&ip->i_contents, RW_READER);
437
438 vap->va_type = vp->v_type;
439 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
440
441 vap->va_uid = ip->i_uid;
442 vap->va_gid = ip->i_gid;
443 vap->va_fsid = ip->i_dev;
444 vap->va_nodeid = ip->i_icb_lbano;
445 vap->va_nlink = ip->i_nlink;
446 vap->va_size = ip->i_size;
447 vap->va_seq = ip->i_seq;
448 if (vp->v_type == VCHR || vp->v_type == VBLK) {
449 vap->va_rdev = ip->i_rdev;
450 } else {
451 vap->va_rdev = 0;
452 }
453
454 mutex_enter(&ip->i_tlock);
455 ITIMES_NOLOCK(ip); /* mark correct time in inode */
456 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
457 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
458 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
459 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
460 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
461 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
462 mutex_exit(&ip->i_tlock);
463
464 switch (ip->i_type) {
465 case VBLK:
466 vap->va_blksize = MAXBSIZE;
467 break;
468 case VCHR:
469 vap->va_blksize = MAXBSIZE;
470 break;
471 default:
472 vap->va_blksize = ip->i_udf->udf_lbsize;
473 break;
474 }
475 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
476
477 rw_exit(&ip->i_contents);
478
479 return (0);
480 }
481
482 static int
ud_iaccess_vmode(void * ip,int mode,struct cred * cr)483 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
484 {
485 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
486 }
487
488 /*ARGSUSED4*/
489 static int32_t
udf_setattr(struct vnode * vp,struct vattr * vap,int32_t flags,struct cred * cr,caller_context_t * ct)490 udf_setattr(
491 struct vnode *vp,
492 struct vattr *vap,
493 int32_t flags,
494 struct cred *cr,
495 caller_context_t *ct)
496 {
497 int32_t error = 0;
498 uint32_t mask = vap->va_mask;
499 struct ud_inode *ip;
500 timestruc_t now;
501 struct vattr ovap;
502
503 ud_printf("udf_setattr\n");
504
505 ip = VTOI(vp);
506
507 /*
508 * not updates allowed to 4096 files
509 */
510 if (ip->i_astrat == STRAT_TYPE4096) {
511 return (EINVAL);
512 }
513
514 /*
515 * Cannot set these attributes
516 */
517 if (mask & AT_NOSET) {
518 return (EINVAL);
519 }
520
521 rw_enter(&ip->i_rwlock, RW_WRITER);
522 rw_enter(&ip->i_contents, RW_WRITER);
523
524 ovap.va_uid = ip->i_uid;
525 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
526 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
527 ud_iaccess_vmode, ip);
528 if (error)
529 goto update_inode;
530
531 mask = vap->va_mask;
532 /*
533 * Change file access modes.
534 */
535 if (mask & AT_MODE) {
536 ip->i_perm = VA2UD_PERM(vap->va_mode);
537 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
538 mutex_enter(&ip->i_tlock);
539 ip->i_flag |= ICHG;
540 mutex_exit(&ip->i_tlock);
541 }
542 if (mask & (AT_UID|AT_GID)) {
543 if (mask & AT_UID) {
544 ip->i_uid = vap->va_uid;
545 }
546 if (mask & AT_GID) {
547 ip->i_gid = vap->va_gid;
548 }
549 mutex_enter(&ip->i_tlock);
550 ip->i_flag |= ICHG;
551 mutex_exit(&ip->i_tlock);
552 }
553 /*
554 * Truncate file. Must have write permission and not be a directory.
555 */
556 if (mask & AT_SIZE) {
557 if (vp->v_type == VDIR) {
558 error = EISDIR;
559 goto update_inode;
560 }
561 if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
562 goto update_inode;
563 }
564 if (vap->va_size > MAXOFFSET_T) {
565 error = EFBIG;
566 goto update_inode;
567 }
568 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
569 goto update_inode;
570 }
571
572 if (vap->va_size == 0)
573 vnevent_truncate(vp, ct);
574 }
575 /*
576 * Change file access or modified times.
577 */
578 if (mask & (AT_ATIME|AT_MTIME)) {
579 mutex_enter(&ip->i_tlock);
580 if (mask & AT_ATIME) {
581 ip->i_atime.tv_sec = vap->va_atime.tv_sec;
582 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
583 ip->i_flag &= ~IACC;
584 }
585 if (mask & AT_MTIME) {
586 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
587 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
588 gethrestime(&now);
589 ip->i_ctime.tv_sec = now.tv_sec;
590 ip->i_ctime.tv_nsec = now.tv_nsec;
591 ip->i_flag &= ~(IUPD|ICHG);
592 ip->i_flag |= IMODTIME;
593 }
594 ip->i_flag |= IMOD;
595 mutex_exit(&ip->i_tlock);
596 }
597
598 update_inode:
599 if (curthread->t_flag & T_DONTPEND) {
600 ud_iupdat(ip, 1);
601 } else {
602 ITIMES_NOLOCK(ip);
603 }
604 rw_exit(&ip->i_contents);
605 rw_exit(&ip->i_rwlock);
606
607 return (error);
608 }
609
610 /* ARGSUSED */
611 static int32_t
udf_access(struct vnode * vp,int32_t mode,int32_t flags,struct cred * cr,caller_context_t * ct)612 udf_access(
613 struct vnode *vp,
614 int32_t mode,
615 int32_t flags,
616 struct cred *cr,
617 caller_context_t *ct)
618 {
619 struct ud_inode *ip = VTOI(vp);
620
621 ud_printf("udf_access\n");
622
623 if (ip->i_udf == NULL) {
624 return (EIO);
625 }
626
627 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
628 }
629
630 int32_t udfs_stickyhack = 1;
631
632 /* ARGSUSED */
633 static int32_t
udf_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int32_t flags,struct vnode * rdir,struct cred * cr,caller_context_t * ct,int * direntflags,pathname_t * realpnp)634 udf_lookup(
635 struct vnode *dvp,
636 char *nm,
637 struct vnode **vpp,
638 struct pathname *pnp,
639 int32_t flags,
640 struct vnode *rdir,
641 struct cred *cr,
642 caller_context_t *ct,
643 int *direntflags,
644 pathname_t *realpnp)
645 {
646 int32_t error;
647 struct vnode *vp;
648 struct ud_inode *ip, *xip;
649
650 ud_printf("udf_lookup\n");
651 /*
652 * Null component name is a synonym for directory being searched.
653 */
654 if (*nm == '\0') {
655 VN_HOLD(dvp);
656 *vpp = dvp;
657 error = 0;
658 goto out;
659 }
660
661 /*
662 * Fast path: Check the directory name lookup cache.
663 */
664 ip = VTOI(dvp);
665 if (vp = dnlc_lookup(dvp, nm)) {
666 /*
667 * Check accessibility of directory.
668 */
669 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
670 VN_RELE(vp);
671 }
672 xip = VTOI(vp);
673 } else {
674 error = ud_dirlook(ip, nm, &xip, cr, 1);
675 ITIMES(ip);
676 }
677
678 if (error == 0) {
679 ip = xip;
680 *vpp = ITOV(ip);
681 if ((ip->i_type != VDIR) &&
682 (ip->i_char & ISVTX) &&
683 ((ip->i_perm & IEXEC) == 0) &&
684 udfs_stickyhack) {
685 mutex_enter(&(*vpp)->v_lock);
686 (*vpp)->v_flag |= VISSWAP;
687 mutex_exit(&(*vpp)->v_lock);
688 }
689 ITIMES(ip);
690 /*
691 * If vnode is a device return special vnode instead.
692 */
693 if (IS_DEVVP(*vpp)) {
694 struct vnode *newvp;
695 newvp = specvp(*vpp, (*vpp)->v_rdev,
696 (*vpp)->v_type, cr);
697 VN_RELE(*vpp);
698 if (newvp == NULL) {
699 error = ENOSYS;
700 } else {
701 *vpp = newvp;
702 }
703 }
704 }
705 out:
706 return (error);
707 }
708
709 /* ARGSUSED */
710 static int32_t
udf_create(struct vnode * dvp,char * name,struct vattr * vap,enum vcexcl excl,int32_t mode,struct vnode ** vpp,struct cred * cr,int32_t flag,caller_context_t * ct,vsecattr_t * vsecp)711 udf_create(
712 struct vnode *dvp,
713 char *name,
714 struct vattr *vap,
715 enum vcexcl excl,
716 int32_t mode,
717 struct vnode **vpp,
718 struct cred *cr,
719 int32_t flag,
720 caller_context_t *ct,
721 vsecattr_t *vsecp)
722 {
723 int32_t error;
724 struct ud_inode *ip = VTOI(dvp), *xip;
725
726 ud_printf("udf_create\n");
727
728 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
729 vap->va_mode &= ~VSVTX;
730
731 if (*name == '\0') {
732 /*
733 * Null component name refers to the directory itself.
734 */
735 VN_HOLD(dvp);
736 ITIMES(ip);
737 error = EEXIST;
738 } else {
739 xip = NULL;
740 rw_enter(&ip->i_rwlock, RW_WRITER);
741 error = ud_direnter(ip, name, DE_CREATE,
742 (struct ud_inode *)0, (struct ud_inode *)0,
743 vap, &xip, cr, ct);
744 rw_exit(&ip->i_rwlock);
745 ITIMES(ip);
746 ip = xip;
747 }
748 #ifdef __lock_lint
749 rw_enter(&ip->i_contents, RW_WRITER);
750 #else
751 if (ip != NULL) {
752 rw_enter(&ip->i_contents, RW_WRITER);
753 }
754 #endif
755
756 /*
757 * If the file already exists and this is a non-exclusive create,
758 * check permissions and allow access for non-directories.
759 * Read-only create of an existing directory is also allowed.
760 * We fail an exclusive create of anything which already exists.
761 */
762 if (error == EEXIST) {
763 if (excl == NONEXCL) {
764 if ((ip->i_type == VDIR) && (mode & VWRITE)) {
765 error = EISDIR;
766 } else if (mode) {
767 error = ud_iaccess(ip,
768 UD_UPERM2DPERM(mode), cr, 0);
769 } else {
770 error = 0;
771 }
772 }
773 if (error) {
774 rw_exit(&ip->i_contents);
775 VN_RELE(ITOV(ip));
776 goto out;
777 } else if ((ip->i_type == VREG) &&
778 (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
779 /*
780 * Truncate regular files, if requested by caller.
781 * Grab i_rwlock to make sure no one else is
782 * currently writing to the file (we promised
783 * bmap we would do this).
784 * Must get the locks in the correct order.
785 */
786 if (ip->i_size == 0) {
787 ip->i_flag |= ICHG | IUPD;
788 } else {
789 rw_exit(&ip->i_contents);
790 rw_enter(&ip->i_rwlock, RW_WRITER);
791 rw_enter(&ip->i_contents, RW_WRITER);
792 (void) ud_itrunc(ip, 0, 0, cr);
793 rw_exit(&ip->i_rwlock);
794 }
795 vnevent_create(ITOV(ip), ct);
796 }
797 }
798
799 if (error == 0) {
800 *vpp = ITOV(ip);
801 ITIMES(ip);
802 }
803 #ifdef __lock_lint
804 rw_exit(&ip->i_contents);
805 #else
806 if (ip != NULL) {
807 rw_exit(&ip->i_contents);
808 }
809 #endif
810 if (error) {
811 goto out;
812 }
813
814 /*
815 * If vnode is a device return special vnode instead.
816 */
817 if (!error && IS_DEVVP(*vpp)) {
818 struct vnode *newvp;
819
820 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
821 VN_RELE(*vpp);
822 if (newvp == NULL) {
823 error = ENOSYS;
824 goto out;
825 }
826 *vpp = newvp;
827 }
828 out:
829 return (error);
830 }
831
832 /* ARGSUSED */
833 static int32_t
udf_remove(struct vnode * vp,char * nm,struct cred * cr,caller_context_t * ct,int flags)834 udf_remove(
835 struct vnode *vp,
836 char *nm,
837 struct cred *cr,
838 caller_context_t *ct,
839 int flags)
840 {
841 int32_t error;
842 struct ud_inode *ip = VTOI(vp);
843
844 ud_printf("udf_remove\n");
845
846 rw_enter(&ip->i_rwlock, RW_WRITER);
847 error = ud_dirremove(ip, nm,
848 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
849 rw_exit(&ip->i_rwlock);
850 ITIMES(ip);
851
852 return (error);
853 }
854
855 /* ARGSUSED */
856 static int32_t
udf_link(struct vnode * tdvp,struct vnode * svp,char * tnm,struct cred * cr,caller_context_t * ct,int flags)857 udf_link(
858 struct vnode *tdvp,
859 struct vnode *svp,
860 char *tnm,
861 struct cred *cr,
862 caller_context_t *ct,
863 int flags)
864 {
865 int32_t error;
866 struct vnode *realvp;
867 struct ud_inode *sip;
868 struct ud_inode *tdp;
869
870 ud_printf("udf_link\n");
871 if (VOP_REALVP(svp, &realvp, ct) == 0) {
872 svp = realvp;
873 }
874
875 /*
876 * Do not allow links to directories
877 */
878 if (svp->v_type == VDIR) {
879 return (EPERM);
880 }
881
882 sip = VTOI(svp);
883
884 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
885 return (EPERM);
886
887 tdp = VTOI(tdvp);
888
889 rw_enter(&tdp->i_rwlock, RW_WRITER);
890 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
891 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
892 rw_exit(&tdp->i_rwlock);
893 ITIMES(sip);
894 ITIMES(tdp);
895
896 if (error == 0) {
897 vnevent_link(svp, ct);
898 }
899
900 return (error);
901 }
902
903 /* ARGSUSED */
904 static int32_t
udf_rename(struct vnode * sdvp,char * snm,struct vnode * tdvp,char * tnm,struct cred * cr,caller_context_t * ct,int flags)905 udf_rename(
906 struct vnode *sdvp,
907 char *snm,
908 struct vnode *tdvp,
909 char *tnm,
910 struct cred *cr,
911 caller_context_t *ct,
912 int flags)
913 {
914 int32_t error = 0;
915 struct udf_vfs *udf_vfsp;
916 struct ud_inode *sip; /* source inode */
917 struct ud_inode *sdp, *tdp; /* source and target parent inode */
918 struct vnode *realvp;
919
920 ud_printf("udf_rename\n");
921
922 if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
923 tdvp = realvp;
924 }
925
926 sdp = VTOI(sdvp);
927 tdp = VTOI(tdvp);
928
929 udf_vfsp = sdp->i_udf;
930
931 mutex_enter(&udf_vfsp->udf_rename_lck);
932 /*
933 * Look up inode of file we're supposed to rename.
934 */
935 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
936 mutex_exit(&udf_vfsp->udf_rename_lck);
937 return (error);
938 }
939 /*
940 * be sure this is not a directory with another file system mounted
941 * over it. If it is just give up the locks, and return with
942 * EBUSY
943 */
944 if (vn_mountedvfs(ITOV(sip)) != NULL) {
945 error = EBUSY;
946 goto errout;
947 }
948 /*
949 * Make sure we can delete the source entry. This requires
950 * write permission on the containing directory. If that
951 * directory is "sticky" it further requires (except for
952 * privileged users) that the user own the directory or the
953 * source entry, or else have permission to write the source
954 * entry.
955 */
956 rw_enter(&sdp->i_contents, RW_READER);
957 rw_enter(&sip->i_contents, RW_READER);
958 if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
959 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
960 rw_exit(&sip->i_contents);
961 rw_exit(&sdp->i_contents);
962 ITIMES(sip);
963 goto errout;
964 }
965
966 /*
967 * Check for renaming '.' or '..' or alias of '.'
968 */
969 if ((strcmp(snm, ".") == 0) ||
970 (strcmp(snm, "..") == 0) ||
971 (sdp == sip)) {
972 error = EINVAL;
973 rw_exit(&sip->i_contents);
974 rw_exit(&sdp->i_contents);
975 goto errout;
976 }
977 rw_exit(&sip->i_contents);
978 rw_exit(&sdp->i_contents);
979
980
981 /*
982 * Link source to the target.
983 */
984 rw_enter(&tdp->i_rwlock, RW_WRITER);
985 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
986 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
987 /*
988 * ESAME isn't really an error; it indicates that the
989 * operation should not be done because the source and target
990 * are the same file, but that no error should be reported.
991 */
992 if (error == ESAME) {
993 error = 0;
994 }
995 rw_exit(&tdp->i_rwlock);
996 goto errout;
997 }
998 vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
999 rw_exit(&tdp->i_rwlock);
1000
1001 rw_enter(&sdp->i_rwlock, RW_WRITER);
1002 /*
1003 * Unlink the source.
1004 * Remove the source entry. ud_dirremove() checks that the entry
1005 * still reflects sip, and returns an error if it doesn't.
1006 * If the entry has changed just forget about it. Release
1007 * the source inode.
1008 */
1009 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1010 DR_RENAME, cr, ct)) == ENOENT) {
1011 error = 0;
1012 }
1013 rw_exit(&sdp->i_rwlock);
1014 errout:
1015 ITIMES(sdp);
1016 ITIMES(tdp);
1017 VN_RELE(ITOV(sip));
1018 mutex_exit(&udf_vfsp->udf_rename_lck);
1019
1020 return (error);
1021 }
1022
1023 /* ARGSUSED */
1024 static int32_t
udf_mkdir(struct vnode * dvp,char * dirname,struct vattr * vap,struct vnode ** vpp,struct cred * cr,caller_context_t * ct,int flags,vsecattr_t * vsecp)1025 udf_mkdir(
1026 struct vnode *dvp,
1027 char *dirname,
1028 struct vattr *vap,
1029 struct vnode **vpp,
1030 struct cred *cr,
1031 caller_context_t *ct,
1032 int flags,
1033 vsecattr_t *vsecp)
1034 {
1035 int32_t error;
1036 struct ud_inode *ip;
1037 struct ud_inode *xip;
1038
1039 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1040
1041 ud_printf("udf_mkdir\n");
1042
1043 ip = VTOI(dvp);
1044 rw_enter(&ip->i_rwlock, RW_WRITER);
1045 error = ud_direnter(ip, dirname, DE_MKDIR,
1046 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1047 rw_exit(&ip->i_rwlock);
1048 ITIMES(ip);
1049 if (error == 0) {
1050 ip = xip;
1051 *vpp = ITOV(ip);
1052 ITIMES(ip);
1053 } else if (error == EEXIST) {
1054 ITIMES(xip);
1055 VN_RELE(ITOV(xip));
1056 }
1057
1058 return (error);
1059 }
1060
1061 /* ARGSUSED */
1062 static int32_t
udf_rmdir(struct vnode * vp,char * nm,struct vnode * cdir,struct cred * cr,caller_context_t * ct,int flags)1063 udf_rmdir(
1064 struct vnode *vp,
1065 char *nm,
1066 struct vnode *cdir,
1067 struct cred *cr,
1068 caller_context_t *ct,
1069 int flags)
1070 {
1071 int32_t error;
1072 struct ud_inode *ip = VTOI(vp);
1073
1074 ud_printf("udf_rmdir\n");
1075
1076 rw_enter(&ip->i_rwlock, RW_WRITER);
1077 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1078 cr, ct);
1079 rw_exit(&ip->i_rwlock);
1080 ITIMES(ip);
1081
1082 return (error);
1083 }
1084
1085 /* ARGSUSED */
1086 static int32_t
udf_readdir(struct vnode * vp,struct uio * uiop,struct cred * cr,int32_t * eofp,caller_context_t * ct,int flags)1087 udf_readdir(
1088 struct vnode *vp,
1089 struct uio *uiop,
1090 struct cred *cr,
1091 int32_t *eofp,
1092 caller_context_t *ct,
1093 int flags)
1094 {
1095 struct ud_inode *ip;
1096 struct dirent64 *nd;
1097 struct udf_vfs *udf_vfsp;
1098 int32_t error = 0, len, outcount = 0;
1099 uint32_t dirsiz, offset;
1100 uint32_t bufsize, ndlen, dummy;
1101 caddr_t outbuf;
1102 caddr_t outb, end_outb;
1103 struct iovec *iovp;
1104
1105 uint8_t *dname;
1106 int32_t length;
1107
1108 uint8_t *buf = NULL;
1109
1110 struct fbuf *fbp = NULL;
1111 struct file_id *fid;
1112 uint8_t *name;
1113
1114
1115 ud_printf("udf_readdir\n");
1116
1117 ip = VTOI(vp);
1118 udf_vfsp = ip->i_udf;
1119
1120 dirsiz = ip->i_size;
1121 if ((uiop->uio_offset >= dirsiz) ||
1122 (ip->i_nlink <= 0)) {
1123 if (eofp) {
1124 *eofp = 1;
1125 }
1126 return (0);
1127 }
1128
1129 offset = uiop->uio_offset;
1130 iovp = uiop->uio_iov;
1131 bufsize = iovp->iov_len;
1132
1133 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1134 end_outb = outb + bufsize;
1135 nd = (struct dirent64 *)outbuf;
1136
1137 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1138 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1139
1140 if (offset == 0) {
1141 len = DIRENT64_RECLEN(1);
1142 if (((caddr_t)nd + len) >= end_outb) {
1143 error = EINVAL;
1144 goto end;
1145 }
1146 nd->d_ino = ip->i_icb_lbano;
1147 nd->d_reclen = (uint16_t)len;
1148 nd->d_off = 0x10;
1149 nd->d_name[0] = '.';
1150 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1151 nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1152 outcount++;
1153 } else if (offset == 0x10) {
1154 offset = 0;
1155 }
1156
1157 while (offset < dirsiz) {
1158 error = ud_get_next_fid(ip, &fbp,
1159 offset, &fid, &name, buf);
1160 if (error != 0) {
1161 break;
1162 }
1163
1164 if ((fid->fid_flags & FID_DELETED) == 0) {
1165 if (fid->fid_flags & FID_PARENT) {
1166
1167 len = DIRENT64_RECLEN(2);
1168 if (((caddr_t)nd + len) >= end_outb) {
1169 error = EINVAL;
1170 break;
1171 }
1172
1173 nd->d_ino = ip->i_icb_lbano;
1174 nd->d_reclen = (uint16_t)len;
1175 nd->d_off = offset + FID_LEN(fid);
1176 nd->d_name[0] = '.';
1177 nd->d_name[1] = '.';
1178 bzero(&nd->d_name[2],
1179 DIRENT64_NAMELEN(len) - 2);
1180 nd = (struct dirent64 *)
1181 ((char *)nd + nd->d_reclen);
1182 } else {
1183 if ((error = ud_uncompress(fid->fid_idlen,
1184 &length, name, dname)) != 0) {
1185 break;
1186 }
1187 if (length == 0) {
1188 offset += FID_LEN(fid);
1189 continue;
1190 }
1191 len = DIRENT64_RECLEN(length);
1192 if (((caddr_t)nd + len) >= end_outb) {
1193 if (!outcount) {
1194 error = EINVAL;
1195 }
1196 break;
1197 }
1198 (void) strncpy(nd->d_name,
1199 (caddr_t)dname, length);
1200 bzero(&nd->d_name[length],
1201 DIRENT64_NAMELEN(len) - length);
1202 nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1203 SWAP_16(fid->fid_icb.lad_ext_prn),
1204 SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1205 &dummy);
1206 nd->d_reclen = (uint16_t)len;
1207 nd->d_off = offset + FID_LEN(fid);
1208 nd = (struct dirent64 *)
1209 ((char *)nd + nd->d_reclen);
1210 }
1211 outcount++;
1212 }
1213
1214 offset += FID_LEN(fid);
1215 }
1216
1217 end:
1218 if (fbp != NULL) {
1219 fbrelse(fbp, S_OTHER);
1220 }
1221 ndlen = ((char *)nd - outbuf);
1222 /*
1223 * In case of error do not call uiomove.
1224 * Return the error to the caller.
1225 */
1226 if ((error == 0) && (ndlen != 0)) {
1227 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1228 uiop->uio_offset = offset;
1229 }
1230 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1231 kmem_free((caddr_t)dname, 1024);
1232 kmem_free(outbuf, (uint32_t)bufsize);
1233 if (eofp && error == 0) {
1234 *eofp = (uiop->uio_offset >= dirsiz);
1235 }
1236 return (error);
1237 }
1238
1239 /* ARGSUSED */
1240 static int32_t
udf_symlink(struct vnode * dvp,char * linkname,struct vattr * vap,char * target,struct cred * cr,caller_context_t * ct,int flags)1241 udf_symlink(
1242 struct vnode *dvp,
1243 char *linkname,
1244 struct vattr *vap,
1245 char *target,
1246 struct cred *cr,
1247 caller_context_t *ct,
1248 int flags)
1249 {
1250 int32_t error = 0, outlen;
1251 uint32_t ioflag = 0;
1252 struct ud_inode *ip, *dip = VTOI(dvp);
1253
1254 struct path_comp *pc;
1255 int8_t *dname = NULL, *uname = NULL, *sp;
1256
1257 ud_printf("udf_symlink\n");
1258
1259 ip = (struct ud_inode *)0;
1260 vap->va_type = VLNK;
1261 vap->va_rdev = 0;
1262
1263 rw_enter(&dip->i_rwlock, RW_WRITER);
1264 error = ud_direnter(dip, linkname, DE_CREATE,
1265 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1266 rw_exit(&dip->i_rwlock);
1267 if (error == 0) {
1268 dname = kmem_zalloc(1024, KM_SLEEP);
1269 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1270
1271 pc = (struct path_comp *)uname;
1272 /*
1273 * If the first character in target is "/"
1274 * then skip it and create entry for it
1275 */
1276 if (*target == '/') {
1277 pc->pc_type = 2;
1278 pc->pc_len = 0;
1279 pc = (struct path_comp *)(((char *)pc) + 4);
1280 while (*target == '/') {
1281 target++;
1282 }
1283 }
1284
1285 while (*target != NULL) {
1286 sp = target;
1287 while ((*target != '/') && (*target != '\0')) {
1288 target ++;
1289 }
1290 /*
1291 * We got the next component of the
1292 * path name. Create path_comp of
1293 * appropriate type
1294 */
1295 if (((target - sp) == 1) && (*sp == '.')) {
1296 /*
1297 * Dot entry.
1298 */
1299 pc->pc_type = 4;
1300 pc = (struct path_comp *)(((char *)pc) + 4);
1301 } else if (((target - sp) == 2) &&
1302 (*sp == '.') && ((*(sp + 1)) == '.')) {
1303 /*
1304 * DotDot entry.
1305 */
1306 pc->pc_type = 3;
1307 pc = (struct path_comp *)(((char *)pc) + 4);
1308 } else {
1309 /*
1310 * convert the user given name
1311 * into appropriate form to be put
1312 * on the media
1313 */
1314 outlen = 1024; /* set to size of dname */
1315 if (error = ud_compress(target - sp, &outlen,
1316 (uint8_t *)sp, (uint8_t *)dname)) {
1317 break;
1318 }
1319 pc->pc_type = 5;
1320 /* LINTED */
1321 pc->pc_len = outlen;
1322 dname[outlen] = '\0';
1323 (void) strcpy((char *)pc->pc_id, dname);
1324 pc = (struct path_comp *)
1325 (((char *)pc) + 4 + outlen);
1326 }
1327 while (*target == '/') {
1328 target++;
1329 }
1330 if (*target == NULL) {
1331 break;
1332 }
1333 }
1334
1335 rw_enter(&ip->i_contents, RW_WRITER);
1336 if (error == 0) {
1337 ioflag = FWRITE;
1338 if (curthread->t_flag & T_DONTPEND) {
1339 ioflag |= FDSYNC;
1340 }
1341 error = ud_rdwri(UIO_WRITE, ioflag, ip,
1342 uname, ((int8_t *)pc) - uname,
1343 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1344 }
1345 if (error) {
1346 ud_idrop(ip);
1347 rw_exit(&ip->i_contents);
1348 rw_enter(&dip->i_rwlock, RW_WRITER);
1349 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1350 (struct vnode *)0, DR_REMOVE, cr, ct);
1351 rw_exit(&dip->i_rwlock);
1352 goto update_inode;
1353 }
1354 rw_exit(&ip->i_contents);
1355 }
1356
1357 if ((error == 0) || (error == EEXIST)) {
1358 VN_RELE(ITOV(ip));
1359 }
1360
1361 update_inode:
1362 ITIMES(VTOI(dvp));
1363 if (uname != NULL) {
1364 kmem_free(uname, PAGESIZE);
1365 }
1366 if (dname != NULL) {
1367 kmem_free(dname, 1024);
1368 }
1369
1370 return (error);
1371 }
1372
1373 /* ARGSUSED */
1374 static int32_t
udf_readlink(struct vnode * vp,struct uio * uiop,struct cred * cr,caller_context_t * ct)1375 udf_readlink(
1376 struct vnode *vp,
1377 struct uio *uiop,
1378 struct cred *cr,
1379 caller_context_t *ct)
1380 {
1381 int32_t error = 0, off, id_len, size, len;
1382 int8_t *dname = NULL, *uname = NULL;
1383 struct ud_inode *ip;
1384 struct fbuf *fbp = NULL;
1385 struct path_comp *pc;
1386
1387 ud_printf("udf_readlink\n");
1388
1389 if (vp->v_type != VLNK) {
1390 return (EINVAL);
1391 }
1392
1393 ip = VTOI(vp);
1394 size = ip->i_size;
1395 if (size > PAGESIZE) {
1396 return (EIO);
1397 }
1398
1399 if (size == 0) {
1400 return (0);
1401 }
1402
1403 dname = kmem_zalloc(1024, KM_SLEEP);
1404 uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1405
1406 rw_enter(&ip->i_contents, RW_READER);
1407
1408 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1409 goto end;
1410 }
1411
1412 off = 0;
1413
1414 while (off < size) {
1415 pc = (struct path_comp *)(fbp->fb_addr + off);
1416 switch (pc->pc_type) {
1417 case 1 :
1418 (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1419 (void) strcat(uname, "/");
1420 break;
1421 case 2 :
1422 if (pc->pc_len != 0) {
1423 goto end;
1424 }
1425 uname[0] = '/';
1426 uname[1] = '\0';
1427 break;
1428 case 3 :
1429 (void) strcat(uname, "../");
1430 break;
1431 case 4 :
1432 (void) strcat(uname, "./");
1433 break;
1434 case 5 :
1435 if ((error = ud_uncompress(pc->pc_len, &id_len,
1436 pc->pc_id, (uint8_t *)dname)) != 0) {
1437 break;
1438 }
1439 dname[id_len] = '\0';
1440 (void) strcat(uname, dname);
1441 (void) strcat(uname, "/");
1442 break;
1443 default :
1444 error = EINVAL;
1445 goto end;
1446 }
1447 off += 4 + pc->pc_len;
1448 }
1449 len = strlen(uname) - 1;
1450 if (uname[len] == '/') {
1451 if (len == 0) {
1452 /*
1453 * special case link to /
1454 */
1455 len = 1;
1456 } else {
1457 uname[len] = '\0';
1458 }
1459 }
1460
1461 error = uiomove(uname, len, UIO_READ, uiop);
1462
1463 ITIMES(ip);
1464
1465 end:
1466 if (fbp != NULL) {
1467 fbrelse(fbp, S_OTHER);
1468 }
1469 rw_exit(&ip->i_contents);
1470 if (uname != NULL) {
1471 kmem_free(uname, PAGESIZE);
1472 }
1473 if (dname != NULL) {
1474 kmem_free(dname, 1024);
1475 }
1476 return (error);
1477 }
1478
1479 /* ARGSUSED */
1480 static int32_t
udf_fsync(struct vnode * vp,int32_t syncflag,struct cred * cr,caller_context_t * ct)1481 udf_fsync(
1482 struct vnode *vp,
1483 int32_t syncflag,
1484 struct cred *cr,
1485 caller_context_t *ct)
1486 {
1487 int32_t error = 0;
1488 struct ud_inode *ip = VTOI(vp);
1489
1490 ud_printf("udf_fsync\n");
1491
1492 rw_enter(&ip->i_contents, RW_WRITER);
1493 if (!(IS_SWAPVP(vp))) {
1494 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1495 }
1496 if (error == 0) {
1497 error = ud_sync_indir(ip);
1498 }
1499 ITIMES(ip); /* XXX: is this necessary ??? */
1500 rw_exit(&ip->i_contents);
1501
1502 return (error);
1503 }
1504
1505 /* ARGSUSED */
1506 static void
udf_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)1507 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1508 {
1509 ud_printf("udf_iinactive\n");
1510
1511 ud_iinactive(VTOI(vp), cr);
1512 }
1513
1514 /* ARGSUSED */
1515 static int32_t
udf_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)1516 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1517 {
1518 struct udf_fid *udfidp;
1519 struct ud_inode *ip = VTOI(vp);
1520
1521 ud_printf("udf_fid\n");
1522
1523 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1524 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1525 return (ENOSPC);
1526 }
1527
1528 udfidp = (struct udf_fid *)fidp;
1529 bzero((char *)udfidp, sizeof (struct udf_fid));
1530 rw_enter(&ip->i_contents, RW_READER);
1531 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1532 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1533 udfidp->udfid_prn = ip->i_icb_prn;
1534 udfidp->udfid_icb_lbn = ip->i_icb_block;
1535 rw_exit(&ip->i_contents);
1536
1537 return (0);
1538 }
1539
1540 /* ARGSUSED2 */
1541 static int
udf_rwlock(struct vnode * vp,int32_t write_lock,caller_context_t * ctp)1542 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1543 {
1544 struct ud_inode *ip = VTOI(vp);
1545
1546 ud_printf("udf_rwlock\n");
1547
1548 if (write_lock) {
1549 rw_enter(&ip->i_rwlock, RW_WRITER);
1550 } else {
1551 rw_enter(&ip->i_rwlock, RW_READER);
1552 }
1553 #ifdef __lock_lint
1554 rw_exit(&ip->i_rwlock);
1555 #endif
1556 return (write_lock);
1557 }
1558
1559 /* ARGSUSED */
1560 static void
udf_rwunlock(struct vnode * vp,int32_t write_lock,caller_context_t * ctp)1561 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1562 {
1563 struct ud_inode *ip = VTOI(vp);
1564
1565 ud_printf("udf_rwunlock\n");
1566
1567 #ifdef __lock_lint
1568 rw_enter(&ip->i_rwlock, RW_WRITER);
1569 #endif
1570
1571 rw_exit(&ip->i_rwlock);
1572
1573 }
1574
1575 /* ARGSUSED */
1576 static int32_t
udf_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1577 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1578 {
1579 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1580 }
1581
1582 static int32_t
udf_frlock(struct vnode * vp,int32_t cmd,struct flock64 * bfp,int32_t flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ct)1583 udf_frlock(
1584 struct vnode *vp,
1585 int32_t cmd,
1586 struct flock64 *bfp,
1587 int32_t flag,
1588 offset_t offset,
1589 struct flk_callback *flk_cbp,
1590 cred_t *cr,
1591 caller_context_t *ct)
1592 {
1593 struct ud_inode *ip = VTOI(vp);
1594
1595 ud_printf("udf_frlock\n");
1596
1597 /*
1598 * If file is being mapped, disallow frlock.
1599 * XXX I am not holding tlock while checking i_mapcnt because the
1600 * current locking strategy drops all locks before calling fs_frlock.
1601 * So, mapcnt could change before we enter fs_frlock making is
1602 * meaningless to have held tlock in the first place.
1603 */
1604 if ((ip->i_mapcnt > 0) &&
1605 (MANDLOCK(vp, ip->i_char))) {
1606 return (EAGAIN);
1607 }
1608
1609 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1610 }
1611
1612 /*ARGSUSED6*/
1613 static int32_t
udf_space(struct vnode * vp,int32_t cmd,struct flock64 * bfp,int32_t flag,offset_t offset,cred_t * cr,caller_context_t * ct)1614 udf_space(
1615 struct vnode *vp,
1616 int32_t cmd,
1617 struct flock64 *bfp,
1618 int32_t flag,
1619 offset_t offset,
1620 cred_t *cr,
1621 caller_context_t *ct)
1622 {
1623 int32_t error = 0;
1624
1625 ud_printf("udf_space\n");
1626
1627 if (cmd != F_FREESP) {
1628 error = EINVAL;
1629 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1630 error = ud_freesp(vp, bfp, flag, cr);
1631
1632 if (error == 0 && bfp->l_start == 0)
1633 vnevent_truncate(vp, ct);
1634 }
1635
1636 return (error);
1637 }
1638
1639 /* ARGSUSED */
1640 static int32_t
udf_getpage(struct vnode * vp,offset_t off,size_t len,uint32_t * protp,struct page ** plarr,size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1641 udf_getpage(
1642 struct vnode *vp,
1643 offset_t off,
1644 size_t len,
1645 uint32_t *protp,
1646 struct page **plarr,
1647 size_t plsz,
1648 struct seg *seg,
1649 caddr_t addr,
1650 enum seg_rw rw,
1651 struct cred *cr,
1652 caller_context_t *ct)
1653 {
1654 struct ud_inode *ip = VTOI(vp);
1655 int32_t error, has_holes, beyond_eof, seqmode, dolock;
1656 int32_t pgsize = PAGESIZE;
1657 struct udf_vfs *udf_vfsp = ip->i_udf;
1658 page_t **pl;
1659 u_offset_t pgoff, eoff, uoff;
1660 krw_t rwtype;
1661 caddr_t pgaddr;
1662
1663 ud_printf("udf_getpage\n");
1664
1665 uoff = (u_offset_t)off; /* type conversion */
1666 if (protp) {
1667 *protp = PROT_ALL;
1668 }
1669 if (vp->v_flag & VNOMAP) {
1670 return (ENOSYS);
1671 }
1672 seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1673
1674 rwtype = RW_READER;
1675 dolock = (rw_owner(&ip->i_contents) != curthread);
1676 retrylock:
1677 #ifdef __lock_lint
1678 rw_enter(&ip->i_contents, rwtype);
1679 #else
1680 if (dolock) {
1681 rw_enter(&ip->i_contents, rwtype);
1682 }
1683 #endif
1684
1685 /*
1686 * We may be getting called as a side effect of a bmap using
1687 * fbread() when the blocks might be being allocated and the
1688 * size has not yet been up'ed. In this case we want to be
1689 * able to return zero pages if we get back UDF_HOLE from
1690 * calling bmap for a non write case here. We also might have
1691 * to read some frags from the disk into a page if we are
1692 * extending the number of frags for a given lbn in bmap().
1693 */
1694 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1695 if (beyond_eof && seg != segkmap) {
1696 #ifdef __lock_lint
1697 rw_exit(&ip->i_contents);
1698 #else
1699 if (dolock) {
1700 rw_exit(&ip->i_contents);
1701 }
1702 #endif
1703 return (EFAULT);
1704 }
1705
1706 /*
1707 * Must hold i_contents lock throughout the call to pvn_getpages
1708 * since locked pages are returned from each call to ud_getapage.
1709 * Must *not* return locked pages and then try for contents lock
1710 * due to lock ordering requirements (inode > page)
1711 */
1712
1713 has_holes = ud_bmap_has_holes(ip);
1714
1715 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1716 int32_t blk_size, count;
1717 u_offset_t offset;
1718
1719 /*
1720 * We must acquire the RW_WRITER lock in order to
1721 * call bmap_write().
1722 */
1723 if (dolock && rwtype == RW_READER) {
1724 rwtype = RW_WRITER;
1725
1726 if (!rw_tryupgrade(&ip->i_contents)) {
1727
1728 rw_exit(&ip->i_contents);
1729
1730 goto retrylock;
1731 }
1732 }
1733
1734 /*
1735 * May be allocating disk blocks for holes here as
1736 * a result of mmap faults. write(2) does the bmap_write
1737 * in rdip/wrip, not here. We are not dealing with frags
1738 * in this case.
1739 */
1740 offset = uoff;
1741 while ((offset < uoff + len) &&
1742 (offset < ip->i_size)) {
1743 /*
1744 * the variable "bnp" is to simplify the expression for
1745 * the compiler; * just passing in &bn to bmap_write
1746 * causes a compiler "loop"
1747 */
1748
1749 blk_size = udf_vfsp->udf_lbsize;
1750 if ((offset + blk_size) > ip->i_size) {
1751 count = ip->i_size - offset;
1752 } else {
1753 count = blk_size;
1754 }
1755 error = ud_bmap_write(ip, offset, count, 0, cr);
1756 if (error) {
1757 goto update_inode;
1758 }
1759 offset += count; /* XXX - make this contig */
1760 }
1761 }
1762
1763 /*
1764 * Can be a reader from now on.
1765 */
1766 #ifdef __lock_lint
1767 if (rwtype == RW_WRITER) {
1768 rw_downgrade(&ip->i_contents);
1769 }
1770 #else
1771 if (dolock && rwtype == RW_WRITER) {
1772 rw_downgrade(&ip->i_contents);
1773 }
1774 #endif
1775
1776 /*
1777 * We remove PROT_WRITE in cases when the file has UDF holes
1778 * because we don't want to call bmap_read() to check each
1779 * page if it is backed with a disk block.
1780 */
1781 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1782 *protp &= ~PROT_WRITE;
1783 }
1784
1785 error = 0;
1786
1787 /*
1788 * The loop looks up pages in the range <off, off + len).
1789 * For each page, we first check if we should initiate an asynchronous
1790 * read ahead before we call page_lookup (we may sleep in page_lookup
1791 * for a previously initiated disk read).
1792 */
1793 eoff = (uoff + len);
1794 for (pgoff = uoff, pgaddr = addr, pl = plarr;
1795 pgoff < eoff; /* empty */) {
1796 page_t *pp;
1797 u_offset_t nextrio;
1798 se_t se;
1799
1800 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1801
1802 /*
1803 * Handle async getpage (faultahead)
1804 */
1805 if (plarr == NULL) {
1806 ip->i_nextrio = pgoff;
1807 ud_getpage_ra(vp, pgoff, seg, pgaddr);
1808 pgoff += pgsize;
1809 pgaddr += pgsize;
1810 continue;
1811 }
1812
1813 /*
1814 * Check if we should initiate read ahead of next cluster.
1815 * We call page_exists only when we need to confirm that
1816 * we have the current page before we initiate the read ahead.
1817 */
1818 nextrio = ip->i_nextrio;
1819 if (seqmode &&
1820 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1821 nextrio < ip->i_size && page_exists(vp, pgoff))
1822 ud_getpage_ra(vp, pgoff, seg, pgaddr);
1823
1824 if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1825
1826 /*
1827 * We found the page in the page cache.
1828 */
1829 *pl++ = pp;
1830 pgoff += pgsize;
1831 pgaddr += pgsize;
1832 len -= pgsize;
1833 plsz -= pgsize;
1834 } else {
1835
1836 /*
1837 * We have to create the page, or read it from disk.
1838 */
1839 if (error = ud_getpage_miss(vp, pgoff, len,
1840 seg, pgaddr, pl, plsz, rw, seqmode)) {
1841 goto error_out;
1842 }
1843
1844 while (*pl != NULL) {
1845 pl++;
1846 pgoff += pgsize;
1847 pgaddr += pgsize;
1848 len -= pgsize;
1849 plsz -= pgsize;
1850 }
1851 }
1852 }
1853
1854 /*
1855 * Return pages up to plsz if they are in the page cache.
1856 * We cannot return pages if there is a chance that they are
1857 * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1858 */
1859 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1860
1861 ASSERT((protp == NULL) ||
1862 !(has_holes && (*protp & PROT_WRITE)));
1863
1864 eoff = pgoff + plsz;
1865 while (pgoff < eoff) {
1866 page_t *pp;
1867
1868 if ((pp = page_lookup_nowait(vp, pgoff,
1869 SE_SHARED)) == NULL)
1870 break;
1871
1872 *pl++ = pp;
1873 pgoff += pgsize;
1874 plsz -= pgsize;
1875 }
1876 }
1877
1878 if (plarr)
1879 *pl = NULL; /* Terminate page list */
1880 ip->i_nextr = pgoff;
1881
1882 error_out:
1883 if (error && plarr) {
1884 /*
1885 * Release any pages we have locked.
1886 */
1887 while (pl > &plarr[0])
1888 page_unlock(*--pl);
1889
1890 plarr[0] = NULL;
1891 }
1892
1893 update_inode:
1894 #ifdef __lock_lint
1895 rw_exit(&ip->i_contents);
1896 #else
1897 if (dolock) {
1898 rw_exit(&ip->i_contents);
1899 }
1900 #endif
1901
1902 /*
1903 * If the inode is not already marked for IACC (in rwip() for read)
1904 * and the inode is not marked for no access time update (in rwip()
1905 * for write) then update the inode access time and mod time now.
1906 */
1907 mutex_enter(&ip->i_tlock);
1908 if ((ip->i_flag & (IACC | INOACC)) == 0) {
1909 if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1910 ip->i_flag |= IACC;
1911 }
1912 if (rw == S_WRITE) {
1913 ip->i_flag |= IUPD;
1914 }
1915 ITIMES_NOLOCK(ip);
1916 }
1917 mutex_exit(&ip->i_tlock);
1918
1919 return (error);
1920 }
1921
1922 int32_t ud_delay = 1;
1923
1924 /* ARGSUSED */
1925 static int32_t
udf_putpage(struct vnode * vp,offset_t off,size_t len,int32_t flags,struct cred * cr,caller_context_t * ct)1926 udf_putpage(
1927 struct vnode *vp,
1928 offset_t off,
1929 size_t len,
1930 int32_t flags,
1931 struct cred *cr,
1932 caller_context_t *ct)
1933 {
1934 struct ud_inode *ip;
1935 int32_t error = 0;
1936
1937 ud_printf("udf_putpage\n");
1938
1939 ip = VTOI(vp);
1940 #ifdef __lock_lint
1941 rw_enter(&ip->i_contents, RW_WRITER);
1942 #endif
1943
1944 if (vp->v_count == 0) {
1945 cmn_err(CE_WARN, "ud_putpage : bad v_count");
1946 error = EINVAL;
1947 goto out;
1948 }
1949
1950 if (vp->v_flag & VNOMAP) {
1951 error = ENOSYS;
1952 goto out;
1953 }
1954
1955 if (flags & B_ASYNC) {
1956 if (ud_delay && len &&
1957 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1958 mutex_enter(&ip->i_tlock);
1959
1960 /*
1961 * If nobody stalled, start a new cluster.
1962 */
1963 if (ip->i_delaylen == 0) {
1964 ip->i_delayoff = off;
1965 ip->i_delaylen = len;
1966 mutex_exit(&ip->i_tlock);
1967 goto out;
1968 }
1969
1970 /*
1971 * If we have a full cluster or they are not contig,
1972 * then push last cluster and start over.
1973 */
1974 if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1975 ip->i_delayoff + ip->i_delaylen != off) {
1976 u_offset_t doff;
1977 size_t dlen;
1978
1979 doff = ip->i_delayoff;
1980 dlen = ip->i_delaylen;
1981 ip->i_delayoff = off;
1982 ip->i_delaylen = len;
1983 mutex_exit(&ip->i_tlock);
1984 error = ud_putpages(vp, doff, dlen, flags, cr);
1985 /* LMXXX - flags are new val, not old */
1986 goto out;
1987 }
1988
1989 /*
1990 * There is something there, it's not full, and
1991 * it is contig.
1992 */
1993 ip->i_delaylen += len;
1994 mutex_exit(&ip->i_tlock);
1995 goto out;
1996 }
1997
1998 /*
1999 * Must have weird flags or we are not clustering.
2000 */
2001 }
2002
2003 error = ud_putpages(vp, off, len, flags, cr);
2004
2005 out:
2006 #ifdef __lock_lint
2007 rw_exit(&ip->i_contents);
2008 #endif
2009 return (error);
2010 }
2011
2012 /* ARGSUSED */
2013 static int32_t
udf_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uint8_t prot,uint8_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2014 udf_map(
2015 struct vnode *vp,
2016 offset_t off,
2017 struct as *as,
2018 caddr_t *addrp,
2019 size_t len,
2020 uint8_t prot,
2021 uint8_t maxprot,
2022 uint32_t flags,
2023 struct cred *cr,
2024 caller_context_t *ct)
2025 {
2026 struct segvn_crargs vn_a;
2027 int32_t error = 0;
2028
2029 ud_printf("udf_map\n");
2030
2031 if (vp->v_flag & VNOMAP) {
2032 error = ENOSYS;
2033 goto end;
2034 }
2035
2036 if ((off < (offset_t)0) ||
2037 ((off + len) < (offset_t)0)) {
2038 error = EINVAL;
2039 goto end;
2040 }
2041
2042 if (vp->v_type != VREG) {
2043 error = ENODEV;
2044 goto end;
2045 }
2046
2047 /*
2048 * If file is being locked, disallow mapping.
2049 */
2050 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2051 error = EAGAIN;
2052 goto end;
2053 }
2054
2055 as_rangelock(as);
2056 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2057 if (error != 0) {
2058 as_rangeunlock(as);
2059 goto end;
2060 }
2061
2062 vn_a.vp = vp;
2063 vn_a.offset = off;
2064 vn_a.type = flags & MAP_TYPE;
2065 vn_a.prot = prot;
2066 vn_a.maxprot = maxprot;
2067 vn_a.cred = cr;
2068 vn_a.amp = NULL;
2069 vn_a.flags = flags & ~MAP_TYPE;
2070 vn_a.szc = 0;
2071 vn_a.lgrp_mem_policy_flags = 0;
2072
2073 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2074 as_rangeunlock(as);
2075
2076 end:
2077 return (error);
2078 }
2079
2080 /* ARGSUSED */
2081 static int32_t
udf_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint8_t prot,uint8_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2082 udf_addmap(struct vnode *vp,
2083 offset_t off,
2084 struct as *as,
2085 caddr_t addr,
2086 size_t len,
2087 uint8_t prot,
2088 uint8_t maxprot,
2089 uint32_t flags,
2090 struct cred *cr,
2091 caller_context_t *ct)
2092 {
2093 struct ud_inode *ip = VTOI(vp);
2094
2095 ud_printf("udf_addmap\n");
2096
2097 if (vp->v_flag & VNOMAP) {
2098 return (ENOSYS);
2099 }
2100
2101 mutex_enter(&ip->i_tlock);
2102 ip->i_mapcnt += btopr(len);
2103 mutex_exit(&ip->i_tlock);
2104
2105 return (0);
2106 }
2107
2108 /* ARGSUSED */
2109 static int32_t
udf_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint32_t prot,uint32_t maxprot,uint32_t flags,struct cred * cr,caller_context_t * ct)2110 udf_delmap(
2111 struct vnode *vp, offset_t off,
2112 struct as *as,
2113 caddr_t addr,
2114 size_t len,
2115 uint32_t prot,
2116 uint32_t maxprot,
2117 uint32_t flags,
2118 struct cred *cr,
2119 caller_context_t *ct)
2120 {
2121 struct ud_inode *ip = VTOI(vp);
2122
2123 ud_printf("udf_delmap\n");
2124
2125 if (vp->v_flag & VNOMAP) {
2126 return (ENOSYS);
2127 }
2128
2129 mutex_enter(&ip->i_tlock);
2130 ip->i_mapcnt -= btopr(len); /* Count released mappings */
2131 ASSERT(ip->i_mapcnt >= 0);
2132 mutex_exit(&ip->i_tlock);
2133
2134 return (0);
2135 }
2136
2137 /* ARGSUSED */
2138 static int32_t
udf_l_pathconf(struct vnode * vp,int32_t cmd,ulong_t * valp,struct cred * cr,caller_context_t * ct)2139 udf_l_pathconf(
2140 struct vnode *vp,
2141 int32_t cmd,
2142 ulong_t *valp,
2143 struct cred *cr,
2144 caller_context_t *ct)
2145 {
2146 int32_t error = 0;
2147
2148 ud_printf("udf_l_pathconf\n");
2149
2150 if (cmd == _PC_FILESIZEBITS) {
2151 /*
2152 * udf supports 64 bits as file size
2153 * but there are several other restrictions
2154 * it only supports 32-bit block numbers and
2155 * daddr32_t is only and int32_t so taking these
2156 * into account we can stay just as where ufs is
2157 */
2158 *valp = 41;
2159 } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2160 /* nanosecond timestamp resolution */
2161 *valp = 1L;
2162 } else {
2163 error = fs_pathconf(vp, cmd, valp, cr, ct);
2164 }
2165
2166 return (error);
2167 }
2168
2169 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2170 #ifndef __lint
2171 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2172 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2173 #endif
2174 /*
2175 * Assumption is that there will not be a pageio request
2176 * to a enbedded file
2177 */
2178 /* ARGSUSED */
2179 static int32_t
udf_pageio(struct vnode * vp,struct page * pp,u_offset_t io_off,size_t io_len,int32_t flags,struct cred * cr,caller_context_t * ct)2180 udf_pageio(
2181 struct vnode *vp,
2182 struct page *pp,
2183 u_offset_t io_off,
2184 size_t io_len,
2185 int32_t flags,
2186 struct cred *cr,
2187 caller_context_t *ct)
2188 {
2189 daddr_t bn;
2190 struct buf *bp;
2191 struct ud_inode *ip = VTOI(vp);
2192 int32_t dolock, error = 0, contig, multi_io;
2193 size_t done_len = 0, cur_len = 0;
2194 page_t *npp = NULL, *opp = NULL, *cpp = pp;
2195
2196 if (pp == NULL) {
2197 return (EINVAL);
2198 }
2199
2200 dolock = (rw_owner(&ip->i_contents) != curthread);
2201
2202 /*
2203 * We need a better check. Ideally, we would use another
2204 * vnodeops so that hlocked and forcibly unmounted file
2205 * systems would return EIO where appropriate and w/o the
2206 * need for these checks.
2207 */
2208 if (ip->i_udf == NULL) {
2209 return (EIO);
2210 }
2211
2212 #ifdef __lock_lint
2213 rw_enter(&ip->i_contents, RW_READER);
2214 #else
2215 if (dolock) {
2216 rw_enter(&ip->i_contents, RW_READER);
2217 }
2218 #endif
2219
2220 /*
2221 * Break the io request into chunks, one for each contiguous
2222 * stretch of disk blocks in the target file.
2223 */
2224 while (done_len < io_len) {
2225 ASSERT(cpp);
2226 bp = NULL;
2227 contig = 0;
2228 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2229 &bn, &contig)) {
2230 break;
2231 }
2232
2233 if (bn == UDF_HOLE) { /* No holey swapfiles */
2234 cmn_err(CE_WARN, "SWAP file has HOLES");
2235 error = EINVAL;
2236 break;
2237 }
2238
2239 cur_len = MIN(io_len - done_len, contig);
2240
2241 /*
2242 * Check if more than one I/O is
2243 * required to complete the given
2244 * I/O operation
2245 */
2246 if (ip->i_udf->udf_lbsize < PAGESIZE) {
2247 if (cur_len >= PAGESIZE) {
2248 multi_io = 0;
2249 cur_len &= PAGEMASK;
2250 } else {
2251 multi_io = 1;
2252 cur_len = MIN(io_len - done_len, PAGESIZE);
2253 }
2254 }
2255 page_list_break(&cpp, &npp, btop(cur_len));
2256
2257 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2258 ASSERT(bp != NULL);
2259
2260 bp->b_edev = ip->i_dev;
2261 bp->b_dev = cmpdev(ip->i_dev);
2262 bp->b_blkno = bn;
2263 bp->b_un.b_addr = (caddr_t)0;
2264 bp->b_file = vp;
2265 bp->b_offset = (offset_t)(io_off + done_len);
2266
2267 /*
2268 * ub.ub_pageios.value.ul++;
2269 */
2270 if (multi_io == 0) {
2271 (void) bdev_strategy(bp);
2272 } else {
2273 error = ud_multi_strat(ip, cpp, bp,
2274 (u_offset_t)(io_off + done_len));
2275 if (error != 0) {
2276 pageio_done(bp);
2277 break;
2278 }
2279 }
2280 if (flags & B_READ) {
2281 ud_pageio_reads++;
2282 } else {
2283 ud_pageio_writes++;
2284 }
2285
2286 /*
2287 * If the request is not B_ASYNC, wait for i/o to complete
2288 * and re-assemble the page list to return to the caller.
2289 * If it is B_ASYNC we leave the page list in pieces and
2290 * cleanup() will dispose of them.
2291 */
2292 if ((flags & B_ASYNC) == 0) {
2293 error = biowait(bp);
2294 pageio_done(bp);
2295 if (error) {
2296 break;
2297 }
2298 page_list_concat(&opp, &cpp);
2299 }
2300 cpp = npp;
2301 npp = NULL;
2302 done_len += cur_len;
2303 }
2304
2305 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2306 if (error) {
2307 if (flags & B_ASYNC) {
2308 /* Cleanup unprocessed parts of list */
2309 page_list_concat(&cpp, &npp);
2310 if (flags & B_READ) {
2311 pvn_read_done(cpp, B_ERROR);
2312 } else {
2313 pvn_write_done(cpp, B_ERROR);
2314 }
2315 } else {
2316 /* Re-assemble list and let caller clean up */
2317 page_list_concat(&opp, &cpp);
2318 page_list_concat(&opp, &npp);
2319 }
2320 }
2321
2322 #ifdef __lock_lint
2323 rw_exit(&ip->i_contents);
2324 #else
2325 if (dolock) {
2326 rw_exit(&ip->i_contents);
2327 }
2328 #endif
2329 return (error);
2330 }
2331
2332
2333
2334
2335 /* -------------------- local functions --------------------------- */
2336
2337
2338
2339 int32_t
ud_rdwri(enum uio_rw rw,int32_t ioflag,struct ud_inode * ip,caddr_t base,int32_t len,offset_t offset,enum uio_seg seg,int32_t * aresid,struct cred * cr)2340 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2341 struct ud_inode *ip, caddr_t base, int32_t len,
2342 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2343 {
2344 int32_t error;
2345 struct uio auio;
2346 struct iovec aiov;
2347
2348 ud_printf("ud_rdwri\n");
2349
2350 bzero((caddr_t)&auio, sizeof (uio_t));
2351 bzero((caddr_t)&aiov, sizeof (iovec_t));
2352
2353 aiov.iov_base = base;
2354 aiov.iov_len = len;
2355 auio.uio_iov = &aiov;
2356 auio.uio_iovcnt = 1;
2357 auio.uio_loffset = offset;
2358 auio.uio_segflg = (int16_t)seg;
2359 auio.uio_resid = len;
2360
2361 if (rw == UIO_WRITE) {
2362 auio.uio_fmode = FWRITE;
2363 auio.uio_extflg = UIO_COPY_DEFAULT;
2364 auio.uio_llimit = curproc->p_fsz_ctl;
2365 error = ud_wrip(ip, &auio, ioflag, cr);
2366 } else {
2367 auio.uio_fmode = FREAD;
2368 auio.uio_extflg = UIO_COPY_CACHED;
2369 auio.uio_llimit = MAXOFFSET_T;
2370 error = ud_rdip(ip, &auio, ioflag, cr);
2371 }
2372
2373 if (aresid) {
2374 *aresid = auio.uio_resid;
2375 } else if (auio.uio_resid) {
2376 error = EIO;
2377 }
2378 return (error);
2379 }
2380
2381 /*
2382 * Free behind hacks. The pager is busted.
2383 * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2384 * or B_FREE_IF_TIGHT_ON_MEMORY.
2385 */
2386 int32_t ud_freebehind = 1;
2387 int32_t ud_smallfile = 32 * 1024;
2388
2389 /* ARGSUSED */
2390 int32_t
ud_getpage_miss(struct vnode * vp,u_offset_t off,size_t len,struct seg * seg,caddr_t addr,page_t * pl[],size_t plsz,enum seg_rw rw,int32_t seq)2391 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2392 size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2393 size_t plsz, enum seg_rw rw, int32_t seq)
2394 {
2395 struct ud_inode *ip = VTOI(vp);
2396 int32_t err = 0;
2397 size_t io_len;
2398 u_offset_t io_off;
2399 u_offset_t pgoff;
2400 page_t *pp;
2401
2402 pl[0] = NULL;
2403
2404 /*
2405 * Figure out whether the page can be created, or must be
2406 * read from the disk
2407 */
2408 if (rw == S_CREATE) {
2409 if ((pp = page_create_va(vp, off,
2410 PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2411 cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2412 return (EINVAL);
2413 }
2414 io_len = PAGESIZE;
2415 } else {
2416 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2417 &io_len, off, PAGESIZE, 0);
2418
2419 /*
2420 * Some other thread has entered the page.
2421 * ud_getpage will retry page_lookup.
2422 */
2423 if (pp == NULL) {
2424 return (0);
2425 }
2426
2427 /*
2428 * Fill the page with as much data as we can from the file.
2429 */
2430 err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2431 if (err) {
2432 pvn_read_done(pp, B_ERROR);
2433 return (err);
2434 }
2435
2436 /*
2437 * XXX ??? ufs has io_len instead of pgoff below
2438 */
2439 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2440
2441 /*
2442 * If the file access is sequential, initiate read ahead
2443 * of the next cluster.
2444 */
2445 if (seq && ip->i_nextrio < ip->i_size) {
2446 ud_getpage_ra(vp, off, seg, addr);
2447 }
2448 }
2449
2450 outmiss:
2451 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2452 return (err);
2453 }
2454
2455 /* ARGSUSED */
2456 void
ud_getpage_ra(struct vnode * vp,u_offset_t off,struct seg * seg,caddr_t addr)2457 ud_getpage_ra(struct vnode *vp,
2458 u_offset_t off, struct seg *seg, caddr_t addr)
2459 {
2460 page_t *pp;
2461 size_t io_len;
2462 struct ud_inode *ip = VTOI(vp);
2463 u_offset_t io_off = ip->i_nextrio, pgoff;
2464 caddr_t addr2 = addr + (io_off - off);
2465 daddr_t bn;
2466 int32_t contig = 0;
2467
2468 /*
2469 * Is this test needed?
2470 */
2471
2472 if (addr2 >= seg->s_base + seg->s_size) {
2473 return;
2474 }
2475
2476 contig = 0;
2477 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2478 return;
2479 }
2480
2481 pp = pvn_read_kluster(vp, io_off, seg, addr2,
2482 &io_off, &io_len, io_off, PAGESIZE, 1);
2483
2484 /*
2485 * Some other thread has entered the page.
2486 * So no read head done here (ie we will have to and wait
2487 * for the read when needed).
2488 */
2489
2490 if (pp == NULL) {
2491 return;
2492 }
2493
2494 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2495 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2496 }
2497
2498 int
ud_page_fill(struct ud_inode * ip,page_t * pp,u_offset_t off,uint32_t bflgs,u_offset_t * pg_off)2499 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2500 uint32_t bflgs, u_offset_t *pg_off)
2501 {
2502 daddr_t bn;
2503 struct buf *bp;
2504 caddr_t kaddr, caddr;
2505 int32_t error = 0, contig = 0, multi_io = 0;
2506 int32_t lbsize = ip->i_udf->udf_lbsize;
2507 int32_t lbmask = ip->i_udf->udf_lbmask;
2508 uint64_t isize;
2509
2510 isize = (ip->i_size + lbmask) & (~lbmask);
2511 if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2512
2513 /*
2514 * Embedded file read file_entry
2515 * from buffer cache and copy the required
2516 * portions
2517 */
2518 bp = ud_bread(ip->i_dev,
2519 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2520 if ((bp->b_error == 0) &&
2521 (bp->b_resid == 0)) {
2522
2523 caddr = bp->b_un.b_addr + ip->i_data_off;
2524
2525 /*
2526 * mapin to kvm
2527 */
2528 kaddr = (caddr_t)ppmapin(pp,
2529 PROT_READ | PROT_WRITE, (caddr_t)-1);
2530 (void) kcopy(caddr, kaddr, ip->i_size);
2531
2532 /*
2533 * mapout of kvm
2534 */
2535 ppmapout(kaddr);
2536 }
2537 brelse(bp);
2538 contig = ip->i_size;
2539 } else {
2540
2541 /*
2542 * Get the continuous size and block number
2543 * at offset "off"
2544 */
2545 if (error = ud_bmap_read(ip, off, &bn, &contig))
2546 goto out;
2547 contig = MIN(contig, PAGESIZE);
2548 contig = (contig + lbmask) & (~lbmask);
2549
2550 /*
2551 * Zero part of the page which we are not
2552 * going to read from the disk.
2553 */
2554
2555 if (bn == UDF_HOLE) {
2556
2557 /*
2558 * This is a HOLE. Just zero out
2559 * the page
2560 */
2561 if (((off + contig) == isize) ||
2562 (contig == PAGESIZE)) {
2563 pagezero(pp->p_prev, 0, PAGESIZE);
2564 goto out;
2565 }
2566 }
2567
2568 if (contig < PAGESIZE) {
2569 uint64_t count;
2570
2571 count = isize - off;
2572 if (contig != count) {
2573 multi_io = 1;
2574 contig = (int32_t)(MIN(count, PAGESIZE));
2575 } else {
2576 pagezero(pp->p_prev, contig, PAGESIZE - contig);
2577 }
2578 }
2579
2580 /*
2581 * Get a bp and initialize it
2582 */
2583 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2584 ASSERT(bp != NULL);
2585
2586 bp->b_edev = ip->i_dev;
2587 bp->b_dev = cmpdev(ip->i_dev);
2588 bp->b_blkno = bn;
2589 bp->b_un.b_addr = 0;
2590 bp->b_file = ip->i_vnode;
2591
2592 /*
2593 * Start I/O
2594 */
2595 if (multi_io == 0) {
2596
2597 /*
2598 * Single I/O is sufficient for this page
2599 */
2600 (void) bdev_strategy(bp);
2601 } else {
2602
2603 /*
2604 * We need to do the I/O in
2605 * piece's
2606 */
2607 error = ud_multi_strat(ip, pp, bp, off);
2608 if (error != 0) {
2609 goto out;
2610 }
2611 }
2612 if ((bflgs & B_ASYNC) == 0) {
2613
2614 /*
2615 * Wait for i/o to complete.
2616 */
2617
2618 error = biowait(bp);
2619 pageio_done(bp);
2620 if (error) {
2621 goto out;
2622 }
2623 }
2624 }
2625 if ((off + contig) >= ip->i_size) {
2626 contig = ip->i_size - off;
2627 }
2628
2629 out:
2630 *pg_off = contig;
2631 return (error);
2632 }
2633
2634 int32_t
ud_putpages(struct vnode * vp,offset_t off,size_t len,int32_t flags,struct cred * cr)2635 ud_putpages(struct vnode *vp, offset_t off,
2636 size_t len, int32_t flags, struct cred *cr)
2637 {
2638 struct ud_inode *ip;
2639 page_t *pp;
2640 u_offset_t io_off;
2641 size_t io_len;
2642 u_offset_t eoff;
2643 int32_t err = 0;
2644 int32_t dolock;
2645
2646 ud_printf("ud_putpages\n");
2647
2648 if (vp->v_count == 0) {
2649 cmn_err(CE_WARN, "ud_putpages: bad v_count");
2650 return (EINVAL);
2651 }
2652
2653 ip = VTOI(vp);
2654
2655 /*
2656 * Acquire the readers/write inode lock before locking
2657 * any pages in this inode.
2658 * The inode lock is held during i/o.
2659 */
2660 if (len == 0) {
2661 mutex_enter(&ip->i_tlock);
2662 ip->i_delayoff = ip->i_delaylen = 0;
2663 mutex_exit(&ip->i_tlock);
2664 }
2665 #ifdef __lock_lint
2666 rw_enter(&ip->i_contents, RW_READER);
2667 #else
2668 dolock = (rw_owner(&ip->i_contents) != curthread);
2669 if (dolock) {
2670 rw_enter(&ip->i_contents, RW_READER);
2671 }
2672 #endif
2673
2674 if (!vn_has_cached_data(vp)) {
2675 #ifdef __lock_lint
2676 rw_exit(&ip->i_contents);
2677 #else
2678 if (dolock) {
2679 rw_exit(&ip->i_contents);
2680 }
2681 #endif
2682 return (0);
2683 }
2684
2685 if (len == 0) {
2686 /*
2687 * Search the entire vp list for pages >= off.
2688 */
2689 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2690 flags, cr);
2691 } else {
2692 /*
2693 * Loop over all offsets in the range looking for
2694 * pages to deal with.
2695 */
2696 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2697 eoff = MIN(off + len, eoff);
2698 } else {
2699 eoff = off + len;
2700 }
2701
2702 for (io_off = off; io_off < eoff; io_off += io_len) {
2703 /*
2704 * If we are not invalidating, synchronously
2705 * freeing or writing pages, use the routine
2706 * page_lookup_nowait() to prevent reclaiming
2707 * them from the free list.
2708 */
2709 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2710 pp = page_lookup(vp, io_off,
2711 (flags & (B_INVAL | B_FREE)) ?
2712 SE_EXCL : SE_SHARED);
2713 } else {
2714 pp = page_lookup_nowait(vp, io_off,
2715 (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2716 }
2717
2718 if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2719 io_len = PAGESIZE;
2720 } else {
2721
2722 err = ud_putapage(vp, pp,
2723 &io_off, &io_len, flags, cr);
2724 if (err != 0) {
2725 break;
2726 }
2727 /*
2728 * "io_off" and "io_len" are returned as
2729 * the range of pages we actually wrote.
2730 * This allows us to skip ahead more quickly
2731 * since several pages may've been dealt
2732 * with by this iteration of the loop.
2733 */
2734 }
2735 }
2736 }
2737 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2738 /*
2739 * We have just sync'ed back all the pages on
2740 * the inode, turn off the IMODTIME flag.
2741 */
2742 mutex_enter(&ip->i_tlock);
2743 ip->i_flag &= ~IMODTIME;
2744 mutex_exit(&ip->i_tlock);
2745 }
2746 #ifdef __lock_lint
2747 rw_exit(&ip->i_contents);
2748 #else
2749 if (dolock) {
2750 rw_exit(&ip->i_contents);
2751 }
2752 #endif
2753 return (err);
2754 }
2755
2756 /* ARGSUSED */
2757 int32_t
ud_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int32_t flags,struct cred * cr)2758 ud_putapage(struct vnode *vp,
2759 page_t *pp, u_offset_t *offp,
2760 size_t *lenp, int32_t flags, struct cred *cr)
2761 {
2762 daddr_t bn;
2763 size_t io_len;
2764 struct ud_inode *ip;
2765 int32_t error = 0, contig, multi_io = 0;
2766 struct udf_vfs *udf_vfsp;
2767 u_offset_t off, io_off;
2768 caddr_t kaddr, caddr;
2769 struct buf *bp = NULL;
2770 int32_t lbmask;
2771 uint64_t isize;
2772 uint16_t crc_len;
2773 struct file_entry *fe;
2774
2775 ud_printf("ud_putapage\n");
2776
2777 ip = VTOI(vp);
2778 ASSERT(ip);
2779 ASSERT(RW_LOCK_HELD(&ip->i_contents));
2780 lbmask = ip->i_udf->udf_lbmask;
2781 isize = (ip->i_size + lbmask) & (~lbmask);
2782
2783 udf_vfsp = ip->i_udf;
2784 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2785
2786 /*
2787 * If the modified time on the inode has not already been
2788 * set elsewhere (e.g. for write/setattr) we set the time now.
2789 * This gives us approximate modified times for mmap'ed files
2790 * which are modified via stores in the user address space.
2791 */
2792 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2793 mutex_enter(&ip->i_tlock);
2794 ip->i_flag |= IUPD;
2795 ITIMES_NOLOCK(ip);
2796 mutex_exit(&ip->i_tlock);
2797 }
2798
2799
2800 /*
2801 * Align the request to a block boundry (for old file systems),
2802 * and go ask bmap() how contiguous things are for this file.
2803 */
2804 off = pp->p_offset & ~(offset_t)lbmask;
2805 /* block align it */
2806
2807
2808 if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2809 ASSERT(ip->i_size <= ip->i_max_emb);
2810
2811 pp = pvn_write_kluster(vp, pp, &io_off,
2812 &io_len, off, PAGESIZE, flags);
2813 if (io_len == 0) {
2814 io_len = PAGESIZE;
2815 }
2816
2817 bp = ud_bread(ip->i_dev,
2818 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2819 udf_vfsp->udf_lbsize);
2820 fe = (struct file_entry *)bp->b_un.b_addr;
2821 if ((bp->b_flags & B_ERROR) ||
2822 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2823 ip->i_icb_block,
2824 1, udf_vfsp->udf_lbsize) != 0)) {
2825 if (pp != NULL)
2826 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2827 if (bp->b_flags & B_ERROR) {
2828 error = EIO;
2829 } else {
2830 error = EINVAL;
2831 }
2832 brelse(bp);
2833 return (error);
2834 }
2835 if ((bp->b_error == 0) &&
2836 (bp->b_resid == 0)) {
2837
2838 caddr = bp->b_un.b_addr + ip->i_data_off;
2839 kaddr = (caddr_t)ppmapin(pp,
2840 PROT_READ | PROT_WRITE, (caddr_t)-1);
2841 (void) kcopy(kaddr, caddr, ip->i_size);
2842 ppmapout(kaddr);
2843 }
2844 crc_len = offsetof(struct file_entry, fe_spec) +
2845 SWAP_32(fe->fe_len_ear);
2846 crc_len += ip->i_size;
2847 ud_make_tag(ip->i_udf, &fe->fe_tag,
2848 UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2849
2850 bwrite(bp);
2851
2852 if (flags & B_ASYNC) {
2853 pvn_write_done(pp, flags);
2854 }
2855 contig = ip->i_size;
2856 } else {
2857
2858 if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2859 goto out;
2860 }
2861 contig = MIN(contig, PAGESIZE);
2862 contig = (contig + lbmask) & (~lbmask);
2863
2864 if (contig < PAGESIZE) {
2865 uint64_t count;
2866
2867 count = isize - off;
2868 if (contig != count) {
2869 multi_io = 1;
2870 contig = (int32_t)(MIN(count, PAGESIZE));
2871 }
2872 }
2873
2874 if ((off + contig) > isize) {
2875 contig = isize - off;
2876 }
2877
2878 if (contig > PAGESIZE) {
2879 if (contig & PAGEOFFSET) {
2880 contig &= PAGEMASK;
2881 }
2882 }
2883
2884 pp = pvn_write_kluster(vp, pp, &io_off,
2885 &io_len, off, contig, flags);
2886 if (io_len == 0) {
2887 io_len = PAGESIZE;
2888 }
2889
2890 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2891 ASSERT(bp != NULL);
2892
2893 bp->b_edev = ip->i_dev;
2894 bp->b_dev = cmpdev(ip->i_dev);
2895 bp->b_blkno = bn;
2896 bp->b_un.b_addr = 0;
2897 bp->b_file = vp;
2898 bp->b_offset = (offset_t)off;
2899
2900
2901 /*
2902 * write throttle
2903 */
2904 ASSERT(bp->b_iodone == NULL);
2905 bp->b_iodone = ud_iodone;
2906 mutex_enter(&ip->i_tlock);
2907 ip->i_writes += bp->b_bcount;
2908 mutex_exit(&ip->i_tlock);
2909
2910 if (multi_io == 0) {
2911
2912 (void) bdev_strategy(bp);
2913 } else {
2914 error = ud_multi_strat(ip, pp, bp, off);
2915 if (error != 0) {
2916 goto out;
2917 }
2918 }
2919
2920 if ((flags & B_ASYNC) == 0) {
2921 /*
2922 * Wait for i/o to complete.
2923 */
2924 error = biowait(bp);
2925 pageio_done(bp);
2926 }
2927 }
2928
2929 if ((flags & B_ASYNC) == 0) {
2930 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2931 }
2932
2933 pp = NULL;
2934
2935 out:
2936 if (error != 0 && pp != NULL) {
2937 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2938 }
2939
2940 if (offp) {
2941 *offp = io_off;
2942 }
2943 if (lenp) {
2944 *lenp = io_len;
2945 }
2946
2947 return (error);
2948 }
2949
2950
2951 int32_t
ud_iodone(struct buf * bp)2952 ud_iodone(struct buf *bp)
2953 {
2954 struct ud_inode *ip;
2955
2956 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2957
2958 bp->b_iodone = NULL;
2959
2960 ip = VTOI(bp->b_pages->p_vnode);
2961
2962 mutex_enter(&ip->i_tlock);
2963 if (ip->i_writes >= ud_LW) {
2964 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2965 if (ud_WRITES) {
2966 cv_broadcast(&ip->i_wrcv); /* wake all up */
2967 }
2968 }
2969 } else {
2970 ip->i_writes -= bp->b_bcount;
2971 }
2972 mutex_exit(&ip->i_tlock);
2973 iodone(bp);
2974 return (0);
2975 }
2976
2977 /* ARGSUSED3 */
2978 int32_t
ud_rdip(struct ud_inode * ip,struct uio * uio,int32_t ioflag,cred_t * cr)2979 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
2980 {
2981 struct vnode *vp;
2982 struct udf_vfs *udf_vfsp;
2983 krw_t rwtype;
2984 caddr_t base;
2985 uint32_t flags;
2986 int32_t error, n, on, mapon, dofree;
2987 u_offset_t off;
2988 long oresid = uio->uio_resid;
2989
2990 ASSERT(RW_LOCK_HELD(&ip->i_contents));
2991 if ((ip->i_type != VREG) &&
2992 (ip->i_type != VDIR) &&
2993 (ip->i_type != VLNK)) {
2994 return (EIO);
2995 }
2996
2997 if (uio->uio_loffset > MAXOFFSET_T) {
2998 return (0);
2999 }
3000
3001 if ((uio->uio_loffset < (offset_t)0) ||
3002 ((uio->uio_loffset + uio->uio_resid) < 0)) {
3003 return (EINVAL);
3004 }
3005 if (uio->uio_resid == 0) {
3006 return (0);
3007 }
3008
3009 vp = ITOV(ip);
3010 udf_vfsp = ip->i_udf;
3011 mutex_enter(&ip->i_tlock);
3012 ip->i_flag |= IACC;
3013 mutex_exit(&ip->i_tlock);
3014
3015 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3016
3017 do {
3018 offset_t diff;
3019 u_offset_t uoff = uio->uio_loffset;
3020 off = uoff & (offset_t)MAXBMASK;
3021 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3022 on = (int)blkoff(udf_vfsp, uoff);
3023 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3024
3025 diff = ip->i_size - uoff;
3026
3027 if (diff <= (offset_t)0) {
3028 error = 0;
3029 goto out;
3030 }
3031 if (diff < (offset_t)n) {
3032 n = (int)diff;
3033 }
3034 dofree = ud_freebehind &&
3035 ip->i_nextr == (off & PAGEMASK) &&
3036 off > ud_smallfile;
3037
3038 #ifndef __lock_lint
3039 if (rwtype == RW_READER) {
3040 rw_exit(&ip->i_contents);
3041 }
3042 #endif
3043
3044 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3045 (uint32_t)n, 1, S_READ);
3046 error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3047
3048 flags = 0;
3049 if (!error) {
3050 /*
3051 * If read a whole block, or read to eof,
3052 * won't need this buffer again soon.
3053 */
3054 if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3055 freemem < lotsfree + pages_before_pager) {
3056 flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3057 }
3058 /*
3059 * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3060 * we want to make sure that the page which has
3061 * been read, is written on disk if it is dirty.
3062 * And corresponding indirect blocks should also
3063 * be flushed out.
3064 */
3065 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3066 flags &= ~SM_ASYNC;
3067 flags |= SM_WRITE;
3068 }
3069 error = segmap_release(segkmap, base, flags);
3070 } else {
3071 (void) segmap_release(segkmap, base, flags);
3072 }
3073
3074 #ifndef __lock_lint
3075 if (rwtype == RW_READER) {
3076 rw_enter(&ip->i_contents, rwtype);
3077 }
3078 #endif
3079 } while (error == 0 && uio->uio_resid > 0 && n != 0);
3080 out:
3081 /*
3082 * Inode is updated according to this table if FRSYNC is set.
3083 *
3084 * FSYNC FDSYNC(posix.4)
3085 * --------------------------
3086 * always IATTCHG|IBDWRITE
3087 */
3088 if (ioflag & FRSYNC) {
3089 if ((ioflag & FSYNC) ||
3090 ((ioflag & FDSYNC) &&
3091 (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3092 rw_exit(&ip->i_contents);
3093 rw_enter(&ip->i_contents, RW_WRITER);
3094 ud_iupdat(ip, 1);
3095 }
3096 }
3097 /*
3098 * If we've already done a partial read, terminate
3099 * the read but return no error.
3100 */
3101 if (oresid != uio->uio_resid) {
3102 error = 0;
3103 }
3104 ITIMES(ip);
3105
3106 return (error);
3107 }
3108
3109 int32_t
ud_wrip(struct ud_inode * ip,struct uio * uio,int ioflag,struct cred * cr)3110 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3111 {
3112 caddr_t base;
3113 struct vnode *vp;
3114 struct udf_vfs *udf_vfsp;
3115 uint32_t flags;
3116 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3117 int32_t pagecreate, newpage;
3118 uint64_t old_i_size;
3119 u_offset_t off;
3120 long start_resid = uio->uio_resid, premove_resid;
3121 rlim64_t limit = uio->uio_limit;
3122
3123
3124 ASSERT(RW_WRITE_HELD(&ip->i_contents));
3125 if ((ip->i_type != VREG) &&
3126 (ip->i_type != VDIR) &&
3127 (ip->i_type != VLNK)) {
3128 return (EIO);
3129 }
3130
3131 if (uio->uio_loffset >= MAXOFFSET_T) {
3132 return (EFBIG);
3133 }
3134 /*
3135 * see udf_l_pathconf
3136 */
3137 if (limit > (((uint64_t)1 << 40) - 1)) {
3138 limit = ((uint64_t)1 << 40) - 1;
3139 }
3140 if (uio->uio_loffset >= limit) {
3141 proc_t *p = ttoproc(curthread);
3142
3143 mutex_enter(&p->p_lock);
3144 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3145 p, RCA_UNSAFE_SIGINFO);
3146 mutex_exit(&p->p_lock);
3147 return (EFBIG);
3148 }
3149 if ((uio->uio_loffset < (offset_t)0) ||
3150 ((uio->uio_loffset + uio->uio_resid) < 0)) {
3151 return (EINVAL);
3152 }
3153 if (uio->uio_resid == 0) {
3154 return (0);
3155 }
3156
3157 mutex_enter(&ip->i_tlock);
3158 ip->i_flag |= INOACC;
3159
3160 if (ioflag & (FSYNC | FDSYNC)) {
3161 ip->i_flag |= ISYNC;
3162 iupdat_flag = 1;
3163 }
3164 mutex_exit(&ip->i_tlock);
3165
3166 udf_vfsp = ip->i_udf;
3167 vp = ITOV(ip);
3168
3169 do {
3170 u_offset_t uoff = uio->uio_loffset;
3171 off = uoff & (offset_t)MAXBMASK;
3172 mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3173 on = (int)blkoff(udf_vfsp, uoff);
3174 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3175
3176 if (ip->i_type == VREG && uoff + n >= limit) {
3177 if (uoff >= limit) {
3178 error = EFBIG;
3179 goto out;
3180 }
3181 n = (int)(limit - (rlim64_t)uoff);
3182 }
3183 if (uoff + n > ip->i_size) {
3184 /*
3185 * We are extending the length of the file.
3186 * bmap is used so that we are sure that
3187 * if we need to allocate new blocks, that it
3188 * is done here before we up the file size.
3189 */
3190 error = ud_bmap_write(ip, uoff,
3191 (int)(on + n), mapon == 0, cr);
3192 if (error) {
3193 break;
3194 }
3195 i_size_changed = 1;
3196 old_i_size = ip->i_size;
3197 ip->i_size = uoff + n;
3198 /*
3199 * If we are writing from the beginning of
3200 * the mapping, we can just create the
3201 * pages without having to read them.
3202 */
3203 pagecreate = (mapon == 0);
3204 } else if (n == MAXBSIZE) {
3205 /*
3206 * Going to do a whole mappings worth,
3207 * so we can just create the pages w/o
3208 * having to read them in. But before
3209 * we do that, we need to make sure any
3210 * needed blocks are allocated first.
3211 */
3212 error = ud_bmap_write(ip, uoff,
3213 (int)(on + n), 1, cr);
3214 if (error) {
3215 break;
3216 }
3217 pagecreate = 1;
3218 } else {
3219 pagecreate = 0;
3220 }
3221
3222 rw_exit(&ip->i_contents);
3223
3224 /*
3225 * Touch the page and fault it in if it is not in
3226 * core before segmap_getmapflt can lock it. This
3227 * is to avoid the deadlock if the buffer is mapped
3228 * to the same file through mmap which we want to
3229 * write to.
3230 */
3231 uio_prefaultpages((long)n, uio);
3232
3233 base = segmap_getmapflt(segkmap, vp, (off + mapon),
3234 (uint32_t)n, !pagecreate, S_WRITE);
3235
3236 /*
3237 * segmap_pagecreate() returns 1 if it calls
3238 * page_create_va() to allocate any pages.
3239 */
3240 newpage = 0;
3241 if (pagecreate) {
3242 newpage = segmap_pagecreate(segkmap, base,
3243 (size_t)n, 0);
3244 }
3245
3246 premove_resid = uio->uio_resid;
3247 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3248
3249 if (pagecreate &&
3250 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3251 /*
3252 * We created pages w/o initializing them completely,
3253 * thus we need to zero the part that wasn't set up.
3254 * This happens on most EOF write cases and if
3255 * we had some sort of error during the uiomove.
3256 */
3257 int nzero, nmoved;
3258
3259 nmoved = (int)(uio->uio_loffset - (off + mapon));
3260 ASSERT(nmoved >= 0 && nmoved <= n);
3261 nzero = roundup(on + n, PAGESIZE) - nmoved;
3262 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3263 (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3264 }
3265
3266 /*
3267 * Unlock the pages allocated by page_create_va()
3268 * in segmap_pagecreate()
3269 */
3270 if (newpage) {
3271 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3272 }
3273
3274 if (error) {
3275 /*
3276 * If we failed on a write, we may have already
3277 * allocated file blocks as well as pages. It's
3278 * hard to undo the block allocation, but we must
3279 * be sure to invalidate any pages that may have
3280 * been allocated.
3281 */
3282 (void) segmap_release(segkmap, base, SM_INVAL);
3283 } else {
3284 flags = 0;
3285 /*
3286 * Force write back for synchronous write cases.
3287 */
3288 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3289 /*
3290 * If the sticky bit is set but the
3291 * execute bit is not set, we do a
3292 * synchronous write back and free
3293 * the page when done. We set up swap
3294 * files to be handled this way to
3295 * prevent servers from keeping around
3296 * the client's swap pages too long.
3297 * XXX - there ought to be a better way.
3298 */
3299 if (IS_SWAPVP(vp)) {
3300 flags = SM_WRITE | SM_FREE |
3301 SM_DONTNEED;
3302 iupdat_flag = 0;
3303 } else {
3304 flags = SM_WRITE;
3305 }
3306 } else if (((mapon + n) == MAXBSIZE) ||
3307 IS_SWAPVP(vp)) {
3308 /*
3309 * Have written a whole block.
3310 * Start an asynchronous write and
3311 * mark the buffer to indicate that
3312 * it won't be needed again soon.
3313 */
3314 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3315 }
3316 error = segmap_release(segkmap, base, flags);
3317
3318 /*
3319 * If the operation failed and is synchronous,
3320 * then we need to unwind what uiomove() last
3321 * did so we can potentially return an error to
3322 * the caller. If this write operation was
3323 * done in two pieces and the first succeeded,
3324 * then we won't return an error for the second
3325 * piece that failed. However, we only want to
3326 * return a resid value that reflects what was
3327 * really done.
3328 *
3329 * Failures for non-synchronous operations can
3330 * be ignored since the page subsystem will
3331 * retry the operation until it succeeds or the
3332 * file system is unmounted.
3333 */
3334 if (error) {
3335 if ((ioflag & (FSYNC | FDSYNC)) ||
3336 ip->i_type == VDIR) {
3337 uio->uio_resid = premove_resid;
3338 } else {
3339 error = 0;
3340 }
3341 }
3342 }
3343
3344 /*
3345 * Re-acquire contents lock.
3346 */
3347 rw_enter(&ip->i_contents, RW_WRITER);
3348 /*
3349 * If the uiomove() failed or if a synchronous
3350 * page push failed, fix up i_size.
3351 */
3352 if (error) {
3353 if (i_size_changed) {
3354 /*
3355 * The uiomove failed, and we
3356 * allocated blocks,so get rid
3357 * of them.
3358 */
3359 (void) ud_itrunc(ip, old_i_size, 0, cr);
3360 }
3361 } else {
3362 /*
3363 * XXX - Can this be out of the loop?
3364 */
3365 ip->i_flag |= IUPD | ICHG;
3366 if (i_size_changed) {
3367 ip->i_flag |= IATTCHG;
3368 }
3369 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3370 (IEXEC >> 10))) != 0 &&
3371 (ip->i_char & (ISUID | ISGID)) != 0 &&
3372 secpolicy_vnode_setid_retain(cr,
3373 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3374 /*
3375 * Clear Set-UID & Set-GID bits on
3376 * successful write if not privileged
3377 * and at least one of the execute bits
3378 * is set. If we always clear Set-GID,
3379 * mandatory file and record locking is
3380 * unuseable.
3381 */
3382 ip->i_char &= ~(ISUID | ISGID);
3383 }
3384 }
3385 } while (error == 0 && uio->uio_resid > 0 && n != 0);
3386
3387 out:
3388 /*
3389 * Inode is updated according to this table -
3390 *
3391 * FSYNC FDSYNC(posix.4)
3392 * --------------------------
3393 * always@ IATTCHG|IBDWRITE
3394 *
3395 * @ - If we are doing synchronous write the only time we should
3396 * not be sync'ing the ip here is if we have the stickyhack
3397 * activated, the file is marked with the sticky bit and
3398 * no exec bit, the file length has not been changed and
3399 * no new blocks have been allocated during this write.
3400 */
3401 if ((ip->i_flag & ISYNC) != 0) {
3402 /*
3403 * we have eliminated nosync
3404 */
3405 if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3406 ((ioflag & FSYNC) && iupdat_flag)) {
3407 ud_iupdat(ip, 1);
3408 }
3409 }
3410
3411 /*
3412 * If we've already done a partial-write, terminate
3413 * the write but return no error.
3414 */
3415 if (start_resid != uio->uio_resid) {
3416 error = 0;
3417 }
3418 ip->i_flag &= ~(INOACC | ISYNC);
3419 ITIMES_NOLOCK(ip);
3420
3421 return (error);
3422 }
3423
3424 int32_t
ud_multi_strat(struct ud_inode * ip,page_t * pp,struct buf * bp,u_offset_t start)3425 ud_multi_strat(struct ud_inode *ip,
3426 page_t *pp, struct buf *bp, u_offset_t start)
3427 {
3428 daddr_t bn;
3429 int32_t error = 0, io_count, contig, alloc_sz, i;
3430 uint32_t io_off;
3431 mio_master_t *mm = NULL;
3432 mio_slave_t *ms = NULL;
3433 struct buf *rbp;
3434
3435 ASSERT(!(start & PAGEOFFSET));
3436
3437 /*
3438 * Figure out how many buffers to allocate
3439 */
3440 io_count = 0;
3441 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3442 contig = 0;
3443 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3444 &bn, &contig)) {
3445 goto end;
3446 }
3447 if (contig == 0) {
3448 goto end;
3449 }
3450 contig = MIN(contig, PAGESIZE - io_off);
3451 if (bn != UDF_HOLE) {
3452 io_count ++;
3453 } else {
3454 /*
3455 * HOLE
3456 */
3457 if (bp->b_flags & B_READ) {
3458
3459 /*
3460 * This is a hole and is read
3461 * it should be filled with 0's
3462 */
3463 pagezero(pp, io_off, contig);
3464 }
3465 }
3466 }
3467
3468
3469 if (io_count != 0) {
3470
3471 /*
3472 * Allocate memory for all the
3473 * required number of buffers
3474 */
3475 alloc_sz = sizeof (mio_master_t) +
3476 (sizeof (mio_slave_t) * io_count);
3477 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3478 if (mm == NULL) {
3479 error = ENOMEM;
3480 goto end;
3481 }
3482
3483 /*
3484 * initialize master
3485 */
3486 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3487 mm->mm_size = alloc_sz;
3488 mm->mm_bp = bp;
3489 mm->mm_resid = 0;
3490 mm->mm_error = 0;
3491 mm->mm_index = master_index++;
3492
3493 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3494
3495 /*
3496 * Initialize buffers
3497 */
3498 io_count = 0;
3499 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3500 contig = 0;
3501 if (error = ud_bmap_read(ip,
3502 (u_offset_t)(start + io_off),
3503 &bn, &contig)) {
3504 goto end;
3505 }
3506 ASSERT(contig);
3507 if ((io_off + contig) > bp->b_bcount) {
3508 contig = bp->b_bcount - io_off;
3509 }
3510 if (bn != UDF_HOLE) {
3511 /*
3512 * Clone the buffer
3513 * and prepare to start I/O
3514 */
3515 ms->ms_ptr = mm;
3516 bioinit(&ms->ms_buf);
3517 rbp = bioclone(bp, io_off, (size_t)contig,
3518 bp->b_edev, bn, ud_slave_done,
3519 &ms->ms_buf, KM_NOSLEEP);
3520 ASSERT(rbp == &ms->ms_buf);
3521 mm->mm_resid += contig;
3522 io_count++;
3523 ms ++;
3524 }
3525 }
3526
3527 /*
3528 * Start I/O's
3529 */
3530 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3531 for (i = 0; i < io_count; i++) {
3532 (void) bdev_strategy(&ms->ms_buf);
3533 ms ++;
3534 }
3535 }
3536
3537 end:
3538 if (error != 0) {
3539 bp->b_flags |= B_ERROR;
3540 bp->b_error = error;
3541 if (mm != NULL) {
3542 mutex_destroy(&mm->mm_mutex);
3543 kmem_free(mm, mm->mm_size);
3544 }
3545 }
3546 return (error);
3547 }
3548
3549 int32_t
ud_slave_done(struct buf * bp)3550 ud_slave_done(struct buf *bp)
3551 {
3552 mio_master_t *mm;
3553 int32_t resid;
3554
3555 ASSERT(SEMA_HELD(&bp->b_sem));
3556 ASSERT((bp->b_flags & B_DONE) == 0);
3557
3558 mm = ((mio_slave_t *)bp)->ms_ptr;
3559
3560 /*
3561 * Propagate error and byte count info from slave struct to
3562 * the master struct
3563 */
3564 mutex_enter(&mm->mm_mutex);
3565 if (bp->b_flags & B_ERROR) {
3566
3567 /*
3568 * If multiple slave buffers get
3569 * error we forget the old errors
3570 * this is ok because we any way
3571 * cannot return multiple errors
3572 */
3573 mm->mm_error = bp->b_error;
3574 }
3575 mm->mm_resid -= bp->b_bcount;
3576 resid = mm->mm_resid;
3577 mutex_exit(&mm->mm_mutex);
3578
3579 /*
3580 * free up the resources allocated to cloned buffers.
3581 */
3582 bp_mapout(bp);
3583 biofini(bp);
3584
3585 if (resid == 0) {
3586
3587 /*
3588 * This is the last I/O operation
3589 * clean up and return the original buffer
3590 */
3591 if (mm->mm_error) {
3592 mm->mm_bp->b_flags |= B_ERROR;
3593 mm->mm_bp->b_error = mm->mm_error;
3594 }
3595 biodone(mm->mm_bp);
3596 mutex_destroy(&mm->mm_mutex);
3597 kmem_free(mm, mm->mm_size);
3598 }
3599 return (0);
3600 }
3601