xref: /titanic_51/usr/src/uts/common/fs/udfs/udf_vnops.c (revision 80e2ca8596e3435bc3b76f3c597833ea0a87f85e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/time.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/resource.h>
33 #include <sys/signal.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vfs_opreg.h>
39 #include <sys/stat.h>
40 #include <sys/vnode.h>
41 #include <sys/mode.h>
42 #include <sys/proc.h>
43 #include <sys/disp.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/dnlc.h>
50 #include <sys/conf.h>
51 #include <sys/errno.h>
52 #include <sys/mman.h>
53 #include <sys/fbuf.h>
54 #include <sys/pathname.h>
55 #include <sys/debug.h>
56 #include <sys/vmsystm.h>
57 #include <sys/cmn_err.h>
58 #include <sys/dirent.h>
59 #include <sys/errno.h>
60 #include <sys/modctl.h>
61 #include <sys/statvfs.h>
62 #include <sys/mount.h>
63 #include <sys/sunddi.h>
64 #include <sys/bootconf.h>
65 #include <sys/policy.h>
66 
67 #include <vm/hat.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/as.h>
71 #include <vm/seg.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_kmem.h>
74 #include <vm/seg_vn.h>
75 #include <vm/rm.h>
76 #include <vm/page.h>
77 #include <sys/swap.h>
78 
79 #include <fs/fs_subr.h>
80 
81 #include <sys/fs/udf_volume.h>
82 #include <sys/fs/udf_inode.h>
83 
84 static int32_t udf_open(struct vnode **,
85 	int32_t, struct cred *, caller_context_t *);
86 static int32_t udf_close(struct vnode *,
87 	int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
88 static int32_t udf_read(struct vnode *,
89 	struct uio *, int32_t, struct cred *, caller_context_t *);
90 static int32_t udf_write(struct vnode *,
91 	struct uio *, int32_t, struct cred *, caller_context_t *);
92 static int32_t udf_ioctl(struct vnode *,
93 	int32_t, intptr_t, int32_t, struct cred *, int32_t *,
94 	caller_context_t *);
95 static int32_t udf_getattr(struct vnode *,
96 	struct vattr *, int32_t, struct cred *, caller_context_t *);
97 static int32_t udf_setattr(struct vnode *,
98 	struct vattr *, int32_t, struct cred *, caller_context_t *);
99 static int32_t udf_access(struct vnode *,
100 	int32_t, int32_t, struct cred *, caller_context_t *);
101 static int32_t udf_lookup(struct vnode *,
102 	char *, struct vnode **, struct pathname *,
103 	int32_t, struct vnode *, struct cred *,
104 	caller_context_t *, int *, pathname_t *);
105 static int32_t udf_create(struct vnode *,
106 	char *, struct vattr *, enum vcexcl,
107 	int32_t, struct vnode **, struct cred *, int32_t,
108 	caller_context_t *, vsecattr_t *);
109 static int32_t udf_remove(struct vnode *,
110 	char *, struct cred *, caller_context_t *, int);
111 static int32_t udf_link(struct vnode *,
112 	struct vnode *, char *, struct cred *, caller_context_t *, int);
113 static int32_t udf_rename(struct vnode *,
114 	char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
115 static int32_t udf_mkdir(struct vnode *,
116 	char *, struct vattr *, struct vnode **, struct cred *,
117 	caller_context_t *, int, vsecattr_t *);
118 static int32_t udf_rmdir(struct vnode *,
119 	char *, struct vnode *, struct cred *, caller_context_t *, int);
120 static int32_t udf_readdir(struct vnode *,
121 	struct uio *, struct cred *, int32_t *, caller_context_t *, int);
122 static int32_t udf_symlink(struct vnode *,
123 	char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
124 static int32_t udf_readlink(struct vnode *,
125 	struct uio *, struct cred *, caller_context_t *);
126 static int32_t udf_fsync(struct vnode *,
127 	int32_t, struct cred *, caller_context_t *);
128 static void udf_inactive(struct vnode *,
129 	struct cred *, caller_context_t *);
130 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
131 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
132 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
133 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
134 	caller_context_t *);
135 static int32_t udf_frlock(struct vnode *, int32_t,
136 	struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
137 	caller_context_t *);
138 static int32_t udf_space(struct vnode *, int32_t,
139 	struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
140 static int32_t udf_getpage(struct vnode *, offset_t,
141 	size_t, uint32_t *, struct page **, size_t,
142 	struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
143 static int32_t udf_putpage(struct vnode *, offset_t,
144 	size_t, int32_t, struct cred *, caller_context_t *);
145 static int32_t udf_map(struct vnode *, offset_t, struct as *,
146 	caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
147 	caller_context_t *);
148 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
149 	caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
150 	caller_context_t *);
151 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
152 	caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
153 	caller_context_t *);
154 static int32_t udf_l_pathconf(struct vnode *, int32_t,
155 	ulong_t *, struct cred *, caller_context_t *);
156 static int32_t udf_pageio(struct vnode *, struct page *,
157 	u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
158 
159 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
160 	size_t, struct seg *, caddr_t, page_t *pl[],
161 	size_t, enum seg_rw, int32_t);
162 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
163 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
164 int32_t ud_page_fill(struct ud_inode *, page_t *,
165 	u_offset_t, uint32_t, u_offset_t *);
166 int32_t ud_iodone(struct buf *);
167 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
168 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
169 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
170 int32_t ud_slave_done(struct buf *);
171 
172 /*
173  * Structures to control multiple IO operations to get or put pages
174  * that are backed by discontiguous blocks. The master struct is
175  * a dummy that holds the original bp from pageio_setup. The
176  * slave struct holds the working bp's to do the actual IO. Once
177  * all the slave IOs complete. The master is processed as if a single
178  * IO op has completed.
179  */
180 uint32_t master_index = 0;
181 typedef struct mio_master {
182 	kmutex_t	mm_mutex;	/* protect the fields below */
183 	int32_t		mm_size;
184 	buf_t		*mm_bp;		/* original bp */
185 	int32_t		mm_resid;	/* bytes remaining to transfer */
186 	int32_t		mm_error;	/* accumulated error from slaves */
187 	int32_t		mm_index;	/* XXX debugging */
188 } mio_master_t;
189 
190 typedef struct mio_slave {
191 	buf_t		ms_buf;		/* working buffer for this IO chunk */
192 	mio_master_t	*ms_ptr;	/* pointer to master */
193 } mio_slave_t;
194 
195 struct vnodeops *udf_vnodeops;
196 
197 const fs_operation_def_t udf_vnodeops_template[] = {
198 	VOPNAME_OPEN,		{ .vop_open = udf_open },
199 	VOPNAME_CLOSE,		{ .vop_close = udf_close },
200 	VOPNAME_READ,		{ .vop_read = udf_read },
201 	VOPNAME_WRITE,		{ .vop_write = udf_write },
202 	VOPNAME_IOCTL,		{ .vop_ioctl = udf_ioctl },
203 	VOPNAME_GETATTR,	{ .vop_getattr = udf_getattr },
204 	VOPNAME_SETATTR,	{ .vop_setattr = udf_setattr },
205 	VOPNAME_ACCESS,		{ .vop_access = udf_access },
206 	VOPNAME_LOOKUP,		{ .vop_lookup = udf_lookup },
207 	VOPNAME_CREATE,		{ .vop_create = udf_create },
208 	VOPNAME_REMOVE,		{ .vop_remove = udf_remove },
209 	VOPNAME_LINK,		{ .vop_link = udf_link },
210 	VOPNAME_RENAME,		{ .vop_rename = udf_rename },
211 	VOPNAME_MKDIR,		{ .vop_mkdir = udf_mkdir },
212 	VOPNAME_RMDIR,		{ .vop_rmdir = udf_rmdir },
213 	VOPNAME_READDIR,	{ .vop_readdir = udf_readdir },
214 	VOPNAME_SYMLINK,	{ .vop_symlink = udf_symlink },
215 	VOPNAME_READLINK,	{ .vop_readlink = udf_readlink },
216 	VOPNAME_FSYNC,		{ .vop_fsync = udf_fsync },
217 	VOPNAME_INACTIVE,	{ .vop_inactive = udf_inactive },
218 	VOPNAME_FID,		{ .vop_fid = udf_fid },
219 	VOPNAME_RWLOCK,		{ .vop_rwlock = udf_rwlock },
220 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = udf_rwunlock },
221 	VOPNAME_SEEK,		{ .vop_seek = udf_seek },
222 	VOPNAME_FRLOCK,		{ .vop_frlock = udf_frlock },
223 	VOPNAME_SPACE,		{ .vop_space = udf_space },
224 	VOPNAME_GETPAGE,	{ .vop_getpage = udf_getpage },
225 	VOPNAME_PUTPAGE,	{ .vop_putpage = udf_putpage },
226 	VOPNAME_MAP,		{ .vop_map = udf_map },
227 	VOPNAME_ADDMAP,		{ .vop_addmap = udf_addmap },
228 	VOPNAME_DELMAP,		{ .vop_delmap = udf_delmap },
229 	VOPNAME_PATHCONF,	{ .vop_pathconf = udf_l_pathconf },
230 	VOPNAME_PAGEIO,		{ .vop_pageio = udf_pageio },
231 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
232 	NULL,			NULL
233 };
234 
235 /* ARGSUSED */
236 static int32_t
237 udf_open(
238 	struct vnode **vpp,
239 	int32_t flag,
240 	struct cred *cr,
241 	caller_context_t *ct)
242 {
243 	ud_printf("udf_open\n");
244 
245 	return (0);
246 }
247 
248 /* ARGSUSED */
249 static int32_t
250 udf_close(
251 	struct vnode *vp,
252 	int32_t flag,
253 	int32_t count,
254 	offset_t offset,
255 	struct cred *cr,
256 	caller_context_t *ct)
257 {
258 	struct ud_inode *ip = VTOI(vp);
259 
260 	ud_printf("udf_close\n");
261 
262 	ITIMES(ip);
263 
264 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
265 	cleanshares(vp, ttoproc(curthread)->p_pid);
266 
267 	/*
268 	 * Push partially filled cluster at last close.
269 	 * ``last close'' is approximated because the dnlc
270 	 * may have a hold on the vnode.
271 	 */
272 	if (vp->v_count <= 2 && vp->v_type != VBAD) {
273 		struct ud_inode *ip = VTOI(vp);
274 		if (ip->i_delaylen) {
275 			(void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
276 			    B_ASYNC | B_FREE, cr);
277 			ip->i_delaylen = 0;
278 		}
279 	}
280 
281 	return (0);
282 }
283 
284 /* ARGSUSED */
285 static int32_t
286 udf_read(
287 	struct vnode *vp,
288 	struct uio *uiop,
289 	int32_t ioflag,
290 	struct cred *cr,
291 	caller_context_t *ct)
292 {
293 	struct ud_inode *ip = VTOI(vp);
294 	int32_t error;
295 
296 	ud_printf("udf_read\n");
297 
298 #ifdef	__lock_lint
299 	rw_enter(&ip->i_rwlock, RW_READER);
300 #endif
301 
302 	ASSERT(RW_READ_HELD(&ip->i_rwlock));
303 
304 	if (MANDLOCK(vp, ip->i_char)) {
305 		/*
306 		 * udf_getattr ends up being called by chklock
307 		 */
308 		error = chklock(vp, FREAD, uiop->uio_loffset,
309 		    uiop->uio_resid, uiop->uio_fmode, ct);
310 		if (error) {
311 			goto end;
312 		}
313 	}
314 
315 	rw_enter(&ip->i_contents, RW_READER);
316 	error = ud_rdip(ip, uiop, ioflag, cr);
317 	rw_exit(&ip->i_contents);
318 
319 end:
320 #ifdef	__lock_lint
321 	rw_exit(&ip->i_rwlock);
322 #endif
323 
324 	return (error);
325 }
326 
327 
328 int32_t ud_WRITES = 1;
329 int32_t ud_HW = 96 * 1024;
330 int32_t ud_LW = 64 * 1024;
331 int32_t ud_throttles = 0;
332 
333 /* ARGSUSED */
334 static int32_t
335 udf_write(
336 	struct vnode *vp,
337 	struct uio *uiop,
338 	int32_t ioflag,
339 	struct cred *cr,
340 	caller_context_t *ct)
341 {
342 	struct ud_inode *ip = VTOI(vp);
343 	int32_t error = 0;
344 
345 	ud_printf("udf_write\n");
346 
347 #ifdef	__lock_lint
348 	rw_enter(&ip->i_rwlock, RW_WRITER);
349 #endif
350 
351 	ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
352 
353 	if (MANDLOCK(vp, ip->i_char)) {
354 		/*
355 		 * ud_getattr ends up being called by chklock
356 		 */
357 		error = chklock(vp, FWRITE, uiop->uio_loffset,
358 		    uiop->uio_resid, uiop->uio_fmode, ct);
359 		if (error) {
360 			goto end;
361 		}
362 	}
363 	/*
364 	 * Throttle writes.
365 	 */
366 	mutex_enter(&ip->i_tlock);
367 	if (ud_WRITES && (ip->i_writes > ud_HW)) {
368 		while (ip->i_writes > ud_HW) {
369 			ud_throttles++;
370 			cv_wait(&ip->i_wrcv, &ip->i_tlock);
371 		}
372 	}
373 	mutex_exit(&ip->i_tlock);
374 
375 	/*
376 	 * Write to the file
377 	 */
378 	rw_enter(&ip->i_contents, RW_WRITER);
379 	if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
380 		/*
381 		 * In append mode start at end of file.
382 		 */
383 		uiop->uio_loffset = ip->i_size;
384 	}
385 	error = ud_wrip(ip, uiop, ioflag, cr);
386 	rw_exit(&ip->i_contents);
387 
388 end:
389 #ifdef	__lock_lint
390 	rw_exit(&ip->i_rwlock);
391 #endif
392 
393 	return (error);
394 }
395 
396 /* ARGSUSED */
397 static int32_t
398 udf_ioctl(
399 	struct vnode *vp,
400 	int32_t cmd,
401 	intptr_t arg,
402 	int32_t flag,
403 	struct cred *cr,
404 	int32_t *rvalp,
405 	caller_context_t *ct)
406 {
407 	return (ENOTTY);
408 }
409 
410 /* ARGSUSED */
411 static int32_t
412 udf_getattr(
413 	struct vnode *vp,
414 	struct vattr *vap,
415 	int32_t flags,
416 	struct cred *cr,
417 	caller_context_t *ct)
418 {
419 	struct ud_inode *ip = VTOI(vp);
420 
421 	ud_printf("udf_getattr\n");
422 
423 	if (vap->va_mask == AT_SIZE) {
424 		/*
425 		 * for performance, if only the size is requested don't bother
426 		 * with anything else.
427 		 */
428 		vap->va_size = ip->i_size;
429 		return (0);
430 	}
431 
432 	rw_enter(&ip->i_contents, RW_READER);
433 
434 	vap->va_type = vp->v_type;
435 	vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
436 
437 	vap->va_uid = ip->i_uid;
438 	vap->va_gid = ip->i_gid;
439 	vap->va_fsid = ip->i_dev;
440 	vap->va_nodeid = ip->i_icb_lbano;
441 	vap->va_nlink = ip->i_nlink;
442 	vap->va_size = ip->i_size;
443 	vap->va_seq = ip->i_seq;
444 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
445 		vap->va_rdev = ip->i_rdev;
446 	} else {
447 		vap->va_rdev = 0;
448 	}
449 
450 	mutex_enter(&ip->i_tlock);
451 	ITIMES_NOLOCK(ip);	/* mark correct time in inode */
452 	vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
453 	vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
454 	vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
455 	vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
456 	vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
457 	vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
458 	mutex_exit(&ip->i_tlock);
459 
460 	switch (ip->i_type) {
461 		case VBLK:
462 			vap->va_blksize = MAXBSIZE;
463 			break;
464 		case VCHR:
465 			vap->va_blksize = MAXBSIZE;
466 			break;
467 		default:
468 			vap->va_blksize = ip->i_udf->udf_lbsize;
469 			break;
470 	}
471 	vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
472 
473 	rw_exit(&ip->i_contents);
474 
475 	return (0);
476 }
477 
478 static int
479 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
480 {
481 	return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
482 }
483 
484 /*ARGSUSED4*/
485 static int32_t
486 udf_setattr(
487 	struct vnode *vp,
488 	struct vattr *vap,
489 	int32_t flags,
490 	struct cred *cr,
491 	caller_context_t *ct)
492 {
493 	int32_t error = 0;
494 	uint32_t mask = vap->va_mask;
495 	struct ud_inode *ip;
496 	timestruc_t now;
497 	struct vattr ovap;
498 
499 	ud_printf("udf_setattr\n");
500 
501 	ip = VTOI(vp);
502 
503 	/*
504 	 * not updates allowed to 4096 files
505 	 */
506 	if (ip->i_astrat == STRAT_TYPE4096) {
507 		return (EINVAL);
508 	}
509 
510 	/*
511 	 * Cannot set these attributes
512 	 */
513 	if (mask & AT_NOSET) {
514 		return (EINVAL);
515 	}
516 
517 	rw_enter(&ip->i_rwlock, RW_WRITER);
518 	rw_enter(&ip->i_contents, RW_WRITER);
519 
520 	ovap.va_uid = ip->i_uid;
521 	ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
522 	error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
523 	    ud_iaccess_vmode, ip);
524 	if (error)
525 		goto update_inode;
526 
527 	mask = vap->va_mask;
528 	/*
529 	 * Change file access modes.
530 	 */
531 	if (mask & AT_MODE) {
532 		ip->i_perm = VA2UD_PERM(vap->va_mode);
533 		ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
534 		mutex_enter(&ip->i_tlock);
535 		ip->i_flag |= ICHG;
536 		mutex_exit(&ip->i_tlock);
537 	}
538 	if (mask & (AT_UID|AT_GID)) {
539 		if (mask & AT_UID) {
540 			ip->i_uid = vap->va_uid;
541 		}
542 		if (mask & AT_GID) {
543 			ip->i_gid = vap->va_gid;
544 		}
545 		mutex_enter(&ip->i_tlock);
546 		ip->i_flag |= ICHG;
547 		mutex_exit(&ip->i_tlock);
548 	}
549 	/*
550 	 * Truncate file.  Must have write permission and not be a directory.
551 	 */
552 	if (mask & AT_SIZE) {
553 		if (vp->v_type == VDIR) {
554 			error = EISDIR;
555 			goto update_inode;
556 		}
557 		if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
558 			goto update_inode;
559 		}
560 		if (vap->va_size > MAXOFFSET_T) {
561 			error = EFBIG;
562 			goto update_inode;
563 		}
564 		if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
565 			goto update_inode;
566 		}
567 	}
568 	/*
569 	 * Change file access or modified times.
570 	 */
571 	if (mask & (AT_ATIME|AT_MTIME)) {
572 		mutex_enter(&ip->i_tlock);
573 		if (mask & AT_ATIME) {
574 			ip->i_atime.tv_sec = vap->va_atime.tv_sec;
575 			ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
576 			ip->i_flag &= ~IACC;
577 		}
578 		if (mask & AT_MTIME) {
579 			ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
580 			ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
581 			gethrestime(&now);
582 			ip->i_ctime.tv_sec = now.tv_sec;
583 			ip->i_ctime.tv_nsec = now.tv_nsec;
584 			ip->i_flag &= ~(IUPD|ICHG);
585 			ip->i_flag |= IMODTIME;
586 		}
587 		ip->i_flag |= IMOD;
588 		mutex_exit(&ip->i_tlock);
589 	}
590 
591 update_inode:
592 	if (curthread->t_flag & T_DONTPEND) {
593 		ud_iupdat(ip, 1);
594 	} else {
595 		ITIMES_NOLOCK(ip);
596 	}
597 	rw_exit(&ip->i_contents);
598 	rw_exit(&ip->i_rwlock);
599 
600 	return (error);
601 }
602 
603 /* ARGSUSED */
604 static int32_t
605 udf_access(
606 	struct vnode *vp,
607 	int32_t mode,
608 	int32_t flags,
609 	struct cred *cr,
610 	caller_context_t *ct)
611 {
612 	struct ud_inode *ip = VTOI(vp);
613 
614 	ud_printf("udf_access\n");
615 
616 	if (ip->i_udf == NULL) {
617 		return (EIO);
618 	}
619 
620 	return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
621 }
622 
623 int32_t udfs_stickyhack = 1;
624 
625 /* ARGSUSED */
626 static int32_t
627 udf_lookup(
628 	struct vnode *dvp,
629 	char *nm,
630 	struct vnode **vpp,
631 	struct pathname *pnp,
632 	int32_t flags,
633 	struct vnode *rdir,
634 	struct cred *cr,
635 	caller_context_t *ct,
636 	int *direntflags,
637 	pathname_t *realpnp)
638 {
639 	int32_t error;
640 	struct vnode *vp;
641 	struct ud_inode *ip, *xip;
642 
643 	ud_printf("udf_lookup\n");
644 	/*
645 	 * Null component name is a synonym for directory being searched.
646 	 */
647 	if (*nm == '\0') {
648 		VN_HOLD(dvp);
649 		*vpp = dvp;
650 		error = 0;
651 		goto out;
652 	}
653 
654 	/*
655 	 * Fast path: Check the directory name lookup cache.
656 	 */
657 	ip = VTOI(dvp);
658 	if (vp = dnlc_lookup(dvp, nm)) {
659 		/*
660 		 * Check accessibility of directory.
661 		 */
662 		if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
663 			VN_RELE(vp);
664 		}
665 		xip = VTOI(vp);
666 	} else {
667 		error = ud_dirlook(ip, nm, &xip, cr, 1);
668 		ITIMES(ip);
669 	}
670 
671 	if (error == 0) {
672 		ip = xip;
673 		*vpp = ITOV(ip);
674 		if ((ip->i_type != VDIR) &&
675 		    (ip->i_char & ISVTX) &&
676 		    ((ip->i_perm & IEXEC) == 0) &&
677 		    udfs_stickyhack) {
678 			mutex_enter(&(*vpp)->v_lock);
679 			(*vpp)->v_flag |= VISSWAP;
680 			mutex_exit(&(*vpp)->v_lock);
681 		}
682 		ITIMES(ip);
683 		/*
684 		 * If vnode is a device return special vnode instead.
685 		 */
686 		if (IS_DEVVP(*vpp)) {
687 			struct vnode *newvp;
688 			newvp = specvp(*vpp, (*vpp)->v_rdev,
689 			    (*vpp)->v_type, cr);
690 			VN_RELE(*vpp);
691 			if (newvp == NULL) {
692 				error = ENOSYS;
693 			} else {
694 				*vpp = newvp;
695 			}
696 		}
697 	}
698 out:
699 	return (error);
700 }
701 
702 /* ARGSUSED */
703 static int32_t
704 udf_create(
705 	struct vnode *dvp,
706 	char *name,
707 	struct vattr *vap,
708 	enum vcexcl excl,
709 	int32_t mode,
710 	struct vnode **vpp,
711 	struct cred *cr,
712 	int32_t flag,
713 	caller_context_t *ct,
714 	vsecattr_t *vsecp)
715 {
716 	int32_t error;
717 	struct ud_inode *ip = VTOI(dvp), *xip;
718 
719 	ud_printf("udf_create\n");
720 
721 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
722 		vap->va_mode &= ~VSVTX;
723 
724 	if (*name == '\0') {
725 		/*
726 		 * Null component name refers to the directory itself.
727 		 */
728 		VN_HOLD(dvp);
729 		ITIMES(ip);
730 		error = EEXIST;
731 	} else {
732 		xip = NULL;
733 		rw_enter(&ip->i_rwlock, RW_WRITER);
734 		error = ud_direnter(ip, name, DE_CREATE,
735 		    (struct ud_inode *)0, (struct ud_inode *)0,
736 		    vap, &xip, cr, ct);
737 		rw_exit(&ip->i_rwlock);
738 		ITIMES(ip);
739 		ip = xip;
740 	}
741 #ifdef	__lock_lint
742 	rw_enter(&ip->i_contents, RW_WRITER);
743 #else
744 	if (ip != NULL) {
745 		rw_enter(&ip->i_contents, RW_WRITER);
746 	}
747 #endif
748 
749 	/*
750 	 * If the file already exists and this is a non-exclusive create,
751 	 * check permissions and allow access for non-directories.
752 	 * Read-only create of an existing directory is also allowed.
753 	 * We fail an exclusive create of anything which already exists.
754 	 */
755 	if (error == EEXIST) {
756 		if (excl == NONEXCL) {
757 			if ((ip->i_type == VDIR) && (mode & VWRITE)) {
758 				error = EISDIR;
759 			} else if (mode) {
760 				error = ud_iaccess(ip,
761 				    UD_UPERM2DPERM(mode), cr, 0);
762 			} else {
763 				error = 0;
764 			}
765 		}
766 		if (error) {
767 			rw_exit(&ip->i_contents);
768 			VN_RELE(ITOV(ip));
769 			goto out;
770 		} else if ((ip->i_type == VREG) &&
771 		    (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
772 			/*
773 			 * Truncate regular files, if requested by caller.
774 			 * Grab i_rwlock to make sure no one else is
775 			 * currently writing to the file (we promised
776 			 * bmap we would do this).
777 			 * Must get the locks in the correct order.
778 			 */
779 			if (ip->i_size == 0) {
780 				ip->i_flag |= ICHG | IUPD;
781 			} else {
782 				rw_exit(&ip->i_contents);
783 				rw_enter(&ip->i_rwlock, RW_WRITER);
784 				rw_enter(&ip->i_contents, RW_WRITER);
785 				(void) ud_itrunc(ip, 0, 0, cr);
786 				rw_exit(&ip->i_rwlock);
787 			}
788 			vnevent_create(ITOV(ip), ct);
789 		}
790 	}
791 
792 	if (error == 0) {
793 		*vpp = ITOV(ip);
794 		ITIMES(ip);
795 	}
796 #ifdef	__lock_lint
797 	rw_exit(&ip->i_contents);
798 #else
799 	if (ip != NULL) {
800 		rw_exit(&ip->i_contents);
801 	}
802 #endif
803 	if (error) {
804 		goto out;
805 	}
806 
807 	/*
808 	 * If vnode is a device return special vnode instead.
809 	 */
810 	if (!error && IS_DEVVP(*vpp)) {
811 		struct vnode *newvp;
812 
813 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
814 		VN_RELE(*vpp);
815 		if (newvp == NULL) {
816 			error = ENOSYS;
817 			goto out;
818 		}
819 		*vpp = newvp;
820 	}
821 out:
822 	return (error);
823 }
824 
825 /* ARGSUSED */
826 static int32_t
827 udf_remove(
828 	struct vnode *vp,
829 	char *nm,
830 	struct cred *cr,
831 	caller_context_t *ct,
832 	int flags)
833 {
834 	int32_t error;
835 	struct ud_inode *ip = VTOI(vp);
836 
837 	ud_printf("udf_remove\n");
838 
839 	rw_enter(&ip->i_rwlock, RW_WRITER);
840 	error = ud_dirremove(ip, nm,
841 	    (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
842 	rw_exit(&ip->i_rwlock);
843 	ITIMES(ip);
844 
845 	return (error);
846 }
847 
848 /* ARGSUSED */
849 static int32_t
850 udf_link(
851 	struct vnode *tdvp,
852 	struct vnode *svp,
853 	char *tnm,
854 	struct cred *cr,
855 	caller_context_t *ct,
856 	int flags)
857 {
858 	int32_t error;
859 	struct vnode *realvp;
860 	struct ud_inode *sip;
861 	struct ud_inode *tdp;
862 
863 	ud_printf("udf_link\n");
864 	if (VOP_REALVP(svp, &realvp, ct) == 0) {
865 		svp = realvp;
866 	}
867 
868 	/*
869 	 * Do not allow links to directories
870 	 */
871 	if (svp->v_type == VDIR) {
872 		return (EPERM);
873 	}
874 
875 	sip = VTOI(svp);
876 
877 	if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
878 		return (EPERM);
879 
880 	tdp = VTOI(tdvp);
881 
882 	rw_enter(&tdp->i_rwlock, RW_WRITER);
883 	error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
884 	    sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
885 	rw_exit(&tdp->i_rwlock);
886 	ITIMES(sip);
887 	ITIMES(tdp);
888 
889 	if (error == 0) {
890 		vnevent_link(svp, ct);
891 	}
892 
893 	return (error);
894 }
895 
896 /* ARGSUSED */
897 static int32_t
898 udf_rename(
899 	struct vnode *sdvp,
900 	char *snm,
901 	struct vnode *tdvp,
902 	char *tnm,
903 	struct cred *cr,
904 	caller_context_t *ct,
905 	int flags)
906 {
907 	int32_t error = 0;
908 	struct udf_vfs *udf_vfsp;
909 	struct ud_inode *sip;		/* source inode */
910 	struct ud_inode *sdp, *tdp;	/* source and target parent inode */
911 	struct vnode *realvp;
912 
913 	ud_printf("udf_rename\n");
914 
915 	if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
916 		tdvp = realvp;
917 	}
918 
919 	sdp = VTOI(sdvp);
920 	tdp = VTOI(tdvp);
921 
922 	udf_vfsp = sdp->i_udf;
923 
924 	mutex_enter(&udf_vfsp->udf_rename_lck);
925 	/*
926 	 * Look up inode of file we're supposed to rename.
927 	 */
928 	if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
929 		mutex_exit(&udf_vfsp->udf_rename_lck);
930 		return (error);
931 	}
932 	/*
933 	 * be sure this is not a directory with another file system mounted
934 	 * over it.  If it is just give up the locks, and return with
935 	 * EBUSY
936 	 */
937 	if (vn_mountedvfs(ITOV(sip)) != NULL) {
938 		error = EBUSY;
939 		goto errout;
940 	}
941 	/*
942 	 * Make sure we can delete the source entry.  This requires
943 	 * write permission on the containing directory.  If that
944 	 * directory is "sticky" it further requires (except for
945 	 * privileged users) that the user own the directory or the
946 	 * source entry, or else have permission to write the source
947 	 * entry.
948 	 */
949 	rw_enter(&sdp->i_contents, RW_READER);
950 	rw_enter(&sip->i_contents, RW_READER);
951 	if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
952 	    (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
953 		rw_exit(&sip->i_contents);
954 		rw_exit(&sdp->i_contents);
955 		ITIMES(sip);
956 		goto errout;
957 	}
958 
959 	/*
960 	 * Check for renaming '.' or '..' or alias of '.'
961 	 */
962 	if ((strcmp(snm, ".") == 0) ||
963 	    (strcmp(snm, "..") == 0) ||
964 	    (sdp == sip)) {
965 		error = EINVAL;
966 		rw_exit(&sip->i_contents);
967 		rw_exit(&sdp->i_contents);
968 		goto errout;
969 	}
970 	rw_exit(&sip->i_contents);
971 	rw_exit(&sdp->i_contents);
972 
973 
974 	/*
975 	 * Link source to the target.
976 	 */
977 	rw_enter(&tdp->i_rwlock, RW_WRITER);
978 	if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
979 	    (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
980 		/*
981 		 * ESAME isn't really an error; it indicates that the
982 		 * operation should not be done because the source and target
983 		 * are the same file, but that no error should be reported.
984 		 */
985 		if (error == ESAME) {
986 			error = 0;
987 		}
988 		rw_exit(&tdp->i_rwlock);
989 		goto errout;
990 	}
991 	vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
992 	rw_exit(&tdp->i_rwlock);
993 
994 	rw_enter(&sdp->i_rwlock, RW_WRITER);
995 	/*
996 	 * Unlink the source.
997 	 * Remove the source entry.  ud_dirremove() checks that the entry
998 	 * still reflects sip, and returns an error if it doesn't.
999 	 * If the entry has changed just forget about it.  Release
1000 	 * the source inode.
1001 	 */
1002 	if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1003 	    DR_RENAME, cr, ct)) == ENOENT) {
1004 		error = 0;
1005 	}
1006 	rw_exit(&sdp->i_rwlock);
1007 errout:
1008 	ITIMES(sdp);
1009 	ITIMES(tdp);
1010 	VN_RELE(ITOV(sip));
1011 	mutex_exit(&udf_vfsp->udf_rename_lck);
1012 
1013 	return (error);
1014 }
1015 
1016 /* ARGSUSED */
1017 static int32_t
1018 udf_mkdir(
1019 	struct vnode *dvp,
1020 	char *dirname,
1021 	struct vattr *vap,
1022 	struct vnode **vpp,
1023 	struct cred *cr,
1024 	caller_context_t *ct,
1025 	int flags,
1026 	vsecattr_t *vsecp)
1027 {
1028 	int32_t error;
1029 	struct ud_inode *ip;
1030 	struct ud_inode *xip;
1031 
1032 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1033 
1034 	ud_printf("udf_mkdir\n");
1035 
1036 	ip = VTOI(dvp);
1037 	rw_enter(&ip->i_rwlock, RW_WRITER);
1038 	error = ud_direnter(ip, dirname, DE_MKDIR,
1039 	    (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1040 	rw_exit(&ip->i_rwlock);
1041 	ITIMES(ip);
1042 	if (error == 0) {
1043 		ip = xip;
1044 		*vpp = ITOV(ip);
1045 		ITIMES(ip);
1046 	} else if (error == EEXIST) {
1047 		ITIMES(xip);
1048 		VN_RELE(ITOV(xip));
1049 	}
1050 
1051 	return (error);
1052 }
1053 
1054 /* ARGSUSED */
1055 static int32_t
1056 udf_rmdir(
1057 	struct vnode *vp,
1058 	char *nm,
1059 	struct vnode *cdir,
1060 	struct cred *cr,
1061 	caller_context_t *ct,
1062 	int flags)
1063 {
1064 	int32_t error;
1065 	struct ud_inode *ip = VTOI(vp);
1066 
1067 	ud_printf("udf_rmdir\n");
1068 
1069 	rw_enter(&ip->i_rwlock, RW_WRITER);
1070 	error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1071 	    cr, ct);
1072 	rw_exit(&ip->i_rwlock);
1073 	ITIMES(ip);
1074 
1075 	return (error);
1076 }
1077 
1078 /* ARGSUSED */
1079 static int32_t
1080 udf_readdir(
1081 	struct vnode *vp,
1082 	struct uio *uiop,
1083 	struct cred *cr,
1084 	int32_t *eofp,
1085 	caller_context_t *ct,
1086 	int flags)
1087 {
1088 	struct ud_inode *ip;
1089 	struct dirent64 *nd;
1090 	struct udf_vfs *udf_vfsp;
1091 	int32_t error = 0, len, outcount = 0;
1092 	uint32_t dirsiz, offset;
1093 	uint32_t bufsize, ndlen, dummy;
1094 	caddr_t outbuf;
1095 	caddr_t outb, end_outb;
1096 	struct iovec *iovp;
1097 
1098 	uint8_t *dname;
1099 	int32_t length;
1100 
1101 	uint8_t *buf = NULL;
1102 
1103 	struct fbuf *fbp = NULL;
1104 	struct file_id *fid;
1105 	uint8_t *name;
1106 
1107 
1108 	ud_printf("udf_readdir\n");
1109 
1110 	ip = VTOI(vp);
1111 	udf_vfsp = ip->i_udf;
1112 
1113 	dirsiz = ip->i_size;
1114 	if ((uiop->uio_offset >= dirsiz) ||
1115 	    (ip->i_nlink <= 0)) {
1116 		if (eofp) {
1117 			*eofp = 1;
1118 		}
1119 		return (0);
1120 	}
1121 
1122 	offset = uiop->uio_offset;
1123 	iovp = uiop->uio_iov;
1124 	bufsize = iovp->iov_len;
1125 
1126 	outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1127 	end_outb = outb + bufsize;
1128 	nd = (struct dirent64 *)outbuf;
1129 
1130 	dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1131 	buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1132 
1133 	if (offset == 0) {
1134 		len = DIRENT64_RECLEN(1);
1135 		if (((caddr_t)nd + len) >= end_outb) {
1136 			error = EINVAL;
1137 			goto end;
1138 		}
1139 		nd->d_ino = ip->i_icb_lbano;
1140 		nd->d_reclen = (uint16_t)len;
1141 		nd->d_off = 0x10;
1142 		nd->d_name[0] = '.';
1143 		bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1144 		nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1145 		outcount++;
1146 	} else if (offset == 0x10) {
1147 		offset = 0;
1148 	}
1149 
1150 	while (offset < dirsiz) {
1151 		error = ud_get_next_fid(ip, &fbp,
1152 		    offset, &fid, &name, buf);
1153 		if (error != 0) {
1154 			break;
1155 		}
1156 
1157 		if ((fid->fid_flags & FID_DELETED) == 0) {
1158 			if (fid->fid_flags & FID_PARENT) {
1159 
1160 				len = DIRENT64_RECLEN(2);
1161 				if (((caddr_t)nd + len) >= end_outb) {
1162 					error = EINVAL;
1163 					break;
1164 				}
1165 
1166 				nd->d_ino = ip->i_icb_lbano;
1167 				nd->d_reclen = (uint16_t)len;
1168 				nd->d_off = offset + FID_LEN(fid);
1169 				nd->d_name[0] = '.';
1170 				nd->d_name[1] = '.';
1171 				bzero(&nd->d_name[2],
1172 				    DIRENT64_NAMELEN(len) - 2);
1173 				nd = (struct dirent64 *)
1174 				    ((char *)nd + nd->d_reclen);
1175 			} else {
1176 				if ((error = ud_uncompress(fid->fid_idlen,
1177 				    &length, name, dname)) != 0) {
1178 					break;
1179 				}
1180 				if (length == 0) {
1181 					offset += FID_LEN(fid);
1182 					continue;
1183 				}
1184 				len = DIRENT64_RECLEN(length);
1185 				if (((caddr_t)nd + len) >= end_outb) {
1186 					if (!outcount) {
1187 						error = EINVAL;
1188 					}
1189 					break;
1190 				}
1191 				(void) strncpy(nd->d_name,
1192 				    (caddr_t)dname, length);
1193 				bzero(&nd->d_name[length],
1194 				    DIRENT64_NAMELEN(len) - length);
1195 				nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1196 				    SWAP_16(fid->fid_icb.lad_ext_prn),
1197 				    SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1198 				    &dummy);
1199 				nd->d_reclen = (uint16_t)len;
1200 				nd->d_off = offset + FID_LEN(fid);
1201 				nd = (struct dirent64 *)
1202 				    ((char *)nd + nd->d_reclen);
1203 			}
1204 			outcount++;
1205 		}
1206 
1207 		offset += FID_LEN(fid);
1208 	}
1209 
1210 end:
1211 	if (fbp != NULL) {
1212 		fbrelse(fbp, S_OTHER);
1213 	}
1214 	ndlen = ((char *)nd - outbuf);
1215 	/*
1216 	 * In case of error do not call uiomove.
1217 	 * Return the error to the caller.
1218 	 */
1219 	if ((error == 0) && (ndlen != 0)) {
1220 		error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1221 		uiop->uio_offset = offset;
1222 	}
1223 	kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1224 	kmem_free((caddr_t)dname, 1024);
1225 	kmem_free(outbuf, (uint32_t)bufsize);
1226 	if (eofp && error == 0) {
1227 		*eofp = (uiop->uio_offset >= dirsiz);
1228 	}
1229 	return (error);
1230 }
1231 
1232 /* ARGSUSED */
1233 static int32_t
1234 udf_symlink(
1235 	struct vnode *dvp,
1236 	char *linkname,
1237 	struct vattr *vap,
1238 	char *target,
1239 	struct cred *cr,
1240 	caller_context_t *ct,
1241 	int flags)
1242 {
1243 	int32_t error = 0, outlen;
1244 	uint32_t ioflag = 0;
1245 	struct ud_inode *ip, *dip = VTOI(dvp);
1246 
1247 	struct path_comp *pc;
1248 	int8_t *dname = NULL, *uname = NULL, *sp;
1249 
1250 	ud_printf("udf_symlink\n");
1251 
1252 	ip = (struct ud_inode *)0;
1253 	vap->va_type = VLNK;
1254 	vap->va_rdev = 0;
1255 
1256 	rw_enter(&dip->i_rwlock, RW_WRITER);
1257 	error = ud_direnter(dip, linkname, DE_CREATE,
1258 	    (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1259 	rw_exit(&dip->i_rwlock);
1260 	if (error == 0) {
1261 		dname = kmem_zalloc(1024, KM_SLEEP);
1262 		uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1263 
1264 		pc = (struct path_comp *)uname;
1265 		/*
1266 		 * If the first character in target is "/"
1267 		 * then skip it and create entry for it
1268 		 */
1269 		if (*target == '/') {
1270 			pc->pc_type = 2;
1271 			pc->pc_len = 0;
1272 			pc = (struct path_comp *)(((char *)pc) + 4);
1273 			while (*target == '/') {
1274 				target++;
1275 			}
1276 		}
1277 
1278 		while (*target != NULL) {
1279 			sp = target;
1280 			while ((*target != '/') && (*target != '\0')) {
1281 				target ++;
1282 			}
1283 			/*
1284 			 * We got the next component of the
1285 			 * path name. Create path_comp of
1286 			 * appropriate type
1287 			 */
1288 			if (((target - sp) == 1) && (*sp == '.')) {
1289 				/*
1290 				 * Dot entry.
1291 				 */
1292 				pc->pc_type = 4;
1293 				pc = (struct path_comp *)(((char *)pc) + 4);
1294 			} else if (((target - sp) == 2) &&
1295 			    (*sp == '.') && ((*(sp + 1)) == '.')) {
1296 				/*
1297 				 * DotDot entry.
1298 				 */
1299 				pc->pc_type = 3;
1300 				pc = (struct path_comp *)(((char *)pc) + 4);
1301 			} else {
1302 				/*
1303 				 * convert the user given name
1304 				 * into appropriate form to be put
1305 				 * on the media
1306 				 */
1307 				outlen = 1024;	/* set to size of dname */
1308 				if (error = ud_compress(target - sp, &outlen,
1309 				    (uint8_t *)sp, (uint8_t *)dname)) {
1310 					break;
1311 				}
1312 				pc->pc_type = 5;
1313 				/* LINTED */
1314 				pc->pc_len = outlen;
1315 				dname[outlen] = '\0';
1316 				(void) strcpy((char *)pc->pc_id, dname);
1317 				pc = (struct path_comp *)
1318 				    (((char *)pc) + 4 + outlen);
1319 			}
1320 			while (*target == '/') {
1321 				target++;
1322 			}
1323 			if (*target == NULL) {
1324 				break;
1325 			}
1326 		}
1327 
1328 		rw_enter(&ip->i_contents, RW_WRITER);
1329 		if (error == 0) {
1330 			ioflag = FWRITE;
1331 			if (curthread->t_flag & T_DONTPEND) {
1332 				ioflag |= FDSYNC;
1333 			}
1334 			error = ud_rdwri(UIO_WRITE, ioflag, ip,
1335 			    uname, ((int8_t *)pc) - uname,
1336 			    (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1337 		}
1338 		if (error) {
1339 			ud_idrop(ip);
1340 			rw_exit(&ip->i_contents);
1341 			rw_enter(&dip->i_rwlock, RW_WRITER);
1342 			(void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1343 			    (struct vnode *)0, DR_REMOVE, cr, ct);
1344 			rw_exit(&dip->i_rwlock);
1345 			goto update_inode;
1346 		}
1347 		rw_exit(&ip->i_contents);
1348 	}
1349 
1350 	if ((error == 0) || (error == EEXIST)) {
1351 		VN_RELE(ITOV(ip));
1352 	}
1353 
1354 update_inode:
1355 	ITIMES(VTOI(dvp));
1356 	if (uname != NULL) {
1357 		kmem_free(uname, PAGESIZE);
1358 	}
1359 	if (dname != NULL) {
1360 		kmem_free(dname, 1024);
1361 	}
1362 
1363 	return (error);
1364 }
1365 
1366 /* ARGSUSED */
1367 static int32_t
1368 udf_readlink(
1369 	struct vnode *vp,
1370 	struct uio *uiop,
1371 	struct cred *cr,
1372 	caller_context_t *ct)
1373 {
1374 	int32_t error = 0, off, id_len, size, len;
1375 	int8_t *dname = NULL, *uname = NULL;
1376 	struct ud_inode *ip;
1377 	struct fbuf *fbp = NULL;
1378 	struct path_comp *pc;
1379 
1380 	ud_printf("udf_readlink\n");
1381 
1382 	if (vp->v_type != VLNK) {
1383 		return (EINVAL);
1384 	}
1385 
1386 	ip = VTOI(vp);
1387 	size = ip->i_size;
1388 	if (size > PAGESIZE) {
1389 		return (EIO);
1390 	}
1391 
1392 	if (size == 0) {
1393 		return (0);
1394 	}
1395 
1396 	dname = kmem_zalloc(1024, KM_SLEEP);
1397 	uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1398 
1399 	rw_enter(&ip->i_contents, RW_READER);
1400 
1401 	if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1402 		goto end;
1403 	}
1404 
1405 	off = 0;
1406 
1407 	while (off < size) {
1408 		pc = (struct path_comp *)(fbp->fb_addr + off);
1409 		switch (pc->pc_type) {
1410 			case 1 :
1411 				(void) strcpy(uname, ip->i_udf->udf_fsmnt);
1412 				(void) strcat(uname, "/");
1413 				break;
1414 			case 2 :
1415 				if (pc->pc_len != 0) {
1416 					goto end;
1417 				}
1418 				uname[0] = '/';
1419 				uname[1] = '\0';
1420 				break;
1421 			case 3 :
1422 				(void) strcat(uname, "../");
1423 				break;
1424 			case 4 :
1425 				(void) strcat(uname, "./");
1426 				break;
1427 			case 5 :
1428 				if ((error = ud_uncompress(pc->pc_len, &id_len,
1429 				    pc->pc_id, (uint8_t *)dname)) != 0) {
1430 					break;
1431 				}
1432 				dname[id_len] = '\0';
1433 				(void) strcat(uname, dname);
1434 				(void) strcat(uname, "/");
1435 				break;
1436 			default :
1437 				error = EINVAL;
1438 				goto end;
1439 		}
1440 		off += 4 + pc->pc_len;
1441 	}
1442 	len = strlen(uname) - 1;
1443 	if (uname[len] == '/') {
1444 		if (len == 0) {
1445 			/*
1446 			 * special case link to /
1447 			 */
1448 			len = 1;
1449 		} else {
1450 			uname[len] = '\0';
1451 		}
1452 	}
1453 
1454 	error = uiomove(uname, len, UIO_READ, uiop);
1455 
1456 	ITIMES(ip);
1457 
1458 end:
1459 	if (fbp != NULL) {
1460 		fbrelse(fbp, S_OTHER);
1461 	}
1462 	rw_exit(&ip->i_contents);
1463 	if (uname != NULL) {
1464 		kmem_free(uname, PAGESIZE);
1465 	}
1466 	if (dname != NULL) {
1467 		kmem_free(dname, 1024);
1468 	}
1469 	return (error);
1470 }
1471 
1472 /* ARGSUSED */
1473 static int32_t
1474 udf_fsync(
1475 	struct vnode *vp,
1476 	int32_t syncflag,
1477 	struct cred *cr,
1478 	caller_context_t *ct)
1479 {
1480 	int32_t error = 0;
1481 	struct ud_inode *ip = VTOI(vp);
1482 
1483 	ud_printf("udf_fsync\n");
1484 
1485 	rw_enter(&ip->i_contents, RW_WRITER);
1486 	if (!(IS_SWAPVP(vp))) {
1487 		error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1488 	}
1489 	if (error == 0) {
1490 		error = ud_sync_indir(ip);
1491 	}
1492 	ITIMES(ip);		/* XXX: is this necessary ??? */
1493 	rw_exit(&ip->i_contents);
1494 
1495 	return (error);
1496 }
1497 
1498 /* ARGSUSED */
1499 static void
1500 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1501 {
1502 	ud_printf("udf_iinactive\n");
1503 
1504 	ud_iinactive(VTOI(vp), cr);
1505 }
1506 
1507 /* ARGSUSED */
1508 static int32_t
1509 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1510 {
1511 	struct udf_fid *udfidp;
1512 	struct ud_inode *ip = VTOI(vp);
1513 
1514 	ud_printf("udf_fid\n");
1515 
1516 	if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1517 		fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1518 		return (ENOSPC);
1519 	}
1520 
1521 	udfidp = (struct udf_fid *)fidp;
1522 	bzero((char *)udfidp, sizeof (struct udf_fid));
1523 	rw_enter(&ip->i_contents, RW_READER);
1524 	udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1525 	udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1526 	udfidp->udfid_prn = ip->i_icb_prn;
1527 	udfidp->udfid_icb_lbn = ip->i_icb_block;
1528 	rw_exit(&ip->i_contents);
1529 
1530 	return (0);
1531 }
1532 
1533 /* ARGSUSED2 */
1534 static int
1535 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1536 {
1537 	struct ud_inode *ip = VTOI(vp);
1538 
1539 	ud_printf("udf_rwlock\n");
1540 
1541 	if (write_lock) {
1542 		rw_enter(&ip->i_rwlock, RW_WRITER);
1543 	} else {
1544 		rw_enter(&ip->i_rwlock, RW_READER);
1545 	}
1546 #ifdef	__lock_lint
1547 	rw_exit(&ip->i_rwlock);
1548 #endif
1549 	return (write_lock);
1550 }
1551 
1552 /* ARGSUSED */
1553 static void
1554 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1555 {
1556 	struct ud_inode *ip = VTOI(vp);
1557 
1558 	ud_printf("udf_rwunlock\n");
1559 
1560 #ifdef	__lock_lint
1561 	rw_enter(&ip->i_rwlock, RW_WRITER);
1562 #endif
1563 
1564 	rw_exit(&ip->i_rwlock);
1565 
1566 }
1567 
1568 /* ARGSUSED */
1569 static int32_t
1570 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1571 {
1572 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1573 }
1574 
1575 static int32_t
1576 udf_frlock(
1577 	struct vnode *vp,
1578 	int32_t cmd,
1579 	struct flock64 *bfp,
1580 	int32_t flag,
1581 	offset_t offset,
1582 	struct flk_callback *flk_cbp,
1583 	cred_t *cr,
1584 	caller_context_t *ct)
1585 {
1586 	struct ud_inode *ip = VTOI(vp);
1587 
1588 	ud_printf("udf_frlock\n");
1589 
1590 	/*
1591 	 * If file is being mapped, disallow frlock.
1592 	 * XXX I am not holding tlock while checking i_mapcnt because the
1593 	 * current locking strategy drops all locks before calling fs_frlock.
1594 	 * So, mapcnt could change before we enter fs_frlock making is
1595 	 * meaningless to have held tlock in the first place.
1596 	 */
1597 	if ((ip->i_mapcnt > 0) &&
1598 	    (MANDLOCK(vp, ip->i_char))) {
1599 		return (EAGAIN);
1600 	}
1601 
1602 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1603 }
1604 
1605 /*ARGSUSED6*/
1606 static int32_t
1607 udf_space(
1608 	struct vnode *vp,
1609 	int32_t cmd,
1610 	struct flock64 *bfp,
1611 	int32_t flag,
1612 	offset_t offset,
1613 	cred_t *cr,
1614 	caller_context_t *ct)
1615 {
1616 	int32_t error = 0;
1617 
1618 	ud_printf("udf_space\n");
1619 
1620 	if (cmd != F_FREESP) {
1621 		error =  EINVAL;
1622 	} else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1623 		error = ud_freesp(vp, bfp, flag, cr);
1624 	}
1625 
1626 	return (error);
1627 }
1628 
1629 /* ARGSUSED */
1630 static int32_t
1631 udf_getpage(
1632 	struct vnode *vp,
1633 	offset_t off,
1634 	size_t len,
1635 	uint32_t *protp,
1636 	struct page **plarr,
1637 	size_t plsz,
1638 	struct seg *seg,
1639 	caddr_t addr,
1640 	enum seg_rw rw,
1641 	struct cred *cr,
1642 	caller_context_t *ct)
1643 {
1644 	struct ud_inode *ip = VTOI(vp);
1645 	int32_t error, has_holes, beyond_eof, seqmode, dolock;
1646 	int32_t pgsize = PAGESIZE;
1647 	struct udf_vfs *udf_vfsp = ip->i_udf;
1648 	page_t **pl;
1649 	u_offset_t pgoff, eoff, uoff;
1650 	krw_t rwtype;
1651 	caddr_t pgaddr;
1652 
1653 	ud_printf("udf_getpage\n");
1654 
1655 	uoff = (u_offset_t)off; /* type conversion */
1656 	if (protp) {
1657 		*protp = PROT_ALL;
1658 	}
1659 	if (vp->v_flag & VNOMAP) {
1660 		return (ENOSYS);
1661 	}
1662 	seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1663 
1664 	rwtype = RW_READER;
1665 	dolock = (rw_owner(&ip->i_contents) != curthread);
1666 retrylock:
1667 #ifdef	__lock_lint
1668 	rw_enter(&ip->i_contents, rwtype);
1669 #else
1670 	if (dolock) {
1671 		rw_enter(&ip->i_contents, rwtype);
1672 	}
1673 #endif
1674 
1675 	/*
1676 	 * We may be getting called as a side effect of a bmap using
1677 	 * fbread() when the blocks might be being allocated and the
1678 	 * size has not yet been up'ed.  In this case we want to be
1679 	 * able to return zero pages if we get back UDF_HOLE from
1680 	 * calling bmap for a non write case here.  We also might have
1681 	 * to read some frags from the disk into a page if we are
1682 	 * extending the number of frags for a given lbn in bmap().
1683 	 */
1684 	beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1685 	if (beyond_eof && seg != segkmap) {
1686 #ifdef	__lock_lint
1687 		rw_exit(&ip->i_contents);
1688 #else
1689 		if (dolock) {
1690 			rw_exit(&ip->i_contents);
1691 		}
1692 #endif
1693 		return (EFAULT);
1694 	}
1695 
1696 	/*
1697 	 * Must hold i_contents lock throughout the call to pvn_getpages
1698 	 * since locked pages are returned from each call to ud_getapage.
1699 	 * Must *not* return locked pages and then try for contents lock
1700 	 * due to lock ordering requirements (inode > page)
1701 	 */
1702 
1703 	has_holes = ud_bmap_has_holes(ip);
1704 
1705 	if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1706 		int32_t	blk_size, count;
1707 		u_offset_t offset;
1708 
1709 		/*
1710 		 * We must acquire the RW_WRITER lock in order to
1711 		 * call bmap_write().
1712 		 */
1713 		if (dolock && rwtype == RW_READER) {
1714 			rwtype = RW_WRITER;
1715 
1716 			if (!rw_tryupgrade(&ip->i_contents)) {
1717 
1718 				rw_exit(&ip->i_contents);
1719 
1720 				goto retrylock;
1721 			}
1722 		}
1723 
1724 		/*
1725 		 * May be allocating disk blocks for holes here as
1726 		 * a result of mmap faults. write(2) does the bmap_write
1727 		 * in rdip/wrip, not here. We are not dealing with frags
1728 		 * in this case.
1729 		 */
1730 		offset = uoff;
1731 		while ((offset < uoff + len) &&
1732 		    (offset < ip->i_size)) {
1733 			/*
1734 			 * the variable "bnp" is to simplify the expression for
1735 			 * the compiler; * just passing in &bn to bmap_write
1736 			 * causes a compiler "loop"
1737 			 */
1738 
1739 			blk_size = udf_vfsp->udf_lbsize;
1740 			if ((offset + blk_size) > ip->i_size) {
1741 				count = ip->i_size - offset;
1742 			} else {
1743 				count = blk_size;
1744 			}
1745 			error = ud_bmap_write(ip, offset, count, 0, cr);
1746 			if (error) {
1747 				goto update_inode;
1748 			}
1749 			offset += count; /* XXX - make this contig */
1750 		}
1751 	}
1752 
1753 	/*
1754 	 * Can be a reader from now on.
1755 	 */
1756 #ifdef	__lock_lint
1757 	if (rwtype == RW_WRITER) {
1758 		rw_downgrade(&ip->i_contents);
1759 	}
1760 #else
1761 	if (dolock && rwtype == RW_WRITER) {
1762 		rw_downgrade(&ip->i_contents);
1763 	}
1764 #endif
1765 
1766 	/*
1767 	 * We remove PROT_WRITE in cases when the file has UDF holes
1768 	 * because we don't  want to call bmap_read() to check each
1769 	 * page if it is backed with a disk block.
1770 	 */
1771 	if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1772 		*protp &= ~PROT_WRITE;
1773 	}
1774 
1775 	error = 0;
1776 
1777 	/*
1778 	 * The loop looks up pages in the range <off, off + len).
1779 	 * For each page, we first check if we should initiate an asynchronous
1780 	 * read ahead before we call page_lookup (we may sleep in page_lookup
1781 	 * for a previously initiated disk read).
1782 	 */
1783 	eoff = (uoff + len);
1784 	for (pgoff = uoff, pgaddr = addr, pl = plarr;
1785 	    pgoff < eoff; /* empty */) {
1786 		page_t	*pp;
1787 		u_offset_t	nextrio;
1788 		se_t	se;
1789 
1790 		se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1791 
1792 		/*
1793 		 * Handle async getpage (faultahead)
1794 		 */
1795 		if (plarr == NULL) {
1796 			ip->i_nextrio = pgoff;
1797 			ud_getpage_ra(vp, pgoff, seg, pgaddr);
1798 			pgoff += pgsize;
1799 			pgaddr += pgsize;
1800 			continue;
1801 		}
1802 
1803 		/*
1804 		 * Check if we should initiate read ahead of next cluster.
1805 		 * We call page_exists only when we need to confirm that
1806 		 * we have the current page before we initiate the read ahead.
1807 		 */
1808 		nextrio = ip->i_nextrio;
1809 		if (seqmode &&
1810 		    pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1811 		    nextrio < ip->i_size && page_exists(vp, pgoff))
1812 			ud_getpage_ra(vp, pgoff, seg, pgaddr);
1813 
1814 		if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1815 
1816 			/*
1817 			 * We found the page in the page cache.
1818 			 */
1819 			*pl++ = pp;
1820 			pgoff += pgsize;
1821 			pgaddr += pgsize;
1822 			len -= pgsize;
1823 			plsz -= pgsize;
1824 		} else  {
1825 
1826 			/*
1827 			 * We have to create the page, or read it from disk.
1828 			 */
1829 			if (error = ud_getpage_miss(vp, pgoff, len,
1830 			    seg, pgaddr, pl, plsz, rw, seqmode)) {
1831 				goto error_out;
1832 			}
1833 
1834 			while (*pl != NULL) {
1835 				pl++;
1836 				pgoff += pgsize;
1837 				pgaddr += pgsize;
1838 				len -= pgsize;
1839 				plsz -= pgsize;
1840 			}
1841 		}
1842 	}
1843 
1844 	/*
1845 	 * Return pages up to plsz if they are in the page cache.
1846 	 * We cannot return pages if there is a chance that they are
1847 	 * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1848 	 */
1849 	if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1850 
1851 		ASSERT((protp == NULL) ||
1852 		    !(has_holes && (*protp & PROT_WRITE)));
1853 
1854 		eoff = pgoff + plsz;
1855 		while (pgoff < eoff) {
1856 			page_t		*pp;
1857 
1858 			if ((pp = page_lookup_nowait(vp, pgoff,
1859 			    SE_SHARED)) == NULL)
1860 				break;
1861 
1862 			*pl++ = pp;
1863 			pgoff += pgsize;
1864 			plsz -= pgsize;
1865 		}
1866 	}
1867 
1868 	if (plarr)
1869 		*pl = NULL;			/* Terminate page list */
1870 	ip->i_nextr = pgoff;
1871 
1872 error_out:
1873 	if (error && plarr) {
1874 		/*
1875 		 * Release any pages we have locked.
1876 		 */
1877 		while (pl > &plarr[0])
1878 			page_unlock(*--pl);
1879 
1880 		plarr[0] = NULL;
1881 	}
1882 
1883 update_inode:
1884 #ifdef	__lock_lint
1885 	rw_exit(&ip->i_contents);
1886 #else
1887 	if (dolock) {
1888 		rw_exit(&ip->i_contents);
1889 	}
1890 #endif
1891 
1892 	/*
1893 	 * If the inode is not already marked for IACC (in rwip() for read)
1894 	 * and the inode is not marked for no access time update (in rwip()
1895 	 * for write) then update the inode access time and mod time now.
1896 	 */
1897 	mutex_enter(&ip->i_tlock);
1898 	if ((ip->i_flag & (IACC | INOACC)) == 0) {
1899 		if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1900 			ip->i_flag |= IACC;
1901 		}
1902 		if (rw == S_WRITE) {
1903 			ip->i_flag |= IUPD;
1904 		}
1905 		ITIMES_NOLOCK(ip);
1906 	}
1907 	mutex_exit(&ip->i_tlock);
1908 
1909 	return (error);
1910 }
1911 
1912 int32_t ud_delay = 1;
1913 
1914 /* ARGSUSED */
1915 static int32_t
1916 udf_putpage(
1917 	struct vnode *vp,
1918 	offset_t off,
1919 	size_t len,
1920 	int32_t flags,
1921 	struct cred *cr,
1922 	caller_context_t *ct)
1923 {
1924 	struct ud_inode *ip;
1925 	int32_t error = 0;
1926 
1927 	ud_printf("udf_putpage\n");
1928 
1929 	ip = VTOI(vp);
1930 #ifdef	__lock_lint
1931 	rw_enter(&ip->i_contents, RW_WRITER);
1932 #endif
1933 
1934 	if (vp->v_count == 0) {
1935 		cmn_err(CE_WARN, "ud_putpage : bad v_count");
1936 		error = EINVAL;
1937 		goto out;
1938 	}
1939 
1940 	if (vp->v_flag & VNOMAP) {
1941 		error = ENOSYS;
1942 		goto out;
1943 	}
1944 
1945 	if (flags & B_ASYNC) {
1946 		if (ud_delay && len &&
1947 		    (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1948 			mutex_enter(&ip->i_tlock);
1949 
1950 			/*
1951 			 * If nobody stalled, start a new cluster.
1952 			 */
1953 			if (ip->i_delaylen == 0) {
1954 				ip->i_delayoff = off;
1955 				ip->i_delaylen = len;
1956 				mutex_exit(&ip->i_tlock);
1957 				goto out;
1958 			}
1959 
1960 			/*
1961 			 * If we have a full cluster or they are not contig,
1962 			 * then push last cluster and start over.
1963 			 */
1964 			if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1965 			    ip->i_delayoff + ip->i_delaylen != off) {
1966 				u_offset_t doff;
1967 				size_t dlen;
1968 
1969 				doff = ip->i_delayoff;
1970 				dlen = ip->i_delaylen;
1971 				ip->i_delayoff = off;
1972 				ip->i_delaylen = len;
1973 				mutex_exit(&ip->i_tlock);
1974 				error = ud_putpages(vp, doff, dlen, flags, cr);
1975 				/* LMXXX - flags are new val, not old */
1976 				goto out;
1977 			}
1978 
1979 			/*
1980 			 * There is something there, it's not full, and
1981 			 * it is contig.
1982 			 */
1983 			ip->i_delaylen += len;
1984 			mutex_exit(&ip->i_tlock);
1985 			goto out;
1986 		}
1987 
1988 		/*
1989 		 * Must have weird flags or we are not clustering.
1990 		 */
1991 	}
1992 
1993 	error = ud_putpages(vp, off, len, flags, cr);
1994 
1995 out:
1996 #ifdef	__lock_lint
1997 	rw_exit(&ip->i_contents);
1998 #endif
1999 	return (error);
2000 }
2001 
2002 /* ARGSUSED */
2003 static int32_t
2004 udf_map(
2005 	struct vnode *vp,
2006 	offset_t off,
2007 	struct as *as,
2008 	caddr_t *addrp,
2009 	size_t len,
2010 	uint8_t prot,
2011 	uint8_t maxprot,
2012 	uint32_t flags,
2013 	struct cred *cr,
2014 	caller_context_t *ct)
2015 {
2016 	struct segvn_crargs vn_a;
2017 	int32_t error = 0;
2018 
2019 	ud_printf("udf_map\n");
2020 
2021 	if (vp->v_flag & VNOMAP) {
2022 		error = ENOSYS;
2023 		goto end;
2024 	}
2025 
2026 	if ((off < (offset_t)0) ||
2027 	    ((off + len) < (offset_t)0)) {
2028 		error = EINVAL;
2029 		goto end;
2030 	}
2031 
2032 	if (vp->v_type != VREG) {
2033 		error = ENODEV;
2034 		goto end;
2035 	}
2036 
2037 	/*
2038 	 * If file is being locked, disallow mapping.
2039 	 */
2040 	if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2041 		error = EAGAIN;
2042 		goto end;
2043 	}
2044 
2045 	as_rangelock(as);
2046 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2047 	if (error != 0) {
2048 		as_rangeunlock(as);
2049 		goto end;
2050 	}
2051 
2052 	vn_a.vp = vp;
2053 	vn_a.offset = off;
2054 	vn_a.type = flags & MAP_TYPE;
2055 	vn_a.prot = prot;
2056 	vn_a.maxprot = maxprot;
2057 	vn_a.cred = cr;
2058 	vn_a.amp = NULL;
2059 	vn_a.flags = flags & ~MAP_TYPE;
2060 	vn_a.szc = 0;
2061 	vn_a.lgrp_mem_policy_flags = 0;
2062 
2063 	error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2064 	as_rangeunlock(as);
2065 
2066 end:
2067 	return (error);
2068 }
2069 
2070 /* ARGSUSED */
2071 static int32_t
2072 udf_addmap(struct vnode *vp,
2073 	offset_t off,
2074 	struct as *as,
2075 	caddr_t addr,
2076 	size_t len,
2077 	uint8_t prot,
2078 	uint8_t maxprot,
2079 	uint32_t flags,
2080 	struct cred *cr,
2081 	caller_context_t *ct)
2082 {
2083 	struct ud_inode *ip = VTOI(vp);
2084 
2085 	ud_printf("udf_addmap\n");
2086 
2087 	if (vp->v_flag & VNOMAP) {
2088 		return (ENOSYS);
2089 	}
2090 
2091 	mutex_enter(&ip->i_tlock);
2092 	ip->i_mapcnt += btopr(len);
2093 	mutex_exit(&ip->i_tlock);
2094 
2095 	return (0);
2096 }
2097 
2098 /* ARGSUSED */
2099 static int32_t
2100 udf_delmap(
2101 	struct vnode *vp, offset_t off,
2102 	struct as *as,
2103 	caddr_t addr,
2104 	size_t len,
2105 	uint32_t prot,
2106 	uint32_t maxprot,
2107 	uint32_t flags,
2108 	struct cred *cr,
2109 	caller_context_t *ct)
2110 {
2111 	struct ud_inode *ip = VTOI(vp);
2112 
2113 	ud_printf("udf_delmap\n");
2114 
2115 	if (vp->v_flag & VNOMAP) {
2116 		return (ENOSYS);
2117 	}
2118 
2119 	mutex_enter(&ip->i_tlock);
2120 	ip->i_mapcnt -= btopr(len); 	/* Count released mappings */
2121 	ASSERT(ip->i_mapcnt >= 0);
2122 	mutex_exit(&ip->i_tlock);
2123 
2124 	return (0);
2125 }
2126 
2127 /* ARGSUSED */
2128 static int32_t
2129 udf_l_pathconf(
2130 	struct vnode *vp,
2131 	int32_t cmd,
2132 	ulong_t *valp,
2133 	struct cred *cr,
2134 	caller_context_t *ct)
2135 {
2136 	int32_t error = 0;
2137 
2138 	ud_printf("udf_l_pathconf\n");
2139 
2140 	if (cmd == _PC_FILESIZEBITS) {
2141 		/*
2142 		 * udf supports 64 bits as file size
2143 		 * but there are several other restrictions
2144 		 * it only supports 32-bit block numbers and
2145 		 * daddr32_t is only and int32_t so taking these
2146 		 * into account we can stay just as where ufs is
2147 		 */
2148 		*valp = 41;
2149 	} else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2150 		/* nanosecond timestamp resolution */
2151 		*valp = 1L;
2152 	} else {
2153 		error = fs_pathconf(vp, cmd, valp, cr, ct);
2154 	}
2155 
2156 	return (error);
2157 }
2158 
2159 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2160 #ifndef	__lint
2161 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2162 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2163 #endif
2164 /*
2165  * Assumption is that there will not be a pageio request
2166  * to a enbedded file
2167  */
2168 /* ARGSUSED */
2169 static int32_t
2170 udf_pageio(
2171 	struct vnode *vp,
2172 	struct page *pp,
2173 	u_offset_t io_off,
2174 	size_t io_len,
2175 	int32_t flags,
2176 	struct cred *cr,
2177 	caller_context_t *ct)
2178 {
2179 	daddr_t bn;
2180 	struct buf *bp;
2181 	struct ud_inode *ip = VTOI(vp);
2182 	int32_t dolock, error = 0, contig, multi_io;
2183 	size_t done_len = 0, cur_len = 0;
2184 	page_t *npp = NULL, *opp = NULL, *cpp = pp;
2185 
2186 	if (pp == NULL) {
2187 		return (EINVAL);
2188 	}
2189 
2190 	dolock = (rw_owner(&ip->i_contents) != curthread);
2191 
2192 	/*
2193 	 * We need a better check.  Ideally, we would use another
2194 	 * vnodeops so that hlocked and forcibly unmounted file
2195 	 * systems would return EIO where appropriate and w/o the
2196 	 * need for these checks.
2197 	 */
2198 	if (ip->i_udf == NULL) {
2199 		return (EIO);
2200 	}
2201 
2202 #ifdef	__lock_lint
2203 	rw_enter(&ip->i_contents, RW_READER);
2204 #else
2205 	if (dolock) {
2206 		rw_enter(&ip->i_contents, RW_READER);
2207 	}
2208 #endif
2209 
2210 	/*
2211 	 * Break the io request into chunks, one for each contiguous
2212 	 * stretch of disk blocks in the target file.
2213 	 */
2214 	while (done_len < io_len) {
2215 		ASSERT(cpp);
2216 		bp = NULL;
2217 		contig = 0;
2218 		if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2219 		    &bn, &contig)) {
2220 			break;
2221 		}
2222 
2223 		if (bn == UDF_HOLE) {   /* No holey swapfiles */
2224 			cmn_err(CE_WARN, "SWAP file has HOLES");
2225 			error = EINVAL;
2226 			break;
2227 		}
2228 
2229 		cur_len = MIN(io_len - done_len, contig);
2230 
2231 		/*
2232 		 * Check if more than one I/O is
2233 		 * required to complete the given
2234 		 * I/O operation
2235 		 */
2236 		if (ip->i_udf->udf_lbsize < PAGESIZE) {
2237 			if (cur_len >= PAGESIZE) {
2238 				multi_io = 0;
2239 				cur_len &= PAGEMASK;
2240 			} else {
2241 				multi_io = 1;
2242 				cur_len = MIN(io_len - done_len, PAGESIZE);
2243 			}
2244 		}
2245 		page_list_break(&cpp, &npp, btop(cur_len));
2246 
2247 		bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2248 		ASSERT(bp != NULL);
2249 
2250 		bp->b_edev = ip->i_dev;
2251 		bp->b_dev = cmpdev(ip->i_dev);
2252 		bp->b_blkno = bn;
2253 		bp->b_un.b_addr = (caddr_t)0;
2254 		bp->b_file = vp;
2255 		bp->b_offset = (offset_t)(io_off + done_len);
2256 
2257 /*
2258  *		ub.ub_pageios.value.ul++;
2259  */
2260 		if (multi_io == 0) {
2261 			(void) bdev_strategy(bp);
2262 		} else {
2263 			error = ud_multi_strat(ip, cpp, bp,
2264 			    (u_offset_t)(io_off + done_len));
2265 			if (error != 0) {
2266 				pageio_done(bp);
2267 				break;
2268 			}
2269 		}
2270 		if (flags & B_READ) {
2271 			ud_pageio_reads++;
2272 		} else {
2273 			ud_pageio_writes++;
2274 		}
2275 
2276 		/*
2277 		 * If the request is not B_ASYNC, wait for i/o to complete
2278 		 * and re-assemble the page list to return to the caller.
2279 		 * If it is B_ASYNC we leave the page list in pieces and
2280 		 * cleanup() will dispose of them.
2281 		 */
2282 		if ((flags & B_ASYNC) == 0) {
2283 			error = biowait(bp);
2284 			pageio_done(bp);
2285 			if (error) {
2286 				break;
2287 			}
2288 			page_list_concat(&opp, &cpp);
2289 		}
2290 		cpp = npp;
2291 		npp = NULL;
2292 		done_len += cur_len;
2293 	}
2294 
2295 	ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2296 	if (error) {
2297 		if (flags & B_ASYNC) {
2298 			/* Cleanup unprocessed parts of list */
2299 			page_list_concat(&cpp, &npp);
2300 			if (flags & B_READ) {
2301 				pvn_read_done(cpp, B_ERROR);
2302 			} else {
2303 				pvn_write_done(cpp, B_ERROR);
2304 			}
2305 		} else {
2306 			/* Re-assemble list and let caller clean up */
2307 			page_list_concat(&opp, &cpp);
2308 			page_list_concat(&opp, &npp);
2309 		}
2310 	}
2311 
2312 #ifdef	__lock_lint
2313 	rw_exit(&ip->i_contents);
2314 #else
2315 	if (dolock) {
2316 		rw_exit(&ip->i_contents);
2317 	}
2318 #endif
2319 	return (error);
2320 }
2321 
2322 
2323 
2324 
2325 /* -------------------- local functions --------------------------- */
2326 
2327 
2328 
2329 int32_t
2330 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2331 	struct ud_inode *ip, caddr_t base, int32_t len,
2332 	offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2333 {
2334 	int32_t error;
2335 	struct uio auio;
2336 	struct iovec aiov;
2337 
2338 	ud_printf("ud_rdwri\n");
2339 
2340 	bzero((caddr_t)&auio, sizeof (uio_t));
2341 	bzero((caddr_t)&aiov, sizeof (iovec_t));
2342 
2343 	aiov.iov_base = base;
2344 	aiov.iov_len = len;
2345 	auio.uio_iov = &aiov;
2346 	auio.uio_iovcnt = 1;
2347 	auio.uio_loffset = offset;
2348 	auio.uio_segflg = (int16_t)seg;
2349 	auio.uio_resid = len;
2350 
2351 	if (rw == UIO_WRITE) {
2352 		auio.uio_fmode = FWRITE;
2353 		auio.uio_extflg = UIO_COPY_DEFAULT;
2354 		auio.uio_llimit = curproc->p_fsz_ctl;
2355 		error = ud_wrip(ip, &auio, ioflag, cr);
2356 	} else {
2357 		auio.uio_fmode = FREAD;
2358 		auio.uio_extflg = UIO_COPY_CACHED;
2359 		auio.uio_llimit = MAXOFFSET_T;
2360 		error = ud_rdip(ip, &auio, ioflag, cr);
2361 	}
2362 
2363 	if (aresid) {
2364 		*aresid = auio.uio_resid;
2365 	} else if (auio.uio_resid) {
2366 		error = EIO;
2367 	}
2368 	return (error);
2369 }
2370 
2371 /*
2372  * Free behind hacks.  The pager is busted.
2373  * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2374  * or B_FREE_IF_TIGHT_ON_MEMORY.
2375  */
2376 int32_t ud_freebehind = 1;
2377 int32_t ud_smallfile = 32 * 1024;
2378 
2379 /* ARGSUSED */
2380 int32_t
2381 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2382 	size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2383 	size_t plsz, enum seg_rw rw, int32_t seq)
2384 {
2385 	struct ud_inode *ip = VTOI(vp);
2386 	int32_t err = 0;
2387 	size_t io_len;
2388 	u_offset_t io_off;
2389 	u_offset_t pgoff;
2390 	page_t *pp;
2391 
2392 	pl[0] = NULL;
2393 
2394 	/*
2395 	 * Figure out whether the page can be created, or must be
2396 	 * read from the disk
2397 	 */
2398 	if (rw == S_CREATE) {
2399 		if ((pp = page_create_va(vp, off,
2400 		    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2401 			cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2402 			return (EINVAL);
2403 		}
2404 		io_len = PAGESIZE;
2405 	} else {
2406 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2407 		    &io_len, off, PAGESIZE, 0);
2408 
2409 		/*
2410 		 * Some other thread has entered the page.
2411 		 * ud_getpage will retry page_lookup.
2412 		 */
2413 		if (pp == NULL) {
2414 			return (0);
2415 		}
2416 
2417 		/*
2418 		 * Fill the page with as much data as we can from the file.
2419 		 */
2420 		err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2421 		if (err) {
2422 			pvn_read_done(pp, B_ERROR);
2423 			return (err);
2424 		}
2425 
2426 		/*
2427 		 * XXX ??? ufs has io_len instead of pgoff below
2428 		 */
2429 		ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2430 
2431 		/*
2432 		 * If the file access is sequential, initiate read ahead
2433 		 * of the next cluster.
2434 		 */
2435 		if (seq && ip->i_nextrio < ip->i_size) {
2436 			ud_getpage_ra(vp, off, seg, addr);
2437 		}
2438 	}
2439 
2440 outmiss:
2441 	pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2442 	return (err);
2443 }
2444 
2445 /* ARGSUSED */
2446 void
2447 ud_getpage_ra(struct vnode *vp,
2448 	u_offset_t off, struct seg *seg, caddr_t addr)
2449 {
2450 	page_t *pp;
2451 	size_t io_len;
2452 	struct ud_inode *ip = VTOI(vp);
2453 	u_offset_t io_off = ip->i_nextrio, pgoff;
2454 	caddr_t addr2 = addr + (io_off - off);
2455 	daddr_t bn;
2456 	int32_t contig = 0;
2457 
2458 	/*
2459 	 * Is this test needed?
2460 	 */
2461 
2462 	if (addr2 >= seg->s_base + seg->s_size) {
2463 		return;
2464 	}
2465 
2466 	contig = 0;
2467 	if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2468 		return;
2469 	}
2470 
2471 	pp = pvn_read_kluster(vp, io_off, seg, addr2,
2472 	    &io_off, &io_len, io_off, PAGESIZE, 1);
2473 
2474 	/*
2475 	 * Some other thread has entered the page.
2476 	 * So no read head done here (ie we will have to and wait
2477 	 * for the read when needed).
2478 	 */
2479 
2480 	if (pp == NULL) {
2481 		return;
2482 	}
2483 
2484 	(void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2485 	ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2486 }
2487 
2488 int
2489 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2490 	uint32_t bflgs, u_offset_t *pg_off)
2491 {
2492 	daddr_t bn;
2493 	struct buf *bp;
2494 	caddr_t kaddr, caddr;
2495 	int32_t error = 0, contig = 0, multi_io = 0;
2496 	int32_t lbsize = ip->i_udf->udf_lbsize;
2497 	int32_t lbmask = ip->i_udf->udf_lbmask;
2498 	uint64_t isize;
2499 
2500 	isize = (ip->i_size + lbmask) & (~lbmask);
2501 	if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2502 
2503 		/*
2504 		 * Embedded file read file_entry
2505 		 * from buffer cache and copy the required
2506 		 * portions
2507 		 */
2508 		bp = ud_bread(ip->i_dev,
2509 		    ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2510 		if ((bp->b_error == 0) &&
2511 		    (bp->b_resid == 0)) {
2512 
2513 			caddr = bp->b_un.b_addr + ip->i_data_off;
2514 
2515 			/*
2516 			 * mapin to kvm
2517 			 */
2518 			kaddr = (caddr_t)ppmapin(pp,
2519 			    PROT_READ | PROT_WRITE, (caddr_t)-1);
2520 			(void) kcopy(caddr, kaddr, ip->i_size);
2521 
2522 			/*
2523 			 * mapout of kvm
2524 			 */
2525 			ppmapout(kaddr);
2526 		}
2527 		brelse(bp);
2528 		contig = ip->i_size;
2529 	} else {
2530 
2531 		/*
2532 		 * Get the continuous size and block number
2533 		 * at offset "off"
2534 		 */
2535 		if (error = ud_bmap_read(ip, off, &bn, &contig))
2536 			goto out;
2537 		contig = MIN(contig, PAGESIZE);
2538 		contig = (contig + lbmask) & (~lbmask);
2539 
2540 		/*
2541 		 * Zero part of the page which we are not
2542 		 * going to read from the disk.
2543 		 */
2544 
2545 		if (bn == UDF_HOLE) {
2546 
2547 			/*
2548 			 * This is a HOLE. Just zero out
2549 			 * the page
2550 			 */
2551 			if (((off + contig) == isize) ||
2552 			    (contig == PAGESIZE)) {
2553 				pagezero(pp->p_prev, 0, PAGESIZE);
2554 				goto out;
2555 			}
2556 		}
2557 
2558 		if (contig < PAGESIZE) {
2559 			uint64_t count;
2560 
2561 			count = isize - off;
2562 			if (contig != count) {
2563 				multi_io = 1;
2564 				contig = (int32_t)(MIN(count, PAGESIZE));
2565 			} else {
2566 				pagezero(pp->p_prev, contig, PAGESIZE - contig);
2567 			}
2568 		}
2569 
2570 		/*
2571 		 * Get a bp and initialize it
2572 		 */
2573 		bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2574 		ASSERT(bp != NULL);
2575 
2576 		bp->b_edev = ip->i_dev;
2577 		bp->b_dev = cmpdev(ip->i_dev);
2578 		bp->b_blkno = bn;
2579 		bp->b_un.b_addr = 0;
2580 		bp->b_file = ip->i_vnode;
2581 
2582 		/*
2583 		 * Start I/O
2584 		 */
2585 		if (multi_io == 0) {
2586 
2587 			/*
2588 			 * Single I/O is sufficient for this page
2589 			 */
2590 			(void) bdev_strategy(bp);
2591 		} else {
2592 
2593 			/*
2594 			 * We need to do the I/O in
2595 			 * piece's
2596 			 */
2597 			error = ud_multi_strat(ip, pp, bp, off);
2598 			if (error != 0) {
2599 				goto out;
2600 			}
2601 		}
2602 		if ((bflgs & B_ASYNC) == 0) {
2603 
2604 			/*
2605 			 * Wait for i/o to complete.
2606 			 */
2607 
2608 			error = biowait(bp);
2609 			pageio_done(bp);
2610 			if (error) {
2611 				goto out;
2612 			}
2613 		}
2614 	}
2615 	if ((off + contig) >= ip->i_size) {
2616 		contig = ip->i_size - off;
2617 	}
2618 
2619 out:
2620 	*pg_off = contig;
2621 	return (error);
2622 }
2623 
2624 int32_t
2625 ud_putpages(struct vnode *vp, offset_t off,
2626 	size_t len, int32_t flags, struct cred *cr)
2627 {
2628 	struct ud_inode *ip;
2629 	page_t *pp;
2630 	u_offset_t io_off;
2631 	size_t io_len;
2632 	u_offset_t eoff;
2633 	int32_t err = 0;
2634 	int32_t dolock;
2635 
2636 	ud_printf("ud_putpages\n");
2637 
2638 	if (vp->v_count == 0) {
2639 		cmn_err(CE_WARN, "ud_putpages: bad v_count");
2640 		return (EINVAL);
2641 	}
2642 
2643 	ip = VTOI(vp);
2644 
2645 	/*
2646 	 * Acquire the readers/write inode lock before locking
2647 	 * any pages in this inode.
2648 	 * The inode lock is held during i/o.
2649 	 */
2650 	if (len == 0) {
2651 		mutex_enter(&ip->i_tlock);
2652 		ip->i_delayoff = ip->i_delaylen = 0;
2653 		mutex_exit(&ip->i_tlock);
2654 	}
2655 #ifdef	__lock_lint
2656 	rw_enter(&ip->i_contents, RW_READER);
2657 #else
2658 	dolock = (rw_owner(&ip->i_contents) != curthread);
2659 	if (dolock) {
2660 		rw_enter(&ip->i_contents, RW_READER);
2661 	}
2662 #endif
2663 
2664 	if (!vn_has_cached_data(vp)) {
2665 #ifdef	__lock_lint
2666 		rw_exit(&ip->i_contents);
2667 #else
2668 		if (dolock) {
2669 			rw_exit(&ip->i_contents);
2670 		}
2671 #endif
2672 		return (0);
2673 	}
2674 
2675 	if (len == 0) {
2676 		/*
2677 		 * Search the entire vp list for pages >= off.
2678 		 */
2679 		err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2680 		    flags, cr);
2681 	} else {
2682 		/*
2683 		 * Loop over all offsets in the range looking for
2684 		 * pages to deal with.
2685 		 */
2686 		if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2687 			eoff = MIN(off + len, eoff);
2688 		} else {
2689 			eoff = off + len;
2690 		}
2691 
2692 		for (io_off = off; io_off < eoff; io_off += io_len) {
2693 			/*
2694 			 * If we are not invalidating, synchronously
2695 			 * freeing or writing pages, use the routine
2696 			 * page_lookup_nowait() to prevent reclaiming
2697 			 * them from the free list.
2698 			 */
2699 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2700 				pp = page_lookup(vp, io_off,
2701 				    (flags & (B_INVAL | B_FREE)) ?
2702 				    SE_EXCL : SE_SHARED);
2703 			} else {
2704 				pp = page_lookup_nowait(vp, io_off,
2705 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2706 			}
2707 
2708 			if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2709 				io_len = PAGESIZE;
2710 			} else {
2711 
2712 				err = ud_putapage(vp, pp,
2713 				    &io_off, &io_len, flags, cr);
2714 				if (err != 0) {
2715 					break;
2716 				}
2717 				/*
2718 				 * "io_off" and "io_len" are returned as
2719 				 * the range of pages we actually wrote.
2720 				 * This allows us to skip ahead more quickly
2721 				 * since several pages may've been dealt
2722 				 * with by this iteration of the loop.
2723 				 */
2724 			}
2725 		}
2726 	}
2727 	if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2728 		/*
2729 		 * We have just sync'ed back all the pages on
2730 		 * the inode, turn off the IMODTIME flag.
2731 		 */
2732 		mutex_enter(&ip->i_tlock);
2733 		ip->i_flag &= ~IMODTIME;
2734 		mutex_exit(&ip->i_tlock);
2735 	}
2736 #ifdef	__lock_lint
2737 	rw_exit(&ip->i_contents);
2738 #else
2739 	if (dolock) {
2740 		rw_exit(&ip->i_contents);
2741 	}
2742 #endif
2743 	return (err);
2744 }
2745 
2746 /* ARGSUSED */
2747 int32_t
2748 ud_putapage(struct vnode *vp,
2749 	page_t *pp, u_offset_t *offp,
2750 	size_t *lenp, int32_t flags, struct cred *cr)
2751 {
2752 	daddr_t bn;
2753 	size_t io_len;
2754 	struct ud_inode *ip;
2755 	int32_t error = 0, contig, multi_io = 0;
2756 	struct udf_vfs *udf_vfsp;
2757 	u_offset_t off, io_off;
2758 	caddr_t kaddr, caddr;
2759 	struct buf *bp = NULL;
2760 	int32_t lbmask;
2761 	uint64_t isize;
2762 	int32_t crc_len;
2763 	struct file_entry *fe;
2764 
2765 	ud_printf("ud_putapage\n");
2766 
2767 	ip = VTOI(vp);
2768 	ASSERT(ip);
2769 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2770 	lbmask = ip->i_udf->udf_lbmask;
2771 	isize = (ip->i_size + lbmask) & (~lbmask);
2772 
2773 	udf_vfsp = ip->i_udf;
2774 	ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2775 
2776 	/*
2777 	 * If the modified time on the inode has not already been
2778 	 * set elsewhere (e.g. for write/setattr) we set the time now.
2779 	 * This gives us approximate modified times for mmap'ed files
2780 	 * which are modified via stores in the user address space.
2781 	 */
2782 	if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2783 		mutex_enter(&ip->i_tlock);
2784 		ip->i_flag |= IUPD;
2785 		ITIMES_NOLOCK(ip);
2786 		mutex_exit(&ip->i_tlock);
2787 	}
2788 
2789 
2790 	/*
2791 	 * Align the request to a block boundry (for old file systems),
2792 	 * and go ask bmap() how contiguous things are for this file.
2793 	 */
2794 	off = pp->p_offset & ~(offset_t)lbmask;
2795 				/* block align it */
2796 
2797 
2798 	if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2799 		ASSERT(ip->i_size <= ip->i_max_emb);
2800 
2801 		pp = pvn_write_kluster(vp, pp, &io_off,
2802 		    &io_len, off, PAGESIZE, flags);
2803 		if (io_len == 0) {
2804 			io_len = PAGESIZE;
2805 		}
2806 
2807 		bp = ud_bread(ip->i_dev,
2808 		    ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2809 		    udf_vfsp->udf_lbsize);
2810 		fe = (struct file_entry *)bp->b_un.b_addr;
2811 		if ((bp->b_flags & B_ERROR) ||
2812 		    (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2813 		    ip->i_icb_block,
2814 		    1, udf_vfsp->udf_lbsize) != 0)) {
2815 			if (pp != NULL)
2816 				pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2817 			if (bp->b_flags & B_ERROR) {
2818 				error = EIO;
2819 			} else {
2820 				error = EINVAL;
2821 			}
2822 			brelse(bp);
2823 			return (error);
2824 		}
2825 		if ((bp->b_error == 0) &&
2826 		    (bp->b_resid == 0)) {
2827 
2828 			caddr = bp->b_un.b_addr + ip->i_data_off;
2829 			kaddr = (caddr_t)ppmapin(pp,
2830 			    PROT_READ | PROT_WRITE, (caddr_t)-1);
2831 			(void) kcopy(kaddr, caddr, ip->i_size);
2832 			ppmapout(kaddr);
2833 		}
2834 		crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) +
2835 		    SWAP_32(fe->fe_len_ear);
2836 		crc_len += ip->i_size;
2837 		ud_make_tag(ip->i_udf, &fe->fe_tag,
2838 		    UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2839 
2840 		bwrite(bp);
2841 
2842 		if (flags & B_ASYNC) {
2843 			pvn_write_done(pp, flags);
2844 		}
2845 		contig = ip->i_size;
2846 	} else {
2847 
2848 		if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2849 			goto out;
2850 		}
2851 		contig = MIN(contig, PAGESIZE);
2852 		contig = (contig + lbmask) & (~lbmask);
2853 
2854 		if (contig < PAGESIZE) {
2855 			uint64_t count;
2856 
2857 			count = isize - off;
2858 			if (contig != count) {
2859 				multi_io = 1;
2860 				contig = (int32_t)(MIN(count, PAGESIZE));
2861 			}
2862 		}
2863 
2864 		if ((off + contig) > isize) {
2865 			contig = isize - off;
2866 		}
2867 
2868 		if (contig > PAGESIZE) {
2869 			if (contig & PAGEOFFSET) {
2870 				contig &= PAGEMASK;
2871 			}
2872 		}
2873 
2874 		pp = pvn_write_kluster(vp, pp, &io_off,
2875 		    &io_len, off, contig, flags);
2876 		if (io_len == 0) {
2877 			io_len = PAGESIZE;
2878 		}
2879 
2880 		bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2881 		ASSERT(bp != NULL);
2882 
2883 		bp->b_edev = ip->i_dev;
2884 		bp->b_dev = cmpdev(ip->i_dev);
2885 		bp->b_blkno = bn;
2886 		bp->b_un.b_addr = 0;
2887 		bp->b_file = vp;
2888 		bp->b_offset = (offset_t)off;
2889 
2890 
2891 		/*
2892 		 * write throttle
2893 		 */
2894 		ASSERT(bp->b_iodone == NULL);
2895 		bp->b_iodone = ud_iodone;
2896 		mutex_enter(&ip->i_tlock);
2897 		ip->i_writes += bp->b_bcount;
2898 		mutex_exit(&ip->i_tlock);
2899 
2900 		if (multi_io == 0) {
2901 
2902 			(void) bdev_strategy(bp);
2903 		} else {
2904 			error = ud_multi_strat(ip, pp, bp, off);
2905 			if (error != 0) {
2906 				goto out;
2907 			}
2908 		}
2909 
2910 		if ((flags & B_ASYNC) == 0) {
2911 			/*
2912 			 * Wait for i/o to complete.
2913 			 */
2914 			error = biowait(bp);
2915 			pageio_done(bp);
2916 		}
2917 	}
2918 
2919 	if ((flags & B_ASYNC) == 0) {
2920 		pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2921 	}
2922 
2923 	pp = NULL;
2924 
2925 out:
2926 	if (error != 0 && pp != NULL) {
2927 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2928 	}
2929 
2930 	if (offp) {
2931 		*offp = io_off;
2932 	}
2933 	if (lenp) {
2934 		*lenp = io_len;
2935 	}
2936 
2937 	return (error);
2938 }
2939 
2940 
2941 int32_t
2942 ud_iodone(struct buf *bp)
2943 {
2944 	struct ud_inode *ip;
2945 
2946 	ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2947 
2948 	bp->b_iodone = NULL;
2949 
2950 	ip = VTOI(bp->b_pages->p_vnode);
2951 
2952 	mutex_enter(&ip->i_tlock);
2953 	if (ip->i_writes >= ud_LW) {
2954 		if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2955 			if (ud_WRITES) {
2956 				cv_broadcast(&ip->i_wrcv); /* wake all up */
2957 			}
2958 		}
2959 	} else {
2960 		ip->i_writes -= bp->b_bcount;
2961 	}
2962 	mutex_exit(&ip->i_tlock);
2963 	iodone(bp);
2964 	return (0);
2965 }
2966 
2967 /* ARGSUSED3 */
2968 int32_t
2969 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
2970 {
2971 	struct vnode *vp;
2972 	struct udf_vfs *udf_vfsp;
2973 	krw_t rwtype;
2974 	caddr_t base;
2975 	uint32_t flags;
2976 	int32_t error, n, on, mapon, dofree;
2977 	u_offset_t off;
2978 	long oresid = uio->uio_resid;
2979 
2980 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2981 	if ((ip->i_type != VREG) &&
2982 	    (ip->i_type != VDIR) &&
2983 	    (ip->i_type != VLNK)) {
2984 		return (EIO);
2985 	}
2986 
2987 	if (uio->uio_loffset > MAXOFFSET_T) {
2988 		return (0);
2989 	}
2990 
2991 	if ((uio->uio_loffset < (offset_t)0) ||
2992 	    ((uio->uio_loffset + uio->uio_resid) < 0)) {
2993 		return (EINVAL);
2994 	}
2995 	if (uio->uio_resid == 0) {
2996 		return (0);
2997 	}
2998 
2999 	vp = ITOV(ip);
3000 	udf_vfsp = ip->i_udf;
3001 	mutex_enter(&ip->i_tlock);
3002 	ip->i_flag |= IACC;
3003 	mutex_exit(&ip->i_tlock);
3004 
3005 	rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3006 
3007 	do {
3008 		offset_t diff;
3009 		u_offset_t uoff = uio->uio_loffset;
3010 		off = uoff & (offset_t)MAXBMASK;
3011 		mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3012 		on = (int)blkoff(udf_vfsp, uoff);
3013 		n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3014 
3015 		diff = ip->i_size - uoff;
3016 
3017 		if (diff <= (offset_t)0) {
3018 			error = 0;
3019 			goto out;
3020 		}
3021 		if (diff < (offset_t)n) {
3022 			n = (int)diff;
3023 		}
3024 		dofree = ud_freebehind &&
3025 		    ip->i_nextr == (off & PAGEMASK) &&
3026 		    off > ud_smallfile;
3027 
3028 #ifndef	__lock_lint
3029 		if (rwtype == RW_READER) {
3030 			rw_exit(&ip->i_contents);
3031 		}
3032 #endif
3033 
3034 		base = segmap_getmapflt(segkmap, vp, (off + mapon),
3035 		    (uint32_t)n, 1, S_READ);
3036 		error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3037 
3038 		flags = 0;
3039 		if (!error) {
3040 			/*
3041 			 * If read a whole block, or read to eof,
3042 			 * won't need this buffer again soon.
3043 			 */
3044 			if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3045 			    freemem < lotsfree + pages_before_pager) {
3046 				flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3047 			}
3048 			/*
3049 			 * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3050 			 * we want to make sure that the page which has
3051 			 * been read, is written on disk if it is dirty.
3052 			 * And corresponding indirect blocks should also
3053 			 * be flushed out.
3054 			 */
3055 			if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3056 				flags &= ~SM_ASYNC;
3057 				flags |= SM_WRITE;
3058 			}
3059 			error = segmap_release(segkmap, base, flags);
3060 		} else    {
3061 			(void) segmap_release(segkmap, base, flags);
3062 		}
3063 
3064 #ifndef __lock_lint
3065 		if (rwtype == RW_READER) {
3066 			rw_enter(&ip->i_contents, rwtype);
3067 		}
3068 #endif
3069 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
3070 out:
3071 	/*
3072 	 * Inode is updated according to this table if FRSYNC is set.
3073 	 *
3074 	 *	FSYNC	FDSYNC(posix.4)
3075 	 *	--------------------------
3076 	 *	always	IATTCHG|IBDWRITE
3077 	 */
3078 	if (ioflag & FRSYNC) {
3079 		if ((ioflag & FSYNC) ||
3080 		    ((ioflag & FDSYNC) &&
3081 		    (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3082 		rw_exit(&ip->i_contents);
3083 		rw_enter(&ip->i_contents, RW_WRITER);
3084 		ud_iupdat(ip, 1);
3085 		}
3086 	}
3087 	/*
3088 	 * If we've already done a partial read, terminate
3089 	 * the read but return no error.
3090 	 */
3091 	if (oresid != uio->uio_resid) {
3092 		error = 0;
3093 	}
3094 	ITIMES(ip);
3095 
3096 	return (error);
3097 }
3098 
3099 int32_t
3100 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3101 {
3102 	caddr_t base;
3103 	struct vnode *vp;
3104 	struct udf_vfs *udf_vfsp;
3105 	uint32_t flags;
3106 	int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3107 	int32_t pagecreate, newpage;
3108 	uint64_t old_i_size;
3109 	u_offset_t off;
3110 	long start_resid = uio->uio_resid, premove_resid;
3111 	rlim64_t limit = uio->uio_limit;
3112 
3113 
3114 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
3115 	if ((ip->i_type != VREG) &&
3116 	    (ip->i_type != VDIR) &&
3117 	    (ip->i_type != VLNK)) {
3118 		return (EIO);
3119 	}
3120 
3121 	if (uio->uio_loffset >= MAXOFFSET_T) {
3122 		return (EFBIG);
3123 	}
3124 	/*
3125 	 * see udf_l_pathconf
3126 	 */
3127 	if (limit > (((uint64_t)1 << 40) - 1)) {
3128 		limit = ((uint64_t)1 << 40) - 1;
3129 	}
3130 	if (uio->uio_loffset >= limit) {
3131 		proc_t *p = ttoproc(curthread);
3132 
3133 		mutex_enter(&p->p_lock);
3134 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3135 		    p, RCA_UNSAFE_SIGINFO);
3136 		mutex_exit(&p->p_lock);
3137 		return (EFBIG);
3138 	}
3139 	if ((uio->uio_loffset < (offset_t)0) ||
3140 	    ((uio->uio_loffset + uio->uio_resid) < 0)) {
3141 		return (EINVAL);
3142 	}
3143 	if (uio->uio_resid == 0) {
3144 		return (0);
3145 	}
3146 
3147 	mutex_enter(&ip->i_tlock);
3148 	ip->i_flag |= INOACC;
3149 
3150 	if (ioflag & (FSYNC | FDSYNC)) {
3151 		ip->i_flag |= ISYNC;
3152 		iupdat_flag = 1;
3153 	}
3154 	mutex_exit(&ip->i_tlock);
3155 
3156 	udf_vfsp = ip->i_udf;
3157 	vp = ITOV(ip);
3158 
3159 	do {
3160 		u_offset_t uoff = uio->uio_loffset;
3161 		off = uoff & (offset_t)MAXBMASK;
3162 		mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3163 		on = (int)blkoff(udf_vfsp, uoff);
3164 		n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3165 
3166 		if (ip->i_type == VREG && uoff + n >= limit) {
3167 			if (uoff >= limit) {
3168 				error = EFBIG;
3169 				goto out;
3170 			}
3171 			n = (int)(limit - (rlim64_t)uoff);
3172 		}
3173 		if (uoff + n > ip->i_size) {
3174 			/*
3175 			 * We are extending the length of the file.
3176 			 * bmap is used so that we are sure that
3177 			 * if we need to allocate new blocks, that it
3178 			 * is done here before we up the file size.
3179 			 */
3180 			error = ud_bmap_write(ip, uoff,
3181 			    (int)(on + n), mapon == 0, cr);
3182 			if (error) {
3183 				break;
3184 			}
3185 			i_size_changed = 1;
3186 			old_i_size = ip->i_size;
3187 			ip->i_size = uoff + n;
3188 			/*
3189 			 * If we are writing from the beginning of
3190 			 * the mapping, we can just create the
3191 			 * pages without having to read them.
3192 			 */
3193 			pagecreate = (mapon == 0);
3194 		} else if (n == MAXBSIZE) {
3195 			/*
3196 			 * Going to do a whole mappings worth,
3197 			 * so we can just create the pages w/o
3198 			 * having to read them in.  But before
3199 			 * we do that, we need to make sure any
3200 			 * needed blocks are allocated first.
3201 			 */
3202 			error = ud_bmap_write(ip, uoff,
3203 			    (int)(on + n), 1, cr);
3204 			if (error) {
3205 				break;
3206 			}
3207 			pagecreate = 1;
3208 		} else {
3209 			pagecreate = 0;
3210 		}
3211 
3212 		rw_exit(&ip->i_contents);
3213 
3214 		/*
3215 		 * Touch the page and fault it in if it is not in
3216 		 * core before segmap_getmapflt can lock it. This
3217 		 * is to avoid the deadlock if the buffer is mapped
3218 		 * to the same file through mmap which we want to
3219 		 * write to.
3220 		 */
3221 		uio_prefaultpages((long)n, uio);
3222 
3223 		base = segmap_getmapflt(segkmap, vp, (off + mapon),
3224 		    (uint32_t)n, !pagecreate, S_WRITE);
3225 
3226 		/*
3227 		 * segmap_pagecreate() returns 1 if it calls
3228 		 * page_create_va() to allocate any pages.
3229 		 */
3230 		newpage = 0;
3231 		if (pagecreate) {
3232 			newpage = segmap_pagecreate(segkmap, base,
3233 			    (size_t)n, 0);
3234 		}
3235 
3236 		premove_resid = uio->uio_resid;
3237 		error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3238 
3239 		if (pagecreate &&
3240 		    uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3241 			/*
3242 			 * We created pages w/o initializing them completely,
3243 			 * thus we need to zero the part that wasn't set up.
3244 			 * This happens on most EOF write cases and if
3245 			 * we had some sort of error during the uiomove.
3246 			 */
3247 			int nzero, nmoved;
3248 
3249 			nmoved = (int)(uio->uio_loffset - (off + mapon));
3250 			ASSERT(nmoved >= 0 && nmoved <= n);
3251 			nzero = roundup(on + n, PAGESIZE) - nmoved;
3252 			ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3253 			(void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3254 		}
3255 
3256 		/*
3257 		 * Unlock the pages allocated by page_create_va()
3258 		 * in segmap_pagecreate()
3259 		 */
3260 		if (newpage) {
3261 			segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3262 		}
3263 
3264 		if (error) {
3265 			/*
3266 			 * If we failed on a write, we may have already
3267 			 * allocated file blocks as well as pages.  It's
3268 			 * hard to undo the block allocation, but we must
3269 			 * be sure to invalidate any pages that may have
3270 			 * been allocated.
3271 			 */
3272 			(void) segmap_release(segkmap, base, SM_INVAL);
3273 		} else {
3274 			flags = 0;
3275 			/*
3276 			 * Force write back for synchronous write cases.
3277 			 */
3278 			if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3279 				/*
3280 				 * If the sticky bit is set but the
3281 				 * execute bit is not set, we do a
3282 				 * synchronous write back and free
3283 				 * the page when done.  We set up swap
3284 				 * files to be handled this way to
3285 				 * prevent servers from keeping around
3286 				 * the client's swap pages too long.
3287 				 * XXX - there ought to be a better way.
3288 				 */
3289 				if (IS_SWAPVP(vp)) {
3290 					flags = SM_WRITE | SM_FREE |
3291 					    SM_DONTNEED;
3292 					iupdat_flag = 0;
3293 				} else {
3294 					flags = SM_WRITE;
3295 				}
3296 			} else if (((mapon + n) == MAXBSIZE) ||
3297 			    IS_SWAPVP(vp)) {
3298 				/*
3299 				 * Have written a whole block.
3300 				 * Start an asynchronous write and
3301 				 * mark the buffer to indicate that
3302 				 * it won't be needed again soon.
3303 				 */
3304 				flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3305 			}
3306 			error = segmap_release(segkmap, base, flags);
3307 
3308 			/*
3309 			 * If the operation failed and is synchronous,
3310 			 * then we need to unwind what uiomove() last
3311 			 * did so we can potentially return an error to
3312 			 * the caller.  If this write operation was
3313 			 * done in two pieces and the first succeeded,
3314 			 * then we won't return an error for the second
3315 			 * piece that failed.  However, we only want to
3316 			 * return a resid value that reflects what was
3317 			 * really done.
3318 			 *
3319 			 * Failures for non-synchronous operations can
3320 			 * be ignored since the page subsystem will
3321 			 * retry the operation until it succeeds or the
3322 			 * file system is unmounted.
3323 			 */
3324 			if (error) {
3325 				if ((ioflag & (FSYNC | FDSYNC)) ||
3326 				    ip->i_type == VDIR) {
3327 					uio->uio_resid = premove_resid;
3328 				} else {
3329 					error = 0;
3330 				}
3331 			}
3332 		}
3333 
3334 		/*
3335 		 * Re-acquire contents lock.
3336 		 */
3337 		rw_enter(&ip->i_contents, RW_WRITER);
3338 		/*
3339 		 * If the uiomove() failed or if a synchronous
3340 		 * page push failed, fix up i_size.
3341 		 */
3342 		if (error) {
3343 			if (i_size_changed) {
3344 				/*
3345 				 * The uiomove failed, and we
3346 				 * allocated blocks,so get rid
3347 				 * of them.
3348 				 */
3349 				(void) ud_itrunc(ip, old_i_size, 0, cr);
3350 			}
3351 		} else {
3352 			/*
3353 			 * XXX - Can this be out of the loop?
3354 			 */
3355 			ip->i_flag |= IUPD | ICHG;
3356 			if (i_size_changed) {
3357 				ip->i_flag |= IATTCHG;
3358 			}
3359 			if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3360 			    (IEXEC >> 10))) != 0 &&
3361 			    (ip->i_char & (ISUID | ISGID)) != 0 &&
3362 			    secpolicy_vnode_setid_retain(cr,
3363 			    (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3364 				/*
3365 				 * Clear Set-UID & Set-GID bits on
3366 				 * successful write if not privileged
3367 				 * and at least one of the execute bits
3368 				 * is set.  If we always clear Set-GID,
3369 				 * mandatory file and record locking is
3370 				 * unuseable.
3371 				 */
3372 				ip->i_char &= ~(ISUID | ISGID);
3373 			}
3374 		}
3375 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
3376 
3377 out:
3378 	/*
3379 	 * Inode is updated according to this table -
3380 	 *
3381 	 *	FSYNC	FDSYNC(posix.4)
3382 	 *	--------------------------
3383 	 *	always@	IATTCHG|IBDWRITE
3384 	 *
3385 	 * @ -  If we are doing synchronous write the only time we should
3386 	 *	not be sync'ing the ip here is if we have the stickyhack
3387 	 *	activated, the file is marked with the sticky bit and
3388 	 *	no exec bit, the file length has not been changed and
3389 	 *	no new blocks have been allocated during this write.
3390 	 */
3391 	if ((ip->i_flag & ISYNC) != 0) {
3392 		/*
3393 		 * we have eliminated nosync
3394 		 */
3395 		if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3396 		    ((ioflag & FSYNC) && iupdat_flag)) {
3397 			ud_iupdat(ip, 1);
3398 		}
3399 	}
3400 
3401 	/*
3402 	 * If we've already done a partial-write, terminate
3403 	 * the write but return no error.
3404 	 */
3405 	if (start_resid != uio->uio_resid) {
3406 		error = 0;
3407 	}
3408 	ip->i_flag &= ~(INOACC | ISYNC);
3409 	ITIMES_NOLOCK(ip);
3410 
3411 	return (error);
3412 }
3413 
3414 int32_t
3415 ud_multi_strat(struct ud_inode *ip,
3416 	page_t *pp, struct buf *bp, u_offset_t start)
3417 {
3418 	daddr_t bn;
3419 	int32_t error = 0, io_count, contig, alloc_sz, i;
3420 	uint32_t io_off;
3421 	mio_master_t *mm = NULL;
3422 	mio_slave_t *ms = NULL;
3423 	struct buf *rbp;
3424 
3425 	ASSERT(!(start & PAGEOFFSET));
3426 
3427 	/*
3428 	 * Figure out how many buffers to allocate
3429 	 */
3430 	io_count = 0;
3431 	for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3432 		contig = 0;
3433 		if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3434 		    &bn, &contig)) {
3435 			goto end;
3436 		}
3437 		if (contig == 0) {
3438 			goto end;
3439 		}
3440 		contig = MIN(contig, PAGESIZE - io_off);
3441 		if (bn != UDF_HOLE) {
3442 			io_count ++;
3443 		} else {
3444 			/*
3445 			 * HOLE
3446 			 */
3447 			if (bp->b_flags & B_READ) {
3448 
3449 				/*
3450 				 * This is a hole and is read
3451 				 * it should be filled with 0's
3452 				 */
3453 				pagezero(pp, io_off, contig);
3454 			}
3455 		}
3456 	}
3457 
3458 
3459 	if (io_count != 0) {
3460 
3461 		/*
3462 		 * Allocate memory for all the
3463 		 * required number of buffers
3464 		 */
3465 		alloc_sz = sizeof (mio_master_t) +
3466 		    (sizeof (mio_slave_t) * io_count);
3467 		mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3468 		if (mm == NULL) {
3469 			error = ENOMEM;
3470 			goto end;
3471 		}
3472 
3473 		/*
3474 		 * initialize master
3475 		 */
3476 		mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3477 		mm->mm_size = alloc_sz;
3478 		mm->mm_bp = bp;
3479 		mm->mm_resid = 0;
3480 		mm->mm_error = 0;
3481 		mm->mm_index = master_index++;
3482 
3483 		ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3484 
3485 		/*
3486 		 * Initialize buffers
3487 		 */
3488 		io_count = 0;
3489 		for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3490 			contig = 0;
3491 			if (error = ud_bmap_read(ip,
3492 			    (u_offset_t)(start + io_off),
3493 			    &bn, &contig)) {
3494 				goto end;
3495 			}
3496 			ASSERT(contig);
3497 			if ((io_off + contig) > bp->b_bcount) {
3498 				contig = bp->b_bcount - io_off;
3499 			}
3500 			if (bn != UDF_HOLE) {
3501 				/*
3502 				 * Clone the buffer
3503 				 * and prepare to start I/O
3504 				 */
3505 				ms->ms_ptr = mm;
3506 				bioinit(&ms->ms_buf);
3507 				rbp = bioclone(bp, io_off, (size_t)contig,
3508 				    bp->b_edev, bn, ud_slave_done,
3509 				    &ms->ms_buf, KM_NOSLEEP);
3510 				ASSERT(rbp == &ms->ms_buf);
3511 				mm->mm_resid += contig;
3512 				io_count++;
3513 				ms ++;
3514 			}
3515 		}
3516 
3517 		/*
3518 		 * Start I/O's
3519 		 */
3520 		ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3521 		for (i = 0; i < io_count; i++) {
3522 			(void) bdev_strategy(&ms->ms_buf);
3523 			ms ++;
3524 		}
3525 	}
3526 
3527 end:
3528 	if (error != 0) {
3529 		bp->b_flags |= B_ERROR;
3530 		bp->b_error = error;
3531 		if (mm != NULL) {
3532 			mutex_destroy(&mm->mm_mutex);
3533 			kmem_free(mm, mm->mm_size);
3534 		}
3535 	}
3536 	return (error);
3537 }
3538 
3539 int32_t
3540 ud_slave_done(struct buf *bp)
3541 {
3542 	mio_master_t *mm;
3543 	int32_t resid;
3544 
3545 	ASSERT(SEMA_HELD(&bp->b_sem));
3546 	ASSERT((bp->b_flags & B_DONE) == 0);
3547 
3548 	mm = ((mio_slave_t *)bp)->ms_ptr;
3549 
3550 	/*
3551 	 * Propagate error and byte count info from slave struct to
3552 	 * the master struct
3553 	 */
3554 	mutex_enter(&mm->mm_mutex);
3555 	if (bp->b_flags & B_ERROR) {
3556 
3557 		/*
3558 		 * If multiple slave buffers get
3559 		 * error we forget the old errors
3560 		 * this is ok because we any way
3561 		 * cannot return multiple errors
3562 		 */
3563 		mm->mm_error = bp->b_error;
3564 	}
3565 	mm->mm_resid -= bp->b_bcount;
3566 	resid = mm->mm_resid;
3567 	mutex_exit(&mm->mm_mutex);
3568 
3569 	/*
3570 	 * free up the resources allocated to cloned buffers.
3571 	 */
3572 	bp_mapout(bp);
3573 	biofini(bp);
3574 
3575 	if (resid == 0) {
3576 
3577 		/*
3578 		 * This is the last I/O operation
3579 		 * clean up and return the original buffer
3580 		 */
3581 		if (mm->mm_error) {
3582 			mm->mm_bp->b_flags |= B_ERROR;
3583 			mm->mm_bp->b_error = mm->mm_error;
3584 		}
3585 		biodone(mm->mm_bp);
3586 		mutex_destroy(&mm->mm_mutex);
3587 		kmem_free(mm, mm->mm_size);
3588 	}
3589 	return (0);
3590 }
3591