xref: /titanic_52/usr/src/uts/common/fs/udfs/udf_vnops.c (revision 69112edd987c28fa551d4f8d9362a84a45365f17)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/time.h>
31 #include <sys/systm.h>
32 #include <sys/sysmacros.h>
33 #include <sys/resource.h>
34 #include <sys/signal.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/stat.h>
41 #include <sys/vnode.h>
42 #include <sys/mode.h>
43 #include <sys/proc.h>
44 #include <sys/disp.h>
45 #include <sys/file.h>
46 #include <sys/fcntl.h>
47 #include <sys/flock.h>
48 #include <sys/kmem.h>
49 #include <sys/uio.h>
50 #include <sys/dnlc.h>
51 #include <sys/conf.h>
52 #include <sys/errno.h>
53 #include <sys/mman.h>
54 #include <sys/fbuf.h>
55 #include <sys/pathname.h>
56 #include <sys/debug.h>
57 #include <sys/vmsystm.h>
58 #include <sys/cmn_err.h>
59 #include <sys/dirent.h>
60 #include <sys/errno.h>
61 #include <sys/modctl.h>
62 #include <sys/statvfs.h>
63 #include <sys/mount.h>
64 #include <sys/sunddi.h>
65 #include <sys/bootconf.h>
66 #include <sys/policy.h>
67 
68 #include <vm/hat.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/as.h>
72 #include <vm/seg.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_kmem.h>
75 #include <vm/seg_vn.h>
76 #include <vm/rm.h>
77 #include <vm/page.h>
78 #include <sys/swap.h>
79 
80 #include <fs/fs_subr.h>
81 
82 #include <sys/fs/udf_volume.h>
83 #include <sys/fs/udf_inode.h>
84 
85 static int32_t udf_open(struct vnode **,
86 	int32_t, struct cred *, caller_context_t *);
87 static int32_t udf_close(struct vnode *,
88 	int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
89 static int32_t udf_read(struct vnode *,
90 	struct uio *, int32_t, struct cred *, caller_context_t *);
91 static int32_t udf_write(struct vnode *,
92 	struct uio *, int32_t, struct cred *, caller_context_t *);
93 static int32_t udf_ioctl(struct vnode *,
94 	int32_t, intptr_t, int32_t, struct cred *, int32_t *,
95 	caller_context_t *);
96 static int32_t udf_getattr(struct vnode *,
97 	struct vattr *, int32_t, struct cred *, caller_context_t *);
98 static int32_t udf_setattr(struct vnode *,
99 	struct vattr *, int32_t, struct cred *, caller_context_t *);
100 static int32_t udf_access(struct vnode *,
101 	int32_t, int32_t, struct cred *, caller_context_t *);
102 static int32_t udf_lookup(struct vnode *,
103 	char *, struct vnode **, struct pathname *,
104 	int32_t, struct vnode *, struct cred *,
105 	caller_context_t *, int *, pathname_t *);
106 static int32_t udf_create(struct vnode *,
107 	char *, struct vattr *, enum vcexcl,
108 	int32_t, struct vnode **, struct cred *, int32_t,
109 	caller_context_t *, vsecattr_t *);
110 static int32_t udf_remove(struct vnode *,
111 	char *, struct cred *, caller_context_t *, int);
112 static int32_t udf_link(struct vnode *,
113 	struct vnode *, char *, struct cred *, caller_context_t *, int);
114 static int32_t udf_rename(struct vnode *,
115 	char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
116 static int32_t udf_mkdir(struct vnode *,
117 	char *, struct vattr *, struct vnode **, struct cred *,
118 	caller_context_t *, int, vsecattr_t *);
119 static int32_t udf_rmdir(struct vnode *,
120 	char *, struct vnode *, struct cred *, caller_context_t *, int);
121 static int32_t udf_readdir(struct vnode *,
122 	struct uio *, struct cred *, int32_t *, caller_context_t *, int);
123 static int32_t udf_symlink(struct vnode *,
124 	char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
125 static int32_t udf_readlink(struct vnode *,
126 	struct uio *, struct cred *, caller_context_t *);
127 static int32_t udf_fsync(struct vnode *,
128 	int32_t, struct cred *, caller_context_t *);
129 static void udf_inactive(struct vnode *,
130 	struct cred *, caller_context_t *);
131 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
132 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
133 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
134 static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
135 	caller_context_t *);
136 static int32_t udf_frlock(struct vnode *, int32_t,
137 	struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
138 	caller_context_t *);
139 static int32_t udf_space(struct vnode *, int32_t,
140 	struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
141 static int32_t udf_getpage(struct vnode *, offset_t,
142 	size_t, uint32_t *, struct page **, size_t,
143 	struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
144 static int32_t udf_putpage(struct vnode *, offset_t,
145 	size_t, int32_t, struct cred *, caller_context_t *);
146 static int32_t udf_map(struct vnode *, offset_t, struct as *,
147 	caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
148 	caller_context_t *);
149 static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
150 	caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
151 	caller_context_t *);
152 static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
153 	caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
154 	caller_context_t *);
155 static int32_t udf_l_pathconf(struct vnode *, int32_t,
156 	ulong_t *, struct cred *, caller_context_t *);
157 static int32_t udf_pageio(struct vnode *, struct page *,
158 	u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
159 
160 int32_t ud_getpage_miss(struct vnode *, u_offset_t,
161 	size_t, struct seg *, caddr_t, page_t *pl[],
162 	size_t, enum seg_rw, int32_t);
163 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
164 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
165 int32_t ud_page_fill(struct ud_inode *, page_t *,
166 	u_offset_t, uint32_t, u_offset_t *);
167 int32_t ud_iodone(struct buf *);
168 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
169 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
170 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
171 int32_t ud_slave_done(struct buf *);
172 
173 /*
174  * Structures to control multiple IO operations to get or put pages
175  * that are backed by discontiguous blocks. The master struct is
176  * a dummy that holds the original bp from pageio_setup. The
177  * slave struct holds the working bp's to do the actual IO. Once
178  * all the slave IOs complete. The master is processed as if a single
179  * IO op has completed.
180  */
181 uint32_t master_index = 0;
182 typedef struct mio_master {
183 	kmutex_t	mm_mutex;	/* protect the fields below */
184 	int32_t		mm_size;
185 	buf_t		*mm_bp;		/* original bp */
186 	int32_t		mm_resid;	/* bytes remaining to transfer */
187 	int32_t		mm_error;	/* accumulated error from slaves */
188 	int32_t		mm_index;	/* XXX debugging */
189 } mio_master_t;
190 
191 typedef struct mio_slave {
192 	buf_t		ms_buf;		/* working buffer for this IO chunk */
193 	mio_master_t	*ms_ptr;	/* pointer to master */
194 } mio_slave_t;
195 
196 struct vnodeops *udf_vnodeops;
197 
198 const fs_operation_def_t udf_vnodeops_template[] = {
199 	VOPNAME_OPEN,		{ .vop_open = udf_open },
200 	VOPNAME_CLOSE,		{ .vop_close = udf_close },
201 	VOPNAME_READ,		{ .vop_read = udf_read },
202 	VOPNAME_WRITE,		{ .vop_write = udf_write },
203 	VOPNAME_IOCTL,		{ .vop_ioctl = udf_ioctl },
204 	VOPNAME_GETATTR,	{ .vop_getattr = udf_getattr },
205 	VOPNAME_SETATTR,	{ .vop_setattr = udf_setattr },
206 	VOPNAME_ACCESS,		{ .vop_access = udf_access },
207 	VOPNAME_LOOKUP,		{ .vop_lookup = udf_lookup },
208 	VOPNAME_CREATE,		{ .vop_create = udf_create },
209 	VOPNAME_REMOVE,		{ .vop_remove = udf_remove },
210 	VOPNAME_LINK,		{ .vop_link = udf_link },
211 	VOPNAME_RENAME,		{ .vop_rename = udf_rename },
212 	VOPNAME_MKDIR,		{ .vop_mkdir = udf_mkdir },
213 	VOPNAME_RMDIR,		{ .vop_rmdir = udf_rmdir },
214 	VOPNAME_READDIR,	{ .vop_readdir = udf_readdir },
215 	VOPNAME_SYMLINK,	{ .vop_symlink = udf_symlink },
216 	VOPNAME_READLINK,	{ .vop_readlink = udf_readlink },
217 	VOPNAME_FSYNC,		{ .vop_fsync = udf_fsync },
218 	VOPNAME_INACTIVE,	{ .vop_inactive = udf_inactive },
219 	VOPNAME_FID,		{ .vop_fid = udf_fid },
220 	VOPNAME_RWLOCK,		{ .vop_rwlock = udf_rwlock },
221 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = udf_rwunlock },
222 	VOPNAME_SEEK,		{ .vop_seek = udf_seek },
223 	VOPNAME_FRLOCK,		{ .vop_frlock = udf_frlock },
224 	VOPNAME_SPACE,		{ .vop_space = udf_space },
225 	VOPNAME_GETPAGE,	{ .vop_getpage = udf_getpage },
226 	VOPNAME_PUTPAGE,	{ .vop_putpage = udf_putpage },
227 	VOPNAME_MAP,		{ .vop_map = udf_map },
228 	VOPNAME_ADDMAP,		{ .vop_addmap = udf_addmap },
229 	VOPNAME_DELMAP,		{ .vop_delmap = udf_delmap },
230 	VOPNAME_PATHCONF,	{ .vop_pathconf = udf_l_pathconf },
231 	VOPNAME_PAGEIO,		{ .vop_pageio = udf_pageio },
232 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
233 	NULL,			NULL
234 };
235 
236 /* ARGSUSED */
237 static int32_t
238 udf_open(
239 	struct vnode **vpp,
240 	int32_t flag,
241 	struct cred *cr,
242 	caller_context_t *ct)
243 {
244 	ud_printf("udf_open\n");
245 
246 	return (0);
247 }
248 
249 /* ARGSUSED */
250 static int32_t
251 udf_close(
252 	struct vnode *vp,
253 	int32_t flag,
254 	int32_t count,
255 	offset_t offset,
256 	struct cred *cr,
257 	caller_context_t *ct)
258 {
259 	struct ud_inode *ip = VTOI(vp);
260 
261 	ud_printf("udf_close\n");
262 
263 	ITIMES(ip);
264 
265 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
266 	cleanshares(vp, ttoproc(curthread)->p_pid);
267 
268 	/*
269 	 * Push partially filled cluster at last close.
270 	 * ``last close'' is approximated because the dnlc
271 	 * may have a hold on the vnode.
272 	 */
273 	if (vp->v_count <= 2 && vp->v_type != VBAD) {
274 		struct ud_inode *ip = VTOI(vp);
275 		if (ip->i_delaylen) {
276 			(void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
277 			    B_ASYNC | B_FREE, cr);
278 			ip->i_delaylen = 0;
279 		}
280 	}
281 
282 	return (0);
283 }
284 
285 /* ARGSUSED */
286 static int32_t
287 udf_read(
288 	struct vnode *vp,
289 	struct uio *uiop,
290 	int32_t ioflag,
291 	struct cred *cr,
292 	caller_context_t *ct)
293 {
294 	struct ud_inode *ip = VTOI(vp);
295 	int32_t error;
296 
297 	ud_printf("udf_read\n");
298 
299 #ifdef	__lock_lint
300 	rw_enter(&ip->i_rwlock, RW_READER);
301 #endif
302 
303 	ASSERT(RW_READ_HELD(&ip->i_rwlock));
304 
305 	if (MANDLOCK(vp, ip->i_char)) {
306 		/*
307 		 * udf_getattr ends up being called by chklock
308 		 */
309 		error = chklock(vp, FREAD, uiop->uio_loffset,
310 		    uiop->uio_resid, uiop->uio_fmode, ct);
311 		if (error) {
312 			goto end;
313 		}
314 	}
315 
316 	rw_enter(&ip->i_contents, RW_READER);
317 	error = ud_rdip(ip, uiop, ioflag, cr);
318 	rw_exit(&ip->i_contents);
319 
320 end:
321 #ifdef	__lock_lint
322 	rw_exit(&ip->i_rwlock);
323 #endif
324 
325 	return (error);
326 }
327 
328 
329 int32_t ud_WRITES = 1;
330 int32_t ud_HW = 96 * 1024;
331 int32_t ud_LW = 64 * 1024;
332 int32_t ud_throttles = 0;
333 
334 /* ARGSUSED */
335 static int32_t
336 udf_write(
337 	struct vnode *vp,
338 	struct uio *uiop,
339 	int32_t ioflag,
340 	struct cred *cr,
341 	caller_context_t *ct)
342 {
343 	struct ud_inode *ip = VTOI(vp);
344 	int32_t error = 0;
345 
346 	ud_printf("udf_write\n");
347 
348 #ifdef	__lock_lint
349 	rw_enter(&ip->i_rwlock, RW_WRITER);
350 #endif
351 
352 	ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
353 
354 	if (MANDLOCK(vp, ip->i_char)) {
355 		/*
356 		 * ud_getattr ends up being called by chklock
357 		 */
358 		error = chklock(vp, FWRITE, uiop->uio_loffset,
359 		    uiop->uio_resid, uiop->uio_fmode, ct);
360 		if (error) {
361 			goto end;
362 		}
363 	}
364 	/*
365 	 * Throttle writes.
366 	 */
367 	mutex_enter(&ip->i_tlock);
368 	if (ud_WRITES && (ip->i_writes > ud_HW)) {
369 		while (ip->i_writes > ud_HW) {
370 			ud_throttles++;
371 			cv_wait(&ip->i_wrcv, &ip->i_tlock);
372 		}
373 	}
374 	mutex_exit(&ip->i_tlock);
375 
376 	/*
377 	 * Write to the file
378 	 */
379 	rw_enter(&ip->i_contents, RW_WRITER);
380 	if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
381 		/*
382 		 * In append mode start at end of file.
383 		 */
384 		uiop->uio_loffset = ip->i_size;
385 	}
386 	error = ud_wrip(ip, uiop, ioflag, cr);
387 	rw_exit(&ip->i_contents);
388 
389 end:
390 #ifdef	__lock_lint
391 	rw_exit(&ip->i_rwlock);
392 #endif
393 
394 	return (error);
395 }
396 
397 /* ARGSUSED */
398 static int32_t
399 udf_ioctl(
400 	struct vnode *vp,
401 	int32_t cmd,
402 	intptr_t arg,
403 	int32_t flag,
404 	struct cred *cr,
405 	int32_t *rvalp,
406 	caller_context_t *ct)
407 {
408 	return (ENOTTY);
409 }
410 
411 /* ARGSUSED */
412 static int32_t
413 udf_getattr(
414 	struct vnode *vp,
415 	struct vattr *vap,
416 	int32_t flags,
417 	struct cred *cr,
418 	caller_context_t *ct)
419 {
420 	struct ud_inode *ip = VTOI(vp);
421 
422 	ud_printf("udf_getattr\n");
423 
424 	if (vap->va_mask == AT_SIZE) {
425 		/*
426 		 * for performance, if only the size is requested don't bother
427 		 * with anything else.
428 		 */
429 		vap->va_size = ip->i_size;
430 		return (0);
431 	}
432 
433 	rw_enter(&ip->i_contents, RW_READER);
434 
435 	vap->va_type = vp->v_type;
436 	vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
437 
438 	vap->va_uid = ip->i_uid;
439 	vap->va_gid = ip->i_gid;
440 	vap->va_fsid = ip->i_dev;
441 	vap->va_nodeid = ip->i_icb_lbano;
442 	vap->va_nlink = ip->i_nlink;
443 	vap->va_size = ip->i_size;
444 	vap->va_seq = ip->i_seq;
445 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
446 		vap->va_rdev = ip->i_rdev;
447 	} else {
448 		vap->va_rdev = 0;
449 	}
450 
451 	mutex_enter(&ip->i_tlock);
452 	ITIMES_NOLOCK(ip);	/* mark correct time in inode */
453 	vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
454 	vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
455 	vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
456 	vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
457 	vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
458 	vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
459 	mutex_exit(&ip->i_tlock);
460 
461 	switch (ip->i_type) {
462 		case VBLK:
463 			vap->va_blksize = MAXBSIZE;
464 			break;
465 		case VCHR:
466 			vap->va_blksize = MAXBSIZE;
467 			break;
468 		default:
469 			vap->va_blksize = ip->i_udf->udf_lbsize;
470 			break;
471 	}
472 	vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
473 
474 	rw_exit(&ip->i_contents);
475 
476 	return (0);
477 }
478 
479 static int
480 ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
481 {
482 	return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr));
483 }
484 
485 /*ARGSUSED4*/
486 static int32_t
487 udf_setattr(
488 	struct vnode *vp,
489 	struct vattr *vap,
490 	int32_t flags,
491 	struct cred *cr,
492 	caller_context_t *ct)
493 {
494 	int32_t error = 0;
495 	uint32_t mask = vap->va_mask;
496 	struct ud_inode *ip;
497 	timestruc_t now;
498 	struct vattr ovap;
499 
500 	ud_printf("udf_setattr\n");
501 
502 	ip = VTOI(vp);
503 
504 	/*
505 	 * not updates allowed to 4096 files
506 	 */
507 	if (ip->i_astrat == STRAT_TYPE4096) {
508 		return (EINVAL);
509 	}
510 
511 	/*
512 	 * Cannot set these attributes
513 	 */
514 	if (mask & AT_NOSET) {
515 		return (EINVAL);
516 	}
517 
518 	rw_enter(&ip->i_rwlock, RW_WRITER);
519 	rw_enter(&ip->i_contents, RW_WRITER);
520 
521 	ovap.va_uid = ip->i_uid;
522 	ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
523 	error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
524 	    ud_iaccess_vmode, ip);
525 	if (error)
526 		goto update_inode;
527 
528 	mask = vap->va_mask;
529 	/*
530 	 * Change file access modes.
531 	 */
532 	if (mask & AT_MODE) {
533 		ip->i_perm = VA2UD_PERM(vap->va_mode);
534 		ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
535 		mutex_enter(&ip->i_tlock);
536 		ip->i_flag |= ICHG;
537 		mutex_exit(&ip->i_tlock);
538 	}
539 	if (mask & (AT_UID|AT_GID)) {
540 		if (mask & AT_UID) {
541 			ip->i_uid = vap->va_uid;
542 		}
543 		if (mask & AT_GID) {
544 			ip->i_gid = vap->va_gid;
545 		}
546 		mutex_enter(&ip->i_tlock);
547 		ip->i_flag |= ICHG;
548 		mutex_exit(&ip->i_tlock);
549 	}
550 	/*
551 	 * Truncate file.  Must have write permission and not be a directory.
552 	 */
553 	if (mask & AT_SIZE) {
554 		if (vp->v_type == VDIR) {
555 			error = EISDIR;
556 			goto update_inode;
557 		}
558 		if (error = ud_iaccess(ip, IWRITE, cr)) {
559 			goto update_inode;
560 		}
561 		if (vap->va_size > MAXOFFSET_T) {
562 			error = EFBIG;
563 			goto update_inode;
564 		}
565 		if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
566 			goto update_inode;
567 		}
568 	}
569 	/*
570 	 * Change file access or modified times.
571 	 */
572 	if (mask & (AT_ATIME|AT_MTIME)) {
573 		mutex_enter(&ip->i_tlock);
574 		if (mask & AT_ATIME) {
575 			ip->i_atime.tv_sec = vap->va_atime.tv_sec;
576 			ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
577 			ip->i_flag &= ~IACC;
578 		}
579 		if (mask & AT_MTIME) {
580 			ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
581 			ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
582 			gethrestime(&now);
583 			ip->i_ctime.tv_sec = now.tv_sec;
584 			ip->i_ctime.tv_nsec = now.tv_nsec;
585 			ip->i_flag &= ~(IUPD|ICHG);
586 			ip->i_flag |= IMODTIME;
587 		}
588 		ip->i_flag |= IMOD;
589 		mutex_exit(&ip->i_tlock);
590 	}
591 
592 update_inode:
593 	if (curthread->t_flag & T_DONTPEND) {
594 		ud_iupdat(ip, 1);
595 	} else {
596 		ITIMES_NOLOCK(ip);
597 	}
598 	rw_exit(&ip->i_contents);
599 	rw_exit(&ip->i_rwlock);
600 
601 	return (error);
602 }
603 
604 /* ARGSUSED */
605 static int32_t
606 udf_access(
607 	struct vnode *vp,
608 	int32_t mode,
609 	int32_t flags,
610 	struct cred *cr,
611 	caller_context_t *ct)
612 {
613 	struct ud_inode *ip = VTOI(vp);
614 	int32_t error;
615 
616 	ud_printf("udf_access\n");
617 
618 	if (ip->i_udf == NULL) {
619 		return (EIO);
620 	}
621 
622 	error = ud_iaccess(ip, UD_UPERM2DPERM(mode), cr);
623 
624 	return (error);
625 }
626 
627 int32_t udfs_stickyhack = 1;
628 
629 /* ARGSUSED */
630 static int32_t
631 udf_lookup(
632 	struct vnode *dvp,
633 	char *nm,
634 	struct vnode **vpp,
635 	struct pathname *pnp,
636 	int32_t flags,
637 	struct vnode *rdir,
638 	struct cred *cr,
639 	caller_context_t *ct,
640 	int *direntflags,
641 	pathname_t *realpnp)
642 {
643 	int32_t error;
644 	struct vnode *vp;
645 	struct ud_inode *ip, *xip;
646 
647 	ud_printf("udf_lookup\n");
648 	/*
649 	 * Null component name is a synonym for directory being searched.
650 	 */
651 	if (*nm == '\0') {
652 		VN_HOLD(dvp);
653 		*vpp = dvp;
654 		error = 0;
655 		goto out;
656 	}
657 
658 	/*
659 	 * Fast path: Check the directory name lookup cache.
660 	 */
661 	ip = VTOI(dvp);
662 	if (vp = dnlc_lookup(dvp, nm)) {
663 		/*
664 		 * Check accessibility of directory.
665 		 */
666 		if ((error = ud_iaccess(ip, IEXEC, cr)) != 0) {
667 			VN_RELE(vp);
668 		}
669 		xip = VTOI(vp);
670 	} else {
671 		error = ud_dirlook(ip, nm, &xip, cr, 1);
672 		ITIMES(ip);
673 	}
674 
675 	if (error == 0) {
676 		ip = xip;
677 		*vpp = ITOV(ip);
678 		if ((ip->i_type != VDIR) &&
679 		    (ip->i_char & ISVTX) &&
680 		    ((ip->i_perm & IEXEC) == 0) &&
681 		    udfs_stickyhack) {
682 			mutex_enter(&(*vpp)->v_lock);
683 			(*vpp)->v_flag |= VISSWAP;
684 			mutex_exit(&(*vpp)->v_lock);
685 		}
686 		ITIMES(ip);
687 		/*
688 		 * If vnode is a device return special vnode instead.
689 		 */
690 		if (IS_DEVVP(*vpp)) {
691 			struct vnode *newvp;
692 			newvp = specvp(*vpp, (*vpp)->v_rdev,
693 			    (*vpp)->v_type, cr);
694 			VN_RELE(*vpp);
695 			if (newvp == NULL) {
696 				error = ENOSYS;
697 			} else {
698 				*vpp = newvp;
699 			}
700 		}
701 	}
702 out:
703 	return (error);
704 }
705 
706 /* ARGSUSED */
707 static int32_t
708 udf_create(
709 	struct vnode *dvp,
710 	char *name,
711 	struct vattr *vap,
712 	enum vcexcl excl,
713 	int32_t mode,
714 	struct vnode **vpp,
715 	struct cred *cr,
716 	int32_t flag,
717 	caller_context_t *ct,
718 	vsecattr_t *vsecp)
719 {
720 	int32_t error;
721 	struct ud_inode *ip = VTOI(dvp), *xip;
722 
723 	ud_printf("udf_create\n");
724 
725 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
726 		vap->va_mode &= ~VSVTX;
727 
728 	if (*name == '\0') {
729 		/*
730 		 * Null component name refers to the directory itself.
731 		 */
732 		VN_HOLD(dvp);
733 		ITIMES(ip);
734 		error = EEXIST;
735 	} else {
736 		xip = NULL;
737 		rw_enter(&ip->i_rwlock, RW_WRITER);
738 		error = ud_direnter(ip, name, DE_CREATE,
739 		    (struct ud_inode *)0, (struct ud_inode *)0,
740 		    vap, &xip, cr, ct);
741 		rw_exit(&ip->i_rwlock);
742 		ITIMES(ip);
743 		ip = xip;
744 	}
745 #ifdef	__lock_lint
746 	rw_enter(&ip->i_contents, RW_WRITER);
747 #else
748 	if (ip != NULL) {
749 		rw_enter(&ip->i_contents, RW_WRITER);
750 	}
751 #endif
752 
753 	/*
754 	 * If the file already exists and this is a non-exclusive create,
755 	 * check permissions and allow access for non-directories.
756 	 * Read-only create of an existing directory is also allowed.
757 	 * We fail an exclusive create of anything which already exists.
758 	 */
759 	if (error == EEXIST) {
760 		if (excl == NONEXCL) {
761 			if ((ip->i_type == VDIR) && (mode & VWRITE)) {
762 				error = EISDIR;
763 			} else if (mode) {
764 				error = ud_iaccess(ip,
765 				    UD_UPERM2DPERM(mode), cr);
766 			} else {
767 				error = 0;
768 			}
769 		}
770 		if (error) {
771 			rw_exit(&ip->i_contents);
772 			VN_RELE(ITOV(ip));
773 			goto out;
774 		} else if ((ip->i_type == VREG) &&
775 		    (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
776 			/*
777 			 * Truncate regular files, if requested by caller.
778 			 * Grab i_rwlock to make sure no one else is
779 			 * currently writing to the file (we promised
780 			 * bmap we would do this).
781 			 * Must get the locks in the correct order.
782 			 */
783 			if (ip->i_size == 0) {
784 				ip->i_flag |= ICHG | IUPD;
785 			} else {
786 				rw_exit(&ip->i_contents);
787 				rw_enter(&ip->i_rwlock, RW_WRITER);
788 				rw_enter(&ip->i_contents, RW_WRITER);
789 				(void) ud_itrunc(ip, 0, 0, cr);
790 				rw_exit(&ip->i_rwlock);
791 			}
792 			vnevent_create(ITOV(ip), ct);
793 		}
794 	}
795 
796 	if (error == 0) {
797 		*vpp = ITOV(ip);
798 		ITIMES(ip);
799 	}
800 #ifdef	__lock_lint
801 	rw_exit(&ip->i_contents);
802 #else
803 	if (ip != NULL) {
804 		rw_exit(&ip->i_contents);
805 	}
806 #endif
807 	if (error) {
808 		goto out;
809 	}
810 
811 	/*
812 	 * If vnode is a device return special vnode instead.
813 	 */
814 	if (!error && IS_DEVVP(*vpp)) {
815 		struct vnode *newvp;
816 
817 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
818 		VN_RELE(*vpp);
819 		if (newvp == NULL) {
820 			error = ENOSYS;
821 			goto out;
822 		}
823 		*vpp = newvp;
824 	}
825 out:
826 	return (error);
827 }
828 
829 /* ARGSUSED */
830 static int32_t
831 udf_remove(
832 	struct vnode *vp,
833 	char *nm,
834 	struct cred *cr,
835 	caller_context_t *ct,
836 	int flags)
837 {
838 	int32_t error;
839 	struct ud_inode *ip = VTOI(vp);
840 
841 	ud_printf("udf_remove\n");
842 
843 	rw_enter(&ip->i_rwlock, RW_WRITER);
844 	error = ud_dirremove(ip, nm,
845 	    (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
846 	rw_exit(&ip->i_rwlock);
847 	ITIMES(ip);
848 
849 	return (error);
850 }
851 
852 /* ARGSUSED */
853 static int32_t
854 udf_link(
855 	struct vnode *tdvp,
856 	struct vnode *svp,
857 	char *tnm,
858 	struct cred *cr,
859 	caller_context_t *ct,
860 	int flags)
861 {
862 	int32_t error;
863 	struct vnode *realvp;
864 	struct ud_inode *sip;
865 	struct ud_inode *tdp;
866 
867 	ud_printf("udf_link\n");
868 	if (VOP_REALVP(svp, &realvp, ct) == 0) {
869 		svp = realvp;
870 	}
871 
872 	/*
873 	 * Do not allow links to directories
874 	 */
875 	if (svp->v_type == VDIR) {
876 		return (EPERM);
877 	}
878 
879 	sip = VTOI(svp);
880 
881 	if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
882 		return (EPERM);
883 
884 	tdp = VTOI(tdvp);
885 
886 	rw_enter(&tdp->i_rwlock, RW_WRITER);
887 	error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
888 	    sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
889 	rw_exit(&tdp->i_rwlock);
890 	ITIMES(sip);
891 	ITIMES(tdp);
892 
893 	if (error == 0) {
894 		vnevent_link(svp, ct);
895 	}
896 
897 	return (error);
898 }
899 
900 /* ARGSUSED */
901 static int32_t
902 udf_rename(
903 	struct vnode *sdvp,
904 	char *snm,
905 	struct vnode *tdvp,
906 	char *tnm,
907 	struct cred *cr,
908 	caller_context_t *ct,
909 	int flags)
910 {
911 	int32_t error = 0;
912 	struct udf_vfs *udf_vfsp;
913 	struct ud_inode *sip;		/* source inode */
914 	struct ud_inode *sdp, *tdp;	/* source and target parent inode */
915 	struct vnode *realvp;
916 
917 	ud_printf("udf_rename\n");
918 
919 	if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
920 		tdvp = realvp;
921 	}
922 
923 	sdp = VTOI(sdvp);
924 	tdp = VTOI(tdvp);
925 
926 	udf_vfsp = sdp->i_udf;
927 
928 	mutex_enter(&udf_vfsp->udf_rename_lck);
929 	/*
930 	 * Look up inode of file we're supposed to rename.
931 	 */
932 	if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
933 		mutex_exit(&udf_vfsp->udf_rename_lck);
934 		return (error);
935 	}
936 	/*
937 	 * be sure this is not a directory with another file system mounted
938 	 * over it.  If it is just give up the locks, and return with
939 	 * EBUSY
940 	 */
941 	if (vn_mountedvfs(ITOV(sip)) != NULL) {
942 		error = EBUSY;
943 		goto errout;
944 	}
945 	/*
946 	 * Make sure we can delete the source entry.  This requires
947 	 * write permission on the containing directory.  If that
948 	 * directory is "sticky" it further requires (except for
949 	 * privileged users) that the user own the directory or the
950 	 * source entry, or else have permission to write the source
951 	 * entry.
952 	 */
953 	rw_enter(&sdp->i_contents, RW_READER);
954 	rw_enter(&sip->i_contents, RW_READER);
955 	if ((error = ud_iaccess(sdp, IWRITE, cr)) != 0 ||
956 	    (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
957 		rw_exit(&sip->i_contents);
958 		rw_exit(&sdp->i_contents);
959 		ITIMES(sip);
960 		goto errout;
961 	}
962 
963 	/*
964 	 * Check for renaming '.' or '..' or alias of '.'
965 	 */
966 	if ((strcmp(snm, ".") == 0) ||
967 	    (strcmp(snm, "..") == 0) ||
968 	    (sdp == sip)) {
969 		error = EINVAL;
970 		rw_exit(&sip->i_contents);
971 		rw_exit(&sdp->i_contents);
972 		goto errout;
973 	}
974 	rw_exit(&sip->i_contents);
975 	rw_exit(&sdp->i_contents);
976 
977 
978 	/*
979 	 * Link source to the target.
980 	 */
981 	rw_enter(&tdp->i_rwlock, RW_WRITER);
982 	if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
983 	    (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
984 		/*
985 		 * ESAME isn't really an error; it indicates that the
986 		 * operation should not be done because the source and target
987 		 * are the same file, but that no error should be reported.
988 		 */
989 		if (error == ESAME) {
990 			error = 0;
991 		}
992 		rw_exit(&tdp->i_rwlock);
993 		goto errout;
994 	}
995 	vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
996 	rw_exit(&tdp->i_rwlock);
997 
998 	rw_enter(&sdp->i_rwlock, RW_WRITER);
999 	/*
1000 	 * Unlink the source.
1001 	 * Remove the source entry.  ud_dirremove() checks that the entry
1002 	 * still reflects sip, and returns an error if it doesn't.
1003 	 * If the entry has changed just forget about it.  Release
1004 	 * the source inode.
1005 	 */
1006 	if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1007 	    DR_RENAME, cr, ct)) == ENOENT) {
1008 		error = 0;
1009 	}
1010 	rw_exit(&sdp->i_rwlock);
1011 errout:
1012 	ITIMES(sdp);
1013 	ITIMES(tdp);
1014 	VN_RELE(ITOV(sip));
1015 	mutex_exit(&udf_vfsp->udf_rename_lck);
1016 
1017 	return (error);
1018 }
1019 
1020 /* ARGSUSED */
1021 static int32_t
1022 udf_mkdir(
1023 	struct vnode *dvp,
1024 	char *dirname,
1025 	struct vattr *vap,
1026 	struct vnode **vpp,
1027 	struct cred *cr,
1028 	caller_context_t *ct,
1029 	int flags,
1030 	vsecattr_t *vsecp)
1031 {
1032 	int32_t error;
1033 	struct ud_inode *ip;
1034 	struct ud_inode *xip;
1035 
1036 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1037 
1038 	ud_printf("udf_mkdir\n");
1039 
1040 	ip = VTOI(dvp);
1041 	rw_enter(&ip->i_rwlock, RW_WRITER);
1042 	error = ud_direnter(ip, dirname, DE_MKDIR,
1043 	    (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1044 	rw_exit(&ip->i_rwlock);
1045 	ITIMES(ip);
1046 	if (error == 0) {
1047 		ip = xip;
1048 		*vpp = ITOV(ip);
1049 		ITIMES(ip);
1050 	} else if (error == EEXIST) {
1051 		ITIMES(xip);
1052 		VN_RELE(ITOV(xip));
1053 	}
1054 
1055 	return (error);
1056 }
1057 
1058 /* ARGSUSED */
1059 static int32_t
1060 udf_rmdir(
1061 	struct vnode *vp,
1062 	char *nm,
1063 	struct vnode *cdir,
1064 	struct cred *cr,
1065 	caller_context_t *ct,
1066 	int flags)
1067 {
1068 	int32_t error;
1069 	struct ud_inode *ip = VTOI(vp);
1070 
1071 	ud_printf("udf_rmdir\n");
1072 
1073 	rw_enter(&ip->i_rwlock, RW_WRITER);
1074 	error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1075 	    cr, ct);
1076 	rw_exit(&ip->i_rwlock);
1077 	ITIMES(ip);
1078 
1079 	return (error);
1080 }
1081 
1082 /* ARGSUSED */
1083 static int32_t
1084 udf_readdir(
1085 	struct vnode *vp,
1086 	struct uio *uiop,
1087 	struct cred *cr,
1088 	int32_t *eofp,
1089 	caller_context_t *ct,
1090 	int flags)
1091 {
1092 	struct ud_inode *ip;
1093 	struct dirent64 *nd;
1094 	struct udf_vfs *udf_vfsp;
1095 	int32_t error = 0, len, outcount = 0;
1096 	uint32_t dirsiz, offset;
1097 	uint32_t bufsize, ndlen, dummy;
1098 	caddr_t outbuf;
1099 	caddr_t outb, end_outb;
1100 	struct iovec *iovp;
1101 
1102 	uint8_t *dname;
1103 	int32_t length;
1104 
1105 	uint8_t *buf = NULL;
1106 
1107 	struct fbuf *fbp = NULL;
1108 	struct file_id *fid;
1109 	uint8_t *name;
1110 
1111 
1112 	ud_printf("udf_readdir\n");
1113 
1114 	ip = VTOI(vp);
1115 	udf_vfsp = ip->i_udf;
1116 
1117 	dirsiz = ip->i_size;
1118 	if ((uiop->uio_offset >= dirsiz) ||
1119 	    (ip->i_nlink <= 0)) {
1120 		if (eofp) {
1121 			*eofp = 1;
1122 		}
1123 		return (0);
1124 	}
1125 
1126 	offset = uiop->uio_offset;
1127 	iovp = uiop->uio_iov;
1128 	bufsize = iovp->iov_len;
1129 
1130 	outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1131 	end_outb = outb + bufsize;
1132 	nd = (struct dirent64 *)outbuf;
1133 
1134 	dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1135 	buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1136 
1137 	if (offset == 0) {
1138 		len = DIRENT64_RECLEN(1);
1139 		if (((caddr_t)nd + len) >= end_outb) {
1140 			error = EINVAL;
1141 			goto end;
1142 		}
1143 		nd->d_ino = ip->i_icb_lbano;
1144 		nd->d_reclen = (uint16_t)len;
1145 		nd->d_off = 0x10;
1146 		nd->d_name[0] = '.';
1147 		bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1148 		nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1149 		outcount++;
1150 	} else if (offset == 0x10) {
1151 		offset = 0;
1152 	}
1153 
1154 	while (offset < dirsiz) {
1155 		error = ud_get_next_fid(ip, &fbp,
1156 		    offset, &fid, &name, buf);
1157 		if (error != 0) {
1158 			break;
1159 		}
1160 
1161 		if ((fid->fid_flags & FID_DELETED) == 0) {
1162 			if (fid->fid_flags & FID_PARENT) {
1163 
1164 				len = DIRENT64_RECLEN(2);
1165 				if (((caddr_t)nd + len) >= end_outb) {
1166 					error = EINVAL;
1167 					break;
1168 				}
1169 
1170 				nd->d_ino = ip->i_icb_lbano;
1171 				nd->d_reclen = (uint16_t)len;
1172 				nd->d_off = offset + FID_LEN(fid);
1173 				nd->d_name[0] = '.';
1174 				nd->d_name[1] = '.';
1175 				bzero(&nd->d_name[2],
1176 				    DIRENT64_NAMELEN(len) - 2);
1177 				nd = (struct dirent64 *)
1178 				    ((char *)nd + nd->d_reclen);
1179 			} else {
1180 				if ((error = ud_uncompress(fid->fid_idlen,
1181 				    &length, name, dname)) != 0) {
1182 					break;
1183 				}
1184 				if (length == 0) {
1185 					offset += FID_LEN(fid);
1186 					continue;
1187 				}
1188 				len = DIRENT64_RECLEN(length);
1189 				if (((caddr_t)nd + len) >= end_outb) {
1190 					if (!outcount) {
1191 						error = EINVAL;
1192 					}
1193 					break;
1194 				}
1195 				(void) strncpy(nd->d_name,
1196 				    (caddr_t)dname, length);
1197 				bzero(&nd->d_name[length],
1198 				    DIRENT64_NAMELEN(len) - length);
1199 				nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1200 				    SWAP_16(fid->fid_icb.lad_ext_prn),
1201 				    SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1202 				    &dummy);
1203 				nd->d_reclen = (uint16_t)len;
1204 				nd->d_off = offset + FID_LEN(fid);
1205 				nd = (struct dirent64 *)
1206 				    ((char *)nd + nd->d_reclen);
1207 			}
1208 			outcount++;
1209 		}
1210 
1211 		offset += FID_LEN(fid);
1212 	}
1213 
1214 end:
1215 	if (fbp != NULL) {
1216 		fbrelse(fbp, S_OTHER);
1217 	}
1218 	ndlen = ((char *)nd - outbuf);
1219 	/*
1220 	 * In case of error do not call uiomove.
1221 	 * Return the error to the caller.
1222 	 */
1223 	if ((error == 0) && (ndlen != 0)) {
1224 		error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1225 		uiop->uio_offset = offset;
1226 	}
1227 	kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1228 	kmem_free((caddr_t)dname, 1024);
1229 	kmem_free(outbuf, (uint32_t)bufsize);
1230 	if (eofp && error == 0) {
1231 		*eofp = (uiop->uio_offset >= dirsiz);
1232 	}
1233 	return (error);
1234 }
1235 
1236 /* ARGSUSED */
1237 static int32_t
1238 udf_symlink(
1239 	struct vnode *dvp,
1240 	char *linkname,
1241 	struct vattr *vap,
1242 	char *target,
1243 	struct cred *cr,
1244 	caller_context_t *ct,
1245 	int flags)
1246 {
1247 	int32_t error = 0, outlen;
1248 	uint32_t ioflag = 0;
1249 	struct ud_inode *ip, *dip = VTOI(dvp);
1250 
1251 	struct path_comp *pc;
1252 	int8_t *dname = NULL, *uname = NULL, *sp;
1253 
1254 	ud_printf("udf_symlink\n");
1255 
1256 	ip = (struct ud_inode *)0;
1257 	vap->va_type = VLNK;
1258 	vap->va_rdev = 0;
1259 
1260 	rw_enter(&dip->i_rwlock, RW_WRITER);
1261 	error = ud_direnter(dip, linkname, DE_CREATE,
1262 	    (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1263 	rw_exit(&dip->i_rwlock);
1264 	if (error == 0) {
1265 		dname = kmem_zalloc(1024, KM_SLEEP);
1266 		uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1267 
1268 		pc = (struct path_comp *)uname;
1269 		/*
1270 		 * If the first character in target is "/"
1271 		 * then skip it and create entry for it
1272 		 */
1273 		if (*target == '/') {
1274 			pc->pc_type = 2;
1275 			pc->pc_len = 0;
1276 			pc = (struct path_comp *)(((char *)pc) + 4);
1277 			while (*target == '/') {
1278 				target++;
1279 			}
1280 		}
1281 
1282 		while (*target != NULL) {
1283 			sp = target;
1284 			while ((*target != '/') && (*target != '\0')) {
1285 				target ++;
1286 			}
1287 			/*
1288 			 * We got the next component of the
1289 			 * path name. Create path_comp of
1290 			 * appropriate type
1291 			 */
1292 			if (((target - sp) == 1) && (*sp == '.')) {
1293 				/*
1294 				 * Dot entry.
1295 				 */
1296 				pc->pc_type = 4;
1297 				pc = (struct path_comp *)(((char *)pc) + 4);
1298 			} else if (((target - sp) == 2) &&
1299 			    (*sp == '.') && ((*(sp + 1)) == '.')) {
1300 				/*
1301 				 * DotDot entry.
1302 				 */
1303 				pc->pc_type = 3;
1304 				pc = (struct path_comp *)(((char *)pc) + 4);
1305 			} else {
1306 				/*
1307 				 * convert the user given name
1308 				 * into appropriate form to be put
1309 				 * on the media
1310 				 */
1311 				outlen = 1024;	/* set to size of dname */
1312 				if (error = ud_compress(target - sp, &outlen,
1313 				    (uint8_t *)sp, (uint8_t *)dname)) {
1314 					break;
1315 				}
1316 				pc->pc_type = 5;
1317 				/* LINTED */
1318 				pc->pc_len = outlen;
1319 				dname[outlen] = '\0';
1320 				(void) strcpy((char *)pc->pc_id, dname);
1321 				pc = (struct path_comp *)
1322 				    (((char *)pc) + 4 + outlen);
1323 			}
1324 			while (*target == '/') {
1325 				target++;
1326 			}
1327 			if (*target == NULL) {
1328 				break;
1329 			}
1330 		}
1331 
1332 		rw_enter(&ip->i_contents, RW_WRITER);
1333 		if (error == 0) {
1334 			ioflag = FWRITE;
1335 			if (curthread->t_flag & T_DONTPEND) {
1336 				ioflag |= FDSYNC;
1337 			}
1338 			error = ud_rdwri(UIO_WRITE, ioflag, ip,
1339 			    uname, ((int8_t *)pc) - uname,
1340 			    (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1341 		}
1342 		if (error) {
1343 			ud_idrop(ip);
1344 			rw_exit(&ip->i_contents);
1345 			rw_enter(&dip->i_rwlock, RW_WRITER);
1346 			(void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1347 			    (struct vnode *)0, DR_REMOVE, cr, ct);
1348 			rw_exit(&dip->i_rwlock);
1349 			goto update_inode;
1350 		}
1351 		rw_exit(&ip->i_contents);
1352 	}
1353 
1354 	if ((error == 0) || (error == EEXIST)) {
1355 		VN_RELE(ITOV(ip));
1356 	}
1357 
1358 update_inode:
1359 	ITIMES(VTOI(dvp));
1360 	if (uname != NULL) {
1361 		kmem_free(uname, PAGESIZE);
1362 	}
1363 	if (dname != NULL) {
1364 		kmem_free(dname, 1024);
1365 	}
1366 
1367 	return (error);
1368 }
1369 
1370 /* ARGSUSED */
1371 static int32_t
1372 udf_readlink(
1373 	struct vnode *vp,
1374 	struct uio *uiop,
1375 	struct cred *cr,
1376 	caller_context_t *ct)
1377 {
1378 	int32_t error = 0, off, id_len, size, len;
1379 	int8_t *dname = NULL, *uname = NULL;
1380 	struct ud_inode *ip;
1381 	struct fbuf *fbp = NULL;
1382 	struct path_comp *pc;
1383 
1384 	ud_printf("udf_readlink\n");
1385 
1386 	if (vp->v_type != VLNK) {
1387 		return (EINVAL);
1388 	}
1389 
1390 	ip = VTOI(vp);
1391 	size = ip->i_size;
1392 	if (size > PAGESIZE) {
1393 		return (EIO);
1394 	}
1395 
1396 	if (size == 0) {
1397 		return (0);
1398 	}
1399 
1400 	dname = kmem_zalloc(1024, KM_SLEEP);
1401 	uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1402 
1403 	rw_enter(&ip->i_contents, RW_READER);
1404 
1405 	if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1406 		goto end;
1407 	}
1408 
1409 	off = 0;
1410 
1411 	while (off < size) {
1412 		pc = (struct path_comp *)(fbp->fb_addr + off);
1413 		switch (pc->pc_type) {
1414 			case 1 :
1415 				(void) strcpy(uname, ip->i_udf->udf_fsmnt);
1416 				(void) strcat(uname, "/");
1417 				break;
1418 			case 2 :
1419 				if (pc->pc_len != 0) {
1420 					goto end;
1421 				}
1422 				uname[0] = '/';
1423 				uname[1] = '\0';
1424 				break;
1425 			case 3 :
1426 				(void) strcat(uname, "../");
1427 				break;
1428 			case 4 :
1429 				(void) strcat(uname, "./");
1430 				break;
1431 			case 5 :
1432 				if ((error = ud_uncompress(pc->pc_len, &id_len,
1433 				    pc->pc_id, (uint8_t *)dname)) != 0) {
1434 					break;
1435 				}
1436 				dname[id_len] = '\0';
1437 				(void) strcat(uname, dname);
1438 				(void) strcat(uname, "/");
1439 				break;
1440 			default :
1441 				error = EINVAL;
1442 				goto end;
1443 		}
1444 		off += 4 + pc->pc_len;
1445 	}
1446 	len = strlen(uname) - 1;
1447 	if (uname[len] == '/') {
1448 		if (len == 0) {
1449 			/*
1450 			 * special case link to /
1451 			 */
1452 			len = 1;
1453 		} else {
1454 			uname[len] = '\0';
1455 		}
1456 	}
1457 
1458 	error = uiomove(uname, len, UIO_READ, uiop);
1459 
1460 	ITIMES(ip);
1461 
1462 end:
1463 	if (fbp != NULL) {
1464 		fbrelse(fbp, S_OTHER);
1465 	}
1466 	rw_exit(&ip->i_contents);
1467 	if (uname != NULL) {
1468 		kmem_free(uname, PAGESIZE);
1469 	}
1470 	if (dname != NULL) {
1471 		kmem_free(dname, 1024);
1472 	}
1473 	return (error);
1474 }
1475 
1476 /* ARGSUSED */
1477 static int32_t
1478 udf_fsync(
1479 	struct vnode *vp,
1480 	int32_t syncflag,
1481 	struct cred *cr,
1482 	caller_context_t *ct)
1483 {
1484 	int32_t error = 0;
1485 	struct ud_inode *ip = VTOI(vp);
1486 
1487 	ud_printf("udf_fsync\n");
1488 
1489 	rw_enter(&ip->i_contents, RW_WRITER);
1490 	if (!(IS_SWAPVP(vp))) {
1491 		error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1492 	}
1493 	if (error == 0) {
1494 		error = ud_sync_indir(ip);
1495 	}
1496 	ITIMES(ip);		/* XXX: is this necessary ??? */
1497 	rw_exit(&ip->i_contents);
1498 
1499 	return (error);
1500 }
1501 
1502 /* ARGSUSED */
1503 static void
1504 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1505 {
1506 	ud_printf("udf_iinactive\n");
1507 
1508 	ud_iinactive(VTOI(vp), cr);
1509 }
1510 
1511 /* ARGSUSED */
1512 static int32_t
1513 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1514 {
1515 	struct udf_fid *udfidp;
1516 	struct ud_inode *ip = VTOI(vp);
1517 
1518 	ud_printf("udf_fid\n");
1519 
1520 	if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1521 		fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1522 		return (ENOSPC);
1523 	}
1524 
1525 	udfidp = (struct udf_fid *)fidp;
1526 	bzero((char *)udfidp, sizeof (struct udf_fid));
1527 	rw_enter(&ip->i_contents, RW_READER);
1528 	udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1529 	udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1530 	udfidp->udfid_prn = ip->i_icb_prn;
1531 	udfidp->udfid_icb_lbn = ip->i_icb_block;
1532 	rw_exit(&ip->i_contents);
1533 
1534 	return (0);
1535 }
1536 
1537 /* ARGSUSED2 */
1538 static int
1539 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1540 {
1541 	struct ud_inode *ip = VTOI(vp);
1542 
1543 	ud_printf("udf_rwlock\n");
1544 
1545 	if (write_lock) {
1546 		rw_enter(&ip->i_rwlock, RW_WRITER);
1547 	} else {
1548 		rw_enter(&ip->i_rwlock, RW_READER);
1549 	}
1550 #ifdef	__lock_lint
1551 	rw_exit(&ip->i_rwlock);
1552 #endif
1553 	return (write_lock);
1554 }
1555 
1556 /* ARGSUSED */
1557 static void
1558 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1559 {
1560 	struct ud_inode *ip = VTOI(vp);
1561 
1562 	ud_printf("udf_rwunlock\n");
1563 
1564 #ifdef	__lock_lint
1565 	rw_enter(&ip->i_rwlock, RW_WRITER);
1566 #endif
1567 
1568 	rw_exit(&ip->i_rwlock);
1569 
1570 }
1571 
1572 /* ARGSUSED */
1573 static int32_t
1574 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1575 {
1576 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1577 }
1578 
1579 static int32_t
1580 udf_frlock(
1581 	struct vnode *vp,
1582 	int32_t cmd,
1583 	struct flock64 *bfp,
1584 	int32_t flag,
1585 	offset_t offset,
1586 	struct flk_callback *flk_cbp,
1587 	cred_t *cr,
1588 	caller_context_t *ct)
1589 {
1590 	struct ud_inode *ip = VTOI(vp);
1591 
1592 	ud_printf("udf_frlock\n");
1593 
1594 	/*
1595 	 * If file is being mapped, disallow frlock.
1596 	 * XXX I am not holding tlock while checking i_mapcnt because the
1597 	 * current locking strategy drops all locks before calling fs_frlock.
1598 	 * So, mapcnt could change before we enter fs_frlock making is
1599 	 * meaningless to have held tlock in the first place.
1600 	 */
1601 	if ((ip->i_mapcnt > 0) &&
1602 	    (MANDLOCK(vp, ip->i_char))) {
1603 		return (EAGAIN);
1604 	}
1605 
1606 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1607 }
1608 
1609 /*ARGSUSED6*/
1610 static int32_t
1611 udf_space(
1612 	struct vnode *vp,
1613 	int32_t cmd,
1614 	struct flock64 *bfp,
1615 	int32_t flag,
1616 	offset_t offset,
1617 	cred_t *cr,
1618 	caller_context_t *ct)
1619 {
1620 	int32_t error = 0;
1621 
1622 	ud_printf("udf_space\n");
1623 
1624 	if (cmd != F_FREESP) {
1625 		error =  EINVAL;
1626 	} else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1627 		error = ud_freesp(vp, bfp, flag, cr);
1628 	}
1629 
1630 	return (error);
1631 }
1632 
1633 /* ARGSUSED */
1634 static int32_t
1635 udf_getpage(
1636 	struct vnode *vp,
1637 	offset_t off,
1638 	size_t len,
1639 	uint32_t *protp,
1640 	struct page **plarr,
1641 	size_t plsz,
1642 	struct seg *seg,
1643 	caddr_t addr,
1644 	enum seg_rw rw,
1645 	struct cred *cr,
1646 	caller_context_t *ct)
1647 {
1648 	struct ud_inode *ip = VTOI(vp);
1649 	int32_t error, has_holes, beyond_eof, seqmode, dolock;
1650 	int32_t pgsize = PAGESIZE;
1651 	struct udf_vfs *udf_vfsp = ip->i_udf;
1652 	page_t **pl;
1653 	u_offset_t pgoff, eoff, uoff;
1654 	krw_t rwtype;
1655 	caddr_t pgaddr;
1656 
1657 	ud_printf("udf_getpage\n");
1658 
1659 	uoff = (u_offset_t)off; /* type conversion */
1660 	if (protp) {
1661 		*protp = PROT_ALL;
1662 	}
1663 	if (vp->v_flag & VNOMAP) {
1664 		return (ENOSYS);
1665 	}
1666 	seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1667 
1668 	rwtype = RW_READER;
1669 	dolock = (rw_owner(&ip->i_contents) != curthread);
1670 retrylock:
1671 #ifdef	__lock_lint
1672 	rw_enter(&ip->i_contents, rwtype);
1673 #else
1674 	if (dolock) {
1675 		rw_enter(&ip->i_contents, rwtype);
1676 	}
1677 #endif
1678 
1679 	/*
1680 	 * We may be getting called as a side effect of a bmap using
1681 	 * fbread() when the blocks might be being allocated and the
1682 	 * size has not yet been up'ed.  In this case we want to be
1683 	 * able to return zero pages if we get back UDF_HOLE from
1684 	 * calling bmap for a non write case here.  We also might have
1685 	 * to read some frags from the disk into a page if we are
1686 	 * extending the number of frags for a given lbn in bmap().
1687 	 */
1688 	beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1689 	if (beyond_eof && seg != segkmap) {
1690 #ifdef	__lock_lint
1691 		rw_exit(&ip->i_contents);
1692 #else
1693 		if (dolock) {
1694 			rw_exit(&ip->i_contents);
1695 		}
1696 #endif
1697 		return (EFAULT);
1698 	}
1699 
1700 	/*
1701 	 * Must hold i_contents lock throughout the call to pvn_getpages
1702 	 * since locked pages are returned from each call to ud_getapage.
1703 	 * Must *not* return locked pages and then try for contents lock
1704 	 * due to lock ordering requirements (inode > page)
1705 	 */
1706 
1707 	has_holes = ud_bmap_has_holes(ip);
1708 
1709 	if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1710 		int32_t	blk_size, count;
1711 		u_offset_t offset;
1712 
1713 		/*
1714 		 * We must acquire the RW_WRITER lock in order to
1715 		 * call bmap_write().
1716 		 */
1717 		if (dolock && rwtype == RW_READER) {
1718 			rwtype = RW_WRITER;
1719 
1720 			if (!rw_tryupgrade(&ip->i_contents)) {
1721 
1722 				rw_exit(&ip->i_contents);
1723 
1724 				goto retrylock;
1725 			}
1726 		}
1727 
1728 		/*
1729 		 * May be allocating disk blocks for holes here as
1730 		 * a result of mmap faults. write(2) does the bmap_write
1731 		 * in rdip/wrip, not here. We are not dealing with frags
1732 		 * in this case.
1733 		 */
1734 		offset = uoff;
1735 		while ((offset < uoff + len) &&
1736 		    (offset < ip->i_size)) {
1737 			/*
1738 			 * the variable "bnp" is to simplify the expression for
1739 			 * the compiler; * just passing in &bn to bmap_write
1740 			 * causes a compiler "loop"
1741 			 */
1742 
1743 			blk_size = udf_vfsp->udf_lbsize;
1744 			if ((offset + blk_size) > ip->i_size) {
1745 				count = ip->i_size - offset;
1746 			} else {
1747 				count = blk_size;
1748 			}
1749 			error = ud_bmap_write(ip, offset, count, 0, cr);
1750 			if (error) {
1751 				goto update_inode;
1752 			}
1753 			offset += count; /* XXX - make this contig */
1754 		}
1755 	}
1756 
1757 	/*
1758 	 * Can be a reader from now on.
1759 	 */
1760 #ifdef	__lock_lint
1761 	if (rwtype == RW_WRITER) {
1762 		rw_downgrade(&ip->i_contents);
1763 	}
1764 #else
1765 	if (dolock && rwtype == RW_WRITER) {
1766 		rw_downgrade(&ip->i_contents);
1767 	}
1768 #endif
1769 
1770 	/*
1771 	 * We remove PROT_WRITE in cases when the file has UDF holes
1772 	 * because we don't  want to call bmap_read() to check each
1773 	 * page if it is backed with a disk block.
1774 	 */
1775 	if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1776 		*protp &= ~PROT_WRITE;
1777 	}
1778 
1779 	error = 0;
1780 
1781 	/*
1782 	 * The loop looks up pages in the range <off, off + len).
1783 	 * For each page, we first check if we should initiate an asynchronous
1784 	 * read ahead before we call page_lookup (we may sleep in page_lookup
1785 	 * for a previously initiated disk read).
1786 	 */
1787 	eoff = (uoff + len);
1788 	for (pgoff = uoff, pgaddr = addr, pl = plarr;
1789 	    pgoff < eoff; /* empty */) {
1790 		page_t	*pp;
1791 		u_offset_t	nextrio;
1792 		se_t	se;
1793 
1794 		se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1795 
1796 		/*
1797 		 * Handle async getpage (faultahead)
1798 		 */
1799 		if (plarr == NULL) {
1800 			ip->i_nextrio = pgoff;
1801 			ud_getpage_ra(vp, pgoff, seg, pgaddr);
1802 			pgoff += pgsize;
1803 			pgaddr += pgsize;
1804 			continue;
1805 		}
1806 
1807 		/*
1808 		 * Check if we should initiate read ahead of next cluster.
1809 		 * We call page_exists only when we need to confirm that
1810 		 * we have the current page before we initiate the read ahead.
1811 		 */
1812 		nextrio = ip->i_nextrio;
1813 		if (seqmode &&
1814 		    pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1815 		    nextrio < ip->i_size && page_exists(vp, pgoff))
1816 			ud_getpage_ra(vp, pgoff, seg, pgaddr);
1817 
1818 		if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1819 
1820 			/*
1821 			 * We found the page in the page cache.
1822 			 */
1823 			*pl++ = pp;
1824 			pgoff += pgsize;
1825 			pgaddr += pgsize;
1826 			len -= pgsize;
1827 			plsz -= pgsize;
1828 		} else  {
1829 
1830 			/*
1831 			 * We have to create the page, or read it from disk.
1832 			 */
1833 			if (error = ud_getpage_miss(vp, pgoff, len,
1834 			    seg, pgaddr, pl, plsz, rw, seqmode)) {
1835 				goto error_out;
1836 			}
1837 
1838 			while (*pl != NULL) {
1839 				pl++;
1840 				pgoff += pgsize;
1841 				pgaddr += pgsize;
1842 				len -= pgsize;
1843 				plsz -= pgsize;
1844 			}
1845 		}
1846 	}
1847 
1848 	/*
1849 	 * Return pages up to plsz if they are in the page cache.
1850 	 * We cannot return pages if there is a chance that they are
1851 	 * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1852 	 */
1853 	if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1854 
1855 		ASSERT((protp == NULL) ||
1856 		    !(has_holes && (*protp & PROT_WRITE)));
1857 
1858 		eoff = pgoff + plsz;
1859 		while (pgoff < eoff) {
1860 			page_t		*pp;
1861 
1862 			if ((pp = page_lookup_nowait(vp, pgoff,
1863 			    SE_SHARED)) == NULL)
1864 				break;
1865 
1866 			*pl++ = pp;
1867 			pgoff += pgsize;
1868 			plsz -= pgsize;
1869 		}
1870 	}
1871 
1872 	if (plarr)
1873 		*pl = NULL;			/* Terminate page list */
1874 	ip->i_nextr = pgoff;
1875 
1876 error_out:
1877 	if (error && plarr) {
1878 		/*
1879 		 * Release any pages we have locked.
1880 		 */
1881 		while (pl > &plarr[0])
1882 			page_unlock(*--pl);
1883 
1884 		plarr[0] = NULL;
1885 	}
1886 
1887 update_inode:
1888 #ifdef	__lock_lint
1889 	rw_exit(&ip->i_contents);
1890 #else
1891 	if (dolock) {
1892 		rw_exit(&ip->i_contents);
1893 	}
1894 #endif
1895 
1896 	/*
1897 	 * If the inode is not already marked for IACC (in rwip() for read)
1898 	 * and the inode is not marked for no access time update (in rwip()
1899 	 * for write) then update the inode access time and mod time now.
1900 	 */
1901 	mutex_enter(&ip->i_tlock);
1902 	if ((ip->i_flag & (IACC | INOACC)) == 0) {
1903 		if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1904 			ip->i_flag |= IACC;
1905 		}
1906 		if (rw == S_WRITE) {
1907 			ip->i_flag |= IUPD;
1908 		}
1909 		ITIMES_NOLOCK(ip);
1910 	}
1911 	mutex_exit(&ip->i_tlock);
1912 
1913 	return (error);
1914 }
1915 
1916 int32_t ud_delay = 1;
1917 
1918 /* ARGSUSED */
1919 static int32_t
1920 udf_putpage(
1921 	struct vnode *vp,
1922 	offset_t off,
1923 	size_t len,
1924 	int32_t flags,
1925 	struct cred *cr,
1926 	caller_context_t *ct)
1927 {
1928 	struct ud_inode *ip;
1929 	int32_t error = 0;
1930 
1931 	ud_printf("udf_putpage\n");
1932 
1933 	ip = VTOI(vp);
1934 #ifdef	__lock_lint
1935 	rw_enter(&ip->i_contents, RW_WRITER);
1936 #endif
1937 
1938 	if (vp->v_count == 0) {
1939 		cmn_err(CE_WARN, "ud_putpage : bad v_count");
1940 		error = EINVAL;
1941 		goto out;
1942 	}
1943 
1944 	if (vp->v_flag & VNOMAP) {
1945 		error = ENOSYS;
1946 		goto out;
1947 	}
1948 
1949 	if (flags & B_ASYNC) {
1950 		if (ud_delay && len &&
1951 		    (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1952 			mutex_enter(&ip->i_tlock);
1953 
1954 			/*
1955 			 * If nobody stalled, start a new cluster.
1956 			 */
1957 			if (ip->i_delaylen == 0) {
1958 				ip->i_delayoff = off;
1959 				ip->i_delaylen = len;
1960 				mutex_exit(&ip->i_tlock);
1961 				goto out;
1962 			}
1963 
1964 			/*
1965 			 * If we have a full cluster or they are not contig,
1966 			 * then push last cluster and start over.
1967 			 */
1968 			if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1969 			    ip->i_delayoff + ip->i_delaylen != off) {
1970 				u_offset_t doff;
1971 				size_t dlen;
1972 
1973 				doff = ip->i_delayoff;
1974 				dlen = ip->i_delaylen;
1975 				ip->i_delayoff = off;
1976 				ip->i_delaylen = len;
1977 				mutex_exit(&ip->i_tlock);
1978 				error = ud_putpages(vp, doff, dlen, flags, cr);
1979 				/* LMXXX - flags are new val, not old */
1980 				goto out;
1981 			}
1982 
1983 			/*
1984 			 * There is something there, it's not full, and
1985 			 * it is contig.
1986 			 */
1987 			ip->i_delaylen += len;
1988 			mutex_exit(&ip->i_tlock);
1989 			goto out;
1990 		}
1991 
1992 		/*
1993 		 * Must have weird flags or we are not clustering.
1994 		 */
1995 	}
1996 
1997 	error = ud_putpages(vp, off, len, flags, cr);
1998 
1999 out:
2000 #ifdef	__lock_lint
2001 	rw_exit(&ip->i_contents);
2002 #endif
2003 	return (error);
2004 }
2005 
2006 /* ARGSUSED */
2007 static int32_t
2008 udf_map(
2009 	struct vnode *vp,
2010 	offset_t off,
2011 	struct as *as,
2012 	caddr_t *addrp,
2013 	size_t len,
2014 	uint8_t prot,
2015 	uint8_t maxprot,
2016 	uint32_t flags,
2017 	struct cred *cr,
2018 	caller_context_t *ct)
2019 {
2020 	struct segvn_crargs vn_a;
2021 	int32_t error = 0;
2022 
2023 	ud_printf("udf_map\n");
2024 
2025 	if (vp->v_flag & VNOMAP) {
2026 		error = ENOSYS;
2027 		goto end;
2028 	}
2029 
2030 	if ((off < (offset_t)0) ||
2031 	    ((off + len) < (offset_t)0)) {
2032 		error = EINVAL;
2033 		goto end;
2034 	}
2035 
2036 	if (vp->v_type != VREG) {
2037 		error = ENODEV;
2038 		goto end;
2039 	}
2040 
2041 	/*
2042 	 * If file is being locked, disallow mapping.
2043 	 */
2044 	if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2045 		error = EAGAIN;
2046 		goto end;
2047 	}
2048 
2049 	as_rangelock(as);
2050 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2051 	if (error != 0) {
2052 		as_rangeunlock(as);
2053 		goto end;
2054 	}
2055 
2056 	vn_a.vp = vp;
2057 	vn_a.offset = off;
2058 	vn_a.type = flags & MAP_TYPE;
2059 	vn_a.prot = prot;
2060 	vn_a.maxprot = maxprot;
2061 	vn_a.cred = cr;
2062 	vn_a.amp = NULL;
2063 	vn_a.flags = flags & ~MAP_TYPE;
2064 	vn_a.szc = 0;
2065 	vn_a.lgrp_mem_policy_flags = 0;
2066 
2067 	error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2068 	as_rangeunlock(as);
2069 
2070 end:
2071 	return (error);
2072 }
2073 
2074 /* ARGSUSED */
2075 static int32_t
2076 udf_addmap(struct vnode *vp,
2077 	offset_t off,
2078 	struct as *as,
2079 	caddr_t addr,
2080 	size_t len,
2081 	uint8_t prot,
2082 	uint8_t maxprot,
2083 	uint32_t flags,
2084 	struct cred *cr,
2085 	caller_context_t *ct)
2086 {
2087 	struct ud_inode *ip = VTOI(vp);
2088 
2089 	ud_printf("udf_addmap\n");
2090 
2091 	if (vp->v_flag & VNOMAP) {
2092 		return (ENOSYS);
2093 	}
2094 
2095 	mutex_enter(&ip->i_tlock);
2096 	ip->i_mapcnt += btopr(len);
2097 	mutex_exit(&ip->i_tlock);
2098 
2099 	return (0);
2100 }
2101 
2102 /* ARGSUSED */
2103 static int32_t
2104 udf_delmap(
2105 	struct vnode *vp, offset_t off,
2106 	struct as *as,
2107 	caddr_t addr,
2108 	size_t len,
2109 	uint32_t prot,
2110 	uint32_t maxprot,
2111 	uint32_t flags,
2112 	struct cred *cr,
2113 	caller_context_t *ct)
2114 {
2115 	struct ud_inode *ip = VTOI(vp);
2116 
2117 	ud_printf("udf_delmap\n");
2118 
2119 	if (vp->v_flag & VNOMAP) {
2120 		return (ENOSYS);
2121 	}
2122 
2123 	mutex_enter(&ip->i_tlock);
2124 	ip->i_mapcnt -= btopr(len); 	/* Count released mappings */
2125 	ASSERT(ip->i_mapcnt >= 0);
2126 	mutex_exit(&ip->i_tlock);
2127 
2128 	return (0);
2129 }
2130 
2131 /* ARGSUSED */
2132 static int32_t
2133 udf_l_pathconf(
2134 	struct vnode *vp,
2135 	int32_t cmd,
2136 	ulong_t *valp,
2137 	struct cred *cr,
2138 	caller_context_t *ct)
2139 {
2140 	int32_t error = 0;
2141 
2142 	ud_printf("udf_l_pathconf\n");
2143 
2144 	if (cmd == _PC_FILESIZEBITS) {
2145 		/*
2146 		 * udf supports 64 bits as file size
2147 		 * but there are several other restrictions
2148 		 * it only supports 32-bit block numbers and
2149 		 * daddr32_t is only and int32_t so taking these
2150 		 * into account we can stay just as where ufs is
2151 		 */
2152 		*valp = 41;
2153 	} else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2154 		/* nanosecond timestamp resolution */
2155 		*valp = 1L;
2156 	} else {
2157 		error = fs_pathconf(vp, cmd, valp, cr, ct);
2158 	}
2159 
2160 	return (error);
2161 }
2162 
2163 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2164 #ifndef	__lint
2165 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2166 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2167 #endif
2168 /*
2169  * Assumption is that there will not be a pageio request
2170  * to a enbedded file
2171  */
2172 /* ARGSUSED */
2173 static int32_t
2174 udf_pageio(
2175 	struct vnode *vp,
2176 	struct page *pp,
2177 	u_offset_t io_off,
2178 	size_t io_len,
2179 	int32_t flags,
2180 	struct cred *cr,
2181 	caller_context_t *ct)
2182 {
2183 	daddr_t bn;
2184 	struct buf *bp;
2185 	struct ud_inode *ip = VTOI(vp);
2186 	int32_t dolock, error = 0, contig, multi_io;
2187 	size_t done_len = 0, cur_len = 0;
2188 	page_t *npp = NULL, *opp = NULL, *cpp = pp;
2189 
2190 	if (pp == NULL) {
2191 		return (EINVAL);
2192 	}
2193 
2194 	dolock = (rw_owner(&ip->i_contents) != curthread);
2195 
2196 	/*
2197 	 * We need a better check.  Ideally, we would use another
2198 	 * vnodeops so that hlocked and forcibly unmounted file
2199 	 * systems would return EIO where appropriate and w/o the
2200 	 * need for these checks.
2201 	 */
2202 	if (ip->i_udf == NULL) {
2203 		return (EIO);
2204 	}
2205 
2206 #ifdef	__lock_lint
2207 	rw_enter(&ip->i_contents, RW_READER);
2208 #else
2209 	if (dolock) {
2210 		rw_enter(&ip->i_contents, RW_READER);
2211 	}
2212 #endif
2213 
2214 	/*
2215 	 * Break the io request into chunks, one for each contiguous
2216 	 * stretch of disk blocks in the target file.
2217 	 */
2218 	while (done_len < io_len) {
2219 		ASSERT(cpp);
2220 		bp = NULL;
2221 		contig = 0;
2222 		if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2223 		    &bn, &contig)) {
2224 			break;
2225 		}
2226 
2227 		if (bn == UDF_HOLE) {   /* No holey swapfiles */
2228 			cmn_err(CE_WARN, "SWAP file has HOLES");
2229 			error = EINVAL;
2230 			break;
2231 		}
2232 
2233 		cur_len = MIN(io_len - done_len, contig);
2234 
2235 		/*
2236 		 * Check if more than one I/O is
2237 		 * required to complete the given
2238 		 * I/O operation
2239 		 */
2240 		if (ip->i_udf->udf_lbsize < PAGESIZE) {
2241 			if (cur_len >= PAGESIZE) {
2242 				multi_io = 0;
2243 				cur_len &= PAGEMASK;
2244 			} else {
2245 				multi_io = 1;
2246 				cur_len = MIN(io_len - done_len, PAGESIZE);
2247 			}
2248 		}
2249 		page_list_break(&cpp, &npp, btop(cur_len));
2250 
2251 		bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2252 		ASSERT(bp != NULL);
2253 
2254 		bp->b_edev = ip->i_dev;
2255 		bp->b_dev = cmpdev(ip->i_dev);
2256 		bp->b_blkno = bn;
2257 		bp->b_un.b_addr = (caddr_t)0;
2258 		bp->b_file = vp;
2259 		bp->b_offset = (offset_t)(io_off + done_len);
2260 
2261 /*
2262  *		ub.ub_pageios.value.ul++;
2263  */
2264 		if (multi_io == 0) {
2265 			(void) bdev_strategy(bp);
2266 		} else {
2267 			error = ud_multi_strat(ip, cpp, bp,
2268 			    (u_offset_t)(io_off + done_len));
2269 			if (error != 0) {
2270 				pageio_done(bp);
2271 				break;
2272 			}
2273 		}
2274 		if (flags & B_READ) {
2275 			ud_pageio_reads++;
2276 		} else {
2277 			ud_pageio_writes++;
2278 		}
2279 
2280 		/*
2281 		 * If the request is not B_ASYNC, wait for i/o to complete
2282 		 * and re-assemble the page list to return to the caller.
2283 		 * If it is B_ASYNC we leave the page list in pieces and
2284 		 * cleanup() will dispose of them.
2285 		 */
2286 		if ((flags & B_ASYNC) == 0) {
2287 			error = biowait(bp);
2288 			pageio_done(bp);
2289 			if (error) {
2290 				break;
2291 			}
2292 			page_list_concat(&opp, &cpp);
2293 		}
2294 		cpp = npp;
2295 		npp = NULL;
2296 		done_len += cur_len;
2297 	}
2298 
2299 	ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2300 	if (error) {
2301 		if (flags & B_ASYNC) {
2302 			/* Cleanup unprocessed parts of list */
2303 			page_list_concat(&cpp, &npp);
2304 			if (flags & B_READ) {
2305 				pvn_read_done(cpp, B_ERROR);
2306 			} else {
2307 				pvn_write_done(cpp, B_ERROR);
2308 			}
2309 		} else {
2310 			/* Re-assemble list and let caller clean up */
2311 			page_list_concat(&opp, &cpp);
2312 			page_list_concat(&opp, &npp);
2313 		}
2314 	}
2315 
2316 #ifdef	__lock_lint
2317 	rw_exit(&ip->i_contents);
2318 #else
2319 	if (dolock) {
2320 		rw_exit(&ip->i_contents);
2321 	}
2322 #endif
2323 	return (error);
2324 }
2325 
2326 
2327 
2328 
2329 /* -------------------- local functions --------------------------- */
2330 
2331 
2332 
2333 int32_t
2334 ud_rdwri(enum uio_rw rw, int32_t ioflag,
2335 	struct ud_inode *ip, caddr_t base, int32_t len,
2336 	offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2337 {
2338 	int32_t error;
2339 	struct uio auio;
2340 	struct iovec aiov;
2341 
2342 	ud_printf("ud_rdwri\n");
2343 
2344 	bzero((caddr_t)&auio, sizeof (uio_t));
2345 	bzero((caddr_t)&aiov, sizeof (iovec_t));
2346 
2347 	aiov.iov_base = base;
2348 	aiov.iov_len = len;
2349 	auio.uio_iov = &aiov;
2350 	auio.uio_iovcnt = 1;
2351 	auio.uio_loffset = offset;
2352 	auio.uio_segflg = (int16_t)seg;
2353 	auio.uio_resid = len;
2354 
2355 	if (rw == UIO_WRITE) {
2356 		auio.uio_fmode = FWRITE;
2357 		auio.uio_extflg = UIO_COPY_DEFAULT;
2358 		auio.uio_llimit = curproc->p_fsz_ctl;
2359 		error = ud_wrip(ip, &auio, ioflag, cr);
2360 	} else {
2361 		auio.uio_fmode = FREAD;
2362 		auio.uio_extflg = UIO_COPY_CACHED;
2363 		auio.uio_llimit = MAXOFFSET_T;
2364 		error = ud_rdip(ip, &auio, ioflag, cr);
2365 	}
2366 
2367 	if (aresid) {
2368 		*aresid = auio.uio_resid;
2369 	} else if (auio.uio_resid) {
2370 		error = EIO;
2371 	}
2372 	return (error);
2373 }
2374 
2375 /*
2376  * Free behind hacks.  The pager is busted.
2377  * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2378  * or B_FREE_IF_TIGHT_ON_MEMORY.
2379  */
2380 int32_t ud_freebehind = 1;
2381 int32_t ud_smallfile = 32 * 1024;
2382 
2383 /* ARGSUSED */
2384 int32_t
2385 ud_getpage_miss(struct vnode *vp, u_offset_t off,
2386 	size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2387 	size_t plsz, enum seg_rw rw, int32_t seq)
2388 {
2389 	struct ud_inode *ip = VTOI(vp);
2390 	int32_t err = 0;
2391 	size_t io_len;
2392 	u_offset_t io_off;
2393 	u_offset_t pgoff;
2394 	page_t *pp;
2395 
2396 	pl[0] = NULL;
2397 
2398 	/*
2399 	 * Figure out whether the page can be created, or must be
2400 	 * read from the disk
2401 	 */
2402 	if (rw == S_CREATE) {
2403 		if ((pp = page_create_va(vp, off,
2404 		    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2405 			cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2406 			return (EINVAL);
2407 		}
2408 		io_len = PAGESIZE;
2409 	} else {
2410 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2411 		    &io_len, off, PAGESIZE, 0);
2412 
2413 		/*
2414 		 * Some other thread has entered the page.
2415 		 * ud_getpage will retry page_lookup.
2416 		 */
2417 		if (pp == NULL) {
2418 			return (0);
2419 		}
2420 
2421 		/*
2422 		 * Fill the page with as much data as we can from the file.
2423 		 */
2424 		err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2425 		if (err) {
2426 			pvn_read_done(pp, B_ERROR);
2427 			return (err);
2428 		}
2429 
2430 		/*
2431 		 * XXX ??? ufs has io_len instead of pgoff below
2432 		 */
2433 		ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2434 
2435 		/*
2436 		 * If the file access is sequential, initiate read ahead
2437 		 * of the next cluster.
2438 		 */
2439 		if (seq && ip->i_nextrio < ip->i_size) {
2440 			ud_getpage_ra(vp, off, seg, addr);
2441 		}
2442 	}
2443 
2444 outmiss:
2445 	pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2446 	return (err);
2447 }
2448 
2449 /* ARGSUSED */
2450 void
2451 ud_getpage_ra(struct vnode *vp,
2452 	u_offset_t off, struct seg *seg, caddr_t addr)
2453 {
2454 	page_t *pp;
2455 	size_t io_len;
2456 	struct ud_inode *ip = VTOI(vp);
2457 	u_offset_t io_off = ip->i_nextrio, pgoff;
2458 	caddr_t addr2 = addr + (io_off - off);
2459 	daddr_t bn;
2460 	int32_t contig = 0;
2461 
2462 	/*
2463 	 * Is this test needed?
2464 	 */
2465 
2466 	if (addr2 >= seg->s_base + seg->s_size) {
2467 		return;
2468 	}
2469 
2470 	contig = 0;
2471 	if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2472 		return;
2473 	}
2474 
2475 	pp = pvn_read_kluster(vp, io_off, seg, addr2,
2476 	    &io_off, &io_len, io_off, PAGESIZE, 1);
2477 
2478 	/*
2479 	 * Some other thread has entered the page.
2480 	 * So no read head done here (ie we will have to and wait
2481 	 * for the read when needed).
2482 	 */
2483 
2484 	if (pp == NULL) {
2485 		return;
2486 	}
2487 
2488 	(void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2489 	ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2490 }
2491 
2492 int
2493 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2494 	uint32_t bflgs, u_offset_t *pg_off)
2495 {
2496 	daddr_t bn;
2497 	struct buf *bp;
2498 	caddr_t kaddr, caddr;
2499 	int32_t error = 0, contig = 0, multi_io = 0;
2500 	int32_t lbsize = ip->i_udf->udf_lbsize;
2501 	int32_t lbmask = ip->i_udf->udf_lbmask;
2502 	uint64_t isize;
2503 
2504 	isize = (ip->i_size + lbmask) & (~lbmask);
2505 	if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2506 
2507 		/*
2508 		 * Embedded file read file_entry
2509 		 * from buffer cache and copy the required
2510 		 * portions
2511 		 */
2512 		bp = ud_bread(ip->i_dev,
2513 		    ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2514 		if ((bp->b_error == 0) &&
2515 		    (bp->b_resid == 0)) {
2516 
2517 			caddr = bp->b_un.b_addr + ip->i_data_off;
2518 
2519 			/*
2520 			 * mapin to kvm
2521 			 */
2522 			kaddr = (caddr_t)ppmapin(pp,
2523 			    PROT_READ | PROT_WRITE, (caddr_t)-1);
2524 			(void) kcopy(caddr, kaddr, ip->i_size);
2525 
2526 			/*
2527 			 * mapout of kvm
2528 			 */
2529 			ppmapout(kaddr);
2530 		}
2531 		brelse(bp);
2532 		contig = ip->i_size;
2533 	} else {
2534 
2535 		/*
2536 		 * Get the continuous size and block number
2537 		 * at offset "off"
2538 		 */
2539 		if (error = ud_bmap_read(ip, off, &bn, &contig))
2540 			goto out;
2541 		contig = MIN(contig, PAGESIZE);
2542 		contig = (contig + lbmask) & (~lbmask);
2543 
2544 		/*
2545 		 * Zero part of the page which we are not
2546 		 * going to read from the disk.
2547 		 */
2548 
2549 		if (bn == UDF_HOLE) {
2550 
2551 			/*
2552 			 * This is a HOLE. Just zero out
2553 			 * the page
2554 			 */
2555 			if (((off + contig) == isize) ||
2556 			    (contig == PAGESIZE)) {
2557 				pagezero(pp->p_prev, 0, PAGESIZE);
2558 				goto out;
2559 			}
2560 		}
2561 
2562 		if (contig < PAGESIZE) {
2563 			uint64_t count;
2564 
2565 			count = isize - off;
2566 			if (contig != count) {
2567 				multi_io = 1;
2568 				contig = (int32_t)(MIN(count, PAGESIZE));
2569 			} else {
2570 				pagezero(pp->p_prev, contig, PAGESIZE - contig);
2571 			}
2572 		}
2573 
2574 		/*
2575 		 * Get a bp and initialize it
2576 		 */
2577 		bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2578 		ASSERT(bp != NULL);
2579 
2580 		bp->b_edev = ip->i_dev;
2581 		bp->b_dev = cmpdev(ip->i_dev);
2582 		bp->b_blkno = bn;
2583 		bp->b_un.b_addr = 0;
2584 		bp->b_file = ip->i_vnode;
2585 
2586 		/*
2587 		 * Start I/O
2588 		 */
2589 		if (multi_io == 0) {
2590 
2591 			/*
2592 			 * Single I/O is sufficient for this page
2593 			 */
2594 			(void) bdev_strategy(bp);
2595 		} else {
2596 
2597 			/*
2598 			 * We need to do the I/O in
2599 			 * piece's
2600 			 */
2601 			error = ud_multi_strat(ip, pp, bp, off);
2602 			if (error != 0) {
2603 				goto out;
2604 			}
2605 		}
2606 		if ((bflgs & B_ASYNC) == 0) {
2607 
2608 			/*
2609 			 * Wait for i/o to complete.
2610 			 */
2611 
2612 			error = biowait(bp);
2613 			pageio_done(bp);
2614 			if (error) {
2615 				goto out;
2616 			}
2617 		}
2618 	}
2619 	if ((off + contig) >= ip->i_size) {
2620 		contig = ip->i_size - off;
2621 	}
2622 
2623 out:
2624 	*pg_off = contig;
2625 	return (error);
2626 }
2627 
2628 int32_t
2629 ud_putpages(struct vnode *vp, offset_t off,
2630 	size_t len, int32_t flags, struct cred *cr)
2631 {
2632 	struct ud_inode *ip;
2633 	page_t *pp;
2634 	u_offset_t io_off;
2635 	size_t io_len;
2636 	u_offset_t eoff;
2637 	int32_t err = 0;
2638 	int32_t dolock;
2639 
2640 	ud_printf("ud_putpages\n");
2641 
2642 	if (vp->v_count == 0) {
2643 		cmn_err(CE_WARN, "ud_putpages: bad v_count");
2644 		return (EINVAL);
2645 	}
2646 
2647 	ip = VTOI(vp);
2648 
2649 	/*
2650 	 * Acquire the readers/write inode lock before locking
2651 	 * any pages in this inode.
2652 	 * The inode lock is held during i/o.
2653 	 */
2654 	if (len == 0) {
2655 		mutex_enter(&ip->i_tlock);
2656 		ip->i_delayoff = ip->i_delaylen = 0;
2657 		mutex_exit(&ip->i_tlock);
2658 	}
2659 #ifdef	__lock_lint
2660 	rw_enter(&ip->i_contents, RW_READER);
2661 #else
2662 	dolock = (rw_owner(&ip->i_contents) != curthread);
2663 	if (dolock) {
2664 		rw_enter(&ip->i_contents, RW_READER);
2665 	}
2666 #endif
2667 
2668 	if (!vn_has_cached_data(vp)) {
2669 #ifdef	__lock_lint
2670 		rw_exit(&ip->i_contents);
2671 #else
2672 		if (dolock) {
2673 			rw_exit(&ip->i_contents);
2674 		}
2675 #endif
2676 		return (0);
2677 	}
2678 
2679 	if (len == 0) {
2680 		/*
2681 		 * Search the entire vp list for pages >= off.
2682 		 */
2683 		err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2684 		    flags, cr);
2685 	} else {
2686 		/*
2687 		 * Loop over all offsets in the range looking for
2688 		 * pages to deal with.
2689 		 */
2690 		if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2691 			eoff = MIN(off + len, eoff);
2692 		} else {
2693 			eoff = off + len;
2694 		}
2695 
2696 		for (io_off = off; io_off < eoff; io_off += io_len) {
2697 			/*
2698 			 * If we are not invalidating, synchronously
2699 			 * freeing or writing pages, use the routine
2700 			 * page_lookup_nowait() to prevent reclaiming
2701 			 * them from the free list.
2702 			 */
2703 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2704 				pp = page_lookup(vp, io_off,
2705 				    (flags & (B_INVAL | B_FREE)) ?
2706 				    SE_EXCL : SE_SHARED);
2707 			} else {
2708 				pp = page_lookup_nowait(vp, io_off,
2709 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2710 			}
2711 
2712 			if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2713 				io_len = PAGESIZE;
2714 			} else {
2715 
2716 				err = ud_putapage(vp, pp,
2717 				    &io_off, &io_len, flags, cr);
2718 				if (err != 0) {
2719 					break;
2720 				}
2721 				/*
2722 				 * "io_off" and "io_len" are returned as
2723 				 * the range of pages we actually wrote.
2724 				 * This allows us to skip ahead more quickly
2725 				 * since several pages may've been dealt
2726 				 * with by this iteration of the loop.
2727 				 */
2728 			}
2729 		}
2730 	}
2731 	if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2732 		/*
2733 		 * We have just sync'ed back all the pages on
2734 		 * the inode, turn off the IMODTIME flag.
2735 		 */
2736 		mutex_enter(&ip->i_tlock);
2737 		ip->i_flag &= ~IMODTIME;
2738 		mutex_exit(&ip->i_tlock);
2739 	}
2740 #ifdef	__lock_lint
2741 	rw_exit(&ip->i_contents);
2742 #else
2743 	if (dolock) {
2744 		rw_exit(&ip->i_contents);
2745 	}
2746 #endif
2747 	return (err);
2748 }
2749 
2750 /* ARGSUSED */
2751 int32_t
2752 ud_putapage(struct vnode *vp,
2753 	page_t *pp, u_offset_t *offp,
2754 	size_t *lenp, int32_t flags, struct cred *cr)
2755 {
2756 	daddr_t bn;
2757 	size_t io_len;
2758 	struct ud_inode *ip;
2759 	int32_t error = 0, contig, multi_io = 0;
2760 	struct udf_vfs *udf_vfsp;
2761 	u_offset_t off, io_off;
2762 	caddr_t kaddr, caddr;
2763 	struct buf *bp = NULL;
2764 	int32_t lbmask;
2765 	uint64_t isize;
2766 	int32_t crc_len;
2767 	struct file_entry *fe;
2768 
2769 	ud_printf("ud_putapage\n");
2770 
2771 	ip = VTOI(vp);
2772 	ASSERT(ip);
2773 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2774 	lbmask = ip->i_udf->udf_lbmask;
2775 	isize = (ip->i_size + lbmask) & (~lbmask);
2776 
2777 	udf_vfsp = ip->i_udf;
2778 	ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2779 
2780 	/*
2781 	 * If the modified time on the inode has not already been
2782 	 * set elsewhere (e.g. for write/setattr) we set the time now.
2783 	 * This gives us approximate modified times for mmap'ed files
2784 	 * which are modified via stores in the user address space.
2785 	 */
2786 	if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2787 		mutex_enter(&ip->i_tlock);
2788 		ip->i_flag |= IUPD;
2789 		ITIMES_NOLOCK(ip);
2790 		mutex_exit(&ip->i_tlock);
2791 	}
2792 
2793 
2794 	/*
2795 	 * Align the request to a block boundry (for old file systems),
2796 	 * and go ask bmap() how contiguous things are for this file.
2797 	 */
2798 	off = pp->p_offset & ~(offset_t)lbmask;
2799 				/* block align it */
2800 
2801 
2802 	if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2803 		ASSERT(ip->i_size <= ip->i_max_emb);
2804 
2805 		pp = pvn_write_kluster(vp, pp, &io_off,
2806 		    &io_len, off, PAGESIZE, flags);
2807 		if (io_len == 0) {
2808 			io_len = PAGESIZE;
2809 		}
2810 
2811 		bp = ud_bread(ip->i_dev,
2812 		    ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2813 		    udf_vfsp->udf_lbsize);
2814 		fe = (struct file_entry *)bp->b_un.b_addr;
2815 		if ((bp->b_flags & B_ERROR) ||
2816 		    (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2817 		    ip->i_icb_block,
2818 		    1, udf_vfsp->udf_lbsize) != 0)) {
2819 			if (pp != NULL)
2820 				pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2821 			if (bp->b_flags & B_ERROR) {
2822 				error = EIO;
2823 			} else {
2824 				error = EINVAL;
2825 			}
2826 			brelse(bp);
2827 			return (error);
2828 		}
2829 		if ((bp->b_error == 0) &&
2830 		    (bp->b_resid == 0)) {
2831 
2832 			caddr = bp->b_un.b_addr + ip->i_data_off;
2833 			kaddr = (caddr_t)ppmapin(pp,
2834 			    PROT_READ | PROT_WRITE, (caddr_t)-1);
2835 			(void) kcopy(kaddr, caddr, ip->i_size);
2836 			ppmapout(kaddr);
2837 		}
2838 		crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) +
2839 		    SWAP_32(fe->fe_len_ear);
2840 		crc_len += ip->i_size;
2841 		ud_make_tag(ip->i_udf, &fe->fe_tag,
2842 		    UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2843 
2844 		bwrite(bp);
2845 
2846 		if (flags & B_ASYNC) {
2847 			pvn_write_done(pp, flags);
2848 		}
2849 		contig = ip->i_size;
2850 	} else {
2851 
2852 		if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2853 			goto out;
2854 		}
2855 		contig = MIN(contig, PAGESIZE);
2856 		contig = (contig + lbmask) & (~lbmask);
2857 
2858 		if (contig < PAGESIZE) {
2859 			uint64_t count;
2860 
2861 			count = isize - off;
2862 			if (contig != count) {
2863 				multi_io = 1;
2864 				contig = (int32_t)(MIN(count, PAGESIZE));
2865 			}
2866 		}
2867 
2868 		if ((off + contig) > isize) {
2869 			contig = isize - off;
2870 		}
2871 
2872 		if (contig > PAGESIZE) {
2873 			if (contig & PAGEOFFSET) {
2874 				contig &= PAGEMASK;
2875 			}
2876 		}
2877 
2878 		pp = pvn_write_kluster(vp, pp, &io_off,
2879 		    &io_len, off, contig, flags);
2880 		if (io_len == 0) {
2881 			io_len = PAGESIZE;
2882 		}
2883 
2884 		bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2885 		ASSERT(bp != NULL);
2886 
2887 		bp->b_edev = ip->i_dev;
2888 		bp->b_dev = cmpdev(ip->i_dev);
2889 		bp->b_blkno = bn;
2890 		bp->b_un.b_addr = 0;
2891 		bp->b_file = vp;
2892 		bp->b_offset = (offset_t)off;
2893 
2894 
2895 		/*
2896 		 * write throttle
2897 		 */
2898 		ASSERT(bp->b_iodone == NULL);
2899 		bp->b_iodone = ud_iodone;
2900 		mutex_enter(&ip->i_tlock);
2901 		ip->i_writes += bp->b_bcount;
2902 		mutex_exit(&ip->i_tlock);
2903 
2904 		if (multi_io == 0) {
2905 
2906 			(void) bdev_strategy(bp);
2907 		} else {
2908 			error = ud_multi_strat(ip, pp, bp, off);
2909 			if (error != 0) {
2910 				goto out;
2911 			}
2912 		}
2913 
2914 		if ((flags & B_ASYNC) == 0) {
2915 			/*
2916 			 * Wait for i/o to complete.
2917 			 */
2918 			error = biowait(bp);
2919 			pageio_done(bp);
2920 		}
2921 	}
2922 
2923 	if ((flags & B_ASYNC) == 0) {
2924 		pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2925 	}
2926 
2927 	pp = NULL;
2928 
2929 out:
2930 	if (error != 0 && pp != NULL) {
2931 		pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2932 	}
2933 
2934 	if (offp) {
2935 		*offp = io_off;
2936 	}
2937 	if (lenp) {
2938 		*lenp = io_len;
2939 	}
2940 
2941 	return (error);
2942 }
2943 
2944 
2945 int32_t
2946 ud_iodone(struct buf *bp)
2947 {
2948 	struct ud_inode *ip;
2949 
2950 	ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2951 
2952 	bp->b_iodone = NULL;
2953 
2954 	ip = VTOI(bp->b_pages->p_vnode);
2955 
2956 	mutex_enter(&ip->i_tlock);
2957 	if (ip->i_writes >= ud_LW) {
2958 		if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2959 			if (ud_WRITES) {
2960 				cv_broadcast(&ip->i_wrcv); /* wake all up */
2961 			}
2962 		}
2963 	} else {
2964 		ip->i_writes -= bp->b_bcount;
2965 	}
2966 	mutex_exit(&ip->i_tlock);
2967 	iodone(bp);
2968 	return (0);
2969 }
2970 
2971 /* ARGSUSED3 */
2972 int32_t
2973 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
2974 {
2975 	struct vnode *vp;
2976 	struct udf_vfs *udf_vfsp;
2977 	krw_t rwtype;
2978 	caddr_t base;
2979 	uint32_t flags;
2980 	int32_t error, n, on, mapon, dofree;
2981 	u_offset_t off;
2982 	long oresid = uio->uio_resid;
2983 
2984 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2985 	if ((ip->i_type != VREG) &&
2986 	    (ip->i_type != VDIR) &&
2987 	    (ip->i_type != VLNK)) {
2988 		return (EIO);
2989 	}
2990 
2991 	if (uio->uio_loffset > MAXOFFSET_T) {
2992 		return (0);
2993 	}
2994 
2995 	if ((uio->uio_loffset < (offset_t)0) ||
2996 	    ((uio->uio_loffset + uio->uio_resid) < 0)) {
2997 		return (EINVAL);
2998 	}
2999 	if (uio->uio_resid == 0) {
3000 		return (0);
3001 	}
3002 
3003 	vp = ITOV(ip);
3004 	udf_vfsp = ip->i_udf;
3005 	mutex_enter(&ip->i_tlock);
3006 	ip->i_flag |= IACC;
3007 	mutex_exit(&ip->i_tlock);
3008 
3009 	rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3010 
3011 	do {
3012 		offset_t diff;
3013 		u_offset_t uoff = uio->uio_loffset;
3014 		off = uoff & (offset_t)MAXBMASK;
3015 		mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3016 		on = (int)blkoff(udf_vfsp, uoff);
3017 		n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3018 
3019 		diff = ip->i_size - uoff;
3020 
3021 		if (diff <= (offset_t)0) {
3022 			error = 0;
3023 			goto out;
3024 		}
3025 		if (diff < (offset_t)n) {
3026 			n = (int)diff;
3027 		}
3028 		dofree = ud_freebehind &&
3029 		    ip->i_nextr == (off & PAGEMASK) &&
3030 		    off > ud_smallfile;
3031 
3032 #ifndef	__lock_lint
3033 		if (rwtype == RW_READER) {
3034 			rw_exit(&ip->i_contents);
3035 		}
3036 #endif
3037 
3038 		base = segmap_getmapflt(segkmap, vp, (off + mapon),
3039 		    (uint32_t)n, 1, S_READ);
3040 		error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3041 
3042 		flags = 0;
3043 		if (!error) {
3044 			/*
3045 			 * If read a whole block, or read to eof,
3046 			 * won't need this buffer again soon.
3047 			 */
3048 			if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3049 			    freemem < lotsfree + pages_before_pager) {
3050 				flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3051 			}
3052 			/*
3053 			 * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3054 			 * we want to make sure that the page which has
3055 			 * been read, is written on disk if it is dirty.
3056 			 * And corresponding indirect blocks should also
3057 			 * be flushed out.
3058 			 */
3059 			if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3060 				flags &= ~SM_ASYNC;
3061 				flags |= SM_WRITE;
3062 			}
3063 			error = segmap_release(segkmap, base, flags);
3064 		} else    {
3065 			(void) segmap_release(segkmap, base, flags);
3066 		}
3067 
3068 #ifndef __lock_lint
3069 		if (rwtype == RW_READER) {
3070 			rw_enter(&ip->i_contents, rwtype);
3071 		}
3072 #endif
3073 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
3074 out:
3075 	/*
3076 	 * Inode is updated according to this table if FRSYNC is set.
3077 	 *
3078 	 *	FSYNC	FDSYNC(posix.4)
3079 	 *	--------------------------
3080 	 *	always	IATTCHG|IBDWRITE
3081 	 */
3082 	if (ioflag & FRSYNC) {
3083 		if ((ioflag & FSYNC) ||
3084 		    ((ioflag & FDSYNC) &&
3085 		    (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3086 		rw_exit(&ip->i_contents);
3087 		rw_enter(&ip->i_contents, RW_WRITER);
3088 		ud_iupdat(ip, 1);
3089 		}
3090 	}
3091 	/*
3092 	 * If we've already done a partial read, terminate
3093 	 * the read but return no error.
3094 	 */
3095 	if (oresid != uio->uio_resid) {
3096 		error = 0;
3097 	}
3098 	ITIMES(ip);
3099 
3100 	return (error);
3101 }
3102 
3103 int32_t
3104 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3105 {
3106 	caddr_t base;
3107 	struct vnode *vp;
3108 	struct udf_vfs *udf_vfsp;
3109 	uint32_t flags;
3110 	int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3111 	int32_t pagecreate, newpage;
3112 	uint64_t old_i_size;
3113 	u_offset_t off;
3114 	long start_resid = uio->uio_resid, premove_resid;
3115 	rlim64_t limit = uio->uio_limit;
3116 
3117 
3118 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
3119 	if ((ip->i_type != VREG) &&
3120 	    (ip->i_type != VDIR) &&
3121 	    (ip->i_type != VLNK)) {
3122 		return (EIO);
3123 	}
3124 
3125 	if (uio->uio_loffset >= MAXOFFSET_T) {
3126 		return (EFBIG);
3127 	}
3128 	/*
3129 	 * see udf_l_pathconf
3130 	 */
3131 	if (limit > (((uint64_t)1 << 40) - 1)) {
3132 		limit = ((uint64_t)1 << 40) - 1;
3133 	}
3134 	if (uio->uio_loffset >= limit) {
3135 		proc_t *p = ttoproc(curthread);
3136 
3137 		mutex_enter(&p->p_lock);
3138 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3139 		    p, RCA_UNSAFE_SIGINFO);
3140 		mutex_exit(&p->p_lock);
3141 		return (EFBIG);
3142 	}
3143 	if ((uio->uio_loffset < (offset_t)0) ||
3144 	    ((uio->uio_loffset + uio->uio_resid) < 0)) {
3145 		return (EINVAL);
3146 	}
3147 	if (uio->uio_resid == 0) {
3148 		return (0);
3149 	}
3150 
3151 	mutex_enter(&ip->i_tlock);
3152 	ip->i_flag |= INOACC;
3153 
3154 	if (ioflag & (FSYNC | FDSYNC)) {
3155 		ip->i_flag |= ISYNC;
3156 		iupdat_flag = 1;
3157 	}
3158 	mutex_exit(&ip->i_tlock);
3159 
3160 	udf_vfsp = ip->i_udf;
3161 	vp = ITOV(ip);
3162 
3163 	do {
3164 		u_offset_t uoff = uio->uio_loffset;
3165 		off = uoff & (offset_t)MAXBMASK;
3166 		mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3167 		on = (int)blkoff(udf_vfsp, uoff);
3168 		n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3169 
3170 		if (ip->i_type == VREG && uoff + n >= limit) {
3171 			if (uoff >= limit) {
3172 				error = EFBIG;
3173 				goto out;
3174 			}
3175 			n = (int)(limit - (rlim64_t)uoff);
3176 		}
3177 		if (uoff + n > ip->i_size) {
3178 			/*
3179 			 * We are extending the length of the file.
3180 			 * bmap is used so that we are sure that
3181 			 * if we need to allocate new blocks, that it
3182 			 * is done here before we up the file size.
3183 			 */
3184 			error = ud_bmap_write(ip, uoff,
3185 			    (int)(on + n), mapon == 0, cr);
3186 			if (error) {
3187 				break;
3188 			}
3189 			i_size_changed = 1;
3190 			old_i_size = ip->i_size;
3191 			ip->i_size = uoff + n;
3192 			/*
3193 			 * If we are writing from the beginning of
3194 			 * the mapping, we can just create the
3195 			 * pages without having to read them.
3196 			 */
3197 			pagecreate = (mapon == 0);
3198 		} else if (n == MAXBSIZE) {
3199 			/*
3200 			 * Going to do a whole mappings worth,
3201 			 * so we can just create the pages w/o
3202 			 * having to read them in.  But before
3203 			 * we do that, we need to make sure any
3204 			 * needed blocks are allocated first.
3205 			 */
3206 			error = ud_bmap_write(ip, uoff,
3207 			    (int)(on + n), 1, cr);
3208 			if (error) {
3209 				break;
3210 			}
3211 			pagecreate = 1;
3212 		} else {
3213 			pagecreate = 0;
3214 		}
3215 
3216 		rw_exit(&ip->i_contents);
3217 
3218 		/*
3219 		 * Touch the page and fault it in if it is not in
3220 		 * core before segmap_getmapflt can lock it. This
3221 		 * is to avoid the deadlock if the buffer is mapped
3222 		 * to the same file through mmap which we want to
3223 		 * write to.
3224 		 */
3225 		uio_prefaultpages((long)n, uio);
3226 
3227 		base = segmap_getmapflt(segkmap, vp, (off + mapon),
3228 		    (uint32_t)n, !pagecreate, S_WRITE);
3229 
3230 		/*
3231 		 * segmap_pagecreate() returns 1 if it calls
3232 		 * page_create_va() to allocate any pages.
3233 		 */
3234 		newpage = 0;
3235 		if (pagecreate) {
3236 			newpage = segmap_pagecreate(segkmap, base,
3237 			    (size_t)n, 0);
3238 		}
3239 
3240 		premove_resid = uio->uio_resid;
3241 		error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3242 
3243 		if (pagecreate &&
3244 		    uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3245 			/*
3246 			 * We created pages w/o initializing them completely,
3247 			 * thus we need to zero the part that wasn't set up.
3248 			 * This happens on most EOF write cases and if
3249 			 * we had some sort of error during the uiomove.
3250 			 */
3251 			int nzero, nmoved;
3252 
3253 			nmoved = (int)(uio->uio_loffset - (off + mapon));
3254 			ASSERT(nmoved >= 0 && nmoved <= n);
3255 			nzero = roundup(on + n, PAGESIZE) - nmoved;
3256 			ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3257 			(void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3258 		}
3259 
3260 		/*
3261 		 * Unlock the pages allocated by page_create_va()
3262 		 * in segmap_pagecreate()
3263 		 */
3264 		if (newpage) {
3265 			segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3266 		}
3267 
3268 		if (error) {
3269 			/*
3270 			 * If we failed on a write, we may have already
3271 			 * allocated file blocks as well as pages.  It's
3272 			 * hard to undo the block allocation, but we must
3273 			 * be sure to invalidate any pages that may have
3274 			 * been allocated.
3275 			 */
3276 			(void) segmap_release(segkmap, base, SM_INVAL);
3277 		} else {
3278 			flags = 0;
3279 			/*
3280 			 * Force write back for synchronous write cases.
3281 			 */
3282 			if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3283 				/*
3284 				 * If the sticky bit is set but the
3285 				 * execute bit is not set, we do a
3286 				 * synchronous write back and free
3287 				 * the page when done.  We set up swap
3288 				 * files to be handled this way to
3289 				 * prevent servers from keeping around
3290 				 * the client's swap pages too long.
3291 				 * XXX - there ought to be a better way.
3292 				 */
3293 				if (IS_SWAPVP(vp)) {
3294 					flags = SM_WRITE | SM_FREE |
3295 					    SM_DONTNEED;
3296 					iupdat_flag = 0;
3297 				} else {
3298 					flags = SM_WRITE;
3299 				}
3300 			} else if (((mapon + n) == MAXBSIZE) ||
3301 			    IS_SWAPVP(vp)) {
3302 				/*
3303 				 * Have written a whole block.
3304 				 * Start an asynchronous write and
3305 				 * mark the buffer to indicate that
3306 				 * it won't be needed again soon.
3307 				 */
3308 				flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3309 			}
3310 			error = segmap_release(segkmap, base, flags);
3311 
3312 			/*
3313 			 * If the operation failed and is synchronous,
3314 			 * then we need to unwind what uiomove() last
3315 			 * did so we can potentially return an error to
3316 			 * the caller.  If this write operation was
3317 			 * done in two pieces and the first succeeded,
3318 			 * then we won't return an error for the second
3319 			 * piece that failed.  However, we only want to
3320 			 * return a resid value that reflects what was
3321 			 * really done.
3322 			 *
3323 			 * Failures for non-synchronous operations can
3324 			 * be ignored since the page subsystem will
3325 			 * retry the operation until it succeeds or the
3326 			 * file system is unmounted.
3327 			 */
3328 			if (error) {
3329 				if ((ioflag & (FSYNC | FDSYNC)) ||
3330 				    ip->i_type == VDIR) {
3331 					uio->uio_resid = premove_resid;
3332 				} else {
3333 					error = 0;
3334 				}
3335 			}
3336 		}
3337 
3338 		/*
3339 		 * Re-acquire contents lock.
3340 		 */
3341 		rw_enter(&ip->i_contents, RW_WRITER);
3342 		/*
3343 		 * If the uiomove() failed or if a synchronous
3344 		 * page push failed, fix up i_size.
3345 		 */
3346 		if (error) {
3347 			if (i_size_changed) {
3348 				/*
3349 				 * The uiomove failed, and we
3350 				 * allocated blocks,so get rid
3351 				 * of them.
3352 				 */
3353 				(void) ud_itrunc(ip, old_i_size, 0, cr);
3354 			}
3355 		} else {
3356 			/*
3357 			 * XXX - Can this be out of the loop?
3358 			 */
3359 			ip->i_flag |= IUPD | ICHG;
3360 			if (i_size_changed) {
3361 				ip->i_flag |= IATTCHG;
3362 			}
3363 			if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3364 			    (IEXEC >> 10))) != 0 &&
3365 			    (ip->i_char & (ISUID | ISGID)) != 0 &&
3366 			    secpolicy_vnode_setid_retain(cr,
3367 			    (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3368 				/*
3369 				 * Clear Set-UID & Set-GID bits on
3370 				 * successful write if not privileged
3371 				 * and at least one of the execute bits
3372 				 * is set.  If we always clear Set-GID,
3373 				 * mandatory file and record locking is
3374 				 * unuseable.
3375 				 */
3376 				ip->i_char &= ~(ISUID | ISGID);
3377 			}
3378 		}
3379 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
3380 
3381 out:
3382 	/*
3383 	 * Inode is updated according to this table -
3384 	 *
3385 	 *	FSYNC	FDSYNC(posix.4)
3386 	 *	--------------------------
3387 	 *	always@	IATTCHG|IBDWRITE
3388 	 *
3389 	 * @ -  If we are doing synchronous write the only time we should
3390 	 *	not be sync'ing the ip here is if we have the stickyhack
3391 	 *	activated, the file is marked with the sticky bit and
3392 	 *	no exec bit, the file length has not been changed and
3393 	 *	no new blocks have been allocated during this write.
3394 	 */
3395 	if ((ip->i_flag & ISYNC) != 0) {
3396 		/*
3397 		 * we have eliminated nosync
3398 		 */
3399 		if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3400 		    ((ioflag & FSYNC) && iupdat_flag)) {
3401 			ud_iupdat(ip, 1);
3402 		}
3403 	}
3404 
3405 	/*
3406 	 * If we've already done a partial-write, terminate
3407 	 * the write but return no error.
3408 	 */
3409 	if (start_resid != uio->uio_resid) {
3410 		error = 0;
3411 	}
3412 	ip->i_flag &= ~(INOACC | ISYNC);
3413 	ITIMES_NOLOCK(ip);
3414 
3415 	return (error);
3416 }
3417 
3418 int32_t
3419 ud_multi_strat(struct ud_inode *ip,
3420 	page_t *pp, struct buf *bp, u_offset_t start)
3421 {
3422 	daddr_t bn;
3423 	int32_t error = 0, io_count, contig, alloc_sz, i;
3424 	uint32_t io_off;
3425 	mio_master_t *mm = NULL;
3426 	mio_slave_t *ms = NULL;
3427 	struct buf *rbp;
3428 
3429 	ASSERT(!(start & PAGEOFFSET));
3430 
3431 	/*
3432 	 * Figure out how many buffers to allocate
3433 	 */
3434 	io_count = 0;
3435 	for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3436 		contig = 0;
3437 		if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3438 		    &bn, &contig)) {
3439 			goto end;
3440 		}
3441 		if (contig == 0) {
3442 			goto end;
3443 		}
3444 		contig = MIN(contig, PAGESIZE - io_off);
3445 		if (bn != UDF_HOLE) {
3446 			io_count ++;
3447 		} else {
3448 			/*
3449 			 * HOLE
3450 			 */
3451 			if (bp->b_flags & B_READ) {
3452 
3453 				/*
3454 				 * This is a hole and is read
3455 				 * it should be filled with 0's
3456 				 */
3457 				pagezero(pp, io_off, contig);
3458 			}
3459 		}
3460 	}
3461 
3462 
3463 	if (io_count != 0) {
3464 
3465 		/*
3466 		 * Allocate memory for all the
3467 		 * required number of buffers
3468 		 */
3469 		alloc_sz = sizeof (mio_master_t) +
3470 		    (sizeof (mio_slave_t) * io_count);
3471 		mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3472 		if (mm == NULL) {
3473 			error = ENOMEM;
3474 			goto end;
3475 		}
3476 
3477 		/*
3478 		 * initialize master
3479 		 */
3480 		mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3481 		mm->mm_size = alloc_sz;
3482 		mm->mm_bp = bp;
3483 		mm->mm_resid = 0;
3484 		mm->mm_error = 0;
3485 		mm->mm_index = master_index++;
3486 
3487 		ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3488 
3489 		/*
3490 		 * Initialize buffers
3491 		 */
3492 		io_count = 0;
3493 		for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3494 			contig = 0;
3495 			if (error = ud_bmap_read(ip,
3496 			    (u_offset_t)(start + io_off),
3497 			    &bn, &contig)) {
3498 				goto end;
3499 			}
3500 			ASSERT(contig);
3501 			if ((io_off + contig) > bp->b_bcount) {
3502 				contig = bp->b_bcount - io_off;
3503 			}
3504 			if (bn != UDF_HOLE) {
3505 				/*
3506 				 * Clone the buffer
3507 				 * and prepare to start I/O
3508 				 */
3509 				ms->ms_ptr = mm;
3510 				bioinit(&ms->ms_buf);
3511 				rbp = bioclone(bp, io_off, (size_t)contig,
3512 				    bp->b_edev, bn, ud_slave_done,
3513 				    &ms->ms_buf, KM_NOSLEEP);
3514 				ASSERT(rbp == &ms->ms_buf);
3515 				mm->mm_resid += contig;
3516 				io_count++;
3517 				ms ++;
3518 			}
3519 		}
3520 
3521 		/*
3522 		 * Start I/O's
3523 		 */
3524 		ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3525 		for (i = 0; i < io_count; i++) {
3526 			(void) bdev_strategy(&ms->ms_buf);
3527 			ms ++;
3528 		}
3529 	}
3530 
3531 end:
3532 	if (error != 0) {
3533 		bp->b_flags |= B_ERROR;
3534 		bp->b_error = error;
3535 		if (mm != NULL) {
3536 			mutex_destroy(&mm->mm_mutex);
3537 			kmem_free(mm, mm->mm_size);
3538 		}
3539 	}
3540 	return (error);
3541 }
3542 
3543 int32_t
3544 ud_slave_done(struct buf *bp)
3545 {
3546 	mio_master_t *mm;
3547 	int32_t resid;
3548 
3549 	ASSERT(SEMA_HELD(&bp->b_sem));
3550 	ASSERT((bp->b_flags & B_DONE) == 0);
3551 
3552 	mm = ((mio_slave_t *)bp)->ms_ptr;
3553 
3554 	/*
3555 	 * Propagate error and byte count info from slave struct to
3556 	 * the master struct
3557 	 */
3558 	mutex_enter(&mm->mm_mutex);
3559 	if (bp->b_flags & B_ERROR) {
3560 
3561 		/*
3562 		 * If multiple slave buffers get
3563 		 * error we forget the old errors
3564 		 * this is ok because we any way
3565 		 * cannot return multiple errors
3566 		 */
3567 		mm->mm_error = bp->b_error;
3568 	}
3569 	mm->mm_resid -= bp->b_bcount;
3570 	resid = mm->mm_resid;
3571 	mutex_exit(&mm->mm_mutex);
3572 
3573 	/*
3574 	 * free up the resources allocated to cloned buffers.
3575 	 */
3576 	bp_mapout(bp);
3577 	biofini(bp);
3578 
3579 	if (resid == 0) {
3580 
3581 		/*
3582 		 * This is the last I/O operation
3583 		 * clean up and return the original buffer
3584 		 */
3585 		if (mm->mm_error) {
3586 			mm->mm_bp->b_flags |= B_ERROR;
3587 			mm->mm_bp->b_error = mm->mm_error;
3588 		}
3589 		biodone(mm->mm_bp);
3590 		mutex_destroy(&mm->mm_mutex);
3591 		kmem_free(mm, mm->mm_size);
3592 	}
3593 	return (0);
3594 }
3595