xref: /titanic_50/usr/src/uts/common/fs/hsfs/hsfs_node.c (revision 69ed0c8ece2346b34605e2c9567c9f7b0dad5dc8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Directory operations for High Sierra filesystem
30  */
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/user.h>
38 #include <sys/vfs.h>
39 #include <sys/stat.h>
40 #include <sys/vnode.h>
41 #include <sys/mode.h>
42 #include <sys/dnlc.h>
43 #include <sys/cmn_err.h>
44 #include <sys/fbuf.h>
45 #include <sys/kmem.h>
46 #include <sys/policy.h>
47 #include <sys/sunddi.h>
48 #include <vm/hat.h>
49 #include <vm/as.h>
50 #include <vm/pvn.h>
51 #include <vm/seg.h>
52 #include <vm/seg_map.h>
53 #include <vm/seg_kmem.h>
54 #include <vm/page.h>
55 
56 #include <sys/fs/hsfs_spec.h>
57 #include <sys/fs/hsfs_isospec.h>
58 #include <sys/fs/hsfs_node.h>
59 #include <sys/fs/hsfs_impl.h>
60 #include <sys/fs/hsfs_susp.h>
61 #include <sys/fs/hsfs_rrip.h>
62 
63 #include <sys/sysinfo.h>
64 #include <sys/sysmacros.h>
65 #include <sys/errno.h>
66 #include <sys/debug.h>
67 #include <fs/fs_subr.h>
68 
69 /*
70  * This macro expects a name that ends in '.' and returns TRUE if the
71  * name is not "." or ".."
72  */
73 #define	CAN_TRUNCATE_DOT(name, namelen)	\
74 		(namelen > 1 && (namelen > 2 || name[0] != '.'))
75 
76 enum dirblock_result { FOUND_ENTRY, WENT_PAST, HIT_END };
77 
78 /*
79  * These values determine whether we will try to read a file or dir;
80  * they may be patched via /etc/system to allow users to read
81  * record-oriented files.
82  */
83 int ide_prohibited = IDE_PROHIBITED;
84 int hde_prohibited = HDE_PROHIBITED;
85 
86 /*
87  * This variable determines if the HSFS code will use the
88  * directory name lookup cache. The default is for the cache to be used.
89  */
90 static int hsfs_use_dnlc = 1;
91 
92 /*
93  * This variable determines whether strict ISO-9660 directory ordering
94  * is to be assumed.  If false (which it is by default), then when
95  * searching a directory of an ISO-9660 disk, we do not expect the
96  * entries to be sorted (as the spec requires), and so cannot terminate
97  * the search early.  Unfortunately, some vendors are producing
98  * non-compliant disks.  This variable exists to revert to the old
99  * behavior in case someone relies on this. This option is expected to be
100  * removed at some point in the future.
101  *
102  * Use "set hsfs:strict_iso9660_ordering = 1" in /etc/system to override.
103  */
104 static int strict_iso9660_ordering = 0;
105 
106 /*
107  * This tunable allows us to ignore inode numbers from rrip-1.12.
108  * In this case, we fall back to our default inode algorithm.
109  */
110 int use_rrip_inodes = 1;
111 
112 static void hs_hsnode_cache_reclaim(void *unused);
113 static void hs_addfreeb(struct hsfs *fsp, struct hsnode *hp);
114 static enum dirblock_result process_dirblock(struct fbuf *fbp, uint_t *offset,
115 	uint_t last_offset, char *nm, int nmlen, struct hsfs *fsp,
116 	struct hsnode *dhp, struct vnode *dvp, struct vnode **vpp,
117 	int *error);
118 static int strip_trailing(struct hsfs *fsp, char *nm, int len);
119 static int hs_namelen(struct hsfs *fsp, char *nm, int len);
120 static int uppercase_cp(char *from, char *to, int size);
121 static void hs_log_bogus_joliet_warning(void);
122 static int hs_iso_copy(char *from, char *to, int size);
123 static int32_t hs_ucs2_2_utf8(uint16_t c_16, uint8_t *s_8);
124 static int hs_utf8_trunc(uint8_t *str, int len);
125 
126 /*
127  * hs_access
128  * Return 0 if the desired access may be granted.
129  * Otherwise return error code.
130  */
131 int
132 hs_access(struct vnode *vp, mode_t m, struct cred *cred)
133 {
134 	struct hsnode *hp;
135 	int	shift = 0;
136 
137 	/*
138 	 * Write access cannot be granted for a read-only medium
139 	 */
140 	if ((m & VWRITE) && !IS_DEVVP(vp))
141 		return (EROFS);
142 
143 	hp = VTOH(vp);
144 
145 	/*
146 	 * XXX - For now, use volume protections.
147 	 *  Also, always grant EXEC access for directories
148 	 *  if READ access is granted.
149 	 */
150 	if ((vp->v_type == VDIR) && (m & VEXEC)) {
151 		m &= ~VEXEC;
152 		m |= VREAD;
153 	}
154 
155 	if (crgetuid(cred) != hp->hs_dirent.uid) {
156 		shift += 3;
157 		if (!groupmember((uid_t)hp->hs_dirent.gid, cred))
158 			shift += 3;
159 	}
160 	m &= ~(hp->hs_dirent.mode << shift);
161 	if (m != 0)
162 		return (secpolicy_vnode_access(cred, vp, hp->hs_dirent.uid, m));
163 	return (0);
164 }
165 
166 #if ((HS_HASHSIZE & (HS_HASHSIZE - 1)) == 0)
167 #define	HS_HASH(l)	((uint_t)(l) & (HS_HASHSIZE - 1))
168 #else
169 #define	HS_HASH(l)	((uint_t)(l) % HS_HASHSIZE)
170 #endif
171 #define	HS_HPASH(hp)	HS_HASH((hp)->hs_nodeid)
172 
173 /*
174  * The tunable nhsnode is now a threshold for a dynamically allocated
175  * pool of hsnodes, not the size of a statically allocated table.
176  * When the number of hsnodes for a particular file system exceeds
177  * nhsnode, the allocate and free logic will try to reduce the number
178  * of allocated nodes by returning unreferenced nodes to the kmem_cache
179  * instead of putting them on the file system's private free list.
180  */
181 int nhsnode = HS_HSNODESPACE / sizeof (struct hsnode);
182 
183 struct kmem_cache *hsnode_cache;  /* free hsnode cache */
184 
185 /*
186  * Initialize the cache of free hsnodes.
187  */
188 void
189 hs_init_hsnode_cache(void)
190 {
191 	/*
192 	 * A kmem_cache is used for the hsnodes
193 	 * No constructor because hsnodes are initialised by bzeroing.
194 	 */
195 	hsnode_cache = kmem_cache_create("hsfs_hsnode_cache",
196 	    sizeof (struct hsnode), 0, NULL,
197 	    NULL, hs_hsnode_cache_reclaim, NULL, NULL, 0);
198 }
199 
200 /*
201  * Destroy the cache of free hsnodes.
202  */
203 void
204 hs_fini_hsnode_cache(void)
205 {
206 	kmem_cache_destroy(hsnode_cache);
207 }
208 
209 /*
210  * System is short on memory, free up as much as possible
211  */
212 /*ARGSUSED*/
213 static void
214 hs_hsnode_cache_reclaim(void *unused)
215 {
216 	struct hsfs *fsp;
217 	struct hsnode *hp;
218 
219 	/*
220 	 * For each vfs in the hs_mounttab list
221 	 */
222 	mutex_enter(&hs_mounttab_lock);
223 	for (fsp = hs_mounttab; fsp != NULL; fsp = fsp->hsfs_next) {
224 		/*
225 		 * Purge the dnlc of all hsfs entries
226 		 */
227 		(void) dnlc_purge_vfsp(fsp->hsfs_vfs, 0);
228 
229 		/*
230 		 * For each entry in the free chain
231 		 */
232 		rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
233 		mutex_enter(&fsp->hsfs_free_lock);
234 		for (hp = fsp->hsfs_free_f; hp != NULL; hp = fsp->hsfs_free_f) {
235 			/*
236 			 * Remove from chain
237 			 */
238 			fsp->hsfs_free_f = hp->hs_freef;
239 			if (fsp->hsfs_free_f != NULL) {
240 				fsp->hsfs_free_f->hs_freeb = NULL;
241 			} else {
242 				fsp->hsfs_free_b = NULL;
243 			}
244 			/*
245 			 * Free the node. Force it to be fully freed
246 			 * by setting the 3rd arg (nopage) to 1.
247 			 */
248 			hs_freenode(HTOV(hp), fsp, 1);
249 		}
250 		mutex_exit(&fsp->hsfs_free_lock);
251 		rw_exit(&fsp->hsfs_hash_lock);
252 	}
253 	mutex_exit(&hs_mounttab_lock);
254 }
255 
256 /*
257  * Add an hsnode to the end of the free list.
258  */
259 static void
260 hs_addfreeb(struct hsfs *fsp, struct hsnode *hp)
261 {
262 	struct hsnode *ep;
263 
264 	vn_invalid(HTOV(hp));
265 	mutex_enter(&fsp->hsfs_free_lock);
266 	ep = fsp->hsfs_free_b;
267 	fsp->hsfs_free_b = hp;		/* hp is the last entry in free list */
268 	hp->hs_freef = NULL;
269 	hp->hs_freeb = ep;		/* point at previous last entry */
270 	if (ep == NULL)
271 		fsp->hsfs_free_f = hp;	/* hp is only entry in free list */
272 	else
273 		ep->hs_freef = hp;	/* point previous last entry at hp */
274 
275 	mutex_exit(&fsp->hsfs_free_lock);
276 }
277 
278 /*
279  * Get an hsnode from the front of the free list.
280  * Must be called with write hsfs_hash_lock held.
281  */
282 static struct hsnode *
283 hs_getfree(struct hsfs *fsp)
284 {
285 	struct hsnode *hp, **tp;
286 
287 	ASSERT(RW_WRITE_HELD(&fsp->hsfs_hash_lock));
288 
289 	/*
290 	 * If the number of currently-allocated hsnodes is less than
291 	 * the hsnode count threshold (nhsnode), or if there are no
292 	 * nodes on the file system's local free list (which acts as a
293 	 * cache), call kmem_cache_alloc to get a new hsnode from
294 	 * kernel memory.
295 	 */
296 	mutex_enter(&fsp->hsfs_free_lock);
297 	if ((fsp->hsfs_nohsnode < nhsnode) || (fsp->hsfs_free_f == NULL)) {
298 		mutex_exit(&fsp->hsfs_free_lock);
299 		hp = kmem_cache_alloc(hsnode_cache, KM_SLEEP);
300 		fsp->hsfs_nohsnode++;
301 		bzero((caddr_t)hp, sizeof (*hp));
302 		hp->hs_vnode = vn_alloc(KM_SLEEP);
303 		return (hp);
304 	}
305 	hp = fsp->hsfs_free_f;
306 	/* hp cannot be NULL, since we already checked this above */
307 	fsp->hsfs_free_f = hp->hs_freef;
308 	if (fsp->hsfs_free_f != NULL)
309 		fsp->hsfs_free_f->hs_freeb = NULL;
310 	else
311 		fsp->hsfs_free_b = NULL;
312 	mutex_exit(&fsp->hsfs_free_lock);
313 
314 	for (tp = &fsp->hsfs_hash[HS_HPASH(hp)]; *tp != NULL;
315 	    tp = &(*tp)->hs_hash) {
316 		if (*tp == hp) {
317 			struct vnode *vp;
318 
319 			vp = HTOV(hp);
320 
321 			/*
322 			 * file is no longer referenced, destroy all old pages
323 			 */
324 			if (vn_has_cached_data(vp))
325 				/*
326 				 * pvn_vplist_dirty will abort all old pages
327 				 */
328 				(void) pvn_vplist_dirty(vp, (u_offset_t)0,
329 				    hsfs_putapage, B_INVAL,
330 				    (struct cred *)NULL);
331 			*tp = hp->hs_hash;
332 			break;
333 		}
334 	}
335 	if (hp->hs_dirent.sym_link != (char *)NULL) {
336 		kmem_free(hp->hs_dirent.sym_link,
337 		    (size_t)(hp->hs_dirent.ext_size + 1));
338 	}
339 
340 	mutex_destroy(&hp->hs_contents_lock);
341 	{
342 		vnode_t	*vp;
343 
344 		vp = hp->hs_vnode;
345 		bzero((caddr_t)hp, sizeof (*hp));
346 		hp->hs_vnode = vp;
347 		vn_reinit(vp);
348 	}
349 	return (hp);
350 }
351 
352 /*
353  * Remove an hsnode from the free list.
354  */
355 static void
356 hs_remfree(struct hsfs *fsp, struct hsnode *hp)
357 {
358 	mutex_enter(&fsp->hsfs_free_lock);
359 	if (hp->hs_freef != NULL)
360 		hp->hs_freef->hs_freeb = hp->hs_freeb;
361 	else
362 		fsp->hsfs_free_b = hp->hs_freeb;
363 	if (hp->hs_freeb != NULL)
364 		hp->hs_freeb->hs_freef = hp->hs_freef;
365 	else
366 		fsp->hsfs_free_f = hp->hs_freef;
367 	mutex_exit(&fsp->hsfs_free_lock);
368 }
369 
370 /*
371  * Look for hsnode in hash list.
372  * If the inode number is != HS_DUMMY_INO (16), then only the inode
373  * number is used for the check.
374  * If the inode number is == HS_DUMMY_INO, we additionally always
375  * check the directory offset for the file to avoid caching the
376  * meta data for all zero sized to the first zero sized file that
377  * was touched.
378  *
379  * If found, reactivate it if inactive.
380  *
381  * Must be entered with hsfs_hash_lock held.
382  */
383 struct vnode *
384 hs_findhash(ino64_t nodeid, uint_t lbn, uint_t off, struct vfs *vfsp)
385 {
386 	struct hsnode *tp;
387 	struct hsfs *fsp;
388 
389 	fsp = VFS_TO_HSFS(vfsp);
390 
391 	ASSERT(RW_LOCK_HELD(&fsp->hsfs_hash_lock));
392 
393 	for (tp = fsp->hsfs_hash[HS_HASH(nodeid)]; tp != NULL;
394 	    tp = tp->hs_hash) {
395 		if (tp->hs_nodeid == nodeid) {
396 			struct vnode *vp;
397 
398 			if (nodeid == HS_DUMMY_INO) {
399 				/*
400 				 * If this is the dummy inode number, look for
401 				 * matching dir_lbn and dir_off.
402 				 */
403 				for (; tp != NULL; tp = tp->hs_hash) {
404 					if (tp->hs_nodeid == nodeid &&
405 					    tp->hs_dir_lbn == lbn &&
406 					    tp->hs_dir_off == off)
407 						break;
408 				}
409 				if (tp == NULL)
410 					return (NULL);
411 			}
412 
413 			mutex_enter(&tp->hs_contents_lock);
414 			vp = HTOV(tp);
415 			VN_HOLD(vp);
416 			if ((tp->hs_flags & HREF) == 0) {
417 				tp->hs_flags |= HREF;
418 				/*
419 				 * reactivating a free hsnode:
420 				 * remove from free list
421 				 */
422 				hs_remfree(fsp, tp);
423 			}
424 			mutex_exit(&tp->hs_contents_lock);
425 			return (vp);
426 		}
427 	}
428 	return (NULL);
429 }
430 
431 static void
432 hs_addhash(struct hsfs *fsp, struct hsnode *hp)
433 {
434 	ulong_t hashno;
435 
436 	ASSERT(RW_WRITE_HELD(&fsp->hsfs_hash_lock));
437 
438 	hashno = HS_HPASH(hp);
439 	hp->hs_hash = fsp->hsfs_hash[hashno];
440 	fsp->hsfs_hash[hashno] = hp;
441 }
442 
443 /*
444  * Destroy all old pages and free the hsnodes
445  * Return 1 if busy (a hsnode is still referenced).
446  */
447 int
448 hs_synchash(struct vfs *vfsp)
449 {
450 	struct hsfs *fsp;
451 	int i;
452 	struct hsnode *hp, *nhp;
453 	int busy = 0;
454 	struct vnode *vp, *rvp;
455 
456 	fsp = VFS_TO_HSFS(vfsp);
457 	rvp = fsp->hsfs_rootvp;
458 	/* make sure no one can come in */
459 	rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
460 	for (i = 0; i < HS_HASHSIZE; i++) {
461 		for (hp = fsp->hsfs_hash[i]; hp != NULL; hp = hp->hs_hash) {
462 			vp = HTOV(hp);
463 			if ((hp->hs_flags & HREF) && (vp != rvp ||
464 			    (vp == rvp && vp->v_count > 1))) {
465 				busy = 1;
466 				continue;
467 			}
468 			if (vn_has_cached_data(vp))
469 				(void) pvn_vplist_dirty(vp, (u_offset_t)0,
470 				    hsfs_putapage, B_INVAL,
471 				    (struct cred *)NULL);
472 		}
473 	}
474 	if (busy) {
475 		rw_exit(&fsp->hsfs_hash_lock);
476 		return (1);
477 	}
478 
479 	/* now free the hsnodes */
480 	for (i = 0; i < HS_HASHSIZE; i++) {
481 		for (hp = fsp->hsfs_hash[i]; hp != NULL; hp = nhp) {
482 			nhp = hp->hs_hash;
483 			/*
484 			 * We know there are no pages associated with
485 			 * all the hsnodes (they've all been released
486 			 * above). So remove from free list and
487 			 * free the entry with nopage set.
488 			 */
489 			vp = HTOV(hp);
490 			if (vp != rvp) {
491 				hs_remfree(fsp, hp);
492 				hs_freenode(vp, fsp, 1);
493 			}
494 		}
495 	}
496 
497 	ASSERT(fsp->hsfs_nohsnode == 1);
498 	rw_exit(&fsp->hsfs_hash_lock);
499 	/* release the root hsnode, this should free the final hsnode */
500 	VN_RELE(rvp);
501 
502 	return (0);
503 }
504 
505 /*
506  * hs_makenode
507  *
508  * Construct an hsnode.
509  * Caller specifies the directory entry, the block number and offset
510  * of the directory entry, and the vfs pointer.
511  * note: off is the sector offset, not lbn offset
512  * if NULL is returned implies file system hsnode table full
513  */
514 struct vnode *
515 hs_makenode(
516 	struct hs_direntry *dp,
517 	uint_t lbn,
518 	uint_t off,
519 	struct vfs *vfsp)
520 {
521 	struct hsnode *hp;
522 	struct vnode *vp;
523 	struct hs_volume *hvp;
524 	struct vnode *newvp;
525 	struct hsfs *fsp;
526 	ino64_t nodeid;
527 
528 	fsp = VFS_TO_HSFS(vfsp);
529 
530 	/*
531 	 * Construct the data that allows us to re-read the meta data without
532 	 * knowing the name of the file: in the case of a directory
533 	 * entry, this should point to the canonical dirent, the "."
534 	 * directory entry for the directory.  This dirent is pointed
535 	 * to by all directory entries for that dir (including the ".")
536 	 * entry itself.
537 	 * In the case of a file, simply point to the dirent for that
538 	 * file (there are hard links in Rock Ridge, so we need to use
539 	 * different data to contruct the node id).
540 	 */
541 	if (dp->type == VDIR) {
542 		lbn = dp->ext_lbn;
543 		off = 0;
544 	}
545 
546 	/*
547 	 * Normalize lbn and off before creating a nodeid
548 	 * and before storing them in a hs_node structure
549 	 */
550 	hvp = &fsp->hsfs_vol;
551 	lbn += off >> hvp->lbn_shift;
552 	off &= hvp->lbn_maxoffset;
553 	/*
554 	 * If the media carries rrip-v1.12 or newer, and we trust the inodes
555 	 * from the rrip data (use_rrip_inodes != 0), use that data. If the
556 	 * media has been created by a recent mkisofs version, we may trust
557 	 * all numbers in the starting extent number; otherwise, we cannot
558 	 * do this for zero sized files and symlinks, because if we did we'd
559 	 * end up mapping all of them to the same node.
560 	 * We use HS_DUMMY_INO in this case and make sure that we will not
561 	 * map all files to the same meta data.
562 	 */
563 	if (dp->inode != 0 && use_rrip_inodes) {
564 		nodeid = dp->inode;
565 	} else if ((dp->ext_size == 0 || dp->sym_link != (char *)NULL) &&
566 	    (fsp->hsfs_flags & HSFSMNT_INODE) == 0) {
567 		nodeid = HS_DUMMY_INO;
568 	} else {
569 		nodeid = dp->ext_lbn;
570 	}
571 
572 	/* look for hsnode in cache first */
573 
574 	rw_enter(&fsp->hsfs_hash_lock, RW_READER);
575 
576 	if ((vp = hs_findhash(nodeid, lbn, off, vfsp)) == NULL) {
577 
578 		/*
579 		 * Not in cache.  However, someone else may have come
580 		 * to the same conclusion and just put one in.	Upgrade
581 		 * our lock to a write lock and look again.
582 		 */
583 		rw_exit(&fsp->hsfs_hash_lock);
584 		rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
585 
586 		if ((vp = hs_findhash(nodeid, lbn, off, vfsp)) == NULL) {
587 			/*
588 			 * Now we are really sure that the hsnode is not
589 			 * in the cache.  Get one off freelist or else
590 			 * allocate one. Either way get a bzeroed hsnode.
591 			 */
592 			hp = hs_getfree(fsp);
593 
594 			bcopy((caddr_t)dp, (caddr_t)&hp->hs_dirent,
595 			    sizeof (*dp));
596 			/*
597 			 * We've just copied this pointer into hs_dirent,
598 			 * and don't want 2 references to same symlink.
599 			 */
600 			dp->sym_link = (char *)NULL;
601 
602 			/*
603 			 * No need to hold any lock because hsnode is not
604 			 * yet in the hash chain.
605 			 */
606 			mutex_init(&hp->hs_contents_lock, NULL, MUTEX_DEFAULT,
607 			    NULL);
608 			hp->hs_dir_lbn = lbn;
609 			hp->hs_dir_off = off;
610 			hp->hs_nodeid = nodeid;
611 			hp->hs_seq = 0;
612 			hp->hs_prev_offset = 0;
613 			hp->hs_num_contig = 0;
614 			hp->hs_ra_bytes = 0;
615 			hp->hs_flags = HREF;
616 			if (off > HS_SECTOR_SIZE)
617 				cmn_err(CE_WARN, "hs_makenode: bad offset");
618 
619 			vp = HTOV(hp);
620 			vp->v_vfsp = vfsp;
621 			vp->v_type = dp->type;
622 			vp->v_rdev = dp->r_dev;
623 			vn_setops(vp, hsfs_vnodeops);
624 			vp->v_data = (caddr_t)hp;
625 			vn_exists(vp);
626 			/*
627 			 * if it's a device, call specvp
628 			 */
629 			if (IS_DEVVP(vp)) {
630 				rw_exit(&fsp->hsfs_hash_lock);
631 				newvp = specvp(vp, vp->v_rdev, vp->v_type,
632 				    CRED());
633 				if (newvp == NULL)
634 					cmn_err(CE_NOTE,
635 					    "hs_makenode: specvp failed");
636 				VN_RELE(vp);
637 				return (newvp);
638 			}
639 
640 			hs_addhash(fsp, hp);
641 
642 		}
643 	}
644 
645 	if (dp->sym_link != (char *)NULL) {
646 		kmem_free(dp->sym_link, (size_t)(dp->ext_size + 1));
647 		dp->sym_link = (char *)NULL;
648 	}
649 
650 	rw_exit(&fsp->hsfs_hash_lock);
651 	return (vp);
652 }
653 
654 /*
655  * hs_freenode
656  *
657  * Deactivate an hsnode.
658  * Leave it on the hash list but put it on the free list.
659  * If the vnode does not have any pages, release the hsnode to the
660  * kmem_cache using kmem_cache_free, else put in back of the free list.
661  *
662  * This function can be called with the hsfs_free_lock held, but only
663  * when the code is guaranteed to go through the path where the
664  * node is freed entirely, and not the path where the node could go back
665  * on the free list (and where the free lock would need to be acquired).
666  */
667 void
668 hs_freenode(vnode_t *vp, struct hsfs *fsp, int nopage)
669 {
670 	struct hsnode **tp;
671 	struct hsnode *hp = VTOH(vp);
672 
673 	ASSERT(RW_LOCK_HELD(&fsp->hsfs_hash_lock));
674 
675 	if (nopage || (fsp->hsfs_nohsnode >= nhsnode)) {
676 		/* remove this node from the hash list, if it's there */
677 		for (tp = &fsp->hsfs_hash[HS_HPASH(hp)]; *tp != NULL;
678 		    tp = &(*tp)->hs_hash) {
679 
680 			if (*tp == hp) {
681 				*tp = hp->hs_hash;
682 				break;
683 			}
684 		}
685 
686 		if (hp->hs_dirent.sym_link != (char *)NULL) {
687 			kmem_free(hp->hs_dirent.sym_link,
688 			    (size_t)(hp->hs_dirent.ext_size + 1));
689 			hp->hs_dirent.sym_link = NULL;
690 		}
691 		if (vn_has_cached_data(vp)) {
692 			/* clean all old pages */
693 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
694 			    hsfs_putapage, B_INVAL, (struct cred *)NULL);
695 			/* XXX - can we remove pages by fiat like this??? */
696 			vp->v_pages = NULL;
697 		}
698 		mutex_destroy(&hp->hs_contents_lock);
699 		vn_invalid(vp);
700 		vn_free(vp);
701 		kmem_cache_free(hsnode_cache, hp);
702 		fsp->hsfs_nohsnode--;
703 		return;
704 	}
705 	hs_addfreeb(fsp, hp); /* add to back of free list */
706 }
707 
708 /*
709  * hs_remakenode
710  *
711  * Reconstruct a vnode given the location of its directory entry.
712  * Caller specifies the the block number and offset
713  * of the directory entry, and the vfs pointer.
714  * Returns an error code or 0.
715  */
716 int
717 hs_remakenode(uint_t lbn, uint_t off, struct vfs *vfsp,
718     struct vnode **vpp)
719 {
720 	struct buf *secbp;
721 	struct hsfs *fsp;
722 	uint_t secno;
723 	uchar_t *dirp;
724 	struct hs_direntry hd;
725 	int error;
726 
727 	/* Convert to sector and offset */
728 	fsp = VFS_TO_HSFS(vfsp);
729 	if (off > HS_SECTOR_SIZE) {
730 		cmn_err(CE_WARN, "hs_remakenode: bad offset");
731 		error = EINVAL;
732 		goto end;
733 	}
734 	secno = LBN_TO_SEC(lbn, vfsp);
735 	secbp = bread(fsp->hsfs_devvp->v_rdev, secno * 4, HS_SECTOR_SIZE);
736 
737 	error = geterror(secbp);
738 	if (error != 0) {
739 		cmn_err(CE_NOTE, "hs_remakenode: bread: error=(%d)", error);
740 		goto end;
741 	}
742 
743 	dirp = (uchar_t *)secbp->b_un.b_addr;
744 	error = hs_parsedir(fsp, &dirp[off], &hd, (char *)NULL, (int *)NULL,
745 	    HS_SECTOR_SIZE - off);
746 	if (!error) {
747 		*vpp = hs_makenode(&hd, lbn, off, vfsp);
748 		if (*vpp == NULL)
749 			error = ENFILE;
750 	}
751 
752 end:
753 	brelse(secbp);
754 	return (error);
755 }
756 
757 
758 /*
759  * hs_dirlook
760  *
761  * Look for a given name in a given directory.
762  * If found, construct an hsnode for it.
763  */
764 int
765 hs_dirlook(
766 	struct vnode	*dvp,
767 	char		*name,
768 	int		namlen,		/* length of 'name' */
769 	struct vnode	**vpp,
770 	struct cred	*cred)
771 {
772 	struct hsnode *dhp;
773 	struct hsfs	*fsp;
774 	int		error = 0;
775 	uint_t		offset;		/* real offset in directory */
776 	uint_t		last_offset;	/* last index in directory */
777 	char		*cmpname;	/* case-folded name */
778 	int		cmpname_size;	/* how much memory we allocate for it */
779 	int		cmpnamelen;
780 	int		adhoc_search;	/* did we start at begin of dir? */
781 	int		end;
782 	uint_t		hsoffset;
783 	struct fbuf	*fbp;
784 	int		bytes_wanted;
785 	int		dirsiz;
786 	int		is_rrip;
787 
788 	if (dvp->v_type != VDIR)
789 		return (ENOTDIR);
790 
791 	if (error = hs_access(dvp, (mode_t)VEXEC, cred))
792 		return (error);
793 
794 	if (hsfs_use_dnlc && (*vpp = dnlc_lookup(dvp, name)))
795 		return (0);
796 
797 	dhp = VTOH(dvp);
798 	fsp = VFS_TO_HSFS(dvp->v_vfsp);
799 	is_rrip = IS_RRIP_IMPLEMENTED(fsp);
800 
801 	/*
802 	 * name == "^A" is illegal for ISO-9660 and Joliet as '..' is '\1' on
803 	 * disk. It is no problem for Rock Ridge as RR uses '.' and '..'.
804 	 * XXX It could be OK for Joliet also (because namelen == 1 is
805 	 * XXX impossible for UCS-2) but then we need a better compare algorith.
806 	 */
807 	if (!is_rrip && *name == '\1' && namlen == 1)
808 		return (EINVAL);
809 
810 	cmpname_size = (int)(fsp->hsfs_namemax + 1);
811 	cmpname = kmem_alloc((size_t)cmpname_size, KM_SLEEP);
812 
813 	if (namlen >= cmpname_size)
814 		namlen = cmpname_size - 1;
815 	/*
816 	 * For the purposes of comparing the name against dir entries,
817 	 * fold it to upper case.
818 	 */
819 	if (is_rrip) {
820 		(void) strlcpy(cmpname, name, cmpname_size);
821 		cmpnamelen = namlen;
822 	} else {
823 		/*
824 		 * If we don't consider a trailing dot as part of the filename,
825 		 * remove it from the specified name
826 		 */
827 		if ((fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
828 		    name[namlen-1] == '.' &&
829 		    CAN_TRUNCATE_DOT(name, namlen))
830 			name[--namlen] = '\0';
831 		if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2 ||
832 		    fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
833 			cmpnamelen = hs_iso_copy(name, cmpname, namlen);
834 		} else {
835 			cmpnamelen = hs_uppercase_copy(name, cmpname, namlen);
836 		}
837 	}
838 
839 	/* make sure dirent is filled up with all info */
840 	if (dhp->hs_dirent.ext_size == 0)
841 		hs_filldirent(dvp, &dhp->hs_dirent);
842 
843 	/*
844 	 * No lock is needed - hs_offset is used as starting
845 	 * point for searching the directory.
846 	 */
847 	offset = dhp->hs_offset;
848 	hsoffset = offset;
849 	adhoc_search = (offset != 0);
850 
851 	end = dhp->hs_dirent.ext_size;
852 	dirsiz = end;
853 
854 tryagain:
855 
856 	while (offset < end) {
857 		bytes_wanted = MIN(MAXBSIZE, dirsiz - (offset & MAXBMASK));
858 
859 		error = fbread(dvp, (offset_t)(offset & MAXBMASK),
860 		    (unsigned int)bytes_wanted, S_READ, &fbp);
861 		if (error)
862 			goto done;
863 
864 		last_offset = (offset & MAXBMASK) + fbp->fb_count;
865 
866 		switch (process_dirblock(fbp, &offset, last_offset,
867 		    cmpname, cmpnamelen, fsp, dhp, dvp, vpp, &error)) {
868 		case FOUND_ENTRY:
869 			/* found an entry, either correct or not */
870 			goto done;
871 
872 		case WENT_PAST:
873 			/*
874 			 * If we get here we know we didn't find it on the
875 			 * first pass. If adhoc_search, then we started a
876 			 * bit into the dir, and need to wrap around and
877 			 * search the first entries.  If not, then we started
878 			 * at the beginning and didn't find it.
879 			 */
880 			if (adhoc_search) {
881 				offset = 0;
882 				end = hsoffset;
883 				adhoc_search = 0;
884 				goto tryagain;
885 			}
886 			error = ENOENT;
887 			goto done;
888 
889 		case HIT_END:
890 			goto tryagain;
891 		}
892 	}
893 	/*
894 	 * End of all dir blocks, didn't find entry.
895 	 */
896 	if (adhoc_search) {
897 		offset = 0;
898 		end = hsoffset;
899 		adhoc_search = 0;
900 		goto tryagain;
901 	}
902 	error = ENOENT;
903 done:
904 	/*
905 	 * If we found the entry, add it to the DNLC
906 	 * If the entry is a device file (assuming we support Rock Ridge),
907 	 * we enter the device vnode to the cache since that is what
908 	 * is in *vpp.
909 	 * That is ok since the CD-ROM is read-only, so (dvp,name) will
910 	 * always point to the same device.
911 	 */
912 	if (hsfs_use_dnlc && !error)
913 		dnlc_enter(dvp, name, *vpp);
914 
915 	kmem_free(cmpname, (size_t)cmpname_size);
916 
917 	return (error);
918 }
919 
920 /*
921  * hs_parsedir
922  *
923  * Parse a Directory Record into an hs_direntry structure.
924  * High Sierra and ISO directory are almost the same
925  * except the flag and date
926  */
927 int
928 hs_parsedir(
929 	struct hsfs		*fsp,
930 	uchar_t			*dirp,
931 	struct hs_direntry	*hdp,
932 	char			*dnp,
933 	int			*dnlen,
934 	int			last_offset)	/* last offset in dirp */
935 {
936 	char	*on_disk_name;
937 	int	on_disk_namelen;
938 	int	on_disk_dirlen;
939 	uchar_t	flags;
940 	int	namelen;
941 	int	error;
942 	int	name_change_flag = 0;	/* set if name was gotten in SUA */
943 
944 	hdp->ext_lbn = HDE_EXT_LBN(dirp);
945 	hdp->ext_size = HDE_EXT_SIZE(dirp);
946 	hdp->xar_len = HDE_XAR_LEN(dirp);
947 	hdp->intlf_sz = HDE_INTRLV_SIZE(dirp);
948 	hdp->intlf_sk = HDE_INTRLV_SKIP(dirp);
949 	hdp->sym_link = (char *)NULL;
950 
951 	if (fsp->hsfs_vol_type == HS_VOL_TYPE_HS) {
952 		flags = HDE_FLAGS(dirp);
953 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->cdate);
954 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->adate);
955 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->mdate);
956 		if ((flags & hde_prohibited) == 0) {
957 			/*
958 			 * Skip files with the associated bit set.
959 			 */
960 			if (flags & HDE_ASSOCIATED)
961 				return (EAGAIN);
962 			hdp->type = VREG;
963 			hdp->mode = HFREG;
964 			hdp->nlink = 1;
965 		} else if ((flags & hde_prohibited) == HDE_DIRECTORY) {
966 			hdp->type = VDIR;
967 			hdp->mode = HFDIR;
968 			hdp->nlink = 2;
969 		} else {
970 			hs_log_bogus_disk_warning(fsp,
971 			    HSFS_ERR_UNSUP_TYPE, flags);
972 			return (EINVAL);
973 		}
974 		hdp->uid = fsp -> hsfs_vol.vol_uid;
975 		hdp->gid = fsp -> hsfs_vol.vol_gid;
976 		hdp->mode = hdp-> mode | (fsp -> hsfs_vol.vol_prot & 0777);
977 	} else if ((fsp->hsfs_vol_type == HS_VOL_TYPE_ISO) ||
978 	    (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) ||
979 	    (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET)) {
980 
981 		flags = IDE_FLAGS(dirp);
982 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->cdate);
983 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->adate);
984 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->mdate);
985 
986 		if ((flags & ide_prohibited) == 0) {
987 			/*
988 			 * Skip files with the associated bit set.
989 			 */
990 			if (flags & IDE_ASSOCIATED)
991 				return (EAGAIN);
992 			hdp->type = VREG;
993 			hdp->mode = HFREG;
994 			hdp->nlink = 1;
995 		} else if ((flags & ide_prohibited) == IDE_DIRECTORY) {
996 			hdp->type = VDIR;
997 			hdp->mode = HFDIR;
998 			hdp->nlink = 2;
999 		} else {
1000 			hs_log_bogus_disk_warning(fsp,
1001 			    HSFS_ERR_UNSUP_TYPE, flags);
1002 			return (EINVAL);
1003 		}
1004 		hdp->uid = fsp -> hsfs_vol.vol_uid;
1005 		hdp->gid = fsp -> hsfs_vol.vol_gid;
1006 		hdp->mode = hdp-> mode | (fsp -> hsfs_vol.vol_prot & 0777);
1007 		hdp->inode = 0;		/* initialize with 0, then check rrip */
1008 
1009 		/*
1010 		 * Having this all filled in, let's see if we have any
1011 		 * SUA susp to look at.
1012 		 */
1013 		if (IS_SUSP_IMPLEMENTED(fsp)) {
1014 			error = parse_sua((uchar_t *)dnp, dnlen,
1015 			    &name_change_flag, dirp, last_offset,
1016 			    hdp, fsp,
1017 			    (uchar_t *)NULL, NULL);
1018 			if (error) {
1019 				if (hdp->sym_link) {
1020 					kmem_free(hdp->sym_link,
1021 					    (size_t)(hdp->ext_size + 1));
1022 					hdp->sym_link = (char *)NULL;
1023 				}
1024 				return (error);
1025 			}
1026 		}
1027 	}
1028 	hdp->xar_prot = (HDE_PROTECTION & flags) != 0;
1029 
1030 #if dontskip
1031 	if (hdp->xar_len > 0) {
1032 		cmn_err(CE_NOTE, "hsfs: extended attributes not supported");
1033 		return (EINVAL);
1034 	}
1035 #endif
1036 
1037 	/* check interleaf size and skip factor */
1038 	/* must both be zero or non-zero */
1039 	if (hdp->intlf_sz + hdp->intlf_sk) {
1040 		if ((hdp->intlf_sz == 0) || (hdp->intlf_sk == 0)) {
1041 			cmn_err(CE_NOTE,
1042 			    "hsfs: interleaf size or skip factor error");
1043 			return (EINVAL);
1044 		}
1045 		if (hdp->ext_size == 0) {
1046 			cmn_err(CE_NOTE,
1047 			    "hsfs: interleaving specified on zero length file");
1048 			return (EINVAL);
1049 		}
1050 	}
1051 
1052 	if (HDE_VOL_SET(dirp) != 1) {
1053 		if (fsp->hsfs_vol.vol_set_size != 1 &&
1054 		    fsp->hsfs_vol.vol_set_size != HDE_VOL_SET(dirp)) {
1055 			cmn_err(CE_NOTE, "hsfs: multivolume file?");
1056 			return (EINVAL);
1057 		}
1058 	}
1059 
1060 	/*
1061 	 * If the name changed, then the NM field for RRIP was hit and
1062 	 * we should not copy the name again, just return.
1063 	 */
1064 	if (NAME_HAS_CHANGED(name_change_flag))
1065 		return (0);
1066 
1067 	/*
1068 	 * Fall back to the ISO name. Note that as in process_dirblock,
1069 	 * the on-disk filename length must be validated against ISO
1070 	 * limits - which, in case of RR present but no RR name found,
1071 	 * are NOT identical to fsp->hsfs_namemax on this filesystem.
1072 	 */
1073 	on_disk_name = (char *)HDE_name(dirp);
1074 	on_disk_namelen = (int)HDE_NAME_LEN(dirp);
1075 	on_disk_dirlen = (int)HDE_DIR_LEN(dirp);
1076 
1077 	if (on_disk_dirlen < HDE_ROOT_DIR_REC_SIZE ||
1078 	    ((on_disk_dirlen > last_offset) ||
1079 	    ((HDE_FDESIZE + on_disk_namelen) > on_disk_dirlen))) {
1080 		hs_log_bogus_disk_warning(fsp,
1081 		    HSFS_ERR_BAD_DIR_ENTRY, 0);
1082 		return (EINVAL);
1083 	}
1084 
1085 	if (on_disk_namelen > fsp->hsfs_namelen &&
1086 	    hs_namelen(fsp, on_disk_name, on_disk_namelen) >
1087 	    fsp->hsfs_namelen) {
1088 		hs_log_bogus_disk_warning(fsp,
1089 		    fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET ?
1090 		    HSFS_ERR_BAD_JOLIET_FILE_LEN :
1091 		    HSFS_ERR_BAD_FILE_LEN, 0);
1092 	}
1093 	if (on_disk_namelen > ISO_NAMELEN_V2_MAX)
1094 		on_disk_namelen = fsp->hsfs_namemax;	/* Paranoia */
1095 
1096 	if (dnp != NULL) {
1097 		if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1098 			namelen = hs_jnamecopy(on_disk_name, dnp,
1099 			    on_disk_namelen, fsp->hsfs_namemax,
1100 			    fsp->hsfs_flags);
1101 			/*
1102 			 * A negative return value means that the file name
1103 			 * has been truncated to fsp->hsfs_namemax.
1104 			 */
1105 			if (namelen < 0) {
1106 				namelen = -namelen;
1107 				hs_log_bogus_disk_warning(fsp,
1108 				    HSFS_ERR_TRUNC_JOLIET_FILE_LEN, 0);
1109 			}
1110 		} else {
1111 			/*
1112 			 * HS_VOL_TYPE_ISO && HS_VOL_TYPE_ISO_V2
1113 			 */
1114 			namelen = hs_namecopy(on_disk_name, dnp,
1115 			    on_disk_namelen, fsp->hsfs_flags);
1116 		}
1117 		if (namelen == 0)
1118 			return (EINVAL);
1119 		if ((fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
1120 		    dnp[ namelen-1 ] == '.' && CAN_TRUNCATE_DOT(dnp, namelen))
1121 			dnp[ --namelen ] = '\0';
1122 	} else
1123 		namelen = on_disk_namelen;
1124 	if (dnlen != NULL)
1125 		*dnlen = namelen;
1126 
1127 	return (0);
1128 }
1129 
1130 /*
1131  * hs_namecopy
1132  *
1133  * Parse a file/directory name into UNIX form.
1134  * Delete trailing blanks, upper-to-lower case, add NULL terminator.
1135  * Returns the (possibly new) length.
1136  *
1137  * Called from hsfs_readdir() via hs_parsedir()
1138  */
1139 int
1140 hs_namecopy(char *from, char *to, int size, ulong_t flags)
1141 {
1142 	uint_t i;
1143 	uchar_t c;
1144 	int lastspace;
1145 	int maplc;
1146 	int trailspace;
1147 	int version;
1148 
1149 	/* special handling for '.' and '..' */
1150 	if (size == 1) {
1151 		if (*from == '\0') {
1152 			*to++ = '.';
1153 			*to = '\0';
1154 			return (1);
1155 		} else if (*from == '\1') {
1156 			*to++ = '.';
1157 			*to++ = '.';
1158 			*to = '\0';
1159 			return (2);
1160 		}
1161 	}
1162 
1163 	maplc = (flags & HSFSMNT_NOMAPLCASE) == 0;
1164 	trailspace = (flags & HSFSMNT_NOTRAILSPACE) == 0;
1165 	version = (flags & HSFSMNT_NOVERSION) == 0;
1166 	for (i = 0, lastspace = -1; i < size; i++) {
1167 		c = from[i];
1168 		if (c == ';' && version)
1169 			break;
1170 		if (c <= ' ' && !trailspace) {
1171 			if (lastspace == -1)
1172 				lastspace = i;
1173 		} else
1174 			lastspace = -1;
1175 		if (maplc && (c >= 'A') && (c <= 'Z'))
1176 			c += 'a' - 'A';
1177 		to[i] = c;
1178 	}
1179 	if (lastspace != -1)
1180 		i = lastspace;
1181 	to[i] = '\0';
1182 	return (i);
1183 }
1184 
1185 /*
1186  * hs_jnamecopy
1187  *
1188  * This is the Joliet variant of hs_namecopy()
1189  *
1190  * Parse a UCS-2 Joliet file/directory name into UNIX form.
1191  * Add NULL terminator.
1192  * Returns the new length.
1193  *
1194  * Called from hsfs_readdir() via hs_parsedir()
1195  */
1196 int
1197 hs_jnamecopy(char *from, char *to, int size, int maxsize, ulong_t flags)
1198 {
1199 	uint_t i;
1200 	uint_t len;
1201 	uint16_t c;
1202 	int	amt;
1203 	int	version;
1204 
1205 	/* special handling for '.' and '..' */
1206 	if (size == 1) {
1207 		if (*from == '\0') {
1208 			*to++ = '.';
1209 			*to = '\0';
1210 			return (1);
1211 		} else if (*from == '\1') {
1212 			*to++ = '.';
1213 			*to++ = '.';
1214 			*to = '\0';
1215 			return (2);
1216 		}
1217 	}
1218 
1219 	version = (flags & HSFSMNT_NOVERSION) == 0;
1220 	for (i = 0, len = 0; i < size; i++) {
1221 		c = (from[i++] & 0xFF) << 8;
1222 		c |= from[i] & 0xFF;
1223 		if (c == ';' && version)
1224 			break;
1225 
1226 		if (len > (maxsize-3)) {
1227 			if (c < 0x80)
1228 				amt = 1;
1229 			else if (c < 0x800)
1230 				amt = 2;
1231 			else
1232 				amt = 3;
1233 			if ((len+amt) > maxsize) {
1234 				to[len] = '\0';
1235 				return (-len);
1236 			}
1237 		}
1238 		amt = hs_ucs2_2_utf8(c, (uint8_t *)&to[len]);
1239 		if (amt == 0) {
1240 			hs_log_bogus_joliet_warning(); /* should never happen */
1241 			return (0);
1242 		}
1243 		len += amt;
1244 	}
1245 	to[len] = '\0';
1246 	return (len);
1247 }
1248 
1249 /*
1250  * map a filename to upper case;
1251  * return 1 if found lowercase character
1252  *
1253  * Called from process_dirblock()
1254  * via hsfs_lookup() -> hs_dirlook() -> process_dirblock()
1255  * to create an intermedia name from on disk file names for
1256  * comparing names.
1257  */
1258 static int
1259 uppercase_cp(char *from, char *to, int size)
1260 {
1261 	uint_t i;
1262 	uchar_t c;
1263 	uchar_t had_lc = 0;
1264 
1265 	for (i = 0; i < size; i++) {
1266 		c = *from++;
1267 		if ((c >= 'a') && (c <= 'z')) {
1268 			c -= ('a' - 'A');
1269 			had_lc = 1;
1270 		}
1271 		*to++ = c;
1272 	}
1273 	return (had_lc);
1274 }
1275 
1276 /*
1277  * This is the Joliet variant of uppercase_cp()
1278  *
1279  * map a UCS-2 filename to UTF-8;
1280  * return new length
1281  *
1282  * Called from process_dirblock()
1283  * via hsfs_lookup() -> hs_dirlook() -> process_dirblock()
1284  * to create an intermedia name from on disk file names for
1285  * comparing names.
1286  */
1287 int
1288 hs_joliet_cp(char *from, char *to, int size)
1289 {
1290 	uint_t		i;
1291 	uint16_t	c;
1292 	int		len = 0;
1293 	int		amt;
1294 
1295 	/* special handling for '\0' and '\1' */
1296 	if (size == 1) {
1297 		*to = *from;
1298 		return (1);
1299 	}
1300 	for (i = 0; i < size; i += 2) {
1301 		c = (*from++ & 0xFF) << 8;
1302 		c |= *from++ & 0xFF;
1303 
1304 		amt = hs_ucs2_2_utf8(c, (uint8_t *)to);
1305 		if (amt == 0) {
1306 			hs_log_bogus_joliet_warning(); /* should never happen */
1307 			return (0);
1308 		}
1309 
1310 		to  += amt;
1311 		len += amt;
1312 	}
1313 	return (len);
1314 }
1315 
1316 static void
1317 hs_log_bogus_joliet_warning(void)
1318 {
1319 	static int	warned = 0;
1320 
1321 	if (warned)
1322 		return;
1323 	warned = 1;
1324 	cmn_err(CE_CONT, "hsfs: Warning: "
1325 	    "file name contains bad UCS-2 chacarter\n");
1326 }
1327 
1328 
1329 /*
1330  * hs_uppercase_copy
1331  *
1332  * Convert a UNIX-style name into its HSFS equivalent
1333  * replacing '.' and '..' with '\0' and '\1'.
1334  * Map to upper case.
1335  * Returns the (possibly new) length.
1336  *
1337  * Called from hs_dirlook() and rrip_namecopy()
1338  * to create an intermediate name from the callers name from hsfs_lookup()
1339  * XXX Is the call from rrip_namecopy() OK?
1340  */
1341 int
1342 hs_uppercase_copy(char *from, char *to, int size)
1343 {
1344 	uint_t i;
1345 	uchar_t c;
1346 
1347 	/* special handling for '.' and '..' */
1348 
1349 	if (size == 1 && *from == '.') {
1350 		*to = '\0';
1351 		return (1);
1352 	} else if (size == 2 && *from == '.' && *(from+1) == '.') {
1353 		*to = '\1';
1354 		return (1);
1355 	}
1356 
1357 	for (i = 0; i < size; i++) {
1358 		c = *from++;
1359 		if ((c >= 'a') && (c <= 'z'))
1360 			c = c - 'a' + 'A';
1361 		*to++ = c;
1362 	}
1363 	return (size);
1364 }
1365 
1366 /*
1367  * hs_iso_copy
1368  *
1369  * This is the Joliet/ISO-9660:1999 variant of hs_uppercase_copy()
1370  *
1371  * Convert a UTF-8 UNIX-style name into its UTF-8 Joliet/ISO equivalent
1372  * replacing '.' and '..' with '\0' and '\1'.
1373  * Returns the (possibly new) length.
1374  *
1375  * Called from hs_dirlook()
1376  * to create an intermediate name from the callers name from hsfs_lookup()
1377  */
1378 static int
1379 hs_iso_copy(char *from, char *to, int size)
1380 {
1381 	uint_t i;
1382 	uchar_t c;
1383 
1384 	/* special handling for '.' and '..' */
1385 
1386 	if (size == 1 && *from == '.') {
1387 		*to = '\0';
1388 		return (1);
1389 	} else if (size == 2 && *from == '.' && *(from+1) == '.') {
1390 		*to = '\1';
1391 		return (1);
1392 	}
1393 
1394 	for (i = 0; i < size; i++) {
1395 		c = *from++;
1396 		*to++ = c;
1397 	}
1398 	return (size);
1399 }
1400 
1401 void
1402 hs_filldirent(struct vnode *vp, struct hs_direntry *hdp)
1403 {
1404 	struct buf *secbp;
1405 	uint_t	secno;
1406 	offset_t secoff;
1407 	struct hsfs *fsp;
1408 	uchar_t *secp;
1409 	int	error;
1410 
1411 	if (vp->v_type != VDIR) {
1412 		cmn_err(CE_WARN, "hsfs_filldirent: vp (0x%p) not a directory",
1413 		    (void *)vp);
1414 		return;
1415 	}
1416 
1417 	fsp = VFS_TO_HSFS(vp ->v_vfsp);
1418 	secno = LBN_TO_SEC(hdp->ext_lbn+hdp->xar_len, vp->v_vfsp);
1419 	secoff = LBN_TO_BYTE(hdp->ext_lbn+hdp->xar_len, vp->v_vfsp) &
1420 	    MAXHSOFFSET;
1421 	secbp = bread(fsp->hsfs_devvp->v_rdev, secno * 4, HS_SECTOR_SIZE);
1422 	error = geterror(secbp);
1423 	if (error != 0) {
1424 		cmn_err(CE_NOTE, "hs_filldirent: bread: error=(%d)", error);
1425 		goto end;
1426 	}
1427 
1428 	secp = (uchar_t *)secbp->b_un.b_addr;
1429 
1430 	/* quick check */
1431 	if (hdp->ext_lbn != HDE_EXT_LBN(&secp[secoff])) {
1432 		cmn_err(CE_NOTE, "hsfs_filldirent: dirent not match");
1433 		/* keep on going */
1434 	}
1435 	(void) hs_parsedir(fsp, &secp[secoff], hdp, (char *)NULL,
1436 	    (int *)NULL, HS_SECTOR_SIZE - secoff);
1437 
1438 end:
1439 	brelse(secbp);
1440 }
1441 
1442 /*
1443  * Look through a directory block for a matching entry.
1444  * Note: this routine does an fbrelse() on the buffer passed in.
1445  */
1446 static enum dirblock_result
1447 process_dirblock(
1448 	struct fbuf	*fbp,		/* buffer containing dirblk */
1449 	uint_t		*offset,	/* lower index */
1450 	uint_t		last_offset,	/* upper index */
1451 	char		*nm,		/* upcase nm to compare against */
1452 	int		nmlen,		/* length of name */
1453 	struct hsfs	*fsp,
1454 	struct hsnode	*dhp,
1455 	struct vnode	*dvp,
1456 	struct vnode	**vpp,
1457 	int		*error)		/* return value: errno */
1458 {
1459 	uchar_t		*blkp = (uchar_t *)fbp->fb_addr; /* dir block */
1460 	char		*dname;		/* name in directory entry */
1461 	int		dnamelen;	/* length of name */
1462 	struct hs_direntry hd;
1463 	int		hdlen;
1464 	uchar_t		*dirp;		/* the directory entry */
1465 	int		res;
1466 	int		parsedir_res;
1467 	int		is_rrip;
1468 	size_t		rrip_name_size;
1469 	int		rr_namelen = 0;
1470 	char		*rrip_name_str = NULL;
1471 	char		*rrip_tmp_name = NULL;
1472 	enum dirblock_result err = 0;
1473 	int 		did_fbrelse = 0;
1474 	char		uppercase_name[JOLIET_NAMELEN_MAX*3 + 1]; /* 331 */
1475 
1476 #define	PD_return(retval)	\
1477 	{ err = retval; goto do_ret; }		/* return after cleanup */
1478 #define	rel_offset(offset)	\
1479 	((offset) & MAXBOFFSET)			/* index into cur blk */
1480 #define	RESTORE_NM(tmp, orig)	\
1481 	if (is_rrip && *(tmp) != '\0') \
1482 		(void) strcpy((orig), (tmp))
1483 
1484 	is_rrip = IS_RRIP_IMPLEMENTED(fsp);
1485 	if (is_rrip) {
1486 		rrip_name_size = RRIP_FILE_NAMELEN + 1;
1487 		rrip_name_str = kmem_alloc(rrip_name_size, KM_SLEEP);
1488 		rrip_tmp_name = kmem_alloc(rrip_name_size, KM_SLEEP);
1489 		rrip_name_str[0] = '\0';
1490 		rrip_tmp_name[0] = '\0';
1491 	}
1492 
1493 	while (*offset < last_offset) {
1494 
1495 		/*
1496 		 * Directory Entries cannot span sectors.
1497 		 *
1498 		 * Unused bytes at the end of each sector are zeroed
1499 		 * according to ISO9660, but we cannot rely on this
1500 		 * since both media failures and maliciously corrupted
1501 		 * media may return arbitrary values.
1502 		 * We therefore have to check for consistency:
1503 		 * The size of a directory entry must be at least
1504 		 * 34 bytes (the size of the directory entry metadata),
1505 		 * or zero (indicating the end-of-sector condition).
1506 		 * For a non-zero directory entry size of less than
1507 		 * 34 Bytes, log a warning.
1508 		 * In any case, skip the rest of this sector and
1509 		 * continue with the next.
1510 		 */
1511 		hdlen = (int)((uchar_t)
1512 		    HDE_DIR_LEN(&blkp[rel_offset(*offset)]));
1513 
1514 		if (hdlen < HDE_ROOT_DIR_REC_SIZE ||
1515 		    *offset + hdlen > last_offset) {
1516 			/*
1517 			 * Advance to the next sector boundary
1518 			 */
1519 			*offset = roundup(*offset + 1, HS_SECTOR_SIZE);
1520 			if (hdlen)
1521 				hs_log_bogus_disk_warning(fsp,
1522 				    HSFS_ERR_TRAILING_JUNK, 0);
1523 			continue;
1524 		}
1525 
1526 		bzero(&hd, sizeof (hd));
1527 
1528 		/*
1529 		 * Check the filename length in the ISO record for
1530 		 * plausibility and reset it to a safe value, in case
1531 		 * the name length byte is out of range. Since the ISO
1532 		 * name will be used as fallback if the rockridge name
1533 		 * is invalid/nonexistant, we must make sure not to
1534 		 * blow the bounds and initialize dnamelen to a sensible
1535 		 * value within the limits of ISO9660.
1536 		 * In addition to that, the ISO filename is part of the
1537 		 * directory entry. If the filename length is too large
1538 		 * to fit, the record is invalid and we'll advance to
1539 		 * the next.
1540 		 */
1541 		dirp = &blkp[rel_offset(*offset)];
1542 		dname = (char *)HDE_name(dirp);
1543 		dnamelen = (int)((uchar_t)HDE_NAME_LEN(dirp));
1544 		/*
1545 		 * If the directory entry extends beyond the end of the
1546 		 * block, it must be invalid. Skip it.
1547 		 */
1548 		if (dnamelen > hdlen - HDE_FDESIZE) {
1549 			hs_log_bogus_disk_warning(fsp,
1550 			    HSFS_ERR_BAD_DIR_ENTRY, 0);
1551 			goto skip_rec;
1552 		} else if (dnamelen > fsp->hsfs_namelen &&
1553 		    hs_namelen(fsp, dname, dnamelen) > fsp->hsfs_namelen) {
1554 			hs_log_bogus_disk_warning(fsp,
1555 			    fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET ?
1556 			    HSFS_ERR_BAD_JOLIET_FILE_LEN :
1557 			    HSFS_ERR_BAD_FILE_LEN, 0);
1558 		}
1559 		if (dnamelen > ISO_NAMELEN_V2_MAX)
1560 			dnamelen = fsp->hsfs_namemax;	/* Paranoia */
1561 
1562 		/*
1563 		 * If the rock ridge is implemented, then we copy the name
1564 		 * from the SUA area to rrip_name_str. If no Alternate
1565 		 * name is found, then use the uppercase NM in the
1566 		 * rrip_name_str char array.
1567 		 */
1568 		if (is_rrip) {
1569 
1570 			rrip_name_str[0] = '\0';
1571 			rr_namelen = rrip_namecopy(nm, &rrip_name_str[0],
1572 			    &rrip_tmp_name[0], dirp, last_offset - *offset,
1573 			    fsp, &hd);
1574 			if (hd.sym_link) {
1575 				kmem_free(hd.sym_link,
1576 				    (size_t)(hd.ext_size+1));
1577 				hd.sym_link = (char *)NULL;
1578 			}
1579 
1580 			if (rr_namelen != -1) {
1581 				dname = (char *)&rrip_name_str[0];
1582 				dnamelen = rr_namelen;
1583 			}
1584 		}
1585 
1586 		if (!is_rrip || rr_namelen == -1) {
1587 			/* use iso name instead */
1588 
1589 			int i = -1;
1590 			/*
1591 			 * make sure that we get rid of ';' in the dname of
1592 			 * an iso direntry, as we should have no knowledge
1593 			 * of file versions.
1594 			 *
1595 			 * XXX This is done the wrong way: it does not take
1596 			 * XXX care of the fact that the version string is
1597 			 * XXX a decimal number in the range 1 to 32767.
1598 			 */
1599 			if ((fsp->hsfs_flags & HSFSMNT_NOVERSION) == 0) {
1600 				if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1601 					for (i = dnamelen - 1; i > 0; i -= 2) {
1602 						if (dname[i] == ';' &&
1603 						    dname[i-1] == '\0') {
1604 							--i;
1605 							break;
1606 						}
1607 					}
1608 				} else {
1609 					for (i = dnamelen - 1; i > 0; i--) {
1610 						if (dname[i] == ';')
1611 							break;
1612 					}
1613 				}
1614 			}
1615 			if (i > 0) {
1616 				dnamelen = i;
1617 			} else if (fsp->hsfs_vol_type != HS_VOL_TYPE_ISO_V2 &&
1618 			    fsp->hsfs_vol_type != HS_VOL_TYPE_JOLIET) {
1619 				dnamelen = strip_trailing(fsp, dname, dnamelen);
1620 			}
1621 
1622 			ASSERT(dnamelen < sizeof (uppercase_name));
1623 
1624 			if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) {
1625 				(void) strncpy(uppercase_name, dname, dnamelen);
1626 			} else if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1627 				dnamelen = hs_joliet_cp(dname, uppercase_name,
1628 				    dnamelen);
1629 			} else if (uppercase_cp(dname, uppercase_name,
1630 			    dnamelen)) {
1631 				hs_log_bogus_disk_warning(fsp,
1632 				    HSFS_ERR_LOWER_CASE_NM, 0);
1633 			}
1634 			dname = uppercase_name;
1635 			if (!is_rrip &&
1636 			    (fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
1637 			    dname[dnamelen - 1] == '.' &&
1638 			    CAN_TRUNCATE_DOT(dname, dnamelen))
1639 				dname[--dnamelen] = '\0';
1640 		}
1641 
1642 		/*
1643 		 * Quickly screen for a non-matching entry, but not for RRIP.
1644 		 * This test doesn't work for lowercase vs. uppercase names.
1645 		 */
1646 
1647 		/* if we saw a lower case name we can't do this test either */
1648 		if (strict_iso9660_ordering && !is_rrip &&
1649 		    !HSFS_HAVE_LOWER_CASE(fsp) && *nm < *dname) {
1650 			RESTORE_NM(rrip_tmp_name, nm);
1651 			PD_return(WENT_PAST)
1652 		}
1653 
1654 		if (*nm != *dname || nmlen != dnamelen)
1655 			goto skip_rec;
1656 
1657 		if ((res = bcmp(dname, nm, nmlen)) == 0) {
1658 			/* name matches */
1659 			parsedir_res = hs_parsedir(fsp, dirp, &hd,
1660 			    (char *)NULL, (int *)NULL,
1661 			    last_offset - *offset);
1662 			if (!parsedir_res) {
1663 				uint_t lbn;	/* logical block number */
1664 
1665 				lbn = dhp->hs_dirent.ext_lbn +
1666 				    dhp->hs_dirent.xar_len;
1667 				/*
1668 				 * Need to do an fbrelse() on the buffer,
1669 				 * as hs_makenode() may try to acquire
1670 				 * hs_hashlock, which may not be required
1671 				 * while a page is locked.
1672 				 */
1673 				fbrelse(fbp, S_READ);
1674 				did_fbrelse = 1;
1675 				*vpp = hs_makenode(&hd, lbn, *offset,
1676 				    dvp->v_vfsp);
1677 				if (*vpp == NULL) {
1678 					*error = ENFILE;
1679 					RESTORE_NM(rrip_tmp_name, nm);
1680 					PD_return(FOUND_ENTRY)
1681 				}
1682 
1683 				dhp->hs_offset = *offset;
1684 				RESTORE_NM(rrip_tmp_name, nm);
1685 				PD_return(FOUND_ENTRY)
1686 			} else if (parsedir_res != EAGAIN) {
1687 				/* improper dir entry */
1688 				*error = parsedir_res;
1689 				RESTORE_NM(rrip_tmp_name, nm);
1690 				PD_return(FOUND_ENTRY)
1691 			}
1692 		} else if (strict_iso9660_ordering && !is_rrip &&
1693 		    !HSFS_HAVE_LOWER_CASE(fsp) && res < 0) {
1694 			/* name < dir entry */
1695 			RESTORE_NM(rrip_tmp_name, nm);
1696 			PD_return(WENT_PAST)
1697 		}
1698 		/*
1699 		 * name > dir entry,
1700 		 * look at next one.
1701 		 */
1702 skip_rec:
1703 		*offset += hdlen;
1704 		RESTORE_NM(rrip_tmp_name, nm);
1705 	}
1706 	PD_return(HIT_END)
1707 
1708 do_ret:
1709 	if (rrip_name_str)
1710 		kmem_free(rrip_name_str, rrip_name_size);
1711 	if (rrip_tmp_name)
1712 		kmem_free(rrip_tmp_name, rrip_name_size);
1713 	if (!did_fbrelse)
1714 		fbrelse(fbp, S_READ);
1715 	return (err);
1716 #undef PD_return
1717 #undef RESTORE_NM
1718 }
1719 
1720 /*
1721  * Strip trailing nulls or spaces from the name;
1722  * return adjusted length.  If we find such junk,
1723  * log a non-conformant disk message.
1724  */
1725 static int
1726 strip_trailing(struct hsfs *fsp, char *nm, int len)
1727 {
1728 	char *c;
1729 	int trailing_junk = 0;
1730 
1731 	for (c = nm + len - 1; c > nm; c--) {
1732 		if (*c == ' ' || *c == '\0')
1733 			trailing_junk = 1;
1734 		else
1735 			break;
1736 	}
1737 
1738 	if (trailing_junk)
1739 		hs_log_bogus_disk_warning(fsp, HSFS_ERR_TRAILING_JUNK, 0);
1740 
1741 	return ((int)(c - nm + 1));
1742 }
1743 
1744 static int
1745 hs_namelen(struct hsfs *fsp, char *nm, int len)
1746 {
1747 	char	*p = nm + len;
1748 
1749 	if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) {
1750 		return (len);
1751 	} else if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1752 		uint16_t c;
1753 
1754 		while (--p > &nm[1]) {
1755 			c = *p;
1756 			c |= *--p * 256;
1757 			if (c == ';')
1758 				return (p - nm);
1759 			if (c < '0' || c > '9') {
1760 				p++;
1761 				return (p - nm);
1762 			}
1763 		}
1764 	} else {
1765 		char	c;
1766 
1767 		while (--p > nm) {
1768 			c = *p;
1769 			if (c == ';')
1770 				return (p - nm);
1771 			if (c < '0' || c > '9') {
1772 				p++;
1773 				return (p - nm);
1774 			}
1775 		}
1776 	}
1777 	return (len);
1778 }
1779 
1780 /*
1781  * Take a UCS-2 character and convert
1782  * it into a utf8 character.
1783  * A 0 will be returned if the conversion fails
1784  *
1785  * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
1786  *
1787  * The code has been taken from udfs/udf_subr.c
1788  */
1789 static uint8_t hs_first_byte_mark[7] =
1790 			{ 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1791 static int32_t
1792 hs_ucs2_2_utf8(uint16_t c_16, uint8_t *s_8)
1793 {
1794 	int32_t nc;
1795 	uint32_t c_32;
1796 	uint32_t byte_mask = 0xBF;
1797 	uint32_t byte_mark = 0x80;
1798 
1799 	/*
1800 	 * Convert the 16-bit character to a 32-bit character
1801 	 */
1802 	c_32 = c_16;
1803 
1804 	/*
1805 	 * By here the 16-bit character is converted
1806 	 * to a 32-bit wide character
1807 	 */
1808 	if (c_32 < 0x80) {
1809 		nc = 1;
1810 	} else if (c_32 < 0x800) {
1811 		nc = 2;
1812 	} else if (c_32 < 0x10000) {
1813 		nc = 3;
1814 	} else if (c_32 < 0x200000) {
1815 		nc = 4;
1816 	} else if (c_32 < 0x4000000) {
1817 		nc = 5;
1818 	} else if (c_32 <= 0x7FFFFFFF) {	/* avoid signed overflow */
1819 		nc = 6;
1820 	} else {
1821 		nc = 0;
1822 	}
1823 	s_8 += nc;
1824 	switch (nc) {
1825 		case 6 :
1826 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1827 			c_32 >>= 6;
1828 			/* FALLTHROUGH */
1829 		case 5 :
1830 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1831 			c_32 >>= 6;
1832 			/* FALLTHROUGH */
1833 		case 4 :
1834 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1835 			c_32 >>= 6;
1836 			/* FALLTHROUGH */
1837 		case 3 :
1838 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1839 			c_32 >>= 6;
1840 			/* FALLTHROUGH */
1841 		case 2 :
1842 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1843 			c_32 >>= 6;
1844 			/* FALLTHROUGH */
1845 		case 1 :
1846 			*(--s_8) = c_32 | hs_first_byte_mark[nc];
1847 	}
1848 	return (nc);
1849 }
1850