xref: /titanic_44/usr/src/uts/common/fs/hsfs/hsfs_node.c (revision 34acef6775bd2319a3708b750f10ccc4f1292562)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Directory operations for High Sierra filesystem
30  */
31 
32 #include <sys/types.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/user.h>
38 #include <sys/vfs.h>
39 #include <sys/stat.h>
40 #include <sys/vnode.h>
41 #include <sys/mode.h>
42 #include <sys/dnlc.h>
43 #include <sys/cmn_err.h>
44 #include <sys/fbuf.h>
45 #include <sys/kmem.h>
46 #include <sys/policy.h>
47 #include <sys/sunddi.h>
48 #include <vm/hat.h>
49 #include <vm/as.h>
50 #include <vm/pvn.h>
51 #include <vm/seg.h>
52 #include <vm/seg_map.h>
53 #include <vm/seg_kmem.h>
54 #include <vm/page.h>
55 
56 #include <sys/fs/hsfs_spec.h>
57 #include <sys/fs/hsfs_isospec.h>
58 #include <sys/fs/hsfs_node.h>
59 #include <sys/fs/hsfs_impl.h>
60 #include <sys/fs/hsfs_susp.h>
61 #include <sys/fs/hsfs_rrip.h>
62 
63 #include <sys/sysinfo.h>
64 #include <sys/sysmacros.h>
65 #include <sys/errno.h>
66 #include <sys/debug.h>
67 #include <fs/fs_subr.h>
68 
69 /*
70  * This macro expects a name that ends in '.' and returns TRUE if the
71  * name is not "." or ".."
72  */
73 #define	CAN_TRUNCATE_DOT(name, namelen)	\
74 		(namelen > 1 && (namelen > 2 || name[0] != '.'))
75 
76 enum dirblock_result { FOUND_ENTRY, WENT_PAST, HIT_END };
77 
78 /*
79  * These values determine whether we will try to read a file or dir;
80  * they may be patched via /etc/system to allow users to read
81  * record-oriented files.
82  */
83 int ide_prohibited = IDE_PROHIBITED;
84 int hde_prohibited = HDE_PROHIBITED;
85 
86 /*
87  * This variable determines if the HSFS code will use the
88  * directory name lookup cache. The default is for the cache to be used.
89  */
90 static int hsfs_use_dnlc = 1;
91 
92 /*
93  * This variable determines whether strict ISO-9660 directory ordering
94  * is to be assumed.  If false (which it is by default), then when
95  * searching a directory of an ISO-9660 disk, we do not expect the
96  * entries to be sorted (as the spec requires), and so cannot terminate
97  * the search early.  Unfortunately, some vendors are producing
98  * non-compliant disks.  This variable exists to revert to the old
99  * behavior in case someone relies on this. This option is expected to be
100  * removed at some point in the future.
101  *
102  * Use "set hsfs:strict_iso9660_ordering = 1" in /etc/system to override.
103  */
104 static int strict_iso9660_ordering = 0;
105 
106 /*
107  * This tunable allows us to ignore inode numbers from rrip-1.12.
108  * In this case, we fall back to our default inode algorithm.
109  */
110 int use_rrip_inodes = 1;
111 
112 static void hs_hsnode_cache_reclaim(void *unused);
113 static void hs_addfreeb(struct hsfs *fsp, struct hsnode *hp);
114 static enum dirblock_result process_dirblock(struct fbuf *fbp, uint_t *offset,
115 	uint_t last_offset, char *nm, int nmlen, struct hsfs *fsp,
116 	struct hsnode *dhp, struct vnode *dvp, struct vnode **vpp,
117 	int *error);
118 static int strip_trailing(struct hsfs *fsp, char *nm, int len);
119 static int hs_namelen(struct hsfs *fsp, char *nm, int len);
120 static int uppercase_cp(char *from, char *to, int size);
121 static void hs_log_bogus_joliet_warning(void);
122 static int hs_iso_copy(char *from, char *to, int size);
123 static int32_t hs_ucs2_2_utf8(uint16_t c_16, uint8_t *s_8);
124 static int hs_utf8_trunc(uint8_t *str, int len);
125 
126 /*
127  * hs_access
128  * Return 0 if the desired access may be granted.
129  * Otherwise return error code.
130  */
131 int
132 hs_access(struct vnode *vp, mode_t m, struct cred *cred)
133 {
134 	struct hsnode *hp;
135 	int	shift = 0;
136 
137 	/*
138 	 * Write access cannot be granted for a read-only medium
139 	 */
140 	if ((m & VWRITE) && !IS_DEVVP(vp))
141 		return (EROFS);
142 
143 	hp = VTOH(vp);
144 
145 	/*
146 	 * XXX - For now, use volume protections.
147 	 *  Also, always grant EXEC access for directories
148 	 *  if READ access is granted.
149 	 */
150 	if ((vp->v_type == VDIR) && (m & VEXEC)) {
151 		m &= ~VEXEC;
152 		m |= VREAD;
153 	}
154 
155 	if (crgetuid(cred) != hp->hs_dirent.uid) {
156 		shift += 3;
157 		if (!groupmember((uid_t)hp->hs_dirent.gid, cred))
158 			shift += 3;
159 	}
160 	m &= ~(hp->hs_dirent.mode << shift);
161 	if (m != 0)
162 		return (secpolicy_vnode_access(cred, vp, hp->hs_dirent.uid, m));
163 	return (0);
164 }
165 
166 #if ((HS_HASHSIZE & (HS_HASHSIZE - 1)) == 0)
167 #define	HS_HASH(l)	((uint_t)(l) & (HS_HASHSIZE - 1))
168 #else
169 #define	HS_HASH(l)	((uint_t)(l) % HS_HASHSIZE)
170 #endif
171 #define	HS_HPASH(hp)	HS_HASH((hp)->hs_nodeid)
172 
173 /*
174  * The tunable nhsnode is now a threshold for a dynamically allocated
175  * pool of hsnodes, not the size of a statically allocated table.
176  * When the number of hsnodes for a particular file system exceeds
177  * nhsnode, the allocate and free logic will try to reduce the number
178  * of allocated nodes by returning unreferenced nodes to the kmem_cache
179  * instead of putting them on the file system's private free list.
180  */
181 int nhsnode = HS_HSNODESPACE / sizeof (struct hsnode);
182 
183 struct kmem_cache *hsnode_cache;  /* free hsnode cache */
184 
185 /*
186  * Initialize the cache of free hsnodes.
187  */
188 void
189 hs_init_hsnode_cache(void)
190 {
191 	/*
192 	 * A kmem_cache is used for the hsnodes
193 	 * No constructor because hsnodes are initialised by bzeroing.
194 	 */
195 	hsnode_cache = kmem_cache_create("hsfs_hsnode_cache",
196 	    sizeof (struct hsnode), 0, NULL,
197 	    NULL, hs_hsnode_cache_reclaim, NULL, NULL, 0);
198 }
199 
200 /*
201  * Destroy the cache of free hsnodes.
202  */
203 void
204 hs_fini_hsnode_cache(void)
205 {
206 	kmem_cache_destroy(hsnode_cache);
207 }
208 
209 /*
210  * System is short on memory, free up as much as possible
211  */
212 /*ARGSUSED*/
213 static void
214 hs_hsnode_cache_reclaim(void *unused)
215 {
216 	struct hsfs *fsp;
217 	struct hsnode *hp;
218 
219 	/*
220 	 * For each vfs in the hs_mounttab list
221 	 */
222 	mutex_enter(&hs_mounttab_lock);
223 	for (fsp = hs_mounttab; fsp != NULL; fsp = fsp->hsfs_next) {
224 		/*
225 		 * Purge the dnlc of all hsfs entries
226 		 */
227 		(void) dnlc_purge_vfsp(fsp->hsfs_vfs, 0);
228 
229 		/*
230 		 * For each entry in the free chain
231 		 */
232 		rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
233 		mutex_enter(&fsp->hsfs_free_lock);
234 		for (hp = fsp->hsfs_free_f; hp != NULL; hp = fsp->hsfs_free_f) {
235 			/*
236 			 * Remove from chain
237 			 */
238 			fsp->hsfs_free_f = hp->hs_freef;
239 			if (fsp->hsfs_free_f != NULL) {
240 				fsp->hsfs_free_f->hs_freeb = NULL;
241 			} else {
242 				fsp->hsfs_free_b = NULL;
243 			}
244 			/*
245 			 * Free the node. Force it to be fully freed
246 			 * by setting the 3rd arg (nopage) to 1.
247 			 */
248 			hs_freenode(HTOV(hp), fsp, 1);
249 		}
250 		mutex_exit(&fsp->hsfs_free_lock);
251 		rw_exit(&fsp->hsfs_hash_lock);
252 	}
253 	mutex_exit(&hs_mounttab_lock);
254 }
255 
256 /*
257  * Add an hsnode to the end of the free list.
258  */
259 static void
260 hs_addfreeb(struct hsfs *fsp, struct hsnode *hp)
261 {
262 	struct hsnode *ep;
263 
264 	vn_invalid(HTOV(hp));
265 	mutex_enter(&fsp->hsfs_free_lock);
266 	ep = fsp->hsfs_free_b;
267 	fsp->hsfs_free_b = hp;		/* hp is the last entry in free list */
268 	hp->hs_freef = NULL;
269 	hp->hs_freeb = ep;		/* point at previous last entry */
270 	if (ep == NULL)
271 		fsp->hsfs_free_f = hp;	/* hp is only entry in free list */
272 	else
273 		ep->hs_freef = hp;	/* point previous last entry at hp */
274 
275 	mutex_exit(&fsp->hsfs_free_lock);
276 }
277 
278 /*
279  * Get an hsnode from the front of the free list.
280  * Must be called with write hsfs_hash_lock held.
281  */
282 static struct hsnode *
283 hs_getfree(struct hsfs *fsp)
284 {
285 	struct hsnode *hp, **tp;
286 
287 	ASSERT(RW_WRITE_HELD(&fsp->hsfs_hash_lock));
288 
289 	/*
290 	 * If the number of currently-allocated hsnodes is less than
291 	 * the hsnode count threshold (nhsnode), or if there are no
292 	 * nodes on the file system's local free list (which acts as a
293 	 * cache), call kmem_cache_alloc to get a new hsnode from
294 	 * kernel memory.
295 	 */
296 	mutex_enter(&fsp->hsfs_free_lock);
297 	if ((fsp->hsfs_nohsnode < nhsnode) || (fsp->hsfs_free_f == NULL)) {
298 		mutex_exit(&fsp->hsfs_free_lock);
299 		hp = kmem_cache_alloc(hsnode_cache, KM_SLEEP);
300 		fsp->hsfs_nohsnode++;
301 		bzero((caddr_t)hp, sizeof (*hp));
302 		hp->hs_vnode = vn_alloc(KM_SLEEP);
303 		return (hp);
304 	}
305 	hp = fsp->hsfs_free_f;
306 	/* hp cannot be NULL, since we already checked this above */
307 	fsp->hsfs_free_f = hp->hs_freef;
308 	if (fsp->hsfs_free_f != NULL)
309 		fsp->hsfs_free_f->hs_freeb = NULL;
310 	else
311 		fsp->hsfs_free_b = NULL;
312 	mutex_exit(&fsp->hsfs_free_lock);
313 
314 	for (tp = &fsp->hsfs_hash[HS_HPASH(hp)]; *tp != NULL;
315 		tp = &(*tp)->hs_hash) {
316 		if (*tp == hp) {
317 			struct vnode *vp;
318 
319 			vp = HTOV(hp);
320 
321 			/*
322 			 * file is no longer referenced, destroy all old pages
323 			 */
324 			if (vn_has_cached_data(vp))
325 				/*
326 				 * pvn_vplist_dirty will abort all old pages
327 				 */
328 				(void) pvn_vplist_dirty(vp, (u_offset_t)0,
329 				hsfs_putapage, B_INVAL, (struct cred *)NULL);
330 			*tp = hp->hs_hash;
331 			break;
332 		}
333 	}
334 	if (hp->hs_dirent.sym_link != (char *)NULL) {
335 		kmem_free(hp->hs_dirent.sym_link,
336 			(size_t)(hp->hs_dirent.ext_size + 1));
337 	}
338 
339 	mutex_destroy(&hp->hs_contents_lock);
340 	{
341 		vnode_t	*vp;
342 
343 		vp = hp->hs_vnode;
344 		bzero((caddr_t)hp, sizeof (*hp));
345 		hp->hs_vnode = vp;
346 		vn_reinit(vp);
347 	}
348 	return (hp);
349 }
350 
351 /*
352  * Remove an hsnode from the free list.
353  */
354 static void
355 hs_remfree(struct hsfs *fsp, struct hsnode *hp)
356 {
357 	mutex_enter(&fsp->hsfs_free_lock);
358 	if (hp->hs_freef != NULL)
359 		hp->hs_freef->hs_freeb = hp->hs_freeb;
360 	else
361 		fsp->hsfs_free_b = hp->hs_freeb;
362 	if (hp->hs_freeb != NULL)
363 		hp->hs_freeb->hs_freef = hp->hs_freef;
364 	else
365 		fsp->hsfs_free_f = hp->hs_freef;
366 	mutex_exit(&fsp->hsfs_free_lock);
367 }
368 
369 /*
370  * Look for hsnode in hash list.
371  * If the inode number is != HS_DUMMY_INO (16), then
372  * only the inode number is used for the check. If the
373  * inode number is == HS_DUMMY_INO, we always in
374  * addition check equality of fsid and nodeid.
375  * If found, reactivate it if inactive.
376  * Must be entered with hsfs_hash_lock held.
377  */
378 struct vnode *
379 hs_findhash(ino64_t nodeid, uint_t lbn, uint_t off, struct vfs *vfsp)
380 {
381 	struct hsnode *tp;
382 	struct hsfs *fsp;
383 
384 	fsp = VFS_TO_HSFS(vfsp);
385 
386 	ASSERT(RW_LOCK_HELD(&fsp->hsfs_hash_lock));
387 
388 	for (tp = fsp->hsfs_hash[HS_HASH(nodeid)]; tp != NULL;
389 	    tp = tp->hs_hash) {
390 		if (tp->hs_nodeid == nodeid) {
391 			struct vnode *vp;
392 
393 			if (nodeid == HS_DUMMY_INO) {
394 				/*
395 				 * If this is the dummy inode number, look for
396 				 * matching dir_lbn and dir_off.
397 				 */
398 				for (; tp != NULL; tp = tp->hs_hash) {
399 					if (tp->hs_nodeid == nodeid &&
400 					    tp->hs_dir_lbn == lbn &&
401 					    tp->hs_dir_off == off)
402 						break;
403 				}
404 				if (tp == NULL)
405 					return (NULL);
406 			}
407 
408 			mutex_enter(&tp->hs_contents_lock);
409 			vp = HTOV(tp);
410 			VN_HOLD(vp);
411 			if ((tp->hs_flags & HREF) == 0) {
412 				tp->hs_flags |= HREF;
413 				/*
414 				 * reactivating a free hsnode:
415 				 * remove from free list
416 				 */
417 				hs_remfree(fsp, tp);
418 			}
419 			mutex_exit(&tp->hs_contents_lock);
420 			return (vp);
421 		}
422 	}
423 	return (NULL);
424 }
425 
426 static void
427 hs_addhash(struct hsfs *fsp, struct hsnode *hp)
428 {
429 	ulong_t hashno;
430 
431 	ASSERT(RW_WRITE_HELD(&fsp->hsfs_hash_lock));
432 
433 	hashno = HS_HPASH(hp);
434 	hp->hs_hash = fsp->hsfs_hash[hashno];
435 	fsp->hsfs_hash[hashno] = hp;
436 }
437 
438 /*
439  * Destroy all old pages and free the hsnodes
440  * Return 1 if busy (a hsnode is still referenced).
441  */
442 int
443 hs_synchash(struct vfs *vfsp)
444 {
445 	struct hsfs *fsp;
446 	int i;
447 	struct hsnode *hp, *nhp;
448 	int busy = 0;
449 	struct vnode *vp, *rvp;
450 
451 	fsp = VFS_TO_HSFS(vfsp);
452 	rvp = fsp->hsfs_rootvp;
453 	/* make sure no one can come in */
454 	rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
455 	for (i = 0; i < HS_HASHSIZE; i++) {
456 		for (hp = fsp->hsfs_hash[i]; hp != NULL; hp = hp->hs_hash) {
457 			vp = HTOV(hp);
458 			if ((hp->hs_flags & HREF) && (vp != rvp ||
459 				(vp == rvp && vp->v_count > 1))) {
460 				busy = 1;
461 				continue;
462 			}
463 			if (vn_has_cached_data(vp))
464 				(void) pvn_vplist_dirty(vp, (u_offset_t)0,
465 				hsfs_putapage, B_INVAL, (struct cred *)NULL);
466 		}
467 	}
468 	if (busy) {
469 		rw_exit(&fsp->hsfs_hash_lock);
470 		return (1);
471 	}
472 
473 	/* now free the hsnodes */
474 	for (i = 0; i < HS_HASHSIZE; i++) {
475 		for (hp = fsp->hsfs_hash[i]; hp != NULL; hp = nhp) {
476 			nhp = hp->hs_hash;
477 			/*
478 			 * We know there are no pages associated with
479 			 * all the hsnodes (they've all been released
480 			 * above). So remove from free list and
481 			 * free the entry with nopage set.
482 			 */
483 			vp = HTOV(hp);
484 			if (vp != rvp) {
485 				hs_remfree(fsp, hp);
486 				hs_freenode(vp, fsp, 1);
487 			}
488 		}
489 	}
490 
491 	ASSERT(fsp->hsfs_nohsnode == 1);
492 	rw_exit(&fsp->hsfs_hash_lock);
493 	/* release the root hsnode, this should free the final hsnode */
494 	VN_RELE(rvp);
495 
496 	return (0);
497 }
498 
499 /*
500  * hs_makenode
501  *
502  * Construct an hsnode.
503  * Caller specifies the directory entry, the block number and offset
504  * of the directory entry, and the vfs pointer.
505  * note: off is the sector offset, not lbn offset
506  * if NULL is returned implies file system hsnode table full
507  */
508 struct vnode *
509 hs_makenode(
510 	struct hs_direntry *dp,
511 	uint_t lbn,
512 	uint_t off,
513 	struct vfs *vfsp)
514 {
515 	struct hsnode *hp;
516 	struct vnode *vp;
517 	struct hs_volume *hvp;
518 	struct vnode *newvp;
519 	struct hsfs *fsp;
520 	ino64_t nodeid;
521 
522 	fsp = VFS_TO_HSFS(vfsp);
523 
524 	/*
525 	 * Construct the data that allows us to re-read the meta data without
526 	 * knowing the name of the file: in the case of a directory
527 	 * entry, this should point to the canonical dirent, the "."
528 	 * directory entry for the directory.  This dirent is pointed
529 	 * to by all directory entries for that dir (including the ".")
530 	 * entry itself.
531 	 * In the case of a file, simply point to the dirent for that
532 	 * file (there are hard links in Rock Ridge, so we need to use
533 	 * different data to contruct the node id).
534 	 */
535 	if (dp->type == VDIR) {
536 		lbn = dp->ext_lbn;
537 		off = 0;
538 	}
539 
540 	/*
541 	 * Normalize lbn and off before creating a nodeid
542 	 * and before storing them in a hs_node structure
543 	 */
544 	hvp = &fsp->hsfs_vol;
545 	lbn += off >> hvp->lbn_shift;
546 	off &= hvp->lbn_maxoffset;
547 	/*
548 	 * If the media carries rrip-v1.12 or newer, and we trust the inodes
549 	 * from the rrip data (use_rrip_inodes != 0), use that data. If the
550 	 * media has been created by a recent mkisofs version, we may trust
551 	 * all numbers in the starting extent number; otherwise, we cannot
552 	 * do this for zero sized files. We use HS_DUMMY_INO in this case and
553 	 * make sure that we will not map all files to the same meta data.
554 	 */
555 	if (dp->inode != 0 && use_rrip_inodes) {
556 		nodeid = dp->inode;
557 	} else {
558 		nodeid = dp->ext_lbn;
559 		if (dp->ext_size == 0 && (fsp->hsfs_flags & HSFSMNT_INODE) == 0)
560 			nodeid = HS_DUMMY_INO;
561 	}
562 
563 	/* look for hsnode in cache first */
564 
565 	rw_enter(&fsp->hsfs_hash_lock, RW_READER);
566 
567 	if ((vp = hs_findhash(nodeid, lbn, off, vfsp)) == NULL) {
568 
569 		/*
570 		 * Not in cache.  However, someone else may have come
571 		 * to the same conclusion and just put one in.	Upgrade
572 		 * our lock to a write lock and look again.
573 		 */
574 		rw_exit(&fsp->hsfs_hash_lock);
575 		rw_enter(&fsp->hsfs_hash_lock, RW_WRITER);
576 
577 		if ((vp = hs_findhash(nodeid, lbn, off, vfsp)) == NULL) {
578 			/*
579 			 * Now we are really sure that the hsnode is not
580 			 * in the cache.  Get one off freelist or else
581 			 * allocate one. Either way get a bzeroed hsnode.
582 			 */
583 			hp = hs_getfree(fsp);
584 
585 			bcopy((caddr_t)dp, (caddr_t)&hp->hs_dirent,
586 				sizeof (*dp));
587 			/*
588 			 * We've just copied this pointer into hs_dirent,
589 			 * and don't want 2 references to same symlink.
590 			 */
591 			dp->sym_link = (char *)NULL;
592 
593 			/*
594 			 * No need to hold any lock because hsnode is not
595 			 * yet in the hash chain.
596 			 */
597 			mutex_init(&hp->hs_contents_lock, NULL, MUTEX_DEFAULT,
598 			    NULL);
599 			hp->hs_dir_lbn = lbn;
600 			hp->hs_dir_off = off;
601 			hp->hs_nodeid = nodeid;
602 			hp->hs_seq = 0;
603 			hp->hs_flags = HREF;
604 			if (off > HS_SECTOR_SIZE)
605 				cmn_err(CE_WARN, "hs_makenode: bad offset");
606 
607 			vp = HTOV(hp);
608 			vp->v_vfsp = vfsp;
609 			vp->v_type = dp->type;
610 			vp->v_rdev = dp->r_dev;
611 			vn_setops(vp, hsfs_vnodeops);
612 			vp->v_data = (caddr_t)hp;
613 			vn_exists(vp);
614 			/*
615 			 * if it's a device, call specvp
616 			 */
617 			if (IS_DEVVP(vp)) {
618 				rw_exit(&fsp->hsfs_hash_lock);
619 				newvp = specvp(vp, vp->v_rdev, vp->v_type,
620 						CRED());
621 				if (newvp == NULL)
622 				    cmn_err(CE_NOTE,
623 					"hs_makenode: specvp failed");
624 				VN_RELE(vp);
625 				return (newvp);
626 			}
627 
628 			hs_addhash(fsp, hp);
629 
630 		}
631 	}
632 
633 	if (dp->sym_link != (char *)NULL) {
634 		kmem_free(dp->sym_link, (size_t)(dp->ext_size + 1));
635 		dp->sym_link = (char *)NULL;
636 	}
637 
638 	rw_exit(&fsp->hsfs_hash_lock);
639 	return (vp);
640 }
641 
642 /*
643  * hs_freenode
644  *
645  * Deactivate an hsnode.
646  * Leave it on the hash list but put it on the free list.
647  * If the vnode does not have any pages, release the hsnode to the
648  * kmem_cache using kmem_cache_free, else put in back of the free list.
649  *
650  * This function can be called with the hsfs_free_lock held, but only
651  * when the code is guaranteed to go through the path where the
652  * node is freed entirely, and not the path where the node could go back
653  * on the free list (and where the free lock would need to be acquired).
654  */
655 void
656 hs_freenode(vnode_t *vp, struct hsfs *fsp, int nopage)
657 {
658 	struct hsnode **tp;
659 	struct hsnode *hp = VTOH(vp);
660 
661 	ASSERT(RW_LOCK_HELD(&fsp->hsfs_hash_lock));
662 
663 	if (nopage || (fsp->hsfs_nohsnode >= nhsnode)) {
664 		/* remove this node from the hash list, if it's there */
665 		for (tp = &fsp->hsfs_hash[HS_HPASH(hp)]; *tp != NULL;
666 			tp = &(*tp)->hs_hash) {
667 
668 			if (*tp == hp) {
669 				*tp = hp->hs_hash;
670 				break;
671 			}
672 		}
673 
674 		if (hp->hs_dirent.sym_link != (char *)NULL) {
675 			kmem_free(hp->hs_dirent.sym_link,
676 				(size_t)(hp->hs_dirent.ext_size + 1));
677 			hp->hs_dirent.sym_link = NULL;
678 		}
679 		if (vn_has_cached_data(vp)) {
680 			/* clean all old pages */
681 			(void) pvn_vplist_dirty(vp, (u_offset_t)0,
682 			    hsfs_putapage, B_INVAL, (struct cred *)NULL);
683 			/* XXX - can we remove pages by fiat like this??? */
684 			vp->v_pages = NULL;
685 		}
686 		mutex_destroy(&hp->hs_contents_lock);
687 		vn_invalid(vp);
688 		vn_free(vp);
689 		kmem_cache_free(hsnode_cache, hp);
690 		fsp->hsfs_nohsnode--;
691 		return;
692 	}
693 	hs_addfreeb(fsp, hp); /* add to back of free list */
694 }
695 
696 /*
697  * hs_remakenode
698  *
699  * Reconstruct a vnode given the location of its directory entry.
700  * Caller specifies the the block number and offset
701  * of the directory entry, and the vfs pointer.
702  * Returns an error code or 0.
703  */
704 int
705 hs_remakenode(uint_t lbn, uint_t off, struct vfs *vfsp,
706     struct vnode **vpp)
707 {
708 	struct buf *secbp;
709 	struct hsfs *fsp;
710 	uint_t secno;
711 	uchar_t *dirp;
712 	struct hs_direntry hd;
713 	int error;
714 
715 	/* Convert to sector and offset */
716 	fsp = VFS_TO_HSFS(vfsp);
717 	if (off > HS_SECTOR_SIZE) {
718 		cmn_err(CE_WARN, "hs_remakenode: bad offset");
719 		error = EINVAL;
720 		goto end;
721 	}
722 	secno = LBN_TO_SEC(lbn, vfsp);
723 	secbp = bread(fsp->hsfs_devvp->v_rdev, secno * 4, HS_SECTOR_SIZE);
724 
725 	error = geterror(secbp);
726 	if (error != 0) {
727 		cmn_err(CE_NOTE, "hs_remakenode: bread: error=(%d)", error);
728 		goto end;
729 	}
730 
731 	dirp = (uchar_t *)secbp->b_un.b_addr;
732 	error = hs_parsedir(fsp, &dirp[off], &hd, (char *)NULL, (int *)NULL,
733 						HS_SECTOR_SIZE - off);
734 	if (!error) {
735 		*vpp = hs_makenode(&hd, lbn, off, vfsp);
736 		if (*vpp == NULL)
737 			error = ENFILE;
738 	}
739 
740 end:
741 	brelse(secbp);
742 	return (error);
743 }
744 
745 
746 /*
747  * hs_dirlook
748  *
749  * Look for a given name in a given directory.
750  * If found, construct an hsnode for it.
751  */
752 int
753 hs_dirlook(
754 	struct vnode	*dvp,
755 	char		*name,
756 	int		namlen,		/* length of 'name' */
757 	struct vnode	**vpp,
758 	struct cred	*cred)
759 {
760 	struct hsnode *dhp;
761 	struct hsfs	*fsp;
762 	int		error = 0;
763 	uint_t		offset;		/* real offset in directory */
764 	uint_t		last_offset;	/* last index in directory */
765 	char		*cmpname;	/* case-folded name */
766 	int		cmpname_size;	/* how much memory we allocate for it */
767 	int		cmpnamelen;
768 	int		adhoc_search;	/* did we start at begin of dir? */
769 	int		end;
770 	uint_t		hsoffset;
771 	struct fbuf	*fbp;
772 	int		bytes_wanted;
773 	int		dirsiz;
774 	int		is_rrip;
775 
776 	if (dvp->v_type != VDIR)
777 		return (ENOTDIR);
778 
779 	if (error = hs_access(dvp, (mode_t)VEXEC, cred))
780 		return (error);
781 
782 	if (hsfs_use_dnlc && (*vpp = dnlc_lookup(dvp, name)))
783 		return (0);
784 
785 	dhp = VTOH(dvp);
786 	fsp = VFS_TO_HSFS(dvp->v_vfsp);
787 	is_rrip = IS_RRIP_IMPLEMENTED(fsp);
788 
789 	/*
790 	 * name == "^A" is illegal for ISO-9660 and Joliet as '..' is '\1' on
791 	 * disk. It is no problem for Rock Ridge as RR uses '.' and '..'.
792 	 * XXX It could be OK for Joliet also (because namelen == 1 is
793 	 * XXX impossible for UCS-2) but then we need a better compare algorith.
794 	 */
795 	if (!is_rrip && *name == '\1' && namlen == 1)
796 		return (EINVAL);
797 
798 	cmpname_size = (int)(fsp->hsfs_namemax + 1);
799 	cmpname = kmem_alloc((size_t)cmpname_size, KM_SLEEP);
800 
801 	if (namlen >= cmpname_size)
802 		namlen = cmpname_size - 1;
803 	/*
804 	 * For the purposes of comparing the name against dir entries,
805 	 * fold it to upper case.
806 	 */
807 	if (is_rrip) {
808 		(void) strlcpy(cmpname, name, cmpname_size);
809 		cmpnamelen = namlen;
810 	} else {
811 		/*
812 		 * If we don't consider a trailing dot as part of the filename,
813 		 * remove it from the specified name
814 		 */
815 		if ((fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
816 			name[namlen-1] == '.' &&
817 				CAN_TRUNCATE_DOT(name, namlen))
818 			name[--namlen] = '\0';
819 		if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2 ||
820 		    fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
821 			cmpnamelen = hs_iso_copy(name, cmpname, namlen);
822 		} else {
823 			cmpnamelen = hs_uppercase_copy(name, cmpname, namlen);
824 		}
825 	}
826 
827 	/* make sure dirent is filled up with all info */
828 	if (dhp->hs_dirent.ext_size == 0)
829 		hs_filldirent(dvp, &dhp->hs_dirent);
830 
831 	/*
832 	 * No lock is needed - hs_offset is used as starting
833 	 * point for searching the directory.
834 	 */
835 	offset = dhp->hs_offset;
836 	hsoffset = offset;
837 	adhoc_search = (offset != 0);
838 
839 	end = dhp->hs_dirent.ext_size;
840 	dirsiz = end;
841 
842 tryagain:
843 
844 	while (offset < end) {
845 		bytes_wanted = MIN(MAXBSIZE, dirsiz - (offset & MAXBMASK));
846 
847 		error = fbread(dvp, (offset_t)(offset & MAXBMASK),
848 			(unsigned int)bytes_wanted, S_READ, &fbp);
849 		if (error)
850 			goto done;
851 
852 		last_offset = (offset & MAXBMASK) + fbp->fb_count;
853 
854 		switch (process_dirblock(fbp, &offset, last_offset,
855 		    cmpname, cmpnamelen, fsp, dhp, dvp, vpp, &error)) {
856 		case FOUND_ENTRY:
857 			/* found an entry, either correct or not */
858 			goto done;
859 
860 		case WENT_PAST:
861 			/*
862 			 * If we get here we know we didn't find it on the
863 			 * first pass. If adhoc_search, then we started a
864 			 * bit into the dir, and need to wrap around and
865 			 * search the first entries.  If not, then we started
866 			 * at the beginning and didn't find it.
867 			 */
868 			if (adhoc_search) {
869 				offset = 0;
870 				end = hsoffset;
871 				adhoc_search = 0;
872 				goto tryagain;
873 			}
874 			error = ENOENT;
875 			goto done;
876 
877 		case HIT_END:
878 			goto tryagain;
879 		}
880 	}
881 	/*
882 	 * End of all dir blocks, didn't find entry.
883 	 */
884 	if (adhoc_search) {
885 		offset = 0;
886 		end = hsoffset;
887 		adhoc_search = 0;
888 		goto tryagain;
889 	}
890 	error = ENOENT;
891 done:
892 	/*
893 	 * If we found the entry, add it to the DNLC
894 	 * If the entry is a device file (assuming we support Rock Ridge),
895 	 * we enter the device vnode to the cache since that is what
896 	 * is in *vpp.
897 	 * That is ok since the CD-ROM is read-only, so (dvp,name) will
898 	 * always point to the same device.
899 	 */
900 	if (hsfs_use_dnlc && !error)
901 		dnlc_enter(dvp, name, *vpp);
902 
903 	kmem_free(cmpname, (size_t)cmpname_size);
904 
905 	return (error);
906 }
907 
908 /*
909  * hs_parsedir
910  *
911  * Parse a Directory Record into an hs_direntry structure.
912  * High Sierra and ISO directory are almost the same
913  * except the flag and date
914  */
915 int
916 hs_parsedir(
917 	struct hsfs		*fsp,
918 	uchar_t			*dirp,
919 	struct hs_direntry	*hdp,
920 	char			*dnp,
921 	int			*dnlen,
922 	int			last_offset)	/* last offset in dirp */
923 {
924 	char	*on_disk_name;
925 	int	on_disk_namelen;
926 	int	on_disk_dirlen;
927 	uchar_t	flags;
928 	int	namelen;
929 	int	error;
930 	int	name_change_flag = 0;	/* set if name was gotten in SUA */
931 
932 	hdp->ext_lbn = HDE_EXT_LBN(dirp);
933 	hdp->ext_size = HDE_EXT_SIZE(dirp);
934 	hdp->xar_len = HDE_XAR_LEN(dirp);
935 	hdp->intlf_sz = HDE_INTRLV_SIZE(dirp);
936 	hdp->intlf_sk = HDE_INTRLV_SKIP(dirp);
937 	hdp->sym_link = (char *)NULL;
938 
939 	if (fsp->hsfs_vol_type == HS_VOL_TYPE_HS) {
940 		flags = HDE_FLAGS(dirp);
941 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->cdate);
942 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->adate);
943 		hs_parse_dirdate(HDE_cdate(dirp), &hdp->mdate);
944 		if ((flags & hde_prohibited) == 0) {
945 			/*
946 			 * Skip files with the associated bit set.
947 			 */
948 			if (flags & HDE_ASSOCIATED)
949 				return (EAGAIN);
950 			hdp->type = VREG;
951 			hdp->mode = HFREG;
952 			hdp->nlink = 1;
953 		} else if ((flags & hde_prohibited) == HDE_DIRECTORY) {
954 			hdp->type = VDIR;
955 			hdp->mode = HFDIR;
956 			hdp->nlink = 2;
957 		} else {
958 			hs_log_bogus_disk_warning(fsp,
959 			    HSFS_ERR_UNSUP_TYPE, flags);
960 			return (EINVAL);
961 		}
962 		hdp->uid = fsp -> hsfs_vol.vol_uid;
963 		hdp->gid = fsp -> hsfs_vol.vol_gid;
964 		hdp->mode = hdp-> mode | (fsp -> hsfs_vol.vol_prot & 0777);
965 	} else if ((fsp->hsfs_vol_type == HS_VOL_TYPE_ISO) ||
966 		    (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) ||
967 		    (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET)) {
968 
969 		flags = IDE_FLAGS(dirp);
970 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->cdate);
971 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->adate);
972 		hs_parse_dirdate(IDE_cdate(dirp), &hdp->mdate);
973 
974 		if ((flags & ide_prohibited) == 0) {
975 			/*
976 			 * Skip files with the associated bit set.
977 			 */
978 			if (flags & IDE_ASSOCIATED)
979 				return (EAGAIN);
980 			hdp->type = VREG;
981 			hdp->mode = HFREG;
982 			hdp->nlink = 1;
983 		} else if ((flags & ide_prohibited) == IDE_DIRECTORY) {
984 			hdp->type = VDIR;
985 			hdp->mode = HFDIR;
986 			hdp->nlink = 2;
987 		} else {
988 			hs_log_bogus_disk_warning(fsp,
989 			    HSFS_ERR_UNSUP_TYPE, flags);
990 			return (EINVAL);
991 		}
992 		hdp->uid = fsp -> hsfs_vol.vol_uid;
993 		hdp->gid = fsp -> hsfs_vol.vol_gid;
994 		hdp->mode = hdp-> mode | (fsp -> hsfs_vol.vol_prot & 0777);
995 		hdp->inode = 0;		/* initialize with 0, then check rrip */
996 
997 		/*
998 		 * Having this all filled in, let's see if we have any
999 		 * SUA susp to look at.
1000 		 */
1001 		if (IS_SUSP_IMPLEMENTED(fsp)) {
1002 			error = parse_sua((uchar_t *)dnp, dnlen,
1003 					&name_change_flag, dirp, last_offset,
1004 					hdp, fsp,
1005 					(uchar_t *)NULL, NULL);
1006 			if (error) {
1007 				if (hdp->sym_link) {
1008 					kmem_free(hdp->sym_link,
1009 						(size_t)(hdp->ext_size + 1));
1010 					hdp->sym_link = (char *)NULL;
1011 				}
1012 				return (error);
1013 			}
1014 		}
1015 	}
1016 	hdp->xar_prot = (HDE_PROTECTION & flags) != 0;
1017 
1018 #if dontskip
1019 	if (hdp->xar_len > 0) {
1020 		cmn_err(CE_NOTE, "hsfs: extended attributes not supported");
1021 		return (EINVAL);
1022 	}
1023 #endif
1024 
1025 	/* check interleaf size and skip factor */
1026 	/* must both be zero or non-zero */
1027 	if (hdp->intlf_sz + hdp->intlf_sk) {
1028 		if ((hdp->intlf_sz == 0) || (hdp->intlf_sk == 0)) {
1029 			cmn_err(CE_NOTE,
1030 				"hsfs: interleaf size or skip factor error");
1031 			return (EINVAL);
1032 		}
1033 		if (hdp->ext_size == 0) {
1034 			cmn_err(CE_NOTE,
1035 			    "hsfs: interleaving specified on zero length file");
1036 			return (EINVAL);
1037 		}
1038 	}
1039 
1040 	if (HDE_VOL_SET(dirp) != 1) {
1041 		if (fsp->hsfs_vol.vol_set_size != 1 &&
1042 		    fsp->hsfs_vol.vol_set_size != HDE_VOL_SET(dirp)) {
1043 			cmn_err(CE_NOTE, "hsfs: multivolume file?");
1044 			return (EINVAL);
1045 		}
1046 	}
1047 
1048 	/*
1049 	 * If the name changed, then the NM field for RRIP was hit and
1050 	 * we should not copy the name again, just return.
1051 	 */
1052 	if (NAME_HAS_CHANGED(name_change_flag))
1053 		return (0);
1054 
1055 	/*
1056 	 * Fall back to the ISO name. Note that as in process_dirblock,
1057 	 * the on-disk filename length must be validated against ISO
1058 	 * limits - which, in case of RR present but no RR name found,
1059 	 * are NOT identical to fsp->hsfs_namemax on this filesystem.
1060 	 */
1061 	on_disk_name = (char *)HDE_name(dirp);
1062 	on_disk_namelen = (int)HDE_NAME_LEN(dirp);
1063 	on_disk_dirlen = (int)HDE_DIR_LEN(dirp);
1064 
1065 	if (on_disk_dirlen < HDE_ROOT_DIR_REC_SIZE ||
1066 	    ((on_disk_dirlen > last_offset) ||
1067 	    ((HDE_FDESIZE + on_disk_namelen) > on_disk_dirlen))) {
1068 			hs_log_bogus_disk_warning(fsp,
1069 			    HSFS_ERR_BAD_DIR_ENTRY, 0);
1070 		return (EINVAL);
1071 	}
1072 
1073 	if (on_disk_namelen > fsp->hsfs_namelen &&
1074 	    hs_namelen(fsp, on_disk_name, on_disk_namelen) >
1075 							fsp->hsfs_namelen) {
1076 		hs_log_bogus_disk_warning(fsp,
1077 				fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET ?
1078 				HSFS_ERR_BAD_JOLIET_FILE_LEN :
1079 				HSFS_ERR_BAD_FILE_LEN,
1080 				0);
1081 	}
1082 	if (on_disk_namelen > ISO_NAMELEN_V2_MAX)
1083 		on_disk_namelen = fsp->hsfs_namemax;	/* Paranoia */
1084 
1085 	if (dnp != NULL) {
1086 		if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1087 			namelen = hs_jnamecopy(on_disk_name, dnp,
1088 							on_disk_namelen,
1089 							fsp->hsfs_namemax,
1090 							fsp->hsfs_flags);
1091 			/*
1092 			 * A negative return value means that the file name
1093 			 * has been truncated to fsp->hsfs_namemax.
1094 			 */
1095 			if (namelen < 0) {
1096 				namelen = -namelen;
1097 				hs_log_bogus_disk_warning(fsp,
1098 					HSFS_ERR_TRUNC_JOLIET_FILE_LEN,
1099 					0);
1100 			}
1101 		} else {
1102 			/*
1103 			 * HS_VOL_TYPE_ISO && HS_VOL_TYPE_ISO_V2
1104 			 */
1105 			namelen = hs_namecopy(on_disk_name, dnp,
1106 							on_disk_namelen,
1107 							fsp->hsfs_flags);
1108 		}
1109 		if (namelen == 0)
1110 			return (EINVAL);
1111 		if ((fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
1112 		    dnp[ namelen-1 ] == '.' && CAN_TRUNCATE_DOT(dnp, namelen))
1113 			dnp[ --namelen ] = '\0';
1114 	} else
1115 		namelen = on_disk_namelen;
1116 	if (dnlen != NULL)
1117 		*dnlen = namelen;
1118 
1119 	return (0);
1120 }
1121 
1122 /*
1123  * hs_namecopy
1124  *
1125  * Parse a file/directory name into UNIX form.
1126  * Delete trailing blanks, upper-to-lower case, add NULL terminator.
1127  * Returns the (possibly new) length.
1128  *
1129  * Called from hsfs_readdir() via hs_parsedir()
1130  */
1131 int
1132 hs_namecopy(char *from, char *to, int size, ulong_t flags)
1133 {
1134 	uint_t i;
1135 	uchar_t c;
1136 	int lastspace;
1137 	int maplc;
1138 	int trailspace;
1139 	int version;
1140 
1141 	/* special handling for '.' and '..' */
1142 	if (size == 1) {
1143 		if (*from == '\0') {
1144 			*to++ = '.';
1145 			*to = '\0';
1146 			return (1);
1147 		} else if (*from == '\1') {
1148 			*to++ = '.';
1149 			*to++ = '.';
1150 			*to = '\0';
1151 			return (2);
1152 		}
1153 	}
1154 
1155 	maplc = (flags & HSFSMNT_NOMAPLCASE) == 0;
1156 	trailspace = (flags & HSFSMNT_NOTRAILSPACE) == 0;
1157 	version = (flags & HSFSMNT_NOVERSION) == 0;
1158 	for (i = 0, lastspace = -1; i < size; i++) {
1159 		c = from[i];
1160 		if (c == ';' && version)
1161 			break;
1162 		if (c <= ' ' && !trailspace) {
1163 			if (lastspace == -1)
1164 				lastspace = i;
1165 		} else
1166 			lastspace = -1;
1167 		if (maplc && (c >= 'A') && (c <= 'Z'))
1168 			c += 'a' - 'A';
1169 		to[i] = c;
1170 	}
1171 	if (lastspace != -1)
1172 		i = lastspace;
1173 	to[i] = '\0';
1174 	return (i);
1175 }
1176 
1177 /*
1178  * hs_jnamecopy
1179  *
1180  * This is the Joliet variant of hs_namecopy()
1181  *
1182  * Parse a UCS-2 Joliet file/directory name into UNIX form.
1183  * Add NULL terminator.
1184  * Returns the new length.
1185  *
1186  * Called from hsfs_readdir() via hs_parsedir()
1187  */
1188 int
1189 hs_jnamecopy(char *from, char *to, int size, int maxsize, ulong_t flags)
1190 {
1191 	uint_t i;
1192 	uint_t len;
1193 	uint16_t c;
1194 	int	amt;
1195 	int	version;
1196 
1197 	/* special handling for '.' and '..' */
1198 	if (size == 1) {
1199 		if (*from == '\0') {
1200 			*to++ = '.';
1201 			*to = '\0';
1202 			return (1);
1203 		} else if (*from == '\1') {
1204 			*to++ = '.';
1205 			*to++ = '.';
1206 			*to = '\0';
1207 			return (2);
1208 		}
1209 	}
1210 
1211 	version = (flags & HSFSMNT_NOVERSION) == 0;
1212 	for (i = 0, len = 0; i < size; i++) {
1213 		c = (from[i++] & 0xFF) << 8;
1214 		c |= from[i] & 0xFF;
1215 		if (c == ';' && version)
1216 			break;
1217 
1218 		if (len > (maxsize-3)) {
1219 			if (c < 0x80)
1220 				amt = 1;
1221 			else if (c < 0x800)
1222 				amt = 2;
1223 			else
1224 				amt = 3;
1225 			if ((len+amt) > maxsize) {
1226 				to[len] = '\0';
1227 				return (-len);
1228 			}
1229 		}
1230 		amt = hs_ucs2_2_utf8(c, (uint8_t *)&to[len]);
1231 		if (amt == 0) {
1232 			hs_log_bogus_joliet_warning(); /* should never happen */
1233 			return (0);
1234 		}
1235 		len += amt;
1236 	}
1237 	to[len] = '\0';
1238 	return (len);
1239 }
1240 
1241 /*
1242  * map a filename to upper case;
1243  * return 1 if found lowercase character
1244  *
1245  * Called from process_dirblock()
1246  * via hsfs_lookup() -> hs_dirlook() -> process_dirblock()
1247  * to create an intermedia name from on disk file names for
1248  * comparing names.
1249  */
1250 static int
1251 uppercase_cp(char *from, char *to, int size)
1252 {
1253 	uint_t i;
1254 	uchar_t c;
1255 	uchar_t had_lc = 0;
1256 
1257 	for (i = 0; i < size; i++) {
1258 		c = *from++;
1259 		if ((c >= 'a') && (c <= 'z')) {
1260 			c -= ('a' - 'A');
1261 			had_lc = 1;
1262 		}
1263 		*to++ = c;
1264 	}
1265 	return (had_lc);
1266 }
1267 
1268 /*
1269  * This is the Joliet variant of uppercase_cp()
1270  *
1271  * map a UCS-2 filename to UTF-8;
1272  * return new length
1273  *
1274  * Called from process_dirblock()
1275  * via hsfs_lookup() -> hs_dirlook() -> process_dirblock()
1276  * to create an intermedia name from on disk file names for
1277  * comparing names.
1278  */
1279 int
1280 hs_joliet_cp(char *from, char *to, int size)
1281 {
1282 	uint_t		i;
1283 	uint16_t	c;
1284 	int		len = 0;
1285 	int		amt;
1286 
1287 	/* special handling for '\0' and '\1' */
1288 	if (size == 1) {
1289 		*to = *from;
1290 		return (1);
1291 	}
1292 	for (i = 0; i < size; i += 2) {
1293 		c = (*from++ & 0xFF) << 8;
1294 		c |= *from++ & 0xFF;
1295 
1296 		amt = hs_ucs2_2_utf8(c, (uint8_t *)to);
1297 		if (amt == 0) {
1298 			hs_log_bogus_joliet_warning(); /* should never happen */
1299 			return (0);
1300 		}
1301 
1302 		to  += amt;
1303 		len += amt;
1304 	}
1305 	return (len);
1306 }
1307 
1308 static void
1309 hs_log_bogus_joliet_warning(void)
1310 {
1311 	static int	warned = 0;
1312 
1313 	if (warned)
1314 		return;
1315 	warned = 1;
1316 	cmn_err(CE_CONT, "hsfs: Warning: "
1317 		"file name contains bad UCS-2 chacarter\n");
1318 }
1319 
1320 
1321 /*
1322  * hs_uppercase_copy
1323  *
1324  * Convert a UNIX-style name into its HSFS equivalent
1325  * replacing '.' and '..' with '\0' and '\1'.
1326  * Map to upper case.
1327  * Returns the (possibly new) length.
1328  *
1329  * Called from hs_dirlook() and rrip_namecopy()
1330  * to create an intermediate name from the callers name from hsfs_lookup()
1331  * XXX Is the call from rrip_namecopy() OK?
1332  */
1333 int
1334 hs_uppercase_copy(char *from, char *to, int size)
1335 {
1336 	uint_t i;
1337 	uchar_t c;
1338 
1339 	/* special handling for '.' and '..' */
1340 
1341 	if (size == 1 && *from == '.') {
1342 		*to = '\0';
1343 		return (1);
1344 	} else if (size == 2 && *from == '.' && *(from+1) == '.') {
1345 		*to = '\1';
1346 		return (1);
1347 	}
1348 
1349 	for (i = 0; i < size; i++) {
1350 		c = *from++;
1351 		if ((c >= 'a') && (c <= 'z'))
1352 			c = c - 'a' + 'A';
1353 		*to++ = c;
1354 	}
1355 	return (size);
1356 }
1357 
1358 /*
1359  * hs_iso_copy
1360  *
1361  * This is the Joliet/ISO-9660:1999 variant of hs_uppercase_copy()
1362  *
1363  * Convert a UTF-8 UNIX-style name into its UTF-8 Joliet/ISO equivalent
1364  * replacing '.' and '..' with '\0' and '\1'.
1365  * Returns the (possibly new) length.
1366  *
1367  * Called from hs_dirlook()
1368  * to create an intermediate name from the callers name from hsfs_lookup()
1369  */
1370 static int
1371 hs_iso_copy(char *from, char *to, int size)
1372 {
1373 	uint_t i;
1374 	uchar_t c;
1375 
1376 	/* special handling for '.' and '..' */
1377 
1378 	if (size == 1 && *from == '.') {
1379 		*to = '\0';
1380 		return (1);
1381 	} else if (size == 2 && *from == '.' && *(from+1) == '.') {
1382 		*to = '\1';
1383 		return (1);
1384 	}
1385 
1386 	for (i = 0; i < size; i++) {
1387 		c = *from++;
1388 		*to++ = c;
1389 	}
1390 	return (size);
1391 }
1392 
1393 void
1394 hs_filldirent(struct vnode *vp, struct hs_direntry *hdp)
1395 {
1396 	struct buf *secbp;
1397 	uint_t	secno;
1398 	offset_t secoff;
1399 	struct hsfs *fsp;
1400 	uchar_t *secp;
1401 	int	error;
1402 
1403 	if (vp->v_type != VDIR) {
1404 		cmn_err(CE_WARN, "hsfs_filldirent: vp (0x%p) not a directory",
1405 			(void *)vp);
1406 		return;
1407 	}
1408 
1409 	fsp = VFS_TO_HSFS(vp ->v_vfsp);
1410 	secno = LBN_TO_SEC(hdp->ext_lbn+hdp->xar_len, vp->v_vfsp);
1411 	secoff = LBN_TO_BYTE(hdp->ext_lbn+hdp->xar_len, vp->v_vfsp) &
1412 			MAXHSOFFSET;
1413 	secbp = bread(fsp->hsfs_devvp->v_rdev, secno * 4, HS_SECTOR_SIZE);
1414 	error = geterror(secbp);
1415 	if (error != 0) {
1416 		cmn_err(CE_NOTE, "hs_filldirent: bread: error=(%d)", error);
1417 		goto end;
1418 	}
1419 
1420 	secp = (uchar_t *)secbp->b_un.b_addr;
1421 
1422 	/* quick check */
1423 	if (hdp->ext_lbn != HDE_EXT_LBN(&secp[secoff])) {
1424 		cmn_err(CE_NOTE, "hsfs_filldirent: dirent not match");
1425 		/* keep on going */
1426 	}
1427 	(void) hs_parsedir(fsp, &secp[secoff], hdp, (char *)NULL,
1428 				(int *)NULL, HS_SECTOR_SIZE - secoff);
1429 
1430 end:
1431 	brelse(secbp);
1432 }
1433 
1434 /*
1435  * Look through a directory block for a matching entry.
1436  * Note: this routine does an fbrelse() on the buffer passed in.
1437  */
1438 static enum dirblock_result
1439 process_dirblock(
1440 	struct fbuf	*fbp,		/* buffer containing dirblk */
1441 	uint_t		*offset,	/* lower index */
1442 	uint_t		last_offset,	/* upper index */
1443 	char		*nm,		/* upcase nm to compare against */
1444 	int		nmlen,		/* length of name */
1445 	struct hsfs	*fsp,
1446 	struct hsnode	*dhp,
1447 	struct vnode	*dvp,
1448 	struct vnode	**vpp,
1449 	int		*error)		/* return value: errno */
1450 {
1451 	uchar_t		*blkp = (uchar_t *)fbp->fb_addr; /* dir block */
1452 	char		*dname;		/* name in directory entry */
1453 	int		dnamelen;	/* length of name */
1454 	struct hs_direntry hd;
1455 	int		hdlen;
1456 	uchar_t		*dirp;		/* the directory entry */
1457 	int		res;
1458 	int		parsedir_res;
1459 	int		is_rrip;
1460 	size_t		rrip_name_size;
1461 	int		rr_namelen = 0;
1462 	char		*rrip_name_str = NULL;
1463 	char		*rrip_tmp_name = NULL;
1464 	enum dirblock_result err = 0;
1465 	int 		did_fbrelse = 0;
1466 	char		uppercase_name[JOLIET_NAMELEN_MAX*3 + 1]; /* 331 */
1467 
1468 #define	PD_return(retval)	\
1469 	{ err = retval; goto do_ret; }		/* return after cleanup */
1470 #define	rel_offset(offset)	\
1471 	((offset) & MAXBOFFSET)			/* index into cur blk */
1472 #define	RESTORE_NM(tmp, orig)	\
1473 	if (is_rrip && *(tmp) != '\0') \
1474 		(void) strcpy((orig), (tmp))
1475 
1476 	is_rrip = IS_RRIP_IMPLEMENTED(fsp);
1477 	if (is_rrip) {
1478 		rrip_name_size = RRIP_FILE_NAMELEN + 1;
1479 		rrip_name_str = kmem_alloc(rrip_name_size, KM_SLEEP);
1480 		rrip_tmp_name = kmem_alloc(rrip_name_size, KM_SLEEP);
1481 		rrip_name_str[0] = '\0';
1482 		rrip_tmp_name[0] = '\0';
1483 	}
1484 
1485 	while (*offset < last_offset) {
1486 
1487 		/*
1488 		 * Directory Entries cannot span sectors.
1489 		 *
1490 		 * Unused bytes at the end of each sector are zeroed
1491 		 * according to ISO9660, but we cannot rely on this
1492 		 * since both media failures and maliciously corrupted
1493 		 * media may return arbitrary values.
1494 		 * We therefore have to check for consistency:
1495 		 * The size of a directory entry must be at least
1496 		 * 34 bytes (the size of the directory entry metadata),
1497 		 * or zero (indicating the end-of-sector condition).
1498 		 * For a non-zero directory entry size of less than
1499 		 * 34 Bytes, log a warning.
1500 		 * In any case, skip the rest of this sector and
1501 		 * continue with the next.
1502 		 */
1503 		hdlen = (int)((uchar_t)
1504 		    HDE_DIR_LEN(&blkp[rel_offset(*offset)]));
1505 
1506 		if (hdlen < HDE_ROOT_DIR_REC_SIZE ||
1507 		    *offset + hdlen > last_offset) {
1508 			/*
1509 			 * Advance to the next sector boundary
1510 			 */
1511 			*offset = roundup(*offset + 1, HS_SECTOR_SIZE);
1512 			if (hdlen)
1513 				hs_log_bogus_disk_warning(fsp,
1514 				    HSFS_ERR_TRAILING_JUNK, 0);
1515 			continue;
1516 		}
1517 
1518 		bzero(&hd, sizeof (hd));
1519 
1520 		/*
1521 		 * Check the filename length in the ISO record for
1522 		 * plausibility and reset it to a safe value, in case
1523 		 * the name length byte is out of range. Since the ISO
1524 		 * name will be used as fallback if the rockridge name
1525 		 * is invalid/nonexistant, we must make sure not to
1526 		 * blow the bounds and initialize dnamelen to a sensible
1527 		 * value within the limits of ISO9660.
1528 		 * In addition to that, the ISO filename is part of the
1529 		 * directory entry. If the filename length is too large
1530 		 * to fit, the record is invalid and we'll advance to
1531 		 * the next.
1532 		 */
1533 		dirp = &blkp[rel_offset(*offset)];
1534 		dname = (char *)HDE_name(dirp);
1535 		dnamelen = (int)((uchar_t)HDE_NAME_LEN(dirp));
1536 		/*
1537 		 * If the directory entry extends beyond the end of the
1538 		 * block, it must be invalid. Skip it.
1539 		 */
1540 		if (dnamelen > hdlen - HDE_FDESIZE) {
1541 			hs_log_bogus_disk_warning(fsp,
1542 			    HSFS_ERR_BAD_DIR_ENTRY, 0);
1543 			goto skip_rec;
1544 		} else if (dnamelen > fsp->hsfs_namelen &&
1545 			hs_namelen(fsp, dname, dnamelen) > fsp->hsfs_namelen) {
1546 			hs_log_bogus_disk_warning(fsp,
1547 				fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET ?
1548 				HSFS_ERR_BAD_JOLIET_FILE_LEN :
1549 				HSFS_ERR_BAD_FILE_LEN,
1550 				0);
1551 		}
1552 		if (dnamelen > ISO_NAMELEN_V2_MAX)
1553 			dnamelen = fsp->hsfs_namemax;	/* Paranoia */
1554 
1555 		/*
1556 		 * If the rock ridge is implemented, then we copy the name
1557 		 * from the SUA area to rrip_name_str. If no Alternate
1558 		 * name is found, then use the uppercase NM in the
1559 		 * rrip_name_str char array.
1560 		 */
1561 		if (is_rrip) {
1562 
1563 			rrip_name_str[0] = '\0';
1564 			rr_namelen = rrip_namecopy(nm, &rrip_name_str[0],
1565 			    &rrip_tmp_name[0], dirp, last_offset - *offset,
1566 			    fsp, &hd);
1567 			if (hd.sym_link) {
1568 				kmem_free(hd.sym_link,
1569 				    (size_t)(hd.ext_size+1));
1570 				hd.sym_link = (char *)NULL;
1571 			}
1572 
1573 			if (rr_namelen != -1) {
1574 				dname = (char *)&rrip_name_str[0];
1575 				dnamelen = rr_namelen;
1576 			}
1577 		}
1578 
1579 		if (!is_rrip || rr_namelen == -1) {
1580 			/* use iso name instead */
1581 
1582 			int i = -1;
1583 			/*
1584 			 * make sure that we get rid of ';' in the dname of
1585 			 * an iso direntry, as we should have no knowledge
1586 			 * of file versions.
1587 			 *
1588 			 * XXX This is done the wrong way: it does not take
1589 			 * XXX care of the fact that the version string is
1590 			 * XXX a decimal number in the range 1 to 32767.
1591 			 */
1592 			if ((fsp->hsfs_flags & HSFSMNT_NOVERSION) == 0) {
1593 				if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1594 					for (i = dnamelen - 1; i > 0; i -= 2) {
1595 						if (dname[i] == ';' &&
1596 						    dname[i-1] == '\0') {
1597 							--i;
1598 							break;
1599 						}
1600 					}
1601 				} else {
1602 					for (i = dnamelen - 1; i > 0; i--) {
1603 						if (dname[i] == ';')
1604 							break;
1605 					}
1606 				}
1607 			}
1608 			if (i > 0) {
1609 				dnamelen = i;
1610 			} else if (fsp->hsfs_vol_type != HS_VOL_TYPE_ISO_V2 &&
1611 				    fsp->hsfs_vol_type != HS_VOL_TYPE_JOLIET) {
1612 				dnamelen = strip_trailing(fsp, dname, dnamelen);
1613 			}
1614 
1615 			ASSERT(dnamelen < sizeof (uppercase_name));
1616 
1617 			if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) {
1618 				(void) strncpy(uppercase_name, dname, dnamelen);
1619 			} else if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1620 				dnamelen = hs_joliet_cp(dname, uppercase_name,
1621 								dnamelen);
1622 			} else if (uppercase_cp(dname, uppercase_name,
1623 								dnamelen)) {
1624 				hs_log_bogus_disk_warning(fsp,
1625 				    HSFS_ERR_LOWER_CASE_NM, 0);
1626 			}
1627 			dname = uppercase_name;
1628 			if (!is_rrip &&
1629 			    (fsp->hsfs_flags & HSFSMNT_NOTRAILDOT) &&
1630 			    dname[dnamelen - 1] == '.' &&
1631 			    CAN_TRUNCATE_DOT(dname, dnamelen))
1632 				dname[--dnamelen] = '\0';
1633 		}
1634 
1635 		/*
1636 		 * Quickly screen for a non-matching entry, but not for RRIP.
1637 		 * This test doesn't work for lowercase vs. uppercase names.
1638 		 */
1639 
1640 		/* if we saw a lower case name we can't do this test either */
1641 		if (strict_iso9660_ordering && !is_rrip &&
1642 		    !HSFS_HAVE_LOWER_CASE(fsp) && *nm < *dname) {
1643 			RESTORE_NM(rrip_tmp_name, nm);
1644 			PD_return(WENT_PAST)
1645 		}
1646 
1647 		if (*nm != *dname || nmlen != dnamelen)
1648 			goto skip_rec;
1649 
1650 		if ((res = bcmp(dname, nm, nmlen)) == 0) {
1651 			/* name matches */
1652 			parsedir_res = hs_parsedir(fsp, dirp, &hd,
1653 			    (char *)NULL, (int *)NULL,
1654 					last_offset - *offset);
1655 			if (!parsedir_res) {
1656 				uint_t lbn;	/* logical block number */
1657 
1658 				lbn = dhp->hs_dirent.ext_lbn +
1659 				    dhp->hs_dirent.xar_len;
1660 				/*
1661 				 * Need to do an fbrelse() on the buffer,
1662 				 * as hs_makenode() may try to acquire
1663 				 * hs_hashlock, which may not be required
1664 				 * while a page is locked.
1665 				 */
1666 				fbrelse(fbp, S_READ);
1667 				did_fbrelse = 1;
1668 				*vpp = hs_makenode(&hd, lbn, *offset,
1669 				    dvp->v_vfsp);
1670 				if (*vpp == NULL) {
1671 					*error = ENFILE;
1672 					RESTORE_NM(rrip_tmp_name, nm);
1673 					PD_return(FOUND_ENTRY)
1674 				}
1675 
1676 				dhp->hs_offset = *offset;
1677 				RESTORE_NM(rrip_tmp_name, nm);
1678 				PD_return(FOUND_ENTRY)
1679 			} else if (parsedir_res != EAGAIN) {
1680 				/* improper dir entry */
1681 				*error = parsedir_res;
1682 				RESTORE_NM(rrip_tmp_name, nm);
1683 				PD_return(FOUND_ENTRY)
1684 			}
1685 		} else if (strict_iso9660_ordering && !is_rrip &&
1686 			!HSFS_HAVE_LOWER_CASE(fsp) && res < 0) {
1687 			/* name < dir entry */
1688 			RESTORE_NM(rrip_tmp_name, nm);
1689 			PD_return(WENT_PAST)
1690 		}
1691 		/*
1692 		 * name > dir entry,
1693 		 * look at next one.
1694 		 */
1695 skip_rec:
1696 		*offset += hdlen;
1697 		RESTORE_NM(rrip_tmp_name, nm);
1698 	}
1699 	PD_return(HIT_END)
1700 
1701 do_ret:
1702 	if (rrip_name_str)
1703 		kmem_free(rrip_name_str, rrip_name_size);
1704 	if (rrip_tmp_name)
1705 		kmem_free(rrip_tmp_name, rrip_name_size);
1706 	if (!did_fbrelse)
1707 		fbrelse(fbp, S_READ);
1708 	return (err);
1709 #undef PD_return
1710 #undef RESTORE_NM
1711 }
1712 
1713 /*
1714  * Strip trailing nulls or spaces from the name;
1715  * return adjusted length.  If we find such junk,
1716  * log a non-conformant disk message.
1717  */
1718 static int
1719 strip_trailing(struct hsfs *fsp, char *nm, int len)
1720 {
1721 	char *c;
1722 	int trailing_junk = 0;
1723 
1724 	for (c = nm + len - 1; c > nm; c--) {
1725 		if (*c == ' ' || *c == '\0')
1726 			trailing_junk = 1;
1727 		else
1728 			break;
1729 	}
1730 
1731 	if (trailing_junk)
1732 		hs_log_bogus_disk_warning(fsp, HSFS_ERR_TRAILING_JUNK, 0);
1733 
1734 	return ((int)(c - nm + 1));
1735 }
1736 
1737 static int
1738 hs_namelen(struct hsfs *fsp, char *nm, int len)
1739 {
1740 	char	*p = nm + len;
1741 
1742 	if (fsp->hsfs_vol_type == HS_VOL_TYPE_ISO_V2) {
1743 		return (len);
1744 	} else if (fsp->hsfs_vol_type == HS_VOL_TYPE_JOLIET) {
1745 		uint16_t c;
1746 
1747 		while (--p > &nm[1]) {
1748 			c = *p;
1749 			c |= *--p * 256;
1750 			if (c == ';')
1751 				return (p - nm);
1752 			if (c < '0' || c > '9') {
1753 				p++;
1754 				return (p - nm);
1755 			}
1756 		}
1757 	} else {
1758 		char	c;
1759 
1760 		while (--p > nm) {
1761 			c = *p;
1762 			if (c == ';')
1763 				return (p - nm);
1764 			if (c < '0' || c > '9') {
1765 				p++;
1766 				return (p - nm);
1767 			}
1768 		}
1769 	}
1770 	return (len);
1771 }
1772 
1773 /*
1774  * Take a UCS-2 character and convert
1775  * it into a utf8 character.
1776  * A 0 will be returned if the conversion fails
1777  *
1778  * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
1779  *
1780  * The code has been taken from udfs/udf_subr.c
1781  */
1782 static uint8_t hs_first_byte_mark[7] =
1783 			{ 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1784 static int32_t
1785 hs_ucs2_2_utf8(uint16_t c_16, uint8_t *s_8)
1786 {
1787 	int32_t nc;
1788 	uint32_t c_32;
1789 	uint32_t byte_mask = 0xBF;
1790 	uint32_t byte_mark = 0x80;
1791 
1792 	/*
1793 	 * Convert the 16-bit character to a 32-bit character
1794 	 */
1795 	c_32 = c_16;
1796 
1797 	/*
1798 	 * By here the 16-bit character is converted
1799 	 * to a 32-bit wide character
1800 	 */
1801 	if (c_32 < 0x80) {
1802 		nc = 1;
1803 	} else if (c_32 < 0x800) {
1804 		nc = 2;
1805 	} else if (c_32 < 0x10000) {
1806 		nc = 3;
1807 	} else if (c_32 < 0x200000) {
1808 		nc = 4;
1809 	} else if (c_32 < 0x4000000) {
1810 		nc = 5;
1811 	} else if (c_32 <= 0x7FFFFFFF) {	/* avoid signed overflow */
1812 		nc = 6;
1813 	} else {
1814 		nc = 0;
1815 	}
1816 	s_8 += nc;
1817 	switch (nc) {
1818 		case 6 :
1819 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1820 			c_32 >>= 6;
1821 			/* FALLTHROUGH */
1822 		case 5 :
1823 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1824 			c_32 >>= 6;
1825 			/* FALLTHROUGH */
1826 		case 4 :
1827 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1828 			c_32 >>= 6;
1829 			/* FALLTHROUGH */
1830 		case 3 :
1831 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1832 			c_32 >>= 6;
1833 			/* FALLTHROUGH */
1834 		case 2 :
1835 			*(--s_8) = (c_32 | byte_mark)  & byte_mask;
1836 			c_32 >>= 6;
1837 			/* FALLTHROUGH */
1838 		case 1 :
1839 			*(--s_8) = c_32 | hs_first_byte_mark[nc];
1840 	}
1841 	return (nc);
1842 }
1843