xref: /titanic_51/usr/src/uts/common/fs/ufs/ufs_acl.c (revision 4e9cfc9a015e8ca7d41f7d018c74dc8a692305b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/errno.h>
31 #include <sys/kmem.h>
32 #include <sys/t_lock.h>
33 #include <sys/ksynch.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/mode.h>
38 #include <sys/systm.h>
39 #include <vm/seg.h>
40 #include <sys/file.h>
41 #include <sys/acl.h>
42 #include <sys/fs/ufs_inode.h>
43 #include <sys/fs/ufs_acl.h>
44 #include <sys/fs/ufs_quota.h>
45 #include <sys/sysmacros.h>
46 #include <sys/debug.h>
47 #include <sys/policy.h>
48 
49 /* Cache routines */
50 static int si_signature(si_t *);
51 static int si_cachei_get(struct inode *, si_t **);
52 static int si_cachea_get(struct inode *, si_t *, si_t **);
53 static int si_cmp(si_t *, si_t *);
54 static void si_cache_put(si_t *);
55 void si_cache_del(si_t *, int);
56 void si_cache_init(void);
57 
58 static void ufs_si_free_mem(si_t *);
59 static int ufs_si_store(struct inode *, si_t *, int, cred_t *);
60 static si_t *ufs_acl_cp(si_t *);
61 static int ufs_sectobuf(si_t *, caddr_t *, size_t *);
62 static int acl_count(ufs_ic_acl_t *);
63 static int acl_validate(aclent_t *, int, int);
64 static int vsecattr2aclentry(vsecattr_t *, si_t **);
65 static int aclentry2vsecattr(si_t *, vsecattr_t *);
66 
67 krwlock_t si_cache_lock;		/* Protects si_cache */
68 int	si_cachecnt = 64;		/* # buckets in si_cache[a|i] */
69 si_t	**si_cachea;			/* The 'by acl' cache chains */
70 si_t	**si_cachei;			/* The 'by inode' cache chains */
71 long	si_cachehit = 0;
72 long	si_cachemiss = 0;
73 
74 #define	SI_HASH(S)	((int)(S) & (si_cachecnt - 1))
75 
76 /*
77  * Store the new acls in aclp.  Attempts to make things atomic.
78  * Search the acl cache for an identical sp and, if found, attach
79  * the cache'd acl to ip. If the acl is new (not in the cache),
80  * add it to the cache, then attach it to ip.  Last, remove and
81  * decrement the reference count of any prior acl list attached
82  * to the ip.
83  *
84  * Parameters:
85  * ip - Ptr to inode to receive the acl list
86  * sp - Ptr to in-core acl structure to attach to the inode.
87  * puship - 0 do not push the object inode(ip) 1 push the ip
88  * cr - Ptr to credentials
89  *
90  * Returns:	0 - Success
91  * 		N - From errno.h
92  */
93 static int
94 ufs_si_store(struct inode *ip, si_t *sp, int puship, cred_t *cr)
95 {
96 	struct vfs	*vfsp;
97 	struct inode	*sip;
98 	si_t		*oldsp;
99 	si_t		*csp;
100 	caddr_t		acldata;
101 	ino_t		oldshadow;
102 	size_t		acldatalen;
103 	off_t		offset;
104 	int		shadow;
105 	int		err;
106 	int		refcnt;
107 	int		usecnt;
108 	int		signature;
109 	int		resid;
110 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
111 	struct fs	*fs		= ufsvfsp->vfs_fs;
112 
113 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
114 	ASSERT(ip->i_ufs_acl != sp);
115 
116 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
117 		return (ENOSYS);
118 
119 	/*
120 	 * if there are only the three owner/group/other then do not
121 	 * create a shadow inode.  If there is already a shadow with
122 	 * the file, remove it.
123 	 *
124 	 */
125 	if (!sp->ausers &&
126 	    !sp->agroups &&
127 	    !sp->downer &&
128 	    !sp->dgroup &&
129 	    !sp->dother &&
130 	    sp->dclass.acl_ismask == 0 &&
131 	    !sp->dusers &&
132 	    !sp->dgroups) {
133 		if (ip->i_ufs_acl)
134 			err = ufs_si_free(ip->i_ufs_acl, ITOV(ip)->v_vfsp, cr);
135 		ip->i_ufs_acl = NULL;
136 		ip->i_shadow = 0;
137 		ip->i_flag |= IMOD | IACC;
138 		ip->i_mode = (ip->i_smode & ~0777) |
139 		    ((sp->aowner->acl_ic_perm & 07) << 6) |
140 		    (MASK2MODE(sp)) |
141 		    (sp->aother->acl_ic_perm & 07);
142 		TRANS_INODE(ip->i_ufsvfs, ip);
143 		ufs_iupdat(ip, 1);
144 		ufs_si_free_mem(sp);
145 		return (0);
146 	}
147 
148 loop:
149 
150 	/*
151 	 * Check cache. If in cache, use existing shadow inode.
152 	 * Increment the shadow link count, then attach to the
153 	 * cached ufs_acl_entry struct, and increment it's reference
154 	 * count.  Then discard the passed-in ufs_acl_entry and
155 	 * return.
156 	 */
157 	if (si_cachea_get(ip, sp, &csp) == 0) {
158 		ASSERT(RW_WRITE_HELD(&csp->s_lock));
159 		if (ip->i_ufs_acl == csp) {
160 			rw_exit(&csp->s_lock);
161 			(void) ufs_si_free_mem(sp);
162 			return (0);
163 		}
164 		vfsp = ITOV(ip)->v_vfsp;
165 		ASSERT(csp->s_shadow <= INT_MAX);
166 		shadow = (int)csp->s_shadow;
167 		/*
168 		 * We can't call ufs_iget while holding the csp locked,
169 		 * because we might deadlock.  So we drop the
170 		 * lock on csp, then go search the si_cache again
171 		 * to see if the csp is still there.
172 		 */
173 		rw_exit(&csp->s_lock);
174 		if ((err = ufs_iget(vfsp, shadow, &sip, cr)) != 0) {
175 			(void) ufs_si_free_mem(sp);
176 			return (EIO);
177 		}
178 		rw_enter(&sip->i_contents, RW_WRITER);
179 		if ((sip->i_mode & IFMT) != IFSHAD || sip->i_nlink <= 0) {
180 			rw_exit(&sip->i_contents);
181 			VN_RELE(ITOV(sip));
182 			goto loop;
183 		}
184 		/* Get the csp again */
185 		if (si_cachea_get(ip, sp, &csp) != 0) {
186 			rw_exit(&sip->i_contents);
187 			VN_RELE(ITOV(sip));
188 			goto loop;
189 		}
190 		ASSERT(RW_WRITE_HELD(&csp->s_lock));
191 		/* See if we got the right shadow */
192 		if (csp->s_shadow != shadow) {
193 			rw_exit(&csp->s_lock);
194 			rw_exit(&sip->i_contents);
195 			VN_RELE(ITOV(sip));
196 			goto loop;
197 		}
198 		ASSERT(RW_WRITE_HELD(&sip->i_contents));
199 		ASSERT(sip->i_dquot == 0);
200 		/* Increment link count */
201 		ASSERT(sip->i_nlink > 0);
202 		sip->i_nlink++;
203 		TRANS_INODE(ufsvfsp, sip);
204 		csp->s_use = sip->i_nlink;
205 		csp->s_ref++;
206 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
207 		sip->i_flag |= ICHG | IMOD;
208 		sip->i_seq++;
209 		ITIMES_NOLOCK(sip);
210 		/*
211 		 * Always release s_lock before both releasing i_contents
212 		 * and calling VN_RELE.
213 		 */
214 		rw_exit(&csp->s_lock);
215 		rw_exit(&sip->i_contents);
216 		VN_RELE(ITOV(sip));
217 		(void) ufs_si_free_mem(sp);
218 		sp = csp;
219 		si_cachehit++;
220 		goto switchshadows;
221 	}
222 
223 	/* Alloc a shadow inode and fill it in */
224 	err = ufs_ialloc(ip, ip->i_number, (mode_t)IFSHAD, &sip, cr);
225 	if (err) {
226 		(void) ufs_si_free_mem(sp);
227 		return (err);
228 	}
229 	rw_enter(&sip->i_contents, RW_WRITER);
230 	sip->i_flag |= IACC | IUPD | ICHG;
231 	sip->i_seq++;
232 	sip->i_mode = (o_mode_t)IFSHAD;
233 	ITOV(sip)->v_type = VREG;
234 	ufs_reset_vnode(ITOV(sip));
235 	sip->i_nlink = 1;
236 	sip->i_uid = crgetuid(cr);
237 	sip->i_suid = (ulong_t)sip->i_uid > (ulong_t)USHRT_MAX ?
238 	    UID_LONG : sip->i_uid;
239 	sip->i_gid = crgetgid(cr);
240 	sip->i_sgid = (ulong_t)sip->i_gid > (ulong_t)USHRT_MAX ?
241 	    GID_LONG : sip->i_gid;
242 	sip->i_shadow = 0;
243 	TRANS_INODE(ufsvfsp, sip);
244 	sip->i_ufs_acl = NULL;
245 	ASSERT(sip->i_size == 0);
246 
247 	sp->s_shadow = sip->i_number;
248 
249 	if ((err = ufs_sectobuf(sp, &acldata, &acldatalen)) != 0)
250 		goto errout;
251 	offset = 0;
252 
253 	/*
254 	 * We don't actually care about the residual count upon failure,
255 	 * but giving ufs_rdwri() the pointer means it won't translate
256 	 * all failures to EIO.  Our caller needs to know when ENOSPC
257 	 * gets hit.
258 	 */
259 	resid = 0;
260 	if (((err = ufs_rdwri(UIO_WRITE, FWRITE|FSYNC, sip, acldata,
261 	    acldatalen, (offset_t)0, UIO_SYSSPACE, &resid, cr)) != 0) ||
262 	    (resid != 0)) {
263 		kmem_free(acldata, acldatalen);
264 		if ((resid != 0) && (err == 0))
265 			err = ENOSPC;
266 		goto errout;
267 	}
268 
269 	offset += acldatalen;
270 	if ((acldatalen + fs->fs_bsize) > ufsvfsp->vfs_maxacl)
271 		ufsvfsp->vfs_maxacl = acldatalen + fs->fs_bsize;
272 
273 	kmem_free(acldata, acldatalen);
274 	/* Sync & free the shadow inode */
275 	ufs_iupdat(sip, 1);
276 	rw_exit(&sip->i_contents);
277 	VN_RELE(ITOV(sip));
278 
279 	/* We're committed to using this sp */
280 	sp->s_use = 1;
281 	sp->s_ref = 1;
282 
283 	/* Now put the new acl stuff in the cache */
284 	/* XXX Might make a duplicate */
285 	si_cache_put(sp);
286 	si_cachemiss++;
287 
288 switchshadows:
289 	/* Now switch the parent inode to use the new shadow inode */
290 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
291 	rw_enter(&sp->s_lock, RW_READER);
292 	oldsp = ip->i_ufs_acl;
293 	oldshadow = ip->i_shadow;
294 	ip->i_ufs_acl = sp;
295 	ASSERT(sp->s_shadow <= INT_MAX);
296 	ip->i_shadow = (int32_t)sp->s_shadow;
297 	ASSERT(oldsp != sp);
298 	ASSERT(oldshadow != ip->i_number);
299 	ASSERT(ip->i_number != ip->i_shadow);
300 	/*
301 	 * Change the mode bits to follow the acl list
302 	 *
303 	 * NOTE:	a directory is not required to have a "regular" acl
304 	 *		bug id's 1238908,  1257173, 1263171 and 1263188
305 	 *
306 	 *		but if a "regular" acl is present, it must contain
307 	 *		an "owner", "group", and "other" acl
308 	 *
309 	 *		If an ACL mask exists, the effective group rights are
310 	 *		set to the mask.  Otherwise, the effective group rights
311 	 * 		are set to the object group bits.
312 	 */
313 	if (sp->aowner) {				/* Owner */
314 		ip->i_mode &= ~0700;			/* clear Owner */
315 		ip->i_mode |= (sp->aowner->acl_ic_perm & 07) << 6;
316 		ip->i_uid = sp->aowner->acl_ic_who;
317 	}
318 
319 	if (sp->agroup) {				/* Group */
320 		ip->i_mode &= ~0070;			/* clear Group */
321 		ip->i_mode |= MASK2MODE(sp);		/* apply mask */
322 		ip->i_gid = sp->agroup->acl_ic_who;
323 	}
324 
325 	if (sp->aother) {				/* Other */
326 		ip->i_mode &= ~0007;			/* clear Other */
327 		ip->i_mode |= (sp->aother->acl_ic_perm & 07);
328 	}
329 
330 	if (sp->aclass.acl_ismask)
331 		ip->i_mode = (ip->i_mode & ~070) |
332 		    (((sp->aclass.acl_maskbits & 07) << 3) &
333 		    ip->i_mode);
334 
335 	TRANS_INODE(ufsvfsp, ip);
336 	rw_exit(&sp->s_lock);
337 	ip->i_flag |= ICHG;
338 	ip->i_seq++;
339 	/*
340 	 * when creating a file there is no need to push the inode, it
341 	 * is pushed later
342 	 */
343 	if (puship == 1)
344 		ufs_iupdat(ip, 1);
345 
346 	/*
347 	 * Decrement link count on the old shadow inode,
348 	 * and decrement reference count on the old aclp,
349 	 */
350 	if (oldshadow) {
351 		/* Get the shadow inode */
352 		ASSERT(RW_WRITE_HELD(&ip->i_contents));
353 		vfsp = ITOV(ip)->v_vfsp;
354 		if ((err = ufs_iget_alloced(vfsp, oldshadow, &sip, cr)) != 0) {
355 			return (EIO);
356 		}
357 		/* Decrement link count */
358 		rw_enter(&sip->i_contents, RW_WRITER);
359 		if (oldsp)
360 			rw_enter(&oldsp->s_lock, RW_WRITER);
361 		ASSERT(sip->i_dquot == 0);
362 		ASSERT(sip->i_nlink > 0);
363 		usecnt = --sip->i_nlink;
364 		ufs_setreclaim(sip);
365 		TRANS_INODE(ufsvfsp, sip);
366 		sip->i_flag |= ICHG | IMOD;
367 		sip->i_seq++;
368 		ITIMES_NOLOCK(sip);
369 		if (oldsp) {
370 			oldsp->s_use = usecnt;
371 			refcnt = --oldsp->s_ref;
372 			signature = oldsp->s_signature;
373 			/*
374 			 * Always release s_lock before both releasing
375 			 * i_contents and calling VN_RELE.
376 			 */
377 			rw_exit(&oldsp->s_lock);
378 		}
379 		rw_exit(&sip->i_contents);
380 		VN_RELE(ITOV(sip));
381 		if (oldsp && (refcnt == 0))
382 			si_cache_del(oldsp, signature);
383 	}
384 	return (0);
385 
386 errout:
387 	/* Throw the newly alloc'd inode away */
388 	sip->i_nlink = 0;
389 	ufs_setreclaim(sip);
390 	TRANS_INODE(ufsvfsp, sip);
391 	ITIMES_NOLOCK(sip);
392 	rw_exit(&sip->i_contents);
393 	VN_RELE(ITOV(sip));
394 	ASSERT(!sp->s_use && !sp->s_ref && !(sp->s_flags & SI_CACHED));
395 	(void) ufs_si_free_mem(sp);
396 	return (err);
397 }
398 
399 /*
400  * Load the acls for inode ip either from disk (adding to the cache),
401  * or search the cache and attach the cache'd acl list to the ip.
402  * In either case, maintain the proper reference count on the cached entry.
403  *
404  * Parameters:
405  * ip - Ptr to the inode which needs the acl list loaded
406  * cr - Ptr to credentials
407  *
408  * Returns:	0 - Success
409  * 		N - From errno.h
410  */
411 int
412 ufs_si_load(struct inode *ip, cred_t *cr)
413 /*
414  *	ip	parent inode in
415  *	cr	credentials in
416  */
417 {
418 	struct vfs	*vfsp;
419 	struct inode	*sip;
420 	ufs_fsd_t	*fsdp;
421 	si_t		*sp;
422 	vsecattr_t	vsecattr = {
423 				(uint_t)0,
424 				(int)0,
425 				(void *)NULL,
426 				(int)0,
427 				(void *)NULL};
428 	aclent_t	*aclp;
429 	ufs_acl_t	*ufsaclp;
430 	caddr_t		acldata = NULL;
431 	ino_t		maxino;
432 	int		err;
433 	size_t		acldatalen;
434 	int		numacls;
435 	int		shadow;
436 	int		usecnt;
437 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
438 	struct fs	*fs		= ufsvfsp->vfs_fs;
439 
440 	ASSERT(ip != NULL);
441 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
442 	ASSERT(ip->i_shadow && ip->i_ufs_acl == NULL);
443 	ASSERT((ip->i_mode & IFMT) != IFSHAD);
444 
445 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
446 		return (ENOSYS);
447 
448 	if (ip->i_shadow == ip->i_number)
449 		return (EIO);
450 
451 	maxino = (ino_t)(ITOF(ip)->fs_ncg * ITOF(ip)->fs_ipg);
452 	if (ip->i_shadow < UFSROOTINO || ip->i_shadow > maxino)
453 		return (EIO);
454 
455 	/*
456 	 * XXX Check cache.  If in cache, link to it and increment
457 	 * the reference count, then return.
458 	 */
459 	if (si_cachei_get(ip, &sp) == 0) {
460 		ASSERT(RW_WRITE_HELD(&sp->s_lock));
461 		ip->i_ufs_acl = sp;
462 		sp->s_ref++;
463 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
464 		rw_exit(&sp->s_lock);
465 		si_cachehit++;
466 		return (0);
467 	}
468 
469 	/* Get the shadow inode */
470 	vfsp = ITOV(ip)->v_vfsp;
471 	shadow = ip->i_shadow;
472 	if ((err = ufs_iget_alloced(vfsp, shadow, &sip, cr)) != 0) {
473 		return (err);
474 	}
475 	rw_enter(&sip->i_contents, RW_WRITER);
476 
477 	if ((sip->i_mode & IFMT) != IFSHAD) {
478 		rw_exit(&sip->i_contents);
479 		err = EINVAL;
480 		goto alldone;
481 	}
482 
483 	ASSERT(sip->i_dquot == 0);
484 	usecnt = sip->i_nlink;
485 	if ((!ULOCKFS_IS_NOIACC(&ufsvfsp->vfs_ulockfs)) &&
486 	    (!(sip)->i_ufsvfs->vfs_noatime)) {
487 		sip->i_flag |= IACC;
488 	}
489 	rw_downgrade(&sip->i_contents);
490 
491 	ASSERT(sip->i_size <= MAXOFF_T);
492 	/* Read the acl's and other stuff from disk */
493 	acldata	 = kmem_zalloc((size_t)sip->i_size, KM_SLEEP);
494 	acldatalen = sip->i_size;
495 
496 	err = ufs_rdwri(UIO_READ, FREAD, sip, acldata, acldatalen, (offset_t)0,
497 	    UIO_SYSSPACE, (int *)0, cr);
498 
499 	rw_exit(&sip->i_contents);
500 
501 	if (err)
502 		goto alldone;
503 
504 	/*
505 	 * Convert from disk format
506 	 * Result is a vsecattr struct which we then convert to the
507 	 * si struct.
508 	 */
509 	bzero((caddr_t)&vsecattr, sizeof (vsecattr_t));
510 	for (fsdp = (ufs_fsd_t *)acldata;
511 			fsdp < (ufs_fsd_t *)(acldata + acldatalen);
512 			fsdp = (ufs_fsd_t *)((caddr_t)fsdp +
513 				FSD_RECSZ(fsdp, fsdp->fsd_size))) {
514 		if (fsdp->fsd_size <= 0)
515 			break;
516 		switch (fsdp->fsd_type) {
517 		case FSD_ACL:
518 			numacls = vsecattr.vsa_aclcnt =
519 				(int)((fsdp->fsd_size - 2 * sizeof (int)) /
520 							sizeof (ufs_acl_t));
521 			aclp = vsecattr.vsa_aclentp =
522 			kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
523 			for (ufsaclp = (ufs_acl_t *)fsdp->fsd_data;
524 							numacls; ufsaclp++) {
525 				aclp->a_type = ufsaclp->acl_tag;
526 				aclp->a_id = ufsaclp->acl_who;
527 				aclp->a_perm = ufsaclp->acl_perm;
528 				aclp++;
529 				numacls--;
530 			}
531 			break;
532 		case FSD_DFACL:
533 			numacls = vsecattr.vsa_dfaclcnt =
534 				(int)((fsdp->fsd_size - 2 * sizeof (int)) /
535 							sizeof (ufs_acl_t));
536 			aclp = vsecattr.vsa_dfaclentp =
537 			kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
538 			for (ufsaclp = (ufs_acl_t *)fsdp->fsd_data;
539 							numacls; ufsaclp++) {
540 				aclp->a_type = ufsaclp->acl_tag;
541 				aclp->a_id = ufsaclp->acl_who;
542 				aclp->a_perm = ufsaclp->acl_perm;
543 				aclp++;
544 				numacls--;
545 			}
546 			break;
547 		}
548 	}
549 	/* Sort the lists */
550 	if (vsecattr.vsa_aclentp) {
551 		ksort((caddr_t)vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt,
552 				sizeof (aclent_t), cmp2acls);
553 		if ((err = acl_validate(vsecattr.vsa_aclentp,
554 				vsecattr.vsa_aclcnt, ACL_CHECK)) != 0) {
555 			goto alldone;
556 		}
557 	}
558 	if (vsecattr.vsa_dfaclentp) {
559 		ksort((caddr_t)vsecattr.vsa_dfaclentp, vsecattr.vsa_dfaclcnt,
560 				sizeof (aclent_t), cmp2acls);
561 		if ((err = acl_validate(vsecattr.vsa_dfaclentp,
562 				vsecattr.vsa_dfaclcnt, DEF_ACL_CHECK)) != 0) {
563 			goto alldone;
564 		}
565 	}
566 
567 	/* ignore shadow inodes without ACLs */
568 	if (!vsecattr.vsa_aclentp && !vsecattr.vsa_dfaclentp) {
569 		err = 0;
570 		goto alldone;
571 	}
572 
573 	/* Convert from vsecattr struct to ufs_acl_entry struct */
574 	if ((err = vsecattr2aclentry(&vsecattr, &sp)) != 0) {
575 		goto alldone;
576 	}
577 
578 	/* There aren't filled in by vsecattr2aclentry */
579 	sp->s_shadow = ip->i_shadow;
580 	sp->s_dev = ip->i_dev;
581 	sp->s_use = usecnt;
582 	sp->s_ref = 1;
583 	ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
584 
585 	/* XXX Might make a duplicate */
586 	si_cache_put(sp);
587 
588 	/* Signal anyone waiting on this shadow to be loaded */
589 	ip->i_ufs_acl = sp;
590 	err = 0;
591 	si_cachemiss++;
592 	if ((acldatalen + fs->fs_bsize) > ufsvfsp->vfs_maxacl)
593 		ufsvfsp->vfs_maxacl = acldatalen + fs->fs_bsize;
594 alldone:
595 	/*
596 	 * Common exit point. Mark shadow inode as ISTALE
597 	 * if we detect an internal inconsistency, to
598 	 * prevent stray inodes appearing in the cache.
599 	 */
600 	if (err) {
601 		rw_enter(&sip->i_contents, RW_READER);
602 		mutex_enter(&sip->i_tlock);
603 		sip->i_flag |= ISTALE;
604 		mutex_exit(&sip->i_tlock);
605 		rw_exit(&sip->i_contents);
606 	}
607 	VN_RELE(ITOV(sip));
608 
609 	/*
610 	 * Cleanup of data structures allocated
611 	 * on the fly.
612 	 */
613 	if (acldata)
614 		kmem_free(acldata, acldatalen);
615 
616 	if (vsecattr.vsa_aclentp)
617 		kmem_free(vsecattr.vsa_aclentp,
618 			vsecattr.vsa_aclcnt * sizeof (aclent_t));
619 	if (vsecattr.vsa_dfaclentp)
620 		kmem_free(vsecattr.vsa_dfaclentp,
621 			vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
622 	return (err);
623 }
624 
625 /*
626  * Check the inode's ACL's to see if this mode of access is
627  * allowed; return 0 if allowed, EACCES if not.
628  *
629  * We follow the procedure defined in Sec. 3.3.5, ACL Access
630  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
631  */
632 int
633 ufs_acl_access(struct inode *ip, int mode, cred_t *cr)
634 /*
635  *	ip 	parent inode
636  *	mode 	mode of access read, write, execute/examine
637  *	cr	credentials
638  */
639 {
640 	ufs_ic_acl_t *acl;
641 	int ismask, mask = 0;
642 	int gperm = 0;
643 	int ngroup = 0;
644 	si_t	*sp = NULL;
645 	uid_t uid = crgetuid(cr);
646 	uid_t owner;
647 
648 	ASSERT(ip->i_ufs_acl != NULL);
649 
650 	sp = ip->i_ufs_acl;
651 
652 	ismask = sp->aclass.acl_ismask ?
653 	    sp->aclass.acl_ismask : NULL;
654 
655 	if (ismask)
656 		mask = sp->aclass.acl_maskbits;
657 	else
658 		mask = -1;
659 
660 	/*
661 	 * (1) If user owns the file, obey user mode bits
662 	 */
663 	owner = sp->aowner->acl_ic_who;
664 	if (uid == owner) {
665 		return (MODE_CHECK(owner, mode, (sp->aowner->acl_ic_perm << 6),
666 							    cr, ip));
667 	}
668 
669 	/*
670 	 * (2) Obey any matching ACL_USER entry
671 	 */
672 	if (sp->ausers)
673 		for (acl = sp->ausers; acl != NULL; acl = acl->acl_ic_next) {
674 			if (acl->acl_ic_who == uid) {
675 				return (MODE_CHECK(owner, mode,
676 				    (mask & acl->acl_ic_perm) << 6, cr, ip));
677 			}
678 		}
679 
680 	/*
681 	 * (3) If user belongs to file's group, obey group mode bits
682 	 * if no ACL mask is defined; if there is an ACL mask, we look
683 	 * at both the group mode bits and any ACL_GROUP entries.
684 	 */
685 	if (groupmember((uid_t)sp->agroup->acl_ic_who, cr)) {
686 		ngroup++;
687 		gperm = (sp->agroup->acl_ic_perm);
688 		if (!ismask)
689 			return (MODE_CHECK(owner, mode, (gperm << 6), cr, ip));
690 	}
691 
692 	/*
693 	 * (4) Accumulate the permissions in matching ACL_GROUP entries
694 	 */
695 	if (sp->agroups)
696 		for (acl = sp->agroups; acl != NULL; acl = acl->acl_ic_next)
697 		{
698 			if (groupmember(acl->acl_ic_who, cr)) {
699 				ngroup++;
700 				gperm |= acl->acl_ic_perm;
701 			}
702 		}
703 
704 	if (ngroup != 0)
705 		return (MODE_CHECK(owner, mode, ((gperm & mask) << 6), cr, ip));
706 
707 	/*
708 	 * (5) Finally, use the "other" mode bits
709 	 */
710 	return (MODE_CHECK(owner, mode, sp->aother->acl_ic_perm << 6, cr, ip));
711 }
712 
713 /*ARGSUSED2*/
714 int
715 ufs_acl_get(struct inode *ip, vsecattr_t *vsap, int flag, cred_t *cr)
716 {
717 	aclent_t	*aclentp;
718 
719 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
720 
721 	/* XXX Range check, sanity check, shadow check */
722 	/* If an ACL is present, get the data from the shadow inode info */
723 	if (ip->i_ufs_acl)
724 		return (aclentry2vsecattr(ip->i_ufs_acl, vsap));
725 
726 	/*
727 	 * If no ACLs are present, fabricate one from the mode bits.
728 	 * This code is almost identical to fs_fab_acl(), but we
729 	 * already have the mode bits handy, so we'll avoid going
730 	 * through VOP_GETATTR() again.
731 	 */
732 
733 	vsap->vsa_aclcnt    = 0;
734 	vsap->vsa_aclentp   = NULL;
735 	vsap->vsa_dfaclcnt  = 0;	/* Default ACLs are not fabricated */
736 	vsap->vsa_dfaclentp = NULL;
737 
738 	if (vsap->vsa_mask & (VSA_ACLCNT | VSA_ACL))
739 		vsap->vsa_aclcnt    = 4;  /* USER, GROUP, OTHER, and CLASS */
740 
741 	if (vsap->vsa_mask & VSA_ACL) {
742 		vsap->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t),
743 		    KM_SLEEP);
744 		if (vsap->vsa_aclentp == NULL)
745 			return (ENOMEM);
746 		aclentp = vsap->vsa_aclentp;
747 
748 		/* Owner */
749 		aclentp->a_type = USER_OBJ;
750 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0700)) >> 6;
751 		aclentp->a_id = ip->i_uid;	/* Really undefined */
752 		aclentp++;
753 
754 		/* Group */
755 		aclentp->a_type = GROUP_OBJ;
756 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0070)) >> 3;
757 		aclentp->a_id = ip->i_gid; 	/* Really undefined */
758 		aclentp++;
759 
760 		/* Other */
761 		aclentp->a_type = OTHER_OBJ;
762 		aclentp->a_perm = ip->i_mode & 0007;
763 		aclentp->a_id = 0;		/* Really undefined */
764 		aclentp++;
765 
766 		/* Class */
767 		aclentp->a_type = CLASS_OBJ;
768 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0070)) >> 3;
769 		aclentp->a_id = 0;		/* Really undefined */
770 		ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
771 		    sizeof (aclent_t), cmp2acls);
772 	}
773 
774 	return (0);
775 }
776 
777 /*ARGSUSED2*/
778 int
779 ufs_acl_set(struct inode *ip, vsecattr_t *vsap, int flag, cred_t *cr)
780 {
781 	si_t	*sp;
782 	int	err;
783 
784 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
785 
786 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
787 		return (ENOSYS);
788 
789 	/*
790 	 * only the owner of the file or privileged users can change the ACLs
791 	 */
792 	if (secpolicy_vnode_setdac(cr, ip->i_uid) != 0)
793 		return (EPERM);
794 
795 	/* Convert from vsecattr struct to ufs_acl_entry struct */
796 	if ((err = vsecattr2aclentry(vsap, &sp)) != 0)
797 		return (err);
798 	sp->s_dev = ip->i_dev;
799 
800 	/*
801 	 * Make the user & group objs in the acl list follow what's
802 	 * in the inode.
803 	 */
804 #ifdef DEBUG
805 	if (vsap->vsa_mask == VSA_ACL) {
806 		ASSERT(sp->aowner);
807 		ASSERT(sp->agroup);
808 		ASSERT(sp->aother);
809 	}
810 #endif	/* DEBUG */
811 
812 	if (sp->aowner)
813 		sp->aowner->acl_ic_who = ip->i_uid;
814 	if (sp->agroup)
815 		sp->agroup->acl_ic_who = ip->i_gid;
816 
817 	/*
818 	 * Write and cache the new acl list
819 	 */
820 	err = ufs_si_store(ip, sp, 1, cr);
821 
822 	return (err);
823 }
824 
825 /*
826  * XXX Scan sorted array of acl's, checking for:
827  * 1) Any duplicate/conflicting entries (same type and id)
828  * 2) More than 1 of USER_OBJ, GROUP_OBJ, OTHER_OBJ, CLASS_OBJ
829  * 3) More than 1 of DEF_USER_OBJ, DEF_GROUP_OBJ, DEF_OTHER_OBJ, DEF_CLASS_OBJ
830  *
831  * Parameters:
832  * aclentp - ptr to sorted list of acl entries.
833  * nentries - # acl entries on the list
834  * flag - Bitmap (ACL_CHECK and/or DEF_ACL_CHECK) indicating whether the
835  * list contains regular acls, default acls, or both.
836  *
837  * Returns:	0 - Success
838  * EINVAL - Invalid list (dups or multiple entries of type USER_OBJ, etc)
839  */
840 static int
841 acl_validate(aclent_t *aclentp, int nentries, int flag)
842 {
843 	int	i;
844 	int	nuser_objs = 0;
845 	int	ngroup_objs = 0;
846 	int	nother_objs = 0;
847 	int	nclass_objs = 0;
848 	int	ndef_user_objs = 0;
849 	int	ndef_group_objs = 0;
850 	int	ndef_other_objs = 0;
851 	int	ndef_class_objs = 0;
852 	int	nusers = 0;
853 	int	ngroups = 0;
854 	int	ndef_users = 0;
855 	int	ndef_groups = 0;
856 	int	numdefs = 0;
857 
858 	/* Null list or list of one */
859 	if (aclentp == NULL)
860 		return (0);
861 
862 	if (nentries <= 0)
863 		return (EINVAL);
864 
865 	for (i = 1; i < nentries; i++) {
866 		if (((aclentp[i - 1].a_type == aclentp[i].a_type) &&
867 		    (aclentp[i - 1].a_id   == aclentp[i].a_id)) ||
868 		    (aclentp[i - 1].a_perm > 07)) {
869 			return (EINVAL);
870 		}
871 	}
872 
873 	if (flag == 0 || (flag != ACL_CHECK && flag != DEF_ACL_CHECK))
874 		return (EINVAL);
875 
876 	/* Count types */
877 	for (i = 0; i < nentries; i++) {
878 		switch (aclentp[i].a_type) {
879 		case USER_OBJ:		/* Owner */
880 			nuser_objs++;
881 			break;
882 		case GROUP_OBJ:		/* Group */
883 			ngroup_objs++;
884 			break;
885 		case OTHER_OBJ:		/* Other */
886 			nother_objs++;
887 			break;
888 		case CLASS_OBJ:		/* Mask */
889 			nclass_objs++;
890 			break;
891 		case DEF_USER_OBJ:	/* Default Owner */
892 			ndef_user_objs++;
893 			break;
894 		case DEF_GROUP_OBJ:	/* Default Group */
895 			ndef_group_objs++;
896 			break;
897 		case DEF_OTHER_OBJ:	/* Default Other */
898 			ndef_other_objs++;
899 			break;
900 		case DEF_CLASS_OBJ:	/* Default Mask */
901 			ndef_class_objs++;
902 			break;
903 		case USER:		/* Users */
904 			nusers++;
905 			break;
906 		case GROUP:		/* Groups */
907 			ngroups++;
908 			break;
909 		case DEF_USER:		/* Default Users */
910 			ndef_users++;
911 			break;
912 		case DEF_GROUP:		/* Default Groups */
913 			ndef_groups++;
914 			break;
915 		default:		/* Unknown type */
916 			return (EINVAL);
917 		}
918 	}
919 
920 	/*
921 	 * For normal acl's, we require there be one (and only one)
922 	 * USER_OBJ, GROUP_OBJ and OTHER_OBJ.  There is either zero
923 	 * or one CLASS_OBJ.
924 	 */
925 	if (flag & ACL_CHECK) {
926 		if (nuser_objs != 1 || ngroup_objs != 1 ||
927 		    nother_objs != 1 || nclass_objs > 1) {
928 			return (EINVAL);
929 		}
930 		/*
931 		 * If there are ANY group acls, there MUST be a
932 		 * class_obj(mask) acl (1003.6/D12 p. 29 lines 75-80).
933 		 */
934 		if (ngroups && !nclass_objs) {
935 			return (EINVAL);
936 		}
937 		if (nuser_objs + ngroup_objs + nother_objs + nclass_objs +
938 		    ngroups + nusers > MAX_ACL_ENTRIES)
939 			return (EINVAL);
940 	}
941 
942 	/*
943 	 * For default acl's, we require that there be either one (and only one)
944 	 * DEF_USER_OBJ, DEF_GROUP_OBJ and DEF_OTHER_OBJ
945 	 * or  there be none of them.
946 	 */
947 	if (flag & DEF_ACL_CHECK) {
948 		if (ndef_other_objs > 1 || ndef_user_objs > 1 ||
949 		    ndef_group_objs > 1 || ndef_class_objs > 1) {
950 			return (EINVAL);
951 		}
952 
953 		numdefs = ndef_other_objs + ndef_user_objs + ndef_group_objs;
954 
955 		if (numdefs != 0 && numdefs != 3) {
956 			return (EINVAL);
957 		}
958 		/*
959 		 * If there are ANY def_group acls, there MUST be a
960 		 * def_class_obj(mask) acl (1003.6/D12 P. 29 lines 75-80).
961 		 * XXX(jimh) This is inferred.
962 		 */
963 		if (ndef_groups && !ndef_class_objs) {
964 			return (EINVAL);
965 		}
966 		if ((ndef_users || ndef_groups) &&
967 		    ((numdefs != 3) && !ndef_class_objs)) {
968 			return (EINVAL);
969 		}
970 		if (ndef_user_objs + ndef_group_objs + ndef_other_objs +
971 		    ndef_class_objs + ndef_users + ndef_groups >
972 		    MAX_ACL_ENTRIES)
973 			return (EINVAL);
974 	}
975 	return (0);
976 }
977 
978 static int
979 formacl(ufs_ic_acl_t **aclpp, aclent_t *aclentp)
980 {
981 	ufs_ic_acl_t *uaclp;
982 
983 	uaclp = kmem_alloc(sizeof (ufs_ic_acl_t), KM_SLEEP);
984 	uaclp->acl_ic_perm = aclentp->a_perm;
985 	uaclp->acl_ic_who = aclentp->a_id;
986 	uaclp->acl_ic_next = *aclpp;
987 	*aclpp = uaclp;
988 	return (0);
989 }
990 
991 /*
992  * XXX - Make more efficient
993  * Convert from the vsecattr struct, used by the VOP interface, to
994  * the ufs_acl_entry struct used for in-core storage of acl's.
995  *
996  * Parameters:
997  * vsap - Ptr to array of security attributes.
998  * spp - Ptr to ptr to si struct for the results
999  *
1000  * Returns:	0 - Success
1001  * 		N - From errno.h
1002  */
1003 static int
1004 vsecattr2aclentry(vsecattr_t *vsap, si_t **spp)
1005 {
1006 	aclent_t	*aclentp, *aclp;
1007 	si_t		*sp;
1008 	int		err;
1009 	int		i;
1010 
1011 	/* Sort & validate the lists on the vsap */
1012 	ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
1013 	    sizeof (aclent_t), cmp2acls);
1014 	ksort((caddr_t)vsap->vsa_dfaclentp, vsap->vsa_dfaclcnt,
1015 	    sizeof (aclent_t), cmp2acls);
1016 	if ((err = acl_validate(vsap->vsa_aclentp,
1017 	    vsap->vsa_aclcnt, ACL_CHECK)) != 0)
1018 		return (err);
1019 	if ((err = acl_validate(vsap->vsa_dfaclentp,
1020 	    vsap->vsa_dfaclcnt, DEF_ACL_CHECK)) != 0)
1021 		return (err);
1022 
1023 	/* Create new si struct and hang acl's off it */
1024 	sp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1025 	rw_init(&sp->s_lock, NULL, RW_DEFAULT, NULL);
1026 
1027 	/* Process acl list */
1028 	aclp = (aclent_t *)vsap->vsa_aclentp;
1029 	aclentp = aclp + vsap->vsa_aclcnt - 1;
1030 	for (i = 0; i < vsap->vsa_aclcnt; i++) {
1031 		switch (aclentp->a_type) {
1032 		case USER_OBJ:		/* Owner */
1033 			if (err = formacl(&sp->aowner, aclentp))
1034 				goto error;
1035 			break;
1036 		case GROUP_OBJ:		/* Group */
1037 			if (err = formacl(&sp->agroup, aclentp))
1038 				goto error;
1039 			break;
1040 		case OTHER_OBJ:		/* Other */
1041 			if (err = formacl(&sp->aother, aclentp))
1042 				goto error;
1043 			break;
1044 		case USER:
1045 			if (err = formacl(&sp->ausers, aclentp))
1046 				goto error;
1047 			break;
1048 		case CLASS_OBJ:		/* Mask */
1049 			sp->aclass.acl_ismask = 1;
1050 			sp->aclass.acl_maskbits = aclentp->a_perm;
1051 			break;
1052 		case GROUP:
1053 			if (err = formacl(&sp->agroups, aclentp))
1054 				goto error;
1055 			break;
1056 		default:
1057 			break;
1058 		}
1059 		aclentp--;
1060 	}
1061 
1062 	/* Process default acl list */
1063 	aclp = (aclent_t *)vsap->vsa_dfaclentp;
1064 	aclentp = aclp + vsap->vsa_dfaclcnt - 1;
1065 	for (i = 0; i < vsap->vsa_dfaclcnt; i++) {
1066 		switch (aclentp->a_type) {
1067 		case DEF_USER_OBJ:	/* Default Owner */
1068 			if (err = formacl(&sp->downer, aclentp))
1069 				goto error;
1070 			break;
1071 		case DEF_GROUP_OBJ:	/* Default Group */
1072 			if (err = formacl(&sp->dgroup, aclentp))
1073 				goto error;
1074 			break;
1075 		case DEF_OTHER_OBJ:	/* Default Other */
1076 			if (err = formacl(&sp->dother, aclentp))
1077 				goto error;
1078 			break;
1079 		case DEF_USER:
1080 			if (err = formacl(&sp->dusers, aclentp))
1081 				goto error;
1082 			break;
1083 		case DEF_CLASS_OBJ:	/* Default Mask */
1084 			sp->dclass.acl_ismask = 1;
1085 			sp->dclass.acl_maskbits = aclentp->a_perm;
1086 			break;
1087 		case DEF_GROUP:
1088 			if (err = formacl(&sp->dgroups, aclentp))
1089 				goto error;
1090 			break;
1091 		default:
1092 			break;
1093 		}
1094 		aclentp--;
1095 	}
1096 	*spp = sp;
1097 	return (0);
1098 
1099 error:
1100 	ufs_si_free_mem(sp);
1101 	return (err);
1102 }
1103 
1104 void
1105 formvsec(int obj_type, ufs_ic_acl_t *aclp, aclent_t **aclentpp)
1106 {
1107 	for (; aclp; aclp = aclp->acl_ic_next) {
1108 		(*aclentpp)->a_type = obj_type;
1109 		(*aclentpp)->a_perm = aclp->acl_ic_perm;
1110 		(*aclentpp)->a_id = aclp->acl_ic_who;
1111 		(*aclentpp)++;
1112 	}
1113 }
1114 
1115 /*
1116  * XXX - Make more efficient
1117  * Convert from the ufs_acl_entry struct used for in-core storage of acl's
1118  * to the vsecattr struct,  used by the VOP interface.
1119  *
1120  * Parameters:
1121  * sp - Ptr to si struct with the acls
1122  * vsap - Ptr to a vsecattr struct which will take the results.
1123  *
1124  * Returns:	0 - Success
1125  *		N - From errno table
1126  */
1127 static int
1128 aclentry2vsecattr(si_t *sp, vsecattr_t *vsap)
1129 {
1130 	aclent_t	*aclentp;
1131 	int		numacls = 0;
1132 	int		err;
1133 
1134 	vsap->vsa_aclentp = vsap->vsa_dfaclentp = NULL;
1135 
1136 	numacls = acl_count(sp->aowner) +
1137 	    acl_count(sp->agroup) +
1138 	    acl_count(sp->aother) +
1139 	    acl_count(sp->ausers) +
1140 	    acl_count(sp->agroups);
1141 	if (sp->aclass.acl_ismask)
1142 		numacls++;
1143 
1144 	if (numacls == 0)
1145 		goto do_defaults;
1146 
1147 	if (vsap->vsa_mask & (VSA_ACLCNT | VSA_ACL))
1148 		vsap->vsa_aclcnt = numacls;
1149 
1150 	if (vsap->vsa_mask & VSA_ACL) {
1151 		vsap->vsa_aclentp = kmem_zalloc(numacls * sizeof (aclent_t),
1152 		    KM_SLEEP);
1153 		aclentp = vsap->vsa_aclentp;
1154 
1155 		formvsec(USER_OBJ, sp->aowner, &aclentp);
1156 		formvsec(USER, sp->ausers, &aclentp);
1157 		formvsec(GROUP_OBJ, sp->agroup, &aclentp);
1158 		formvsec(GROUP, sp->agroups, &aclentp);
1159 		formvsec(OTHER_OBJ, sp->aother, &aclentp);
1160 
1161 		if (sp->aclass.acl_ismask) {
1162 			aclentp->a_type = CLASS_OBJ;		/* Mask */
1163 			aclentp->a_perm = sp->aclass.acl_maskbits;
1164 			aclentp->a_id = 0;
1165 			aclentp++;
1166 		}
1167 
1168 		/* Sort the acl list */
1169 		ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
1170 		    sizeof (aclent_t), cmp2acls);
1171 		/* Check the acl list */
1172 		if ((err = acl_validate(vsap->vsa_aclentp,
1173 		    vsap->vsa_aclcnt, ACL_CHECK)) != 0) {
1174 			kmem_free(vsap->vsa_aclentp,
1175 			    numacls * sizeof (aclent_t));
1176 			vsap->vsa_aclentp = NULL;
1177 			return (err);
1178 		}
1179 
1180 	}
1181 do_defaults:
1182 	/* Process Defaults */
1183 
1184 	numacls = acl_count(sp->downer) +
1185 	    acl_count(sp->dgroup) +
1186 	    acl_count(sp->dother) +
1187 	    acl_count(sp->dusers) +
1188 	    acl_count(sp->dgroups);
1189 	if (sp->dclass.acl_ismask)
1190 		numacls++;
1191 
1192 	if (numacls == 0)
1193 		goto do_others;
1194 
1195 	if (vsap->vsa_mask & (VSA_DFACLCNT | VSA_DFACL))
1196 		vsap->vsa_dfaclcnt = numacls;
1197 
1198 	if (vsap->vsa_mask & VSA_DFACL) {
1199 		vsap->vsa_dfaclentp =
1200 		    kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
1201 		aclentp = vsap->vsa_dfaclentp;
1202 		formvsec(DEF_USER_OBJ, sp->downer, &aclentp);
1203 		formvsec(DEF_USER, sp->dusers, &aclentp);
1204 		formvsec(DEF_GROUP_OBJ, sp->dgroup, &aclentp);
1205 		formvsec(DEF_GROUP, sp->dgroups, &aclentp);
1206 		formvsec(DEF_OTHER_OBJ, sp->dother, &aclentp);
1207 
1208 		if (sp->dclass.acl_ismask) {
1209 			aclentp->a_type = DEF_CLASS_OBJ;	/* Mask */
1210 			aclentp->a_perm = sp->dclass.acl_maskbits;
1211 			aclentp->a_id = 0;
1212 			aclentp++;
1213 		}
1214 
1215 		/* Sort the default acl list */
1216 		ksort((caddr_t)vsap->vsa_dfaclentp, vsap->vsa_dfaclcnt,
1217 		    sizeof (aclent_t), cmp2acls);
1218 		if ((err = acl_validate(vsap->vsa_dfaclentp,
1219 		    vsap->vsa_dfaclcnt, DEF_ACL_CHECK)) != 0) {
1220 			if (vsap->vsa_aclentp != NULL)
1221 				kmem_free(vsap->vsa_aclentp,
1222 				    vsap->vsa_aclcnt * sizeof (aclent_t));
1223 			kmem_free(vsap->vsa_dfaclentp,
1224 			    vsap->vsa_dfaclcnt * sizeof (aclent_t));
1225 			vsap->vsa_aclentp = vsap->vsa_dfaclentp = NULL;
1226 			return (err);
1227 		}
1228 	}
1229 
1230 do_others:
1231 	return (0);
1232 }
1233 
1234 static void
1235 acl_free(ufs_ic_acl_t *aclp)
1236 {
1237 	while (aclp != NULL) {
1238 		ufs_ic_acl_t *nextaclp = aclp->acl_ic_next;
1239 		kmem_free(aclp, sizeof (ufs_ic_acl_t));
1240 		aclp = nextaclp;
1241 	}
1242 }
1243 
1244 /*
1245  * ufs_si_free_mem will discard the sp, and the acl hanging off of the
1246  * sp.  It is required that the sp not be locked, and not be in the
1247  * cache.
1248  *
1249  * input: pointer to sp to discard.
1250  *
1251  * return - nothing.
1252  *
1253  */
1254 static void
1255 ufs_si_free_mem(si_t *sp)
1256 {
1257 	ASSERT(!(sp->s_flags & SI_CACHED));
1258 	ASSERT(!RW_LOCK_HELD(&sp->s_lock));
1259 	/*
1260 	 *	remove from the cache
1261 	 *	free the acl entries
1262 	 */
1263 	acl_free(sp->aowner);
1264 	acl_free(sp->agroup);
1265 	acl_free(sp->aother);
1266 	acl_free(sp->ausers);
1267 	acl_free(sp->agroups);
1268 
1269 	acl_free(sp->downer);
1270 	acl_free(sp->dgroup);
1271 	acl_free(sp->dother);
1272 	acl_free(sp->dusers);
1273 	acl_free(sp->dgroups);
1274 
1275 	rw_destroy(&sp->s_lock);
1276 	kmem_free(sp, sizeof (si_t));
1277 }
1278 
1279 void
1280 acl_cpy(ufs_ic_acl_t *saclp, ufs_ic_acl_t *daclp)
1281 {
1282 	ufs_ic_acl_t  *aclp, *prev_aclp = NULL, *aclp1;
1283 
1284 	if (saclp == NULL) {
1285 		daclp = NULL;
1286 		return;
1287 	}
1288 	prev_aclp = daclp;
1289 
1290 	for (aclp = saclp; aclp != NULL; aclp = aclp->acl_ic_next) {
1291 		aclp1 = kmem_alloc(sizeof (ufs_ic_acl_t), KM_SLEEP);
1292 		aclp1->acl_ic_next = NULL;
1293 		aclp1->acl_ic_who = aclp->acl_ic_who;
1294 		aclp1->acl_ic_perm = aclp->acl_ic_perm;
1295 		prev_aclp->acl_ic_next = aclp1;
1296 		prev_aclp = (ufs_ic_acl_t *)&aclp1->acl_ic_next;
1297 	}
1298 }
1299 
1300 /*
1301  *	ufs_si_inherit takes a parent acl structure (saclp) and the inode
1302  *	of the object that is inheriting an acl and returns the inode
1303  *	with the acl linked to it.  It also writes the acl to disk if
1304  *	it is a unique inode.
1305  *
1306  *	ip - pointer to inode of object inheriting the acl (contents lock)
1307  *	tdp - parent inode (rw_lock and contents lock)
1308  *	mode - creation modes
1309  *	cr - credentials pointer
1310  */
1311 int
1312 ufs_si_inherit(struct inode *ip, struct inode *tdp, o_mode_t mode, cred_t *cr)
1313 {
1314 	si_t *tsp, *sp = tdp->i_ufs_acl;
1315 	int error;
1316 	o_mode_t old_modes, old_uid, old_gid;
1317 	int mask;
1318 
1319 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
1320 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
1321 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
1322 
1323 	/*
1324 	 * if links/symbolic links, or other invalid acl objects are copied
1325 	 * or moved to a directory with a default acl do not allow inheritance
1326 	 * just return.
1327 	 */
1328 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
1329 		return (0);
1330 
1331 	/* lock the parent security information */
1332 	rw_enter(&sp->s_lock, RW_READER);
1333 
1334 	ASSERT(((tdp->i_mode & IFMT) == IFDIR) ||
1335 	    ((tdp->i_mode & IFMT) == IFATTRDIR));
1336 
1337 	mask = ((sp->downer != NULL) ? 1 : 0) |
1338 	    ((sp->dgroup != NULL) ? 2 : 0) |
1339 	    ((sp->dother != NULL) ? 4 : 0);
1340 
1341 	if (mask == 0) {
1342 		rw_exit(&sp->s_lock);
1343 		return (0);
1344 	}
1345 
1346 	if (mask != 7) {
1347 		rw_exit(&sp->s_lock);
1348 		return (EINVAL);
1349 	}
1350 
1351 	tsp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1352 	rw_init(&tsp->s_lock, NULL, RW_DEFAULT, NULL);
1353 
1354 	/* copy the default acls */
1355 
1356 	ASSERT(RW_READ_HELD(&sp->s_lock));
1357 	acl_cpy(sp->downer, (ufs_ic_acl_t *)&tsp->aowner);
1358 	acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&tsp->agroup);
1359 	acl_cpy(sp->dother, (ufs_ic_acl_t *)&tsp->aother);
1360 	acl_cpy(sp->dusers, (ufs_ic_acl_t *)&tsp->ausers);
1361 	acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&tsp->agroups);
1362 	tsp->aclass.acl_ismask = sp->dclass.acl_ismask;
1363 	tsp->aclass.acl_maskbits = sp->dclass.acl_maskbits;
1364 
1365 	/*
1366 	 * set the owner, group, and other values from the master
1367 	 * inode.
1368 	 */
1369 
1370 	MODE2ACL(tsp->aowner, (mode >> 6), ip->i_uid);
1371 	MODE2ACL(tsp->agroup, (mode >> 3), ip->i_gid);
1372 	MODE2ACL(tsp->aother, (mode), 0);
1373 
1374 	if (tsp->aclass.acl_ismask) {
1375 		tsp->aclass.acl_maskbits &= mode >> 3;
1376 	}
1377 
1378 
1379 	/* copy default acl if necessary */
1380 
1381 	if (((ip->i_mode & IFMT) == IFDIR) ||
1382 	    ((ip->i_mode & IFMT) == IFATTRDIR)) {
1383 		acl_cpy(sp->downer, (ufs_ic_acl_t *)&tsp->downer);
1384 		acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&tsp->dgroup);
1385 		acl_cpy(sp->dother, (ufs_ic_acl_t *)&tsp->dother);
1386 		acl_cpy(sp->dusers, (ufs_ic_acl_t *)&tsp->dusers);
1387 		acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&tsp->dgroups);
1388 		tsp->dclass.acl_ismask = sp->dclass.acl_ismask;
1389 		tsp->dclass.acl_maskbits = sp->dclass.acl_maskbits;
1390 	}
1391 	/*
1392 	 * save the new 9 mode bits in the inode (ip->ic_smode) for
1393 	 * ufs_getattr.  Be sure the mode can be recovered if the store
1394 	 * fails.
1395 	 */
1396 	old_modes = ip->i_mode;
1397 	old_uid = ip->i_uid;
1398 	old_gid = ip->i_gid;
1399 	/*
1400 	 * store the acl, and get back a new security anchor if
1401 	 * it is a duplicate.
1402 	 */
1403 	rw_exit(&sp->s_lock);
1404 	rw_enter(&ip->i_rwlock, RW_WRITER);
1405 
1406 	/*
1407 	 * Suppress out of inodes messages if instructed in the
1408 	 * tdp inode.
1409 	 */
1410 	ip->i_flag |= tdp->i_flag & IQUIET;
1411 
1412 	if ((error = ufs_si_store(ip, tsp, 0, cr)) != 0) {
1413 		ip->i_mode = old_modes;
1414 		ip->i_uid = old_uid;
1415 		ip->i_gid = old_gid;
1416 	}
1417 	ip->i_flag &= ~IQUIET;
1418 	rw_exit(&ip->i_rwlock);
1419 	return (error);
1420 }
1421 
1422 si_t *
1423 ufs_acl_cp(si_t *sp)
1424 {
1425 
1426 	si_t *dsp;
1427 
1428 	ASSERT(RW_READ_HELD(&sp->s_lock));
1429 	ASSERT(sp->s_ref && sp->s_use);
1430 
1431 	dsp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1432 	rw_init(&dsp->s_lock, NULL, RW_DEFAULT, NULL);
1433 
1434 	acl_cpy(sp->aowner, (ufs_ic_acl_t *)&dsp->aowner);
1435 	acl_cpy(sp->agroup, (ufs_ic_acl_t *)&dsp->agroup);
1436 	acl_cpy(sp->aother, (ufs_ic_acl_t *)&dsp->aother);
1437 	acl_cpy(sp->ausers, (ufs_ic_acl_t *)&dsp->ausers);
1438 	acl_cpy(sp->agroups, (ufs_ic_acl_t *)&dsp->agroups);
1439 
1440 	dsp->aclass.acl_ismask = sp->aclass.acl_ismask;
1441 	dsp->aclass.acl_maskbits = sp->aclass.acl_maskbits;
1442 
1443 	acl_cpy(sp->downer, (ufs_ic_acl_t *)&dsp->downer);
1444 	acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&dsp->dgroup);
1445 	acl_cpy(sp->dother, (ufs_ic_acl_t *)&dsp->dother);
1446 	acl_cpy(sp->dusers, (ufs_ic_acl_t *)&dsp->dusers);
1447 	acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&dsp->dgroups);
1448 
1449 	dsp->dclass.acl_ismask = sp->dclass.acl_ismask;
1450 	dsp->dclass.acl_maskbits = sp->dclass.acl_maskbits;
1451 
1452 	return (dsp);
1453 
1454 }
1455 
1456 int
1457 ufs_acl_setattr(struct inode *ip, struct vattr *vap, cred_t *cr)
1458 {
1459 
1460 	si_t *sp;
1461 	int mask = vap->va_mask;
1462 	int error = 0;
1463 
1464 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
1465 
1466 	if (!(mask & (AT_MODE|AT_UID|AT_GID)))
1467 		return (0);
1468 
1469 	/*
1470 	 * if no regular acl's, nothing to do, so let's get out
1471 	 */
1472 	if (!(ip->i_ufs_acl) || !(ip->i_ufs_acl->aowner))
1473 		return (0);
1474 
1475 	rw_enter(&ip->i_ufs_acl->s_lock, RW_READER);
1476 	sp = ufs_acl_cp(ip->i_ufs_acl);
1477 	ASSERT(sp != ip->i_ufs_acl);
1478 
1479 	/*
1480 	 * set the mask to the group permissions if a mask entry
1481 	 * exists.  Otherwise, set the group obj bits to the group
1482 	 * permissions.  Since non-trivial ACLs always have a mask,
1483 	 * and the mask is the final arbiter of group permissions,
1484 	 * setting the mask has the effect of changing the effective
1485 	 * group permissions, even if the group_obj permissions in
1486 	 * the ACL aren't changed.  Posix P1003.1e states that when
1487 	 * an ACL mask exists, chmod(2) must set the acl mask (NOT the
1488 	 * group_obj permissions) to the requested group permissions.
1489 	 */
1490 	if (mask & AT_MODE) {
1491 		sp->aowner->acl_ic_perm = (o_mode_t)(ip->i_mode & 0700) >> 6;
1492 		if (sp->aclass.acl_ismask)
1493 			sp->aclass.acl_maskbits =
1494 			    (o_mode_t)(ip->i_mode & 070) >> 3;
1495 		else
1496 			sp->agroup->acl_ic_perm =
1497 			    (o_mode_t)(ip->i_mode & 070) >> 3;
1498 		sp->aother->acl_ic_perm = (o_mode_t)(ip->i_mode & 07);
1499 	}
1500 
1501 	if (mask & AT_UID) {
1502 		/* Caller has verified our privileges */
1503 		sp->aowner->acl_ic_who = ip->i_uid;
1504 	}
1505 
1506 	if (mask & AT_GID) {
1507 		sp->agroup->acl_ic_who = ip->i_gid;
1508 	}
1509 
1510 	rw_exit(&ip->i_ufs_acl->s_lock);
1511 	error = ufs_si_store(ip, sp, 0, cr);
1512 	return (error);
1513 }
1514 
1515 static int
1516 acl_count(ufs_ic_acl_t *p)
1517 {
1518 	ufs_ic_acl_t	*acl;
1519 	int		count;
1520 
1521 	for (count = 0, acl = p; acl; acl = acl->acl_ic_next, count++)
1522 		;
1523 	return (count);
1524 }
1525 
1526 /*
1527  *	Takes as input a security structure and generates a buffer
1528  *	with fsd's in a form which be written to the shadow inode.
1529  */
1530 static int
1531 ufs_sectobuf(si_t *sp, caddr_t *buf, size_t *len)
1532 {
1533 	size_t		acl_size;
1534 	size_t		def_acl_size;
1535 	caddr_t		buffer;
1536 	struct ufs_fsd	*fsdp;
1537 	ufs_acl_t	*bufaclp;
1538 
1539 	/*
1540 	 * Calc size of buffer to hold all the acls
1541 	 */
1542 	acl_size = acl_count(sp->aowner) +		/* owner */
1543 	    acl_count(sp->agroup) +			/* owner group */
1544 	    acl_count(sp->aother) +			/* owner other */
1545 	    acl_count(sp->ausers) +			/* acl list */
1546 	    acl_count(sp->agroups);			/* group alcs */
1547 	if (sp->aclass.acl_ismask)
1548 		acl_size++;
1549 
1550 	/* Convert to bytes */
1551 	acl_size *= sizeof (ufs_acl_t);
1552 
1553 	/* Add fsd header */
1554 	if (acl_size)
1555 		acl_size += 2 * sizeof (int);
1556 
1557 	/*
1558 	 * Calc size of buffer to hold all the default acls
1559 	 */
1560 	def_acl_size =
1561 	    acl_count(sp->downer) +	/* def owner */
1562 	    acl_count(sp->dgroup) +	/* def owner group */
1563 	    acl_count(sp->dother) +	/* def owner other */
1564 	    acl_count(sp->dusers) +	/* def users  */
1565 	    acl_count(sp->dgroups);	/* def group acls */
1566 	if (sp->dclass.acl_ismask)
1567 		def_acl_size++;
1568 
1569 	/*
1570 	 * Convert to bytes
1571 	 */
1572 	def_acl_size *= sizeof (ufs_acl_t);
1573 
1574 	/*
1575 	 * Add fsd header
1576 	 */
1577 	if (def_acl_size)
1578 		def_acl_size += 2 * sizeof (int);
1579 
1580 	if (acl_size + def_acl_size == 0)
1581 		return (0);
1582 
1583 	buffer = kmem_zalloc((acl_size + def_acl_size), KM_SLEEP);
1584 	bufaclp = (ufs_acl_t *)buffer;
1585 
1586 	if (acl_size == 0)
1587 		goto wrtdefs;
1588 
1589 	/* create fsd and copy acls */
1590 	fsdp = (struct ufs_fsd *)bufaclp;
1591 	fsdp->fsd_type = FSD_ACL;
1592 	bufaclp = (ufs_acl_t *)&fsdp->fsd_data[0];
1593 
1594 	ACL_MOVE(sp->aowner, USER_OBJ, bufaclp);
1595 	ACL_MOVE(sp->agroup, GROUP_OBJ, bufaclp);
1596 	ACL_MOVE(sp->aother, OTHER_OBJ, bufaclp);
1597 	ACL_MOVE(sp->ausers, USER, bufaclp);
1598 	ACL_MOVE(sp->agroups, GROUP, bufaclp);
1599 
1600 	if (sp->aclass.acl_ismask) {
1601 		bufaclp->acl_tag = CLASS_OBJ;
1602 		bufaclp->acl_who = (uid_t)sp->aclass.acl_ismask;
1603 		bufaclp->acl_perm = (o_mode_t)sp->aclass.acl_maskbits;
1604 		bufaclp++;
1605 	}
1606 	ASSERT(acl_size <= INT_MAX);
1607 	fsdp->fsd_size = (int)acl_size;
1608 
1609 wrtdefs:
1610 	if (def_acl_size == 0)
1611 		goto alldone;
1612 
1613 	/* if defaults exist then create fsd and copy default acls */
1614 	fsdp = (struct ufs_fsd *)bufaclp;
1615 	fsdp->fsd_type = FSD_DFACL;
1616 	bufaclp = (ufs_acl_t *)&fsdp->fsd_data[0];
1617 
1618 	ACL_MOVE(sp->downer, DEF_USER_OBJ, bufaclp);
1619 	ACL_MOVE(sp->dgroup, DEF_GROUP_OBJ, bufaclp);
1620 	ACL_MOVE(sp->dother, DEF_OTHER_OBJ, bufaclp);
1621 	ACL_MOVE(sp->dusers, DEF_USER, bufaclp);
1622 	ACL_MOVE(sp->dgroups, DEF_GROUP, bufaclp);
1623 	if (sp->dclass.acl_ismask) {
1624 		bufaclp->acl_tag = DEF_CLASS_OBJ;
1625 		bufaclp->acl_who = (uid_t)sp->dclass.acl_ismask;
1626 		bufaclp->acl_perm = (o_mode_t)sp->dclass.acl_maskbits;
1627 		bufaclp++;
1628 	}
1629 	ASSERT(def_acl_size <= INT_MAX);
1630 	fsdp->fsd_size = (int)def_acl_size;
1631 
1632 alldone:
1633 	*buf = buffer;
1634 	*len = acl_size + def_acl_size;
1635 
1636 	return (0);
1637 }
1638 
1639 /*
1640  *  free a shadow inode  on disk and in memory
1641  */
1642 int
1643 ufs_si_free(si_t *sp, struct vfs *vfsp, cred_t *cr)
1644 {
1645 	struct inode 	*sip;
1646 	int 		shadow;
1647 	int 		err = 0;
1648 	int		refcnt;
1649 	int		signature;
1650 
1651 	ASSERT(vfsp);
1652 	ASSERT(sp);
1653 
1654 	rw_enter(&sp->s_lock, RW_READER);
1655 	ASSERT(sp->s_shadow <= INT_MAX);
1656 	shadow = (int)sp->s_shadow;
1657 	ASSERT(sp->s_ref);
1658 	rw_exit(&sp->s_lock);
1659 
1660 	/*
1661 	 * Decrement link count on the shadow inode,
1662 	 * and decrement reference count on the sip.
1663 	 */
1664 	if ((err = ufs_iget_alloced(vfsp, shadow, &sip, cr)) == 0) {
1665 		rw_enter(&sip->i_contents, RW_WRITER);
1666 		rw_enter(&sp->s_lock, RW_WRITER);
1667 		ASSERT(sp->s_shadow == shadow);
1668 		ASSERT(sip->i_dquot == 0);
1669 		/* Decrement link count */
1670 		ASSERT(sip->i_nlink > 0);
1671 		/*
1672 		 * bug #1264710 assertion failure below
1673 		 */
1674 		sp->s_use = --sip->i_nlink;
1675 		ufs_setreclaim(sip);
1676 		TRANS_INODE(sip->i_ufsvfs, sip);
1677 		sip->i_flag |= ICHG | IMOD;
1678 		sip->i_seq++;
1679 		ITIMES_NOLOCK(sip);
1680 		/* Dec ref counts on si referenced by this ip */
1681 		refcnt = --sp->s_ref;
1682 		signature = sp->s_signature;
1683 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
1684 		/*
1685 		 * Release s_lock before calling VN_RELE
1686 		 * (which may want to acquire i_contents).
1687 		 */
1688 		rw_exit(&sp->s_lock);
1689 		rw_exit(&sip->i_contents);
1690 		VN_RELE(ITOV(sip));
1691 	} else {
1692 		rw_enter(&sp->s_lock, RW_WRITER);
1693 		/* Dec ref counts on si referenced by this ip */
1694 		refcnt = --sp->s_ref;
1695 		signature = sp->s_signature;
1696 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
1697 		rw_exit(&sp->s_lock);
1698 	}
1699 
1700 	if (refcnt == 0)
1701 		si_cache_del(sp, signature);
1702 	return (err);
1703 }
1704 
1705 /*
1706  * Seach the si cache for an si structure by inode #.
1707  * Returns a locked si structure.
1708  *
1709  * Parameters:
1710  * ip - Ptr to an inode on this fs
1711  * spp - Ptr to ptr to si struct for the results, if found.
1712  *
1713  * Returns:	0 - Success (results in spp)
1714  *		1 - Failure (spp undefined)
1715  */
1716 static int
1717 si_cachei_get(struct inode *ip, si_t **spp)
1718 {
1719 	si_t	*sp;
1720 
1721 	rw_enter(&si_cache_lock, RW_READER);
1722 loop:
1723 	for (sp = si_cachei[SI_HASH(ip->i_shadow)]; sp; sp = sp->s_forw)
1724 		if (sp->s_shadow == ip->i_shadow && sp->s_dev == ip->i_dev)
1725 			break;
1726 
1727 	if (sp == NULL) {
1728 		/* Not in cache */
1729 		rw_exit(&si_cache_lock);
1730 		return (1);
1731 	}
1732 	/* Found it */
1733 	rw_enter(&sp->s_lock, RW_WRITER);
1734 alldone:
1735 	rw_exit(&si_cache_lock);
1736 	*spp = sp;
1737 	return (0);
1738 }
1739 
1740 /*
1741  * Seach the si cache by si structure (ie duplicate of the one passed in).
1742  * In order for a match the signatures must be the same and
1743  * the devices must be the same, the acls must match and
1744  * link count of the cached shadow must be less than the
1745  * size of ic_nlink - 1.  MAXLINK - 1 is used to allow the count
1746  * to be incremented one more time by the caller.
1747  * Returns a locked si structure.
1748  *
1749  * Parameters:
1750  * ip - Ptr to an inode on this fs
1751  * spi - Ptr to si the struct we're searching the cache for.
1752  * spp - Ptr to ptr to si struct for the results, if found.
1753  *
1754  * Returns:	0 - Success (results in spp)
1755  *		1 - Failure (spp undefined)
1756  */
1757 static int
1758 si_cachea_get(struct inode *ip, si_t *spi, si_t **spp)
1759 {
1760 	si_t	*sp;
1761 
1762 	spi->s_dev = ip->i_dev;
1763 	spi->s_signature = si_signature(spi);
1764 	rw_enter(&si_cache_lock, RW_READER);
1765 loop:
1766 	for (sp = si_cachea[SI_HASH(spi->s_signature)]; sp; sp = sp->s_next) {
1767 		if (sp->s_signature == spi->s_signature &&
1768 		    sp->s_dev == spi->s_dev &&
1769 		    sp->s_use > 0 &&			/* deleting */
1770 		    sp->s_use <= (MAXLINK - 1) &&	/* Too many links */
1771 		    !si_cmp(sp, spi))
1772 			break;
1773 	}
1774 
1775 	if (sp == NULL) {
1776 		/* Cache miss */
1777 		rw_exit(&si_cache_lock);
1778 		return (1);
1779 	}
1780 	/* Found it */
1781 	rw_enter(&sp->s_lock, RW_WRITER);
1782 alldone:
1783 	spi->s_shadow = sp->s_shadow; /* XXX For debugging */
1784 	rw_exit(&si_cache_lock);
1785 	*spp = sp;
1786 	return (0);
1787 }
1788 
1789 /*
1790  * Place an si structure in the si cache.  May cause duplicates.
1791  *
1792  * Parameters:
1793  * sp - Ptr to the si struct to add to the cache.
1794  *
1795  * Returns: Nothing (void)
1796  */
1797 static void
1798 si_cache_put(si_t *sp)
1799 {
1800 	si_t	**tspp;
1801 
1802 	ASSERT(sp->s_fore == NULL);
1803 	rw_enter(&si_cache_lock, RW_WRITER);
1804 	if (!sp->s_signature)
1805 		sp->s_signature = si_signature(sp);
1806 	sp->s_flags |= SI_CACHED;
1807 	sp->s_fore = NULL;
1808 
1809 	/* The 'by acl' chains */
1810 	tspp = &si_cachea[SI_HASH(sp->s_signature)];
1811 	sp->s_next = *tspp;
1812 	*tspp = sp;
1813 
1814 	/* The 'by inode' chains */
1815 	tspp = &si_cachei[SI_HASH(sp->s_shadow)];
1816 	sp->s_forw = *tspp;
1817 	*tspp = sp;
1818 
1819 	rw_exit(&si_cache_lock);
1820 }
1821 
1822 /*
1823  * The sp passed in is a candidate for deletion from the cache.  We acquire
1824  * the cache lock first, so no cache searches can be done.  Then we search
1825  * for the acl in the cache, and if we find it we can lock it and check that
1826  * nobody else attached to it while we were acquiring the locks.  If the acl
1827  * is in the cache and still has a zero reference count, then we remove it
1828  * from the cache and deallocate it.  If the reference count is non-zero or
1829  * it is not found in the cache, then someone else attached to it or has
1830  * already freed it, so we just return.
1831  *
1832  * Parameters:
1833  * sp - Ptr to the sp struct which is the candicate for deletion.
1834  * signature - the signature for the acl for lookup in the hash table
1835  *
1836  * Returns: Nothing (void)
1837  */
1838 void
1839 si_cache_del(si_t *sp, int signature)
1840 {
1841 	si_t	**tspp;
1842 	int	hash;
1843 	int	foundacl = 0;
1844 
1845 	/*
1846 	 * Unlink & free the sp from the other queues, then destroy it.
1847 	 * Search the 'by acl' chain first, then the 'by inode' chain
1848 	 * after the acl is locked.
1849 	 */
1850 	rw_enter(&si_cache_lock, RW_WRITER);
1851 	hash = SI_HASH(signature);
1852 	for (tspp = &si_cachea[hash]; *tspp; tspp = &(*tspp)->s_next) {
1853 		if (*tspp == sp) {
1854 			/*
1855 			 * Wait to grab the acl lock until after the acl has
1856 			 * been found in the cache.  Otherwise it might try to
1857 			 * grab a lock that has already been destroyed, or
1858 			 * delete an acl that has already been freed.
1859 			 */
1860 			rw_enter(&sp->s_lock, RW_WRITER);
1861 			/* See if someone else attached to it */
1862 			if (sp->s_ref) {
1863 				rw_exit(&sp->s_lock);
1864 				rw_exit(&si_cache_lock);
1865 				return;
1866 			}
1867 			ASSERT(sp->s_fore == NULL);
1868 			ASSERT(sp->s_flags & SI_CACHED);
1869 			foundacl = 1;
1870 			*tspp = sp->s_next;
1871 			break;
1872 		}
1873 	}
1874 
1875 	/*
1876 	 * If the acl was not in the cache, we assume another thread has
1877 	 * deleted it already. This could happen if another thread attaches to
1878 	 * the acl and then releases it after this thread has already found the
1879 	 * reference count to be zero but has not yet taken the cache lock.
1880 	 * Both threads end up seeing a reference count of zero, and call into
1881 	 * si_cache_del.  See bug 4244827 for details on the race condition.
1882 	 */
1883 	if (foundacl == 0) {
1884 		rw_exit(&si_cache_lock);
1885 		return;
1886 	}
1887 
1888 	/* Now check the 'by inode' chain */
1889 	hash = SI_HASH(sp->s_shadow);
1890 	for (tspp = &si_cachei[hash]; *tspp; tspp = &(*tspp)->s_forw) {
1891 		if (*tspp == sp) {
1892 			*tspp = sp->s_forw;
1893 			break;
1894 		}
1895 	}
1896 
1897 	/*
1898 	 * At this point, we can unlock everything because this si
1899 	 * is no longer in the cache, thus cannot be attached to.
1900 	 */
1901 	rw_exit(&sp->s_lock);
1902 	rw_exit(&si_cache_lock);
1903 	sp->s_flags &= ~SI_CACHED;
1904 	(void) ufs_si_free_mem(sp);
1905 }
1906 
1907 /*
1908  * Alloc the hash buckets for the si cache & initialize
1909  * the unreferenced anchor and the cache lock.
1910  */
1911 void
1912 si_cache_init(void)
1913 {
1914 	rw_init(&si_cache_lock, NULL, RW_DEFAULT, NULL);
1915 
1916 	/* The 'by acl' headers */
1917 	si_cachea = kmem_zalloc(si_cachecnt * sizeof (si_t *), KM_SLEEP);
1918 	/* The 'by inode' headers */
1919 	si_cachei = kmem_zalloc(si_cachecnt * sizeof (si_t *), KM_SLEEP);
1920 }
1921 
1922 /*
1923  *  aclcksum takes an acl and generates a checksum.  It takes as input
1924  *  the acl to start at.
1925  *
1926  *  s_aclp - pointer to starting acl
1927  *
1928  *  returns checksum
1929  */
1930 static int
1931 aclcksum(ufs_ic_acl_t *s_aclp)
1932 {
1933 	ufs_ic_acl_t *aclp;
1934 	int signature = 0;
1935 	for (aclp = s_aclp; aclp; aclp = aclp->acl_ic_next) {
1936 		signature += aclp->acl_ic_perm;
1937 		signature += aclp->acl_ic_who;
1938 	}
1939 	return (signature);
1940 }
1941 
1942 /*
1943  * Generate a unique signature for an si structure.  Used by the
1944  * search routine si_cachea_get() to quickly identify candidates
1945  * prior to calling si_cmp().
1946  * Parameters:
1947  * sp - Ptr to the si struct to generate the signature for.
1948  *
1949  * Returns:  A signature for the si struct (really a checksum)
1950  */
1951 static int
1952 si_signature(si_t *sp)
1953 {
1954 	int signature = sp->s_dev;
1955 
1956 	signature += aclcksum(sp->aowner) + aclcksum(sp->agroup) +
1957 	    aclcksum(sp->aother) + aclcksum(sp->ausers) +
1958 	    aclcksum(sp->agroups) + aclcksum(sp->downer) +
1959 	    aclcksum(sp->dgroup) + aclcksum(sp->dother) +
1960 	    aclcksum(sp->dusers) + aclcksum(sp->dgroups);
1961 	if (sp->aclass.acl_ismask)
1962 		signature += sp->aclass.acl_maskbits;
1963 	if (sp->dclass.acl_ismask)
1964 		signature += sp->dclass.acl_maskbits;
1965 
1966 	return (signature);
1967 }
1968 
1969 /*
1970  * aclcmp compares to acls to see if they are identical.
1971  *
1972  * sp1 is source
1973  * sp2 is sourceb
1974  *
1975  * returns 0 if equal and 1 if not equal
1976  */
1977 static int
1978 aclcmp(ufs_ic_acl_t *aclin1p, ufs_ic_acl_t *aclin2p)
1979 {
1980 	ufs_ic_acl_t *aclp1;
1981 	ufs_ic_acl_t *aclp2;
1982 
1983 	/*
1984 	 * if the starting pointers are equal then they are equal so
1985 	 * just return.
1986 	 */
1987 	if (aclin1p == aclin2p)
1988 		return (0);
1989 	/*
1990 	 * check element by element
1991 	 */
1992 	for (aclp1 = aclin1p, aclp2 = aclin2p; aclp1 && aclp2;
1993 	    aclp1 = aclp1->acl_ic_next, aclp2 = aclp2->acl_ic_next) {
1994 		if (aclp1->acl_ic_perm != aclp2->acl_ic_perm ||
1995 		    aclp1->acl_ic_who != aclp2->acl_ic_who)
1996 			return (1);
1997 	}
1998 	/*
1999 	 * both must be zero (at the end of the acl)
2000 	 */
2001 	if (aclp1 || aclp2)
2002 		return (1);
2003 
2004 	return (0);
2005 }
2006 
2007 /*
2008  * Do extensive, field-by-field compare of two si structures.  Returns
2009  * 0 if they are exactly identical, 1 otherwise.
2010  *
2011  * Paramters:
2012  * sp1 - Ptr to 1st si struct
2013  * sp2 - Ptr to 2nd si struct
2014  *
2015  * Returns:
2016  *		0 - Not identical
2017  * 		1 - Identical
2018  */
2019 static int
2020 si_cmp(si_t *sp1, si_t *sp2)
2021 {
2022 	if (sp1->s_dev != sp2->s_dev)
2023 		return (1);
2024 	if (aclcmp(sp1->aowner, sp2->aowner) ||
2025 	    aclcmp(sp1->agroup, sp2->agroup) ||
2026 	    aclcmp(sp1->aother, sp2->aother) ||
2027 	    aclcmp(sp1->ausers, sp2->ausers) ||
2028 	    aclcmp(sp1->agroups, sp2->agroups) ||
2029 	    aclcmp(sp1->downer, sp2->downer) ||
2030 	    aclcmp(sp1->dgroup, sp2->dgroup) ||
2031 	    aclcmp(sp1->dother, sp2->dother) ||
2032 	    aclcmp(sp1->dusers, sp2->dusers) ||
2033 	    aclcmp(sp1->dgroups, sp2->dgroups))
2034 		return (1);
2035 	if (sp1->aclass.acl_ismask != sp2->aclass.acl_ismask)
2036 		return (1);
2037 	if (sp1->dclass.acl_ismask != sp2->dclass.acl_ismask)
2038 		return (1);
2039 	if (sp1->aclass.acl_ismask &&
2040 	    sp1->aclass.acl_maskbits != sp2->aclass.acl_maskbits)
2041 		return (1);
2042 	if (sp1->dclass.acl_ismask &&
2043 	    sp1->dclass.acl_maskbits != sp2->dclass.acl_maskbits)
2044 		return (1);
2045 
2046 	return (0);
2047 }
2048 
2049 /*
2050  * Remove all acls associated with a device.  All acls must have
2051  * a reference count of zero.
2052  *
2053  * inputs:
2054  *	device - device to remove from the cache
2055  *
2056  * outputs:
2057  *	none
2058  */
2059 void
2060 ufs_si_cache_flush(dev_t dev)
2061 {
2062 	si_t *tsp, **tspp;
2063 	int i;
2064 
2065 	rw_enter(&si_cache_lock, RW_WRITER);
2066 	for (i = 0; i < si_cachecnt; i++) {
2067 		tspp = &si_cachea[i];
2068 		while (*tspp) {
2069 			if ((*tspp)->s_dev == dev) {
2070 				*tspp = (*tspp)->s_next;
2071 			} else {
2072 				tspp = &(*tspp)->s_next;
2073 			}
2074 		}
2075 	}
2076 	for (i = 0; i < si_cachecnt; i++) {
2077 		tspp = &si_cachei[i];
2078 		while (*tspp) {
2079 			if ((*tspp)->s_dev == dev) {
2080 				tsp = *tspp;
2081 				*tspp = (*tspp)->s_forw;
2082 				tsp->s_flags &= ~SI_CACHED;
2083 				ufs_si_free_mem(tsp);
2084 			} else {
2085 				tspp = &(*tspp)->s_forw;
2086 			}
2087 		}
2088 	}
2089 	rw_exit(&si_cache_lock);
2090 }
2091 
2092 /*
2093  * ufs_si_del is used to unhook a sp from a inode in memory
2094  *
2095  * ip is the inode to remove the sp from.
2096  */
2097 void
2098 ufs_si_del(struct inode *ip)
2099 {
2100 	si_t    *sp = ip->i_ufs_acl;
2101 	int	refcnt;
2102 	int	signature;
2103 
2104 	if (sp) {
2105 		rw_enter(&sp->s_lock, RW_WRITER);
2106 		refcnt = --sp->s_ref;
2107 		signature = sp->s_signature;
2108 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
2109 		rw_exit(&sp->s_lock);
2110 		if (refcnt == 0)
2111 			si_cache_del(sp, signature);
2112 		ip->i_ufs_acl = NULL;
2113 	}
2114 }
2115