xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_acl.c (revision ed093b41a93e8563e6e1e5dae0768dda2a7bcc27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <sys/errno.h>
29 #include <sys/kmem.h>
30 #include <sys/t_lock.h>
31 #include <sys/ksynch.h>
32 #include <sys/buf.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/mode.h>
36 #include <sys/systm.h>
37 #include <vm/seg.h>
38 #include <sys/file.h>
39 #include <sys/acl.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/fs/ufs_acl.h>
42 #include <sys/fs/ufs_quota.h>
43 #include <sys/sysmacros.h>
44 #include <sys/debug.h>
45 #include <sys/policy.h>
46 
47 /* Cache routines */
48 static int si_signature(si_t *);
49 static int si_cachei_get(struct inode *, si_t **);
50 static int si_cachea_get(struct inode *, si_t *, si_t **);
51 static int si_cmp(si_t *, si_t *);
52 static void si_cache_put(si_t *);
53 void si_cache_del(si_t *, int);
54 void si_cache_init(void);
55 
56 static void ufs_si_free_mem(si_t *);
57 static int ufs_si_store(struct inode *, si_t *, int, cred_t *);
58 static si_t *ufs_acl_cp(si_t *);
59 static int ufs_sectobuf(si_t *, caddr_t *, size_t *);
60 static int acl_count(ufs_ic_acl_t *);
61 static int acl_validate(aclent_t *, int, int);
62 static int vsecattr2aclentry(vsecattr_t *, si_t **);
63 static int aclentry2vsecattr(si_t *, vsecattr_t *);
64 
65 krwlock_t si_cache_lock;		/* Protects si_cache */
66 int	si_cachecnt = 64;		/* # buckets in si_cache[a|i] */
67 si_t	**si_cachea;			/* The 'by acl' cache chains */
68 si_t	**si_cachei;			/* The 'by inode' cache chains */
69 long	si_cachehit = 0;
70 long	si_cachemiss = 0;
71 
72 #define	SI_HASH(S)	((int)(S) & (si_cachecnt - 1))
73 
74 /*
75  * Store the new acls in aclp.  Attempts to make things atomic.
76  * Search the acl cache for an identical sp and, if found, attach
77  * the cache'd acl to ip. If the acl is new (not in the cache),
78  * add it to the cache, then attach it to ip.  Last, remove and
79  * decrement the reference count of any prior acl list attached
80  * to the ip.
81  *
82  * Parameters:
83  * ip - Ptr to inode to receive the acl list
84  * sp - Ptr to in-core acl structure to attach to the inode.
85  * puship - 0 do not push the object inode(ip) 1 push the ip
86  * cr - Ptr to credentials
87  *
88  * Returns:	0 - Success
89  *		N - From errno.h
90  */
91 static int
92 ufs_si_store(struct inode *ip, si_t *sp, int puship, cred_t *cr)
93 {
94 	struct vfs	*vfsp;
95 	struct inode	*sip;
96 	si_t		*oldsp;
97 	si_t		*csp;
98 	caddr_t		acldata;
99 	ino_t		oldshadow;
100 	size_t		acldatalen;
101 	off_t		offset;
102 	int		shadow;
103 	int		err;
104 	int		refcnt;
105 	int		usecnt;
106 	int		signature;
107 	int		resid;
108 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
109 	struct fs	*fs		= ufsvfsp->vfs_fs;
110 
111 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
112 	ASSERT(ip->i_ufs_acl != sp);
113 
114 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
115 		return (ENOSYS);
116 
117 	/*
118 	 * if there are only the three owner/group/other then do not
119 	 * create a shadow inode.  If there is already a shadow with
120 	 * the file, remove it.
121 	 *
122 	 */
123 	if (!sp->ausers &&
124 	    !sp->agroups &&
125 	    !sp->downer &&
126 	    !sp->dgroup &&
127 	    !sp->dother &&
128 	    sp->dclass.acl_ismask == 0 &&
129 	    !sp->dusers &&
130 	    !sp->dgroups) {
131 		if (ip->i_ufs_acl)
132 			err = ufs_si_free(ip->i_ufs_acl, ITOV(ip)->v_vfsp, cr);
133 		ip->i_ufs_acl = NULL;
134 		ip->i_shadow = 0;
135 		ip->i_flag |= IMOD | IACC;
136 		ip->i_mode = (ip->i_smode & ~0777) |
137 		    ((sp->aowner->acl_ic_perm & 07) << 6) |
138 		    (MASK2MODE(sp)) |
139 		    (sp->aother->acl_ic_perm & 07);
140 		TRANS_INODE(ip->i_ufsvfs, ip);
141 		ufs_iupdat(ip, 1);
142 		ufs_si_free_mem(sp);
143 		return (0);
144 	}
145 
146 loop:
147 
148 	/*
149 	 * Check cache. If in cache, use existing shadow inode.
150 	 * Increment the shadow link count, then attach to the
151 	 * cached ufs_acl_entry struct, and increment it's reference
152 	 * count.  Then discard the passed-in ufs_acl_entry and
153 	 * return.
154 	 */
155 	if (si_cachea_get(ip, sp, &csp) == 0) {
156 		ASSERT(RW_WRITE_HELD(&csp->s_lock));
157 		if (ip->i_ufs_acl == csp) {
158 			rw_exit(&csp->s_lock);
159 			(void) ufs_si_free_mem(sp);
160 			return (0);
161 		}
162 		vfsp = ITOV(ip)->v_vfsp;
163 		ASSERT(csp->s_shadow <= INT_MAX);
164 		shadow = (int)csp->s_shadow;
165 		/*
166 		 * We can't call ufs_iget while holding the csp locked,
167 		 * because we might deadlock.  So we drop the
168 		 * lock on csp, then go search the si_cache again
169 		 * to see if the csp is still there.
170 		 */
171 		rw_exit(&csp->s_lock);
172 		if ((err = ufs_iget(vfsp, shadow, &sip, cr)) != 0) {
173 			(void) ufs_si_free_mem(sp);
174 			return (EIO);
175 		}
176 		rw_enter(&sip->i_contents, RW_WRITER);
177 		if ((sip->i_mode & IFMT) != IFSHAD || sip->i_nlink <= 0) {
178 			rw_exit(&sip->i_contents);
179 			VN_RELE(ITOV(sip));
180 			goto loop;
181 		}
182 		/* Get the csp again */
183 		if (si_cachea_get(ip, sp, &csp) != 0) {
184 			rw_exit(&sip->i_contents);
185 			VN_RELE(ITOV(sip));
186 			goto loop;
187 		}
188 		ASSERT(RW_WRITE_HELD(&csp->s_lock));
189 		/* See if we got the right shadow */
190 		if (csp->s_shadow != shadow) {
191 			rw_exit(&csp->s_lock);
192 			rw_exit(&sip->i_contents);
193 			VN_RELE(ITOV(sip));
194 			goto loop;
195 		}
196 		ASSERT(RW_WRITE_HELD(&sip->i_contents));
197 		ASSERT(sip->i_dquot == 0);
198 		/* Increment link count */
199 		ASSERT(sip->i_nlink > 0);
200 		sip->i_nlink++;
201 		TRANS_INODE(ufsvfsp, sip);
202 		csp->s_use = sip->i_nlink;
203 		csp->s_ref++;
204 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
205 		sip->i_flag |= ICHG | IMOD;
206 		sip->i_seq++;
207 		ITIMES_NOLOCK(sip);
208 		/*
209 		 * Always release s_lock before both releasing i_contents
210 		 * and calling VN_RELE.
211 		 */
212 		rw_exit(&csp->s_lock);
213 		rw_exit(&sip->i_contents);
214 		VN_RELE(ITOV(sip));
215 		(void) ufs_si_free_mem(sp);
216 		sp = csp;
217 		si_cachehit++;
218 		goto switchshadows;
219 	}
220 
221 	/* Alloc a shadow inode and fill it in */
222 	err = ufs_ialloc(ip, ip->i_number, (mode_t)IFSHAD, &sip, cr);
223 	if (err) {
224 		(void) ufs_si_free_mem(sp);
225 		return (err);
226 	}
227 	rw_enter(&sip->i_contents, RW_WRITER);
228 	sip->i_flag |= IACC | IUPD | ICHG;
229 	sip->i_seq++;
230 	sip->i_mode = (o_mode_t)IFSHAD;
231 	ITOV(sip)->v_type = VREG;
232 	ufs_reset_vnode(ITOV(sip));
233 	sip->i_nlink = 1;
234 	sip->i_uid = crgetuid(cr);
235 	sip->i_suid = (ulong_t)sip->i_uid > (ulong_t)USHRT_MAX ?
236 	    UID_LONG : sip->i_uid;
237 	sip->i_gid = crgetgid(cr);
238 	sip->i_sgid = (ulong_t)sip->i_gid > (ulong_t)USHRT_MAX ?
239 	    GID_LONG : sip->i_gid;
240 	sip->i_shadow = 0;
241 	TRANS_INODE(ufsvfsp, sip);
242 	sip->i_ufs_acl = NULL;
243 	ASSERT(sip->i_size == 0);
244 
245 	sp->s_shadow = sip->i_number;
246 
247 	if ((err = ufs_sectobuf(sp, &acldata, &acldatalen)) != 0)
248 		goto errout;
249 	offset = 0;
250 
251 	/*
252 	 * We don't actually care about the residual count upon failure,
253 	 * but giving ufs_rdwri() the pointer means it won't translate
254 	 * all failures to EIO.  Our caller needs to know when ENOSPC
255 	 * gets hit.
256 	 */
257 	resid = 0;
258 	if (((err = ufs_rdwri(UIO_WRITE, FWRITE|FSYNC, sip, acldata,
259 	    acldatalen, (offset_t)0, UIO_SYSSPACE, &resid, cr)) != 0) ||
260 	    (resid != 0)) {
261 		kmem_free(acldata, acldatalen);
262 		if ((resid != 0) && (err == 0))
263 			err = ENOSPC;
264 		goto errout;
265 	}
266 
267 	offset += acldatalen;
268 	if ((acldatalen + fs->fs_bsize) > ufsvfsp->vfs_maxacl)
269 		ufsvfsp->vfs_maxacl = acldatalen + fs->fs_bsize;
270 
271 	kmem_free(acldata, acldatalen);
272 	/* Sync & free the shadow inode */
273 	ufs_iupdat(sip, 1);
274 	rw_exit(&sip->i_contents);
275 	VN_RELE(ITOV(sip));
276 
277 	/* We're committed to using this sp */
278 	sp->s_use = 1;
279 	sp->s_ref = 1;
280 
281 	/* Now put the new acl stuff in the cache */
282 	/* XXX Might make a duplicate */
283 	si_cache_put(sp);
284 	si_cachemiss++;
285 
286 switchshadows:
287 	/* Now switch the parent inode to use the new shadow inode */
288 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
289 	rw_enter(&sp->s_lock, RW_READER);
290 	oldsp = ip->i_ufs_acl;
291 	oldshadow = ip->i_shadow;
292 	ip->i_ufs_acl = sp;
293 	ASSERT(sp->s_shadow <= INT_MAX);
294 	ip->i_shadow = (int32_t)sp->s_shadow;
295 	ASSERT(oldsp != sp);
296 	ASSERT(oldshadow != ip->i_number);
297 	ASSERT(ip->i_number != ip->i_shadow);
298 	/*
299 	 * Change the mode bits to follow the acl list
300 	 *
301 	 * NOTE:	a directory is not required to have a "regular" acl
302 	 *		bug id's 1238908,  1257173, 1263171 and 1263188
303 	 *
304 	 *		but if a "regular" acl is present, it must contain
305 	 *		an "owner", "group", and "other" acl
306 	 *
307 	 *		If an ACL mask exists, the effective group rights are
308 	 *		set to the mask.  Otherwise, the effective group rights
309 	 *		are set to the object group bits.
310 	 */
311 	if (sp->aowner) {				/* Owner */
312 		ip->i_mode &= ~0700;			/* clear Owner */
313 		ip->i_mode |= (sp->aowner->acl_ic_perm & 07) << 6;
314 		ip->i_uid = sp->aowner->acl_ic_who;
315 	}
316 
317 	if (sp->agroup) {				/* Group */
318 		ip->i_mode &= ~0070;			/* clear Group */
319 		ip->i_mode |= MASK2MODE(sp);		/* apply mask */
320 		ip->i_gid = sp->agroup->acl_ic_who;
321 	}
322 
323 	if (sp->aother) {				/* Other */
324 		ip->i_mode &= ~0007;			/* clear Other */
325 		ip->i_mode |= (sp->aother->acl_ic_perm & 07);
326 	}
327 
328 	if (sp->aclass.acl_ismask)
329 		ip->i_mode = (ip->i_mode & ~070) |
330 		    (((sp->aclass.acl_maskbits & 07) << 3) &
331 		    ip->i_mode);
332 
333 	TRANS_INODE(ufsvfsp, ip);
334 	rw_exit(&sp->s_lock);
335 	ip->i_flag |= ICHG;
336 	ip->i_seq++;
337 	/*
338 	 * when creating a file there is no need to push the inode, it
339 	 * is pushed later
340 	 */
341 	if (puship == 1)
342 		ufs_iupdat(ip, 1);
343 
344 	/*
345 	 * Decrement link count on the old shadow inode,
346 	 * and decrement reference count on the old aclp,
347 	 */
348 	if (oldshadow) {
349 		/* Get the shadow inode */
350 		ASSERT(RW_WRITE_HELD(&ip->i_contents));
351 		vfsp = ITOV(ip)->v_vfsp;
352 		if ((err = ufs_iget_alloced(vfsp, oldshadow, &sip, cr)) != 0) {
353 			return (EIO);
354 		}
355 		/* Decrement link count */
356 		rw_enter(&sip->i_contents, RW_WRITER);
357 		if (oldsp)
358 			rw_enter(&oldsp->s_lock, RW_WRITER);
359 		ASSERT(sip->i_dquot == 0);
360 		ASSERT(sip->i_nlink > 0);
361 		usecnt = --sip->i_nlink;
362 		ufs_setreclaim(sip);
363 		TRANS_INODE(ufsvfsp, sip);
364 		sip->i_flag |= ICHG | IMOD;
365 		sip->i_seq++;
366 		ITIMES_NOLOCK(sip);
367 		if (oldsp) {
368 			oldsp->s_use = usecnt;
369 			refcnt = --oldsp->s_ref;
370 			signature = oldsp->s_signature;
371 			/*
372 			 * Always release s_lock before both releasing
373 			 * i_contents and calling VN_RELE.
374 			 */
375 			rw_exit(&oldsp->s_lock);
376 		}
377 		rw_exit(&sip->i_contents);
378 		VN_RELE(ITOV(sip));
379 		if (oldsp && (refcnt == 0))
380 			si_cache_del(oldsp, signature);
381 	}
382 	return (0);
383 
384 errout:
385 	/* Throw the newly alloc'd inode away */
386 	sip->i_nlink = 0;
387 	ufs_setreclaim(sip);
388 	TRANS_INODE(ufsvfsp, sip);
389 	ITIMES_NOLOCK(sip);
390 	rw_exit(&sip->i_contents);
391 	VN_RELE(ITOV(sip));
392 	ASSERT(!sp->s_use && !sp->s_ref && !(sp->s_flags & SI_CACHED));
393 	(void) ufs_si_free_mem(sp);
394 	return (err);
395 }
396 
397 /*
398  * Load the acls for inode ip either from disk (adding to the cache),
399  * or search the cache and attach the cache'd acl list to the ip.
400  * In either case, maintain the proper reference count on the cached entry.
401  *
402  * Parameters:
403  * ip - Ptr to the inode which needs the acl list loaded
404  * cr - Ptr to credentials
405  *
406  * Returns:	0 - Success
407  *		N - From errno.h
408  */
409 /*
410  *	ip	parent inode in
411  *	cr	credentials in
412  */
413 int
414 ufs_si_load(struct inode *ip, cred_t *cr)
415 {
416 	struct vfs	*vfsp;
417 	struct inode	*sip;
418 	ufs_fsd_t	*fsdp;
419 	si_t		*sp;
420 	vsecattr_t	vsecattr = { 0, 0, NULL, 0, NULL};
421 	aclent_t	*aclp;
422 	ufs_acl_t	*ufsaclp;
423 	caddr_t		acldata = NULL;
424 	ino_t		maxino;
425 	int		err;
426 	size_t		acldatalen;
427 	int		numacls;
428 	int		shadow;
429 	int		usecnt;
430 	struct ufsvfs	*ufsvfsp	= ip->i_ufsvfs;
431 	struct fs	*fs		= ufsvfsp->vfs_fs;
432 
433 	ASSERT(ip != NULL);
434 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
435 	ASSERT(ip->i_shadow && ip->i_ufs_acl == NULL);
436 	ASSERT((ip->i_mode & IFMT) != IFSHAD);
437 
438 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
439 		return (ENOSYS);
440 
441 	if (ip->i_shadow == ip->i_number)
442 		return (EIO);
443 
444 	maxino = (ino_t)(ITOF(ip)->fs_ncg * ITOF(ip)->fs_ipg);
445 	if (ip->i_shadow < UFSROOTINO || ip->i_shadow > maxino)
446 		return (EIO);
447 
448 	/*
449 	 * XXX Check cache.  If in cache, link to it and increment
450 	 * the reference count, then return.
451 	 */
452 	if (si_cachei_get(ip, &sp) == 0) {
453 		ASSERT(RW_WRITE_HELD(&sp->s_lock));
454 		ip->i_ufs_acl = sp;
455 		sp->s_ref++;
456 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
457 		rw_exit(&sp->s_lock);
458 		si_cachehit++;
459 		return (0);
460 	}
461 
462 	/* Get the shadow inode */
463 	vfsp = ITOV(ip)->v_vfsp;
464 	shadow = ip->i_shadow;
465 	if ((err = ufs_iget_alloced(vfsp, shadow, &sip, cr)) != 0) {
466 		return (err);
467 	}
468 	rw_enter(&sip->i_contents, RW_WRITER);
469 
470 	if ((sip->i_mode & IFMT) != IFSHAD) {
471 		rw_exit(&sip->i_contents);
472 		err = EINVAL;
473 		goto alldone;
474 	}
475 
476 	ASSERT(sip->i_dquot == 0);
477 	usecnt = sip->i_nlink;
478 	if ((!ULOCKFS_IS_NOIACC(&ufsvfsp->vfs_ulockfs)) &&
479 	    (!(sip)->i_ufsvfs->vfs_noatime)) {
480 		sip->i_flag |= IACC;
481 	}
482 	rw_downgrade(&sip->i_contents);
483 
484 	ASSERT(sip->i_size <= MAXOFF_T);
485 	/* Read the acl's and other stuff from disk */
486 	acldata	 = kmem_zalloc((size_t)sip->i_size, KM_SLEEP);
487 	acldatalen = sip->i_size;
488 
489 	err = ufs_rdwri(UIO_READ, FREAD, sip, acldata, acldatalen, (offset_t)0,
490 	    UIO_SYSSPACE, (int *)0, cr);
491 
492 	rw_exit(&sip->i_contents);
493 
494 	if (err)
495 		goto alldone;
496 
497 	/*
498 	 * Convert from disk format
499 	 * Result is a vsecattr struct which we then convert to the
500 	 * si struct.
501 	 */
502 	bzero((caddr_t)&vsecattr, sizeof (vsecattr_t));
503 	for (fsdp = (ufs_fsd_t *)acldata;
504 	    fsdp < (ufs_fsd_t *)(acldata + acldatalen);
505 	    fsdp = (ufs_fsd_t *)((caddr_t)fsdp +
506 	    FSD_RECSZ(fsdp, fsdp->fsd_size))) {
507 		if (fsdp->fsd_size <= 0)
508 			break;
509 		switch (fsdp->fsd_type) {
510 		case FSD_ACL:
511 			numacls = vsecattr.vsa_aclcnt =
512 			    (int)((fsdp->fsd_size -
513 			    2 * sizeof (int)) / sizeof (ufs_acl_t));
514 			aclp = vsecattr.vsa_aclentp =
515 			    kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
516 			for (ufsaclp = (ufs_acl_t *)fsdp->fsd_data;
517 			    numacls; ufsaclp++) {
518 				aclp->a_type = ufsaclp->acl_tag;
519 				aclp->a_id = ufsaclp->acl_who;
520 				aclp->a_perm = ufsaclp->acl_perm;
521 				aclp++;
522 				numacls--;
523 			}
524 			break;
525 		case FSD_DFACL:
526 			numacls = vsecattr.vsa_dfaclcnt =
527 			    (int)((fsdp->fsd_size -
528 			    2 * sizeof (int)) / sizeof (ufs_acl_t));
529 			aclp = vsecattr.vsa_dfaclentp =
530 			    kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
531 			for (ufsaclp = (ufs_acl_t *)fsdp->fsd_data;
532 			    numacls; ufsaclp++) {
533 				aclp->a_type = ufsaclp->acl_tag;
534 				aclp->a_id = ufsaclp->acl_who;
535 				aclp->a_perm = ufsaclp->acl_perm;
536 				aclp++;
537 				numacls--;
538 			}
539 			break;
540 		}
541 	}
542 	/* Sort the lists */
543 	if (vsecattr.vsa_aclentp) {
544 		ksort((caddr_t)vsecattr.vsa_aclentp, vsecattr.vsa_aclcnt,
545 		    sizeof (aclent_t), cmp2acls);
546 		if ((err = acl_validate(vsecattr.vsa_aclentp,
547 		    vsecattr.vsa_aclcnt, ACL_CHECK)) != 0) {
548 			goto alldone;
549 		}
550 	}
551 	if (vsecattr.vsa_dfaclentp) {
552 		ksort((caddr_t)vsecattr.vsa_dfaclentp, vsecattr.vsa_dfaclcnt,
553 		    sizeof (aclent_t), cmp2acls);
554 		if ((err = acl_validate(vsecattr.vsa_dfaclentp,
555 		    vsecattr.vsa_dfaclcnt, DEF_ACL_CHECK)) != 0) {
556 			goto alldone;
557 		}
558 	}
559 
560 	/* ignore shadow inodes without ACLs */
561 	if (!vsecattr.vsa_aclentp && !vsecattr.vsa_dfaclentp) {
562 		err = 0;
563 		goto alldone;
564 	}
565 
566 	/* Convert from vsecattr struct to ufs_acl_entry struct */
567 	if ((err = vsecattr2aclentry(&vsecattr, &sp)) != 0) {
568 		goto alldone;
569 	}
570 
571 	/* There aren't filled in by vsecattr2aclentry */
572 	sp->s_shadow = ip->i_shadow;
573 	sp->s_dev = ip->i_dev;
574 	sp->s_use = usecnt;
575 	sp->s_ref = 1;
576 	ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
577 
578 	/* XXX Might make a duplicate */
579 	si_cache_put(sp);
580 
581 	/* Signal anyone waiting on this shadow to be loaded */
582 	ip->i_ufs_acl = sp;
583 	err = 0;
584 	si_cachemiss++;
585 	if ((acldatalen + fs->fs_bsize) > ufsvfsp->vfs_maxacl)
586 		ufsvfsp->vfs_maxacl = acldatalen + fs->fs_bsize;
587 alldone:
588 	/*
589 	 * Common exit point. Mark shadow inode as ISTALE
590 	 * if we detect an internal inconsistency, to
591 	 * prevent stray inodes appearing in the cache.
592 	 */
593 	if (err) {
594 		rw_enter(&sip->i_contents, RW_READER);
595 		mutex_enter(&sip->i_tlock);
596 		sip->i_flag |= ISTALE;
597 		mutex_exit(&sip->i_tlock);
598 		rw_exit(&sip->i_contents);
599 	}
600 	VN_RELE(ITOV(sip));
601 
602 	/*
603 	 * Cleanup of data structures allocated
604 	 * on the fly.
605 	 */
606 	if (acldata)
607 		kmem_free(acldata, acldatalen);
608 
609 	if (vsecattr.vsa_aclentp)
610 		kmem_free(vsecattr.vsa_aclentp,
611 		    vsecattr.vsa_aclcnt * sizeof (aclent_t));
612 	if (vsecattr.vsa_dfaclentp)
613 		kmem_free(vsecattr.vsa_dfaclentp,
614 		    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
615 	return (err);
616 }
617 
618 /*
619  * Check the inode's ACL's to see if this mode of access is
620  * allowed; return 0 if allowed, EACCES if not.
621  *
622  * We follow the procedure defined in Sec. 3.3.5, ACL Access
623  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
624  */
625 /*
626  *	ip	parent inode
627  *	mode	mode of access read, write, execute/examine
628  *	cr	credentials
629  */
630 int
631 ufs_acl_access(struct inode *ip, int mode, cred_t *cr)
632 {
633 	ufs_ic_acl_t *acl;
634 	int ismask, mask = 0;
635 	int gperm = 0;
636 	int ngroup = 0;
637 	si_t	*sp = NULL;
638 	uid_t uid = crgetuid(cr);
639 	uid_t owner;
640 
641 	ASSERT(ip->i_ufs_acl != NULL);
642 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
643 
644 	sp = ip->i_ufs_acl;
645 
646 	ismask = sp->aclass.acl_ismask ?
647 	    sp->aclass.acl_ismask : 0;
648 
649 	if (ismask)
650 		mask = sp->aclass.acl_maskbits;
651 	else
652 		mask = -1;
653 
654 	/*
655 	 * (1) If user owns the file, obey user mode bits
656 	 */
657 	owner = sp->aowner->acl_ic_who;
658 	if (uid == owner) {
659 		return (MODE_CHECK(owner, mode, (sp->aowner->acl_ic_perm << 6),
660 		    cr, ip));
661 	}
662 
663 	/*
664 	 * (2) Obey any matching ACL_USER entry
665 	 */
666 	if (sp->ausers)
667 		for (acl = sp->ausers; acl != NULL; acl = acl->acl_ic_next) {
668 			if (acl->acl_ic_who == uid) {
669 				return (MODE_CHECK(owner, mode,
670 				    (mask & acl->acl_ic_perm) << 6, cr, ip));
671 			}
672 		}
673 
674 	/*
675 	 * (3) If user belongs to file's group, obey group mode bits
676 	 * if no ACL mask is defined; if there is an ACL mask, we look
677 	 * at both the group mode bits and any ACL_GROUP entries.
678 	 */
679 	if (groupmember((uid_t)sp->agroup->acl_ic_who, cr)) {
680 		ngroup++;
681 		gperm = (sp->agroup->acl_ic_perm);
682 		if (!ismask)
683 			return (MODE_CHECK(owner, mode, (gperm << 6), cr, ip));
684 	}
685 
686 	/*
687 	 * (4) Accumulate the permissions in matching ACL_GROUP entries
688 	 */
689 	if (sp->agroups)
690 		for (acl = sp->agroups; acl != NULL; acl = acl->acl_ic_next) {
691 			if (groupmember(acl->acl_ic_who, cr)) {
692 				ngroup++;
693 				gperm |= acl->acl_ic_perm;
694 			}
695 		}
696 
697 	if (ngroup != 0)
698 		return (MODE_CHECK(owner, mode, ((gperm & mask) << 6), cr, ip));
699 
700 	/*
701 	 * (5) Finally, use the "other" mode bits
702 	 */
703 	return (MODE_CHECK(owner, mode, sp->aother->acl_ic_perm << 6, cr, ip));
704 }
705 
706 /*ARGSUSED2*/
707 int
708 ufs_acl_get(struct inode *ip, vsecattr_t *vsap, int flag, cred_t *cr)
709 {
710 	aclent_t	*aclentp;
711 
712 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
713 
714 	/* XXX Range check, sanity check, shadow check */
715 	/* If an ACL is present, get the data from the shadow inode info */
716 	if (ip->i_ufs_acl)
717 		return (aclentry2vsecattr(ip->i_ufs_acl, vsap));
718 
719 	/*
720 	 * If no ACLs are present, fabricate one from the mode bits.
721 	 * This code is almost identical to fs_fab_acl(), but we
722 	 * already have the mode bits handy, so we'll avoid going
723 	 * through VOP_GETATTR() again.
724 	 */
725 
726 	vsap->vsa_aclcnt    = 0;
727 	vsap->vsa_aclentp   = NULL;
728 	vsap->vsa_dfaclcnt  = 0;	/* Default ACLs are not fabricated */
729 	vsap->vsa_dfaclentp = NULL;
730 
731 	if (vsap->vsa_mask & (VSA_ACLCNT | VSA_ACL))
732 		vsap->vsa_aclcnt    = 4;  /* USER, GROUP, OTHER, and CLASS */
733 
734 	if (vsap->vsa_mask & VSA_ACL) {
735 		vsap->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t),
736 		    KM_SLEEP);
737 		if (vsap->vsa_aclentp == NULL)
738 			return (ENOMEM);
739 		aclentp = vsap->vsa_aclentp;
740 
741 		/* Owner */
742 		aclentp->a_type = USER_OBJ;
743 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0700)) >> 6;
744 		aclentp->a_id = ip->i_uid;	/* Really undefined */
745 		aclentp++;
746 
747 		/* Group */
748 		aclentp->a_type = GROUP_OBJ;
749 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0070)) >> 3;
750 		aclentp->a_id = ip->i_gid;	/* Really undefined */
751 		aclentp++;
752 
753 		/* Other */
754 		aclentp->a_type = OTHER_OBJ;
755 		aclentp->a_perm = ip->i_mode & 0007;
756 		aclentp->a_id = 0;		/* Really undefined */
757 		aclentp++;
758 
759 		/* Class */
760 		aclentp->a_type = CLASS_OBJ;
761 		aclentp->a_perm = ((ushort_t)(ip->i_mode & 0070)) >> 3;
762 		aclentp->a_id = 0;		/* Really undefined */
763 		ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
764 		    sizeof (aclent_t), cmp2acls);
765 	}
766 
767 	return (0);
768 }
769 
770 /*ARGSUSED2*/
771 int
772 ufs_acl_set(struct inode *ip, vsecattr_t *vsap, int flag, cred_t *cr)
773 {
774 	si_t	*sp;
775 	int	err;
776 
777 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
778 
779 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
780 		return (ENOSYS);
781 
782 	/*
783 	 * only the owner of the file or privileged users can change the ACLs
784 	 */
785 	if (secpolicy_vnode_setdac(cr, ip->i_uid) != 0)
786 		return (EPERM);
787 
788 	/* Convert from vsecattr struct to ufs_acl_entry struct */
789 	if ((err = vsecattr2aclentry(vsap, &sp)) != 0)
790 		return (err);
791 	sp->s_dev = ip->i_dev;
792 
793 	/*
794 	 * Make the user & group objs in the acl list follow what's
795 	 * in the inode.
796 	 */
797 #ifdef DEBUG
798 	if (vsap->vsa_mask == VSA_ACL) {
799 		ASSERT(sp->aowner);
800 		ASSERT(sp->agroup);
801 		ASSERT(sp->aother);
802 	}
803 #endif	/* DEBUG */
804 
805 	if (sp->aowner)
806 		sp->aowner->acl_ic_who = ip->i_uid;
807 	if (sp->agroup)
808 		sp->agroup->acl_ic_who = ip->i_gid;
809 
810 	/*
811 	 * Write and cache the new acl list
812 	 */
813 	err = ufs_si_store(ip, sp, 1, cr);
814 
815 	return (err);
816 }
817 
818 /*
819  * XXX Scan sorted array of acl's, checking for:
820  * 1) Any duplicate/conflicting entries (same type and id)
821  * 2) More than 1 of USER_OBJ, GROUP_OBJ, OTHER_OBJ, CLASS_OBJ
822  * 3) More than 1 of DEF_USER_OBJ, DEF_GROUP_OBJ, DEF_OTHER_OBJ, DEF_CLASS_OBJ
823  *
824  * Parameters:
825  * aclentp - ptr to sorted list of acl entries.
826  * nentries - # acl entries on the list
827  * flag - Bitmap (ACL_CHECK and/or DEF_ACL_CHECK) indicating whether the
828  * list contains regular acls, default acls, or both.
829  *
830  * Returns:	0 - Success
831  * EINVAL - Invalid list (dups or multiple entries of type USER_OBJ, etc)
832  */
833 static int
834 acl_validate(aclent_t *aclentp, int nentries, int flag)
835 {
836 	int	i;
837 	int	nuser_objs = 0;
838 	int	ngroup_objs = 0;
839 	int	nother_objs = 0;
840 	int	nclass_objs = 0;
841 	int	ndef_user_objs = 0;
842 	int	ndef_group_objs = 0;
843 	int	ndef_other_objs = 0;
844 	int	ndef_class_objs = 0;
845 	int	nusers = 0;
846 	int	ngroups = 0;
847 	int	ndef_users = 0;
848 	int	ndef_groups = 0;
849 	int	numdefs = 0;
850 
851 	/* Null list or list of one */
852 	if (aclentp == NULL)
853 		return (0);
854 
855 	if (nentries <= 0)
856 		return (EINVAL);
857 
858 	for (i = 1; i < nentries; i++) {
859 		if (((aclentp[i - 1].a_type == aclentp[i].a_type) &&
860 		    (aclentp[i - 1].a_id   == aclentp[i].a_id)) ||
861 		    (aclentp[i - 1].a_perm > 07)) {
862 			return (EINVAL);
863 		}
864 	}
865 
866 	if (flag == 0 || (flag != ACL_CHECK && flag != DEF_ACL_CHECK))
867 		return (EINVAL);
868 
869 	/* Count types */
870 	for (i = 0; i < nentries; i++) {
871 		switch (aclentp[i].a_type) {
872 		case USER_OBJ:		/* Owner */
873 			nuser_objs++;
874 			break;
875 		case GROUP_OBJ:		/* Group */
876 			ngroup_objs++;
877 			break;
878 		case OTHER_OBJ:		/* Other */
879 			nother_objs++;
880 			break;
881 		case CLASS_OBJ:		/* Mask */
882 			nclass_objs++;
883 			break;
884 		case DEF_USER_OBJ:	/* Default Owner */
885 			ndef_user_objs++;
886 			break;
887 		case DEF_GROUP_OBJ:	/* Default Group */
888 			ndef_group_objs++;
889 			break;
890 		case DEF_OTHER_OBJ:	/* Default Other */
891 			ndef_other_objs++;
892 			break;
893 		case DEF_CLASS_OBJ:	/* Default Mask */
894 			ndef_class_objs++;
895 			break;
896 		case USER:		/* Users */
897 			nusers++;
898 			break;
899 		case GROUP:		/* Groups */
900 			ngroups++;
901 			break;
902 		case DEF_USER:		/* Default Users */
903 			ndef_users++;
904 			break;
905 		case DEF_GROUP:		/* Default Groups */
906 			ndef_groups++;
907 			break;
908 		default:		/* Unknown type */
909 			return (EINVAL);
910 		}
911 	}
912 
913 	/*
914 	 * For normal acl's, we require there be one (and only one)
915 	 * USER_OBJ, GROUP_OBJ and OTHER_OBJ.  There is either zero
916 	 * or one CLASS_OBJ.
917 	 */
918 	if (flag & ACL_CHECK) {
919 		if (nuser_objs != 1 || ngroup_objs != 1 ||
920 		    nother_objs != 1 || nclass_objs > 1) {
921 			return (EINVAL);
922 		}
923 		/*
924 		 * If there are ANY group acls, there MUST be a
925 		 * class_obj(mask) acl (1003.6/D12 p. 29 lines 75-80).
926 		 */
927 		if (ngroups && !nclass_objs) {
928 			return (EINVAL);
929 		}
930 		if (nuser_objs + ngroup_objs + nother_objs + nclass_objs +
931 		    ngroups + nusers > MAX_ACL_ENTRIES)
932 			return (EINVAL);
933 	}
934 
935 	/*
936 	 * For default acl's, we require that there be either one (and only one)
937 	 * DEF_USER_OBJ, DEF_GROUP_OBJ and DEF_OTHER_OBJ
938 	 * or  there be none of them.
939 	 */
940 	if (flag & DEF_ACL_CHECK) {
941 		if (ndef_other_objs > 1 || ndef_user_objs > 1 ||
942 		    ndef_group_objs > 1 || ndef_class_objs > 1) {
943 			return (EINVAL);
944 		}
945 
946 		numdefs = ndef_other_objs + ndef_user_objs + ndef_group_objs;
947 
948 		if (numdefs != 0 && numdefs != 3) {
949 			return (EINVAL);
950 		}
951 		/*
952 		 * If there are ANY def_group acls, there MUST be a
953 		 * def_class_obj(mask) acl (1003.6/D12 P. 29 lines 75-80).
954 		 * XXX(jimh) This is inferred.
955 		 */
956 		if (ndef_groups && !ndef_class_objs) {
957 			return (EINVAL);
958 		}
959 		if ((ndef_users || ndef_groups) &&
960 		    ((numdefs != 3) && !ndef_class_objs)) {
961 			return (EINVAL);
962 		}
963 		if (ndef_user_objs + ndef_group_objs + ndef_other_objs +
964 		    ndef_class_objs + ndef_users + ndef_groups >
965 		    MAX_ACL_ENTRIES)
966 			return (EINVAL);
967 	}
968 	return (0);
969 }
970 
971 static int
972 formacl(ufs_ic_acl_t **aclpp, aclent_t *aclentp)
973 {
974 	ufs_ic_acl_t *uaclp;
975 
976 	uaclp = kmem_alloc(sizeof (ufs_ic_acl_t), KM_SLEEP);
977 	uaclp->acl_ic_perm = aclentp->a_perm;
978 	uaclp->acl_ic_who = aclentp->a_id;
979 	uaclp->acl_ic_next = *aclpp;
980 	*aclpp = uaclp;
981 	return (0);
982 }
983 
984 /*
985  * XXX - Make more efficient
986  * Convert from the vsecattr struct, used by the VOP interface, to
987  * the ufs_acl_entry struct used for in-core storage of acl's.
988  *
989  * Parameters:
990  * vsap - Ptr to array of security attributes.
991  * spp - Ptr to ptr to si struct for the results
992  *
993  * Returns:	0 - Success
994  *		N - From errno.h
995  */
996 static int
997 vsecattr2aclentry(vsecattr_t *vsap, si_t **spp)
998 {
999 	aclent_t	*aclentp, *aclp;
1000 	si_t		*sp;
1001 	int		err;
1002 	int		i;
1003 
1004 	/* Sort & validate the lists on the vsap */
1005 	ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
1006 	    sizeof (aclent_t), cmp2acls);
1007 	ksort((caddr_t)vsap->vsa_dfaclentp, vsap->vsa_dfaclcnt,
1008 	    sizeof (aclent_t), cmp2acls);
1009 	if ((err = acl_validate(vsap->vsa_aclentp,
1010 	    vsap->vsa_aclcnt, ACL_CHECK)) != 0)
1011 		return (err);
1012 	if ((err = acl_validate(vsap->vsa_dfaclentp,
1013 	    vsap->vsa_dfaclcnt, DEF_ACL_CHECK)) != 0)
1014 		return (err);
1015 
1016 	/* Create new si struct and hang acl's off it */
1017 	sp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1018 	rw_init(&sp->s_lock, NULL, RW_DEFAULT, NULL);
1019 
1020 	/* Process acl list */
1021 	aclp = (aclent_t *)vsap->vsa_aclentp;
1022 	aclentp = aclp + vsap->vsa_aclcnt - 1;
1023 	for (i = 0; i < vsap->vsa_aclcnt; i++) {
1024 		switch (aclentp->a_type) {
1025 		case USER_OBJ:		/* Owner */
1026 			if (err = formacl(&sp->aowner, aclentp))
1027 				goto error;
1028 			break;
1029 		case GROUP_OBJ:		/* Group */
1030 			if (err = formacl(&sp->agroup, aclentp))
1031 				goto error;
1032 			break;
1033 		case OTHER_OBJ:		/* Other */
1034 			if (err = formacl(&sp->aother, aclentp))
1035 				goto error;
1036 			break;
1037 		case USER:
1038 			if (err = formacl(&sp->ausers, aclentp))
1039 				goto error;
1040 			break;
1041 		case CLASS_OBJ:		/* Mask */
1042 			sp->aclass.acl_ismask = 1;
1043 			sp->aclass.acl_maskbits = aclentp->a_perm;
1044 			break;
1045 		case GROUP:
1046 			if (err = formacl(&sp->agroups, aclentp))
1047 				goto error;
1048 			break;
1049 		default:
1050 			break;
1051 		}
1052 		aclentp--;
1053 	}
1054 
1055 	/* Process default acl list */
1056 	aclp = (aclent_t *)vsap->vsa_dfaclentp;
1057 	aclentp = aclp + vsap->vsa_dfaclcnt - 1;
1058 	for (i = 0; i < vsap->vsa_dfaclcnt; i++) {
1059 		switch (aclentp->a_type) {
1060 		case DEF_USER_OBJ:	/* Default Owner */
1061 			if (err = formacl(&sp->downer, aclentp))
1062 				goto error;
1063 			break;
1064 		case DEF_GROUP_OBJ:	/* Default Group */
1065 			if (err = formacl(&sp->dgroup, aclentp))
1066 				goto error;
1067 			break;
1068 		case DEF_OTHER_OBJ:	/* Default Other */
1069 			if (err = formacl(&sp->dother, aclentp))
1070 				goto error;
1071 			break;
1072 		case DEF_USER:
1073 			if (err = formacl(&sp->dusers, aclentp))
1074 				goto error;
1075 			break;
1076 		case DEF_CLASS_OBJ:	/* Default Mask */
1077 			sp->dclass.acl_ismask = 1;
1078 			sp->dclass.acl_maskbits = aclentp->a_perm;
1079 			break;
1080 		case DEF_GROUP:
1081 			if (err = formacl(&sp->dgroups, aclentp))
1082 				goto error;
1083 			break;
1084 		default:
1085 			break;
1086 		}
1087 		aclentp--;
1088 	}
1089 	*spp = sp;
1090 	return (0);
1091 
1092 error:
1093 	ufs_si_free_mem(sp);
1094 	return (err);
1095 }
1096 
1097 void
1098 formvsec(int obj_type, ufs_ic_acl_t *aclp, aclent_t **aclentpp)
1099 {
1100 	for (; aclp; aclp = aclp->acl_ic_next) {
1101 		(*aclentpp)->a_type = obj_type;
1102 		(*aclentpp)->a_perm = aclp->acl_ic_perm;
1103 		(*aclentpp)->a_id = aclp->acl_ic_who;
1104 		(*aclentpp)++;
1105 	}
1106 }
1107 
1108 /*
1109  * XXX - Make more efficient
1110  * Convert from the ufs_acl_entry struct used for in-core storage of acl's
1111  * to the vsecattr struct,  used by the VOP interface.
1112  *
1113  * Parameters:
1114  * sp - Ptr to si struct with the acls
1115  * vsap - Ptr to a vsecattr struct which will take the results.
1116  *
1117  * Returns:	0 - Success
1118  *		N - From errno table
1119  */
1120 static int
1121 aclentry2vsecattr(si_t *sp, vsecattr_t *vsap)
1122 {
1123 	aclent_t	*aclentp;
1124 	int		numacls = 0;
1125 	int		err;
1126 
1127 	vsap->vsa_aclentp = vsap->vsa_dfaclentp = NULL;
1128 
1129 	numacls = acl_count(sp->aowner) +
1130 	    acl_count(sp->agroup) +
1131 	    acl_count(sp->aother) +
1132 	    acl_count(sp->ausers) +
1133 	    acl_count(sp->agroups);
1134 	if (sp->aclass.acl_ismask)
1135 		numacls++;
1136 
1137 	if (vsap->vsa_mask & (VSA_ACLCNT | VSA_ACL))
1138 		vsap->vsa_aclcnt = numacls;
1139 
1140 	if (numacls == 0)
1141 		goto do_defaults;
1142 
1143 	if (vsap->vsa_mask & VSA_ACL) {
1144 		vsap->vsa_aclentp = kmem_zalloc(numacls * sizeof (aclent_t),
1145 		    KM_SLEEP);
1146 		aclentp = vsap->vsa_aclentp;
1147 
1148 		formvsec(USER_OBJ, sp->aowner, &aclentp);
1149 		formvsec(USER, sp->ausers, &aclentp);
1150 		formvsec(GROUP_OBJ, sp->agroup, &aclentp);
1151 		formvsec(GROUP, sp->agroups, &aclentp);
1152 		formvsec(OTHER_OBJ, sp->aother, &aclentp);
1153 
1154 		if (sp->aclass.acl_ismask) {
1155 			aclentp->a_type = CLASS_OBJ;		/* Mask */
1156 			aclentp->a_perm = sp->aclass.acl_maskbits;
1157 			aclentp->a_id = 0;
1158 			aclentp++;
1159 		}
1160 
1161 		/* Sort the acl list */
1162 		ksort((caddr_t)vsap->vsa_aclentp, vsap->vsa_aclcnt,
1163 		    sizeof (aclent_t), cmp2acls);
1164 		/* Check the acl list */
1165 		if ((err = acl_validate(vsap->vsa_aclentp,
1166 		    vsap->vsa_aclcnt, ACL_CHECK)) != 0) {
1167 			kmem_free(vsap->vsa_aclentp,
1168 			    numacls * sizeof (aclent_t));
1169 			vsap->vsa_aclentp = NULL;
1170 			return (err);
1171 		}
1172 
1173 	}
1174 do_defaults:
1175 	/* Process Defaults */
1176 
1177 	numacls = acl_count(sp->downer) +
1178 	    acl_count(sp->dgroup) +
1179 	    acl_count(sp->dother) +
1180 	    acl_count(sp->dusers) +
1181 	    acl_count(sp->dgroups);
1182 	if (sp->dclass.acl_ismask)
1183 		numacls++;
1184 
1185 	if (vsap->vsa_mask & (VSA_DFACLCNT | VSA_DFACL))
1186 		vsap->vsa_dfaclcnt = numacls;
1187 
1188 	if (numacls == 0)
1189 		goto do_others;
1190 
1191 	if (vsap->vsa_mask & VSA_DFACL) {
1192 		vsap->vsa_dfaclentp =
1193 		    kmem_zalloc(numacls * sizeof (aclent_t), KM_SLEEP);
1194 		aclentp = vsap->vsa_dfaclentp;
1195 		formvsec(DEF_USER_OBJ, sp->downer, &aclentp);
1196 		formvsec(DEF_USER, sp->dusers, &aclentp);
1197 		formvsec(DEF_GROUP_OBJ, sp->dgroup, &aclentp);
1198 		formvsec(DEF_GROUP, sp->dgroups, &aclentp);
1199 		formvsec(DEF_OTHER_OBJ, sp->dother, &aclentp);
1200 
1201 		if (sp->dclass.acl_ismask) {
1202 			aclentp->a_type = DEF_CLASS_OBJ;	/* Mask */
1203 			aclentp->a_perm = sp->dclass.acl_maskbits;
1204 			aclentp->a_id = 0;
1205 			aclentp++;
1206 		}
1207 
1208 		/* Sort the default acl list */
1209 		ksort((caddr_t)vsap->vsa_dfaclentp, vsap->vsa_dfaclcnt,
1210 		    sizeof (aclent_t), cmp2acls);
1211 		if ((err = acl_validate(vsap->vsa_dfaclentp,
1212 		    vsap->vsa_dfaclcnt, DEF_ACL_CHECK)) != 0) {
1213 			if (vsap->vsa_aclentp != NULL)
1214 				kmem_free(vsap->vsa_aclentp,
1215 				    vsap->vsa_aclcnt * sizeof (aclent_t));
1216 			kmem_free(vsap->vsa_dfaclentp,
1217 			    vsap->vsa_dfaclcnt * sizeof (aclent_t));
1218 			vsap->vsa_aclentp = vsap->vsa_dfaclentp = NULL;
1219 			return (err);
1220 		}
1221 	}
1222 
1223 do_others:
1224 	return (0);
1225 }
1226 
1227 static void
1228 acl_free(ufs_ic_acl_t *aclp)
1229 {
1230 	while (aclp != NULL) {
1231 		ufs_ic_acl_t *nextaclp = aclp->acl_ic_next;
1232 		kmem_free(aclp, sizeof (ufs_ic_acl_t));
1233 		aclp = nextaclp;
1234 	}
1235 }
1236 
1237 /*
1238  * ufs_si_free_mem will discard the sp, and the acl hanging off of the
1239  * sp.  It is required that the sp not be locked, and not be in the
1240  * cache.
1241  *
1242  * input: pointer to sp to discard.
1243  *
1244  * return - nothing.
1245  *
1246  */
1247 static void
1248 ufs_si_free_mem(si_t *sp)
1249 {
1250 	ASSERT(!(sp->s_flags & SI_CACHED));
1251 	ASSERT(!RW_LOCK_HELD(&sp->s_lock));
1252 	/*
1253 	 *	remove from the cache
1254 	 *	free the acl entries
1255 	 */
1256 	acl_free(sp->aowner);
1257 	acl_free(sp->agroup);
1258 	acl_free(sp->aother);
1259 	acl_free(sp->ausers);
1260 	acl_free(sp->agroups);
1261 
1262 	acl_free(sp->downer);
1263 	acl_free(sp->dgroup);
1264 	acl_free(sp->dother);
1265 	acl_free(sp->dusers);
1266 	acl_free(sp->dgroups);
1267 
1268 	rw_destroy(&sp->s_lock);
1269 	kmem_free(sp, sizeof (si_t));
1270 }
1271 
1272 void
1273 acl_cpy(ufs_ic_acl_t *saclp, ufs_ic_acl_t *daclp)
1274 {
1275 	ufs_ic_acl_t  *aclp, *prev_aclp = NULL, *aclp1;
1276 
1277 	if (saclp == NULL) {
1278 		daclp = NULL;
1279 		return;
1280 	}
1281 	prev_aclp = daclp;
1282 
1283 	for (aclp = saclp; aclp != NULL; aclp = aclp->acl_ic_next) {
1284 		aclp1 = kmem_alloc(sizeof (ufs_ic_acl_t), KM_SLEEP);
1285 		aclp1->acl_ic_next = NULL;
1286 		aclp1->acl_ic_who = aclp->acl_ic_who;
1287 		aclp1->acl_ic_perm = aclp->acl_ic_perm;
1288 		prev_aclp->acl_ic_next = aclp1;
1289 		prev_aclp = (ufs_ic_acl_t *)&aclp1->acl_ic_next;
1290 	}
1291 }
1292 
1293 /*
1294  *	ufs_si_inherit takes a parent acl structure (saclp) and the inode
1295  *	of the object that is inheriting an acl and returns the inode
1296  *	with the acl linked to it.  It also writes the acl to disk if
1297  *	it is a unique inode.
1298  *
1299  *	ip - pointer to inode of object inheriting the acl (contents lock)
1300  *	tdp - parent inode (rw_lock and contents lock)
1301  *	mode - creation modes
1302  *	cr - credentials pointer
1303  */
1304 int
1305 ufs_si_inherit(struct inode *ip, struct inode *tdp, o_mode_t mode, cred_t *cr)
1306 {
1307 	si_t *tsp, *sp = tdp->i_ufs_acl;
1308 	int error;
1309 	o_mode_t old_modes, old_uid, old_gid;
1310 	int mask;
1311 
1312 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
1313 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
1314 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
1315 
1316 	/*
1317 	 * if links/symbolic links, or other invalid acl objects are copied
1318 	 * or moved to a directory with a default acl do not allow inheritance
1319 	 * just return.
1320 	 */
1321 	if (!CHECK_ACL_ALLOWED(ip->i_mode & IFMT))
1322 		return (0);
1323 
1324 	/* lock the parent security information */
1325 	rw_enter(&sp->s_lock, RW_READER);
1326 
1327 	ASSERT(((tdp->i_mode & IFMT) == IFDIR) ||
1328 	    ((tdp->i_mode & IFMT) == IFATTRDIR));
1329 
1330 	mask = ((sp->downer != NULL) ? 1 : 0) |
1331 	    ((sp->dgroup != NULL) ? 2 : 0) |
1332 	    ((sp->dother != NULL) ? 4 : 0);
1333 
1334 	if (mask == 0) {
1335 		rw_exit(&sp->s_lock);
1336 		return (0);
1337 	}
1338 
1339 	if (mask != 7) {
1340 		rw_exit(&sp->s_lock);
1341 		return (EINVAL);
1342 	}
1343 
1344 	tsp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1345 	rw_init(&tsp->s_lock, NULL, RW_DEFAULT, NULL);
1346 
1347 	/* copy the default acls */
1348 
1349 	ASSERT(RW_READ_HELD(&sp->s_lock));
1350 	acl_cpy(sp->downer, (ufs_ic_acl_t *)&tsp->aowner);
1351 	acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&tsp->agroup);
1352 	acl_cpy(sp->dother, (ufs_ic_acl_t *)&tsp->aother);
1353 	acl_cpy(sp->dusers, (ufs_ic_acl_t *)&tsp->ausers);
1354 	acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&tsp->agroups);
1355 	tsp->aclass.acl_ismask = sp->dclass.acl_ismask;
1356 	tsp->aclass.acl_maskbits = sp->dclass.acl_maskbits;
1357 
1358 	/*
1359 	 * set the owner, group, and other values from the master
1360 	 * inode.
1361 	 */
1362 
1363 	MODE2ACL(tsp->aowner, (mode >> 6), ip->i_uid);
1364 	MODE2ACL(tsp->agroup, (mode >> 3), ip->i_gid);
1365 	MODE2ACL(tsp->aother, (mode), 0);
1366 
1367 	if (tsp->aclass.acl_ismask) {
1368 		tsp->aclass.acl_maskbits &= mode >> 3;
1369 	}
1370 
1371 
1372 	/* copy default acl if necessary */
1373 
1374 	if (((ip->i_mode & IFMT) == IFDIR) ||
1375 	    ((ip->i_mode & IFMT) == IFATTRDIR)) {
1376 		acl_cpy(sp->downer, (ufs_ic_acl_t *)&tsp->downer);
1377 		acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&tsp->dgroup);
1378 		acl_cpy(sp->dother, (ufs_ic_acl_t *)&tsp->dother);
1379 		acl_cpy(sp->dusers, (ufs_ic_acl_t *)&tsp->dusers);
1380 		acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&tsp->dgroups);
1381 		tsp->dclass.acl_ismask = sp->dclass.acl_ismask;
1382 		tsp->dclass.acl_maskbits = sp->dclass.acl_maskbits;
1383 	}
1384 	/*
1385 	 * save the new 9 mode bits in the inode (ip->ic_smode) for
1386 	 * ufs_getattr.  Be sure the mode can be recovered if the store
1387 	 * fails.
1388 	 */
1389 	old_modes = ip->i_mode;
1390 	old_uid = ip->i_uid;
1391 	old_gid = ip->i_gid;
1392 	/*
1393 	 * store the acl, and get back a new security anchor if
1394 	 * it is a duplicate.
1395 	 */
1396 	rw_exit(&sp->s_lock);
1397 	rw_enter(&ip->i_rwlock, RW_WRITER);
1398 
1399 	/*
1400 	 * Suppress out of inodes messages if instructed in the
1401 	 * tdp inode.
1402 	 */
1403 	ip->i_flag |= tdp->i_flag & IQUIET;
1404 
1405 	if ((error = ufs_si_store(ip, tsp, 0, cr)) != 0) {
1406 		ip->i_mode = old_modes;
1407 		ip->i_uid = old_uid;
1408 		ip->i_gid = old_gid;
1409 	}
1410 	ip->i_flag &= ~IQUIET;
1411 	rw_exit(&ip->i_rwlock);
1412 	return (error);
1413 }
1414 
1415 si_t *
1416 ufs_acl_cp(si_t *sp)
1417 {
1418 
1419 	si_t *dsp;
1420 
1421 	ASSERT(RW_READ_HELD(&sp->s_lock));
1422 	ASSERT(sp->s_ref && sp->s_use);
1423 
1424 	dsp = kmem_zalloc(sizeof (si_t), KM_SLEEP);
1425 	rw_init(&dsp->s_lock, NULL, RW_DEFAULT, NULL);
1426 
1427 	acl_cpy(sp->aowner, (ufs_ic_acl_t *)&dsp->aowner);
1428 	acl_cpy(sp->agroup, (ufs_ic_acl_t *)&dsp->agroup);
1429 	acl_cpy(sp->aother, (ufs_ic_acl_t *)&dsp->aother);
1430 	acl_cpy(sp->ausers, (ufs_ic_acl_t *)&dsp->ausers);
1431 	acl_cpy(sp->agroups, (ufs_ic_acl_t *)&dsp->agroups);
1432 
1433 	dsp->aclass.acl_ismask = sp->aclass.acl_ismask;
1434 	dsp->aclass.acl_maskbits = sp->aclass.acl_maskbits;
1435 
1436 	acl_cpy(sp->downer, (ufs_ic_acl_t *)&dsp->downer);
1437 	acl_cpy(sp->dgroup, (ufs_ic_acl_t *)&dsp->dgroup);
1438 	acl_cpy(sp->dother, (ufs_ic_acl_t *)&dsp->dother);
1439 	acl_cpy(sp->dusers, (ufs_ic_acl_t *)&dsp->dusers);
1440 	acl_cpy(sp->dgroups, (ufs_ic_acl_t *)&dsp->dgroups);
1441 
1442 	dsp->dclass.acl_ismask = sp->dclass.acl_ismask;
1443 	dsp->dclass.acl_maskbits = sp->dclass.acl_maskbits;
1444 
1445 	return (dsp);
1446 
1447 }
1448 
1449 int
1450 ufs_acl_setattr(struct inode *ip, struct vattr *vap, cred_t *cr)
1451 {
1452 
1453 	si_t *sp;
1454 	int mask = vap->va_mask;
1455 	int error = 0;
1456 
1457 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
1458 
1459 	if (!(mask & (AT_MODE|AT_UID|AT_GID)))
1460 		return (0);
1461 
1462 	/*
1463 	 * if no regular acl's, nothing to do, so let's get out
1464 	 */
1465 	if (!(ip->i_ufs_acl) || !(ip->i_ufs_acl->aowner))
1466 		return (0);
1467 
1468 	rw_enter(&ip->i_ufs_acl->s_lock, RW_READER);
1469 	sp = ufs_acl_cp(ip->i_ufs_acl);
1470 	ASSERT(sp != ip->i_ufs_acl);
1471 
1472 	/*
1473 	 * set the mask to the group permissions if a mask entry
1474 	 * exists.  Otherwise, set the group obj bits to the group
1475 	 * permissions.  Since non-trivial ACLs always have a mask,
1476 	 * and the mask is the final arbiter of group permissions,
1477 	 * setting the mask has the effect of changing the effective
1478 	 * group permissions, even if the group_obj permissions in
1479 	 * the ACL aren't changed.  Posix P1003.1e states that when
1480 	 * an ACL mask exists, chmod(2) must set the acl mask (NOT the
1481 	 * group_obj permissions) to the requested group permissions.
1482 	 */
1483 	if (mask & AT_MODE) {
1484 		sp->aowner->acl_ic_perm = (o_mode_t)(ip->i_mode & 0700) >> 6;
1485 		if (sp->aclass.acl_ismask)
1486 			sp->aclass.acl_maskbits =
1487 			    (o_mode_t)(ip->i_mode & 070) >> 3;
1488 		else
1489 			sp->agroup->acl_ic_perm =
1490 			    (o_mode_t)(ip->i_mode & 070) >> 3;
1491 		sp->aother->acl_ic_perm = (o_mode_t)(ip->i_mode & 07);
1492 	}
1493 
1494 	if (mask & AT_UID) {
1495 		/* Caller has verified our privileges */
1496 		sp->aowner->acl_ic_who = ip->i_uid;
1497 	}
1498 
1499 	if (mask & AT_GID) {
1500 		sp->agroup->acl_ic_who = ip->i_gid;
1501 	}
1502 
1503 	rw_exit(&ip->i_ufs_acl->s_lock);
1504 	error = ufs_si_store(ip, sp, 0, cr);
1505 	return (error);
1506 }
1507 
1508 static int
1509 acl_count(ufs_ic_acl_t *p)
1510 {
1511 	ufs_ic_acl_t	*acl;
1512 	int		count;
1513 
1514 	for (count = 0, acl = p; acl; acl = acl->acl_ic_next, count++)
1515 		;
1516 	return (count);
1517 }
1518 
1519 /*
1520  *	Takes as input a security structure and generates a buffer
1521  *	with fsd's in a form which be written to the shadow inode.
1522  */
1523 static int
1524 ufs_sectobuf(si_t *sp, caddr_t *buf, size_t *len)
1525 {
1526 	size_t		acl_size;
1527 	size_t		def_acl_size;
1528 	caddr_t		buffer;
1529 	struct ufs_fsd	*fsdp;
1530 	ufs_acl_t	*bufaclp;
1531 
1532 	/*
1533 	 * Calc size of buffer to hold all the acls
1534 	 */
1535 	acl_size = acl_count(sp->aowner) +		/* owner */
1536 	    acl_count(sp->agroup) +			/* owner group */
1537 	    acl_count(sp->aother) +			/* owner other */
1538 	    acl_count(sp->ausers) +			/* acl list */
1539 	    acl_count(sp->agroups);			/* group alcs */
1540 	if (sp->aclass.acl_ismask)
1541 		acl_size++;
1542 
1543 	/* Convert to bytes */
1544 	acl_size *= sizeof (ufs_acl_t);
1545 
1546 	/* Add fsd header */
1547 	if (acl_size)
1548 		acl_size += 2 * sizeof (int);
1549 
1550 	/*
1551 	 * Calc size of buffer to hold all the default acls
1552 	 */
1553 	def_acl_size =
1554 	    acl_count(sp->downer) +	/* def owner */
1555 	    acl_count(sp->dgroup) +	/* def owner group */
1556 	    acl_count(sp->dother) +	/* def owner other */
1557 	    acl_count(sp->dusers) +	/* def users  */
1558 	    acl_count(sp->dgroups);	/* def group acls */
1559 	if (sp->dclass.acl_ismask)
1560 		def_acl_size++;
1561 
1562 	/*
1563 	 * Convert to bytes
1564 	 */
1565 	def_acl_size *= sizeof (ufs_acl_t);
1566 
1567 	/*
1568 	 * Add fsd header
1569 	 */
1570 	if (def_acl_size)
1571 		def_acl_size += 2 * sizeof (int);
1572 
1573 	if (acl_size + def_acl_size == 0)
1574 		return (0);
1575 
1576 	buffer = kmem_zalloc((acl_size + def_acl_size), KM_SLEEP);
1577 	bufaclp = (ufs_acl_t *)buffer;
1578 
1579 	if (acl_size == 0)
1580 		goto wrtdefs;
1581 
1582 	/* create fsd and copy acls */
1583 	fsdp = (struct ufs_fsd *)bufaclp;
1584 	fsdp->fsd_type = FSD_ACL;
1585 	bufaclp = (ufs_acl_t *)&fsdp->fsd_data[0];
1586 
1587 	ACL_MOVE(sp->aowner, USER_OBJ, bufaclp);
1588 	ACL_MOVE(sp->agroup, GROUP_OBJ, bufaclp);
1589 	ACL_MOVE(sp->aother, OTHER_OBJ, bufaclp);
1590 	ACL_MOVE(sp->ausers, USER, bufaclp);
1591 	ACL_MOVE(sp->agroups, GROUP, bufaclp);
1592 
1593 	if (sp->aclass.acl_ismask) {
1594 		bufaclp->acl_tag = CLASS_OBJ;
1595 		bufaclp->acl_who = (uid_t)sp->aclass.acl_ismask;
1596 		bufaclp->acl_perm = (o_mode_t)sp->aclass.acl_maskbits;
1597 		bufaclp++;
1598 	}
1599 	ASSERT(acl_size <= INT_MAX);
1600 	fsdp->fsd_size = (int)acl_size;
1601 
1602 wrtdefs:
1603 	if (def_acl_size == 0)
1604 		goto alldone;
1605 
1606 	/* if defaults exist then create fsd and copy default acls */
1607 	fsdp = (struct ufs_fsd *)bufaclp;
1608 	fsdp->fsd_type = FSD_DFACL;
1609 	bufaclp = (ufs_acl_t *)&fsdp->fsd_data[0];
1610 
1611 	ACL_MOVE(sp->downer, DEF_USER_OBJ, bufaclp);
1612 	ACL_MOVE(sp->dgroup, DEF_GROUP_OBJ, bufaclp);
1613 	ACL_MOVE(sp->dother, DEF_OTHER_OBJ, bufaclp);
1614 	ACL_MOVE(sp->dusers, DEF_USER, bufaclp);
1615 	ACL_MOVE(sp->dgroups, DEF_GROUP, bufaclp);
1616 	if (sp->dclass.acl_ismask) {
1617 		bufaclp->acl_tag = DEF_CLASS_OBJ;
1618 		bufaclp->acl_who = (uid_t)sp->dclass.acl_ismask;
1619 		bufaclp->acl_perm = (o_mode_t)sp->dclass.acl_maskbits;
1620 		bufaclp++;
1621 	}
1622 	ASSERT(def_acl_size <= INT_MAX);
1623 	fsdp->fsd_size = (int)def_acl_size;
1624 
1625 alldone:
1626 	*buf = buffer;
1627 	*len = acl_size + def_acl_size;
1628 
1629 	return (0);
1630 }
1631 
1632 /*
1633  *  free a shadow inode  on disk and in memory
1634  */
1635 int
1636 ufs_si_free(si_t *sp, struct vfs *vfsp, cred_t *cr)
1637 {
1638 	struct inode	*sip;
1639 	int		shadow;
1640 	int		err = 0;
1641 	int		refcnt;
1642 	int		signature;
1643 
1644 	ASSERT(vfsp);
1645 	ASSERT(sp);
1646 
1647 	rw_enter(&sp->s_lock, RW_READER);
1648 	ASSERT(sp->s_shadow <= INT_MAX);
1649 	shadow = (int)sp->s_shadow;
1650 	ASSERT(sp->s_ref);
1651 	rw_exit(&sp->s_lock);
1652 
1653 	/*
1654 	 * Decrement link count on the shadow inode,
1655 	 * and decrement reference count on the sip.
1656 	 */
1657 	if ((err = ufs_iget_alloced(vfsp, shadow, &sip, cr)) == 0) {
1658 		rw_enter(&sip->i_contents, RW_WRITER);
1659 		rw_enter(&sp->s_lock, RW_WRITER);
1660 		ASSERT(sp->s_shadow == shadow);
1661 		ASSERT(sip->i_dquot == 0);
1662 		/* Decrement link count */
1663 		ASSERT(sip->i_nlink > 0);
1664 		/*
1665 		 * bug #1264710 assertion failure below
1666 		 */
1667 		sp->s_use = --sip->i_nlink;
1668 		ufs_setreclaim(sip);
1669 		TRANS_INODE(sip->i_ufsvfs, sip);
1670 		sip->i_flag |= ICHG | IMOD;
1671 		sip->i_seq++;
1672 		ITIMES_NOLOCK(sip);
1673 		/* Dec ref counts on si referenced by this ip */
1674 		refcnt = --sp->s_ref;
1675 		signature = sp->s_signature;
1676 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
1677 		/*
1678 		 * Release s_lock before calling VN_RELE
1679 		 * (which may want to acquire i_contents).
1680 		 */
1681 		rw_exit(&sp->s_lock);
1682 		rw_exit(&sip->i_contents);
1683 		VN_RELE(ITOV(sip));
1684 	} else {
1685 		rw_enter(&sp->s_lock, RW_WRITER);
1686 		/* Dec ref counts on si referenced by this ip */
1687 		refcnt = --sp->s_ref;
1688 		signature = sp->s_signature;
1689 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
1690 		rw_exit(&sp->s_lock);
1691 	}
1692 
1693 	if (refcnt == 0)
1694 		si_cache_del(sp, signature);
1695 	return (err);
1696 }
1697 
1698 /*
1699  * Seach the si cache for an si structure by inode #.
1700  * Returns a locked si structure.
1701  *
1702  * Parameters:
1703  * ip - Ptr to an inode on this fs
1704  * spp - Ptr to ptr to si struct for the results, if found.
1705  *
1706  * Returns:	0 - Success (results in spp)
1707  *		1 - Failure (spp undefined)
1708  */
1709 static int
1710 si_cachei_get(struct inode *ip, si_t **spp)
1711 {
1712 	si_t	*sp;
1713 
1714 	rw_enter(&si_cache_lock, RW_READER);
1715 loop:
1716 	for (sp = si_cachei[SI_HASH(ip->i_shadow)]; sp; sp = sp->s_forw)
1717 		if (sp->s_shadow == ip->i_shadow && sp->s_dev == ip->i_dev)
1718 			break;
1719 
1720 	if (sp == NULL) {
1721 		/* Not in cache */
1722 		rw_exit(&si_cache_lock);
1723 		return (1);
1724 	}
1725 	/* Found it */
1726 	rw_enter(&sp->s_lock, RW_WRITER);
1727 alldone:
1728 	rw_exit(&si_cache_lock);
1729 	*spp = sp;
1730 	return (0);
1731 }
1732 
1733 /*
1734  * Seach the si cache by si structure (ie duplicate of the one passed in).
1735  * In order for a match the signatures must be the same and
1736  * the devices must be the same, the acls must match and
1737  * link count of the cached shadow must be less than the
1738  * size of ic_nlink - 1.  MAXLINK - 1 is used to allow the count
1739  * to be incremented one more time by the caller.
1740  * Returns a locked si structure.
1741  *
1742  * Parameters:
1743  * ip - Ptr to an inode on this fs
1744  * spi - Ptr to si the struct we're searching the cache for.
1745  * spp - Ptr to ptr to si struct for the results, if found.
1746  *
1747  * Returns:	0 - Success (results in spp)
1748  *		1 - Failure (spp undefined)
1749  */
1750 static int
1751 si_cachea_get(struct inode *ip, si_t *spi, si_t **spp)
1752 {
1753 	si_t	*sp;
1754 
1755 	spi->s_dev = ip->i_dev;
1756 	spi->s_signature = si_signature(spi);
1757 	rw_enter(&si_cache_lock, RW_READER);
1758 loop:
1759 	for (sp = si_cachea[SI_HASH(spi->s_signature)]; sp; sp = sp->s_next) {
1760 		if (sp->s_signature == spi->s_signature &&
1761 		    sp->s_dev == spi->s_dev &&
1762 		    sp->s_use > 0 &&			/* deleting */
1763 		    sp->s_use <= (MAXLINK - 1) &&	/* Too many links */
1764 		    !si_cmp(sp, spi))
1765 			break;
1766 	}
1767 
1768 	if (sp == NULL) {
1769 		/* Cache miss */
1770 		rw_exit(&si_cache_lock);
1771 		return (1);
1772 	}
1773 	/* Found it */
1774 	rw_enter(&sp->s_lock, RW_WRITER);
1775 alldone:
1776 	spi->s_shadow = sp->s_shadow; /* XXX For debugging */
1777 	rw_exit(&si_cache_lock);
1778 	*spp = sp;
1779 	return (0);
1780 }
1781 
1782 /*
1783  * Place an si structure in the si cache.  May cause duplicates.
1784  *
1785  * Parameters:
1786  * sp - Ptr to the si struct to add to the cache.
1787  *
1788  * Returns: Nothing (void)
1789  */
1790 static void
1791 si_cache_put(si_t *sp)
1792 {
1793 	si_t	**tspp;
1794 
1795 	ASSERT(sp->s_fore == NULL);
1796 	rw_enter(&si_cache_lock, RW_WRITER);
1797 	if (!sp->s_signature)
1798 		sp->s_signature = si_signature(sp);
1799 	sp->s_flags |= SI_CACHED;
1800 	sp->s_fore = NULL;
1801 
1802 	/* The 'by acl' chains */
1803 	tspp = &si_cachea[SI_HASH(sp->s_signature)];
1804 	sp->s_next = *tspp;
1805 	*tspp = sp;
1806 
1807 	/* The 'by inode' chains */
1808 	tspp = &si_cachei[SI_HASH(sp->s_shadow)];
1809 	sp->s_forw = *tspp;
1810 	*tspp = sp;
1811 
1812 	rw_exit(&si_cache_lock);
1813 }
1814 
1815 /*
1816  * The sp passed in is a candidate for deletion from the cache.  We acquire
1817  * the cache lock first, so no cache searches can be done.  Then we search
1818  * for the acl in the cache, and if we find it we can lock it and check that
1819  * nobody else attached to it while we were acquiring the locks.  If the acl
1820  * is in the cache and still has a zero reference count, then we remove it
1821  * from the cache and deallocate it.  If the reference count is non-zero or
1822  * it is not found in the cache, then someone else attached to it or has
1823  * already freed it, so we just return.
1824  *
1825  * Parameters:
1826  * sp - Ptr to the sp struct which is the candicate for deletion.
1827  * signature - the signature for the acl for lookup in the hash table
1828  *
1829  * Returns: Nothing (void)
1830  */
1831 void
1832 si_cache_del(si_t *sp, int signature)
1833 {
1834 	si_t	**tspp;
1835 	int	hash;
1836 	int	foundacl = 0;
1837 
1838 	/*
1839 	 * Unlink & free the sp from the other queues, then destroy it.
1840 	 * Search the 'by acl' chain first, then the 'by inode' chain
1841 	 * after the acl is locked.
1842 	 */
1843 	rw_enter(&si_cache_lock, RW_WRITER);
1844 	hash = SI_HASH(signature);
1845 	for (tspp = &si_cachea[hash]; *tspp; tspp = &(*tspp)->s_next) {
1846 		if (*tspp == sp) {
1847 			/*
1848 			 * Wait to grab the acl lock until after the acl has
1849 			 * been found in the cache.  Otherwise it might try to
1850 			 * grab a lock that has already been destroyed, or
1851 			 * delete an acl that has already been freed.
1852 			 */
1853 			rw_enter(&sp->s_lock, RW_WRITER);
1854 			/* See if someone else attached to it */
1855 			if (sp->s_ref) {
1856 				rw_exit(&sp->s_lock);
1857 				rw_exit(&si_cache_lock);
1858 				return;
1859 			}
1860 			ASSERT(sp->s_fore == NULL);
1861 			ASSERT(sp->s_flags & SI_CACHED);
1862 			foundacl = 1;
1863 			*tspp = sp->s_next;
1864 			break;
1865 		}
1866 	}
1867 
1868 	/*
1869 	 * If the acl was not in the cache, we assume another thread has
1870 	 * deleted it already. This could happen if another thread attaches to
1871 	 * the acl and then releases it after this thread has already found the
1872 	 * reference count to be zero but has not yet taken the cache lock.
1873 	 * Both threads end up seeing a reference count of zero, and call into
1874 	 * si_cache_del.  See bug 4244827 for details on the race condition.
1875 	 */
1876 	if (foundacl == 0) {
1877 		rw_exit(&si_cache_lock);
1878 		return;
1879 	}
1880 
1881 	/* Now check the 'by inode' chain */
1882 	hash = SI_HASH(sp->s_shadow);
1883 	for (tspp = &si_cachei[hash]; *tspp; tspp = &(*tspp)->s_forw) {
1884 		if (*tspp == sp) {
1885 			*tspp = sp->s_forw;
1886 			break;
1887 		}
1888 	}
1889 
1890 	/*
1891 	 * At this point, we can unlock everything because this si
1892 	 * is no longer in the cache, thus cannot be attached to.
1893 	 */
1894 	rw_exit(&sp->s_lock);
1895 	rw_exit(&si_cache_lock);
1896 	sp->s_flags &= ~SI_CACHED;
1897 	(void) ufs_si_free_mem(sp);
1898 }
1899 
1900 /*
1901  * Alloc the hash buckets for the si cache & initialize
1902  * the unreferenced anchor and the cache lock.
1903  */
1904 void
1905 si_cache_init(void)
1906 {
1907 	rw_init(&si_cache_lock, NULL, RW_DEFAULT, NULL);
1908 
1909 	/* The 'by acl' headers */
1910 	si_cachea = kmem_zalloc(si_cachecnt * sizeof (si_t *), KM_SLEEP);
1911 	/* The 'by inode' headers */
1912 	si_cachei = kmem_zalloc(si_cachecnt * sizeof (si_t *), KM_SLEEP);
1913 }
1914 
1915 /*
1916  *  aclcksum takes an acl and generates a checksum.  It takes as input
1917  *  the acl to start at.
1918  *
1919  *  s_aclp - pointer to starting acl
1920  *
1921  *  returns checksum
1922  */
1923 static int
1924 aclcksum(ufs_ic_acl_t *s_aclp)
1925 {
1926 	ufs_ic_acl_t *aclp;
1927 	int signature = 0;
1928 	for (aclp = s_aclp; aclp; aclp = aclp->acl_ic_next) {
1929 		signature += aclp->acl_ic_perm;
1930 		signature += aclp->acl_ic_who;
1931 	}
1932 	return (signature);
1933 }
1934 
1935 /*
1936  * Generate a unique signature for an si structure.  Used by the
1937  * search routine si_cachea_get() to quickly identify candidates
1938  * prior to calling si_cmp().
1939  * Parameters:
1940  * sp - Ptr to the si struct to generate the signature for.
1941  *
1942  * Returns:  A signature for the si struct (really a checksum)
1943  */
1944 static int
1945 si_signature(si_t *sp)
1946 {
1947 	int signature = sp->s_dev;
1948 
1949 	signature += aclcksum(sp->aowner) + aclcksum(sp->agroup) +
1950 	    aclcksum(sp->aother) + aclcksum(sp->ausers) +
1951 	    aclcksum(sp->agroups) + aclcksum(sp->downer) +
1952 	    aclcksum(sp->dgroup) + aclcksum(sp->dother) +
1953 	    aclcksum(sp->dusers) + aclcksum(sp->dgroups);
1954 	if (sp->aclass.acl_ismask)
1955 		signature += sp->aclass.acl_maskbits;
1956 	if (sp->dclass.acl_ismask)
1957 		signature += sp->dclass.acl_maskbits;
1958 
1959 	return (signature);
1960 }
1961 
1962 /*
1963  * aclcmp compares to acls to see if they are identical.
1964  *
1965  * sp1 is source
1966  * sp2 is sourceb
1967  *
1968  * returns 0 if equal and 1 if not equal
1969  */
1970 static int
1971 aclcmp(ufs_ic_acl_t *aclin1p, ufs_ic_acl_t *aclin2p)
1972 {
1973 	ufs_ic_acl_t *aclp1;
1974 	ufs_ic_acl_t *aclp2;
1975 
1976 	/*
1977 	 * if the starting pointers are equal then they are equal so
1978 	 * just return.
1979 	 */
1980 	if (aclin1p == aclin2p)
1981 		return (0);
1982 	/*
1983 	 * check element by element
1984 	 */
1985 	for (aclp1 = aclin1p, aclp2 = aclin2p; aclp1 && aclp2;
1986 	    aclp1 = aclp1->acl_ic_next, aclp2 = aclp2->acl_ic_next) {
1987 		if (aclp1->acl_ic_perm != aclp2->acl_ic_perm ||
1988 		    aclp1->acl_ic_who != aclp2->acl_ic_who)
1989 			return (1);
1990 	}
1991 	/*
1992 	 * both must be zero (at the end of the acl)
1993 	 */
1994 	if (aclp1 || aclp2)
1995 		return (1);
1996 
1997 	return (0);
1998 }
1999 
2000 /*
2001  * Do extensive, field-by-field compare of two si structures.  Returns
2002  * 0 if they are exactly identical, 1 otherwise.
2003  *
2004  * Paramters:
2005  * sp1 - Ptr to 1st si struct
2006  * sp2 - Ptr to 2nd si struct
2007  *
2008  * Returns:
2009  *		0 - Not identical
2010  *		1 - Identical
2011  */
2012 static int
2013 si_cmp(si_t *sp1, si_t *sp2)
2014 {
2015 	if (sp1->s_dev != sp2->s_dev)
2016 		return (1);
2017 	if (aclcmp(sp1->aowner, sp2->aowner) ||
2018 	    aclcmp(sp1->agroup, sp2->agroup) ||
2019 	    aclcmp(sp1->aother, sp2->aother) ||
2020 	    aclcmp(sp1->ausers, sp2->ausers) ||
2021 	    aclcmp(sp1->agroups, sp2->agroups) ||
2022 	    aclcmp(sp1->downer, sp2->downer) ||
2023 	    aclcmp(sp1->dgroup, sp2->dgroup) ||
2024 	    aclcmp(sp1->dother, sp2->dother) ||
2025 	    aclcmp(sp1->dusers, sp2->dusers) ||
2026 	    aclcmp(sp1->dgroups, sp2->dgroups))
2027 		return (1);
2028 	if (sp1->aclass.acl_ismask != sp2->aclass.acl_ismask)
2029 		return (1);
2030 	if (sp1->dclass.acl_ismask != sp2->dclass.acl_ismask)
2031 		return (1);
2032 	if (sp1->aclass.acl_ismask &&
2033 	    sp1->aclass.acl_maskbits != sp2->aclass.acl_maskbits)
2034 		return (1);
2035 	if (sp1->dclass.acl_ismask &&
2036 	    sp1->dclass.acl_maskbits != sp2->dclass.acl_maskbits)
2037 		return (1);
2038 
2039 	return (0);
2040 }
2041 
2042 /*
2043  * Remove all acls associated with a device.  All acls must have
2044  * a reference count of zero.
2045  *
2046  * inputs:
2047  *	device - device to remove from the cache
2048  *
2049  * outputs:
2050  *	none
2051  */
2052 void
2053 ufs_si_cache_flush(dev_t dev)
2054 {
2055 	si_t *tsp, **tspp;
2056 	int i;
2057 
2058 	rw_enter(&si_cache_lock, RW_WRITER);
2059 	for (i = 0; i < si_cachecnt; i++) {
2060 		tspp = &si_cachea[i];
2061 		while (*tspp) {
2062 			if ((*tspp)->s_dev == dev) {
2063 				*tspp = (*tspp)->s_next;
2064 			} else {
2065 				tspp = &(*tspp)->s_next;
2066 			}
2067 		}
2068 	}
2069 	for (i = 0; i < si_cachecnt; i++) {
2070 		tspp = &si_cachei[i];
2071 		while (*tspp) {
2072 			if ((*tspp)->s_dev == dev) {
2073 				tsp = *tspp;
2074 				*tspp = (*tspp)->s_forw;
2075 				tsp->s_flags &= ~SI_CACHED;
2076 				ufs_si_free_mem(tsp);
2077 			} else {
2078 				tspp = &(*tspp)->s_forw;
2079 			}
2080 		}
2081 	}
2082 	rw_exit(&si_cache_lock);
2083 }
2084 
2085 /*
2086  * ufs_si_del is used to unhook a sp from a inode in memory
2087  *
2088  * ip is the inode to remove the sp from.
2089  */
2090 void
2091 ufs_si_del(struct inode *ip)
2092 {
2093 	si_t    *sp = ip->i_ufs_acl;
2094 	int	refcnt;
2095 	int	signature;
2096 
2097 	if (sp) {
2098 		rw_enter(&sp->s_lock, RW_WRITER);
2099 		refcnt = --sp->s_ref;
2100 		signature = sp->s_signature;
2101 		ASSERT(sp->s_ref >= 0 && sp->s_ref <= sp->s_use);
2102 		rw_exit(&sp->s_lock);
2103 		if (refcnt == 0)
2104 			si_cache_del(sp, signature);
2105 		ip->i_ufs_acl = NULL;
2106 	}
2107 }
2108