xref: /titanic_41/usr/src/uts/common/fs/zfs/zfs_acl.c (revision 3a3e8d7acddcf5f846fdd54de49bd37c17e44d43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/time.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/resource.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/file.h>
38 #include <sys/stat.h>
39 #include <sys/kmem.h>
40 #include <sys/cmn_err.h>
41 #include <sys/errno.h>
42 #include <sys/unistd.h>
43 #include <sys/fs/zfs.h>
44 #include <sys/mode.h>
45 #include <sys/policy.h>
46 #include <sys/zfs_znode.h>
47 #include <sys/zfs_acl.h>
48 #include <sys/zfs_dir.h>
49 #include <sys/zfs_vfsops.h>
50 #include <sys/dmu.h>
51 #include <sys/zap.h>
52 #include <util/qsort.h>
53 #include "fs/fs_subr.h"
54 #include <acl/acl_common.h>
55 
56 #define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
57 #define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
58 
59 #define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
60 #define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
61     ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
62 #define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
63     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
64 #define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
65     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
66 #define	WRITE_MASK (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS| \
67     ACE_WRITE_ATTRIBUTES|ACE_WRITE_ACL|ACE_WRITE_OWNER)
68 
69 #define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
70     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
71 
72 #define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
73     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
74 
75 #define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
76     ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
77 
78 #define	SECURE_NO_INHERIT	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
79 
80 #define	OGE_PAD	6		/* traditional owner/group/everyone ACES */
81 
82 static int zfs_ace_can_use(znode_t *zp, ace_t *);
83 
84 static zfs_acl_t *
85 zfs_acl_alloc(int slots)
86 {
87 	zfs_acl_t *aclp;
88 
89 	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
90 	if (slots != 0) {
91 		aclp->z_acl = kmem_alloc(ZFS_ACL_SIZE(slots), KM_SLEEP);
92 		aclp->z_acl_count = 0;
93 		aclp->z_state = ACL_DATA_ALLOCED;
94 	} else {
95 		aclp->z_state = 0;
96 	}
97 	aclp->z_slots = slots;
98 	return (aclp);
99 }
100 
101 void
102 zfs_acl_free(zfs_acl_t *aclp)
103 {
104 	if (aclp->z_state == ACL_DATA_ALLOCED) {
105 		kmem_free(aclp->z_acl, ZFS_ACL_SIZE(aclp->z_slots));
106 	}
107 	kmem_free(aclp, sizeof (zfs_acl_t));
108 }
109 
110 static uint32_t
111 zfs_v4_to_unix(uint32_t access_mask)
112 {
113 	uint32_t new_mask = 0;
114 
115 	if (access_mask & (ACE_READ_DATA | ACE_LIST_DIRECTORY))
116 		new_mask |= S_IROTH;
117 	if (access_mask & (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_ADD_FILE))
118 		new_mask |= S_IWOTH;
119 	if (access_mask & (ACE_EXECUTE|ACE_READ_NAMED_ATTRS))
120 		new_mask |= S_IXOTH;
121 
122 	return (new_mask);
123 }
124 
125 /*
126  * Convert unix access mask to v4 access mask
127  */
128 static uint32_t
129 zfs_unix_to_v4(uint32_t access_mask)
130 {
131 	uint32_t new_mask = 0;
132 
133 	if (access_mask & 01)
134 		new_mask |= (ACE_EXECUTE);
135 	if (access_mask & 02) {
136 		new_mask |= (ACE_WRITE_DATA);
137 	} if (access_mask & 04) {
138 		new_mask |= ACE_READ_DATA;
139 	}
140 	return (new_mask);
141 }
142 
143 static void
144 zfs_set_ace(ace_t *zacep, uint32_t access_mask, int access_type,
145     uid_t uid, int entry_type)
146 {
147 	zacep->a_access_mask = access_mask;
148 	zacep->a_type = access_type;
149 	zacep->a_who = uid;
150 	zacep->a_flags = entry_type;
151 }
152 
153 static uint64_t
154 zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
155 {
156 	int 	i;
157 	int	entry_type;
158 	mode_t	mode = (zp->z_phys->zp_mode &
159 	    (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
160 	mode_t	 seen = 0;
161 	ace_t 	*acep;
162 
163 	for (i = 0, acep = aclp->z_acl;
164 	    i != aclp->z_acl_count; i++, acep++) {
165 		entry_type = (acep->a_flags & 0xf040);
166 		if (entry_type == ACE_OWNER) {
167 			if ((acep->a_access_mask & ACE_READ_DATA) &&
168 			    (!(seen & S_IRUSR))) {
169 				seen |= S_IRUSR;
170 				if (acep->a_type == ALLOW) {
171 					mode |= S_IRUSR;
172 				}
173 			}
174 			if ((acep->a_access_mask & ACE_WRITE_DATA) &&
175 			    (!(seen & S_IWUSR))) {
176 				seen |= S_IWUSR;
177 				if (acep->a_type == ALLOW) {
178 					mode |= S_IWUSR;
179 				}
180 			}
181 			if ((acep->a_access_mask & ACE_EXECUTE) &&
182 			    (!(seen & S_IXUSR))) {
183 				seen |= S_IXUSR;
184 				if (acep->a_type == ALLOW) {
185 					mode |= S_IXUSR;
186 				}
187 			}
188 		} else if (entry_type == OWNING_GROUP) {
189 			if ((acep->a_access_mask & ACE_READ_DATA) &&
190 			    (!(seen & S_IRGRP))) {
191 				seen |= S_IRGRP;
192 				if (acep->a_type == ALLOW) {
193 					mode |= S_IRGRP;
194 				}
195 			}
196 			if ((acep->a_access_mask & ACE_WRITE_DATA) &&
197 			    (!(seen & S_IWGRP))) {
198 				seen |= S_IWGRP;
199 				if (acep->a_type == ALLOW) {
200 					mode |= S_IWGRP;
201 				}
202 			}
203 			if ((acep->a_access_mask & ACE_EXECUTE) &&
204 			    (!(seen & S_IXGRP))) {
205 				seen |= S_IXGRP;
206 				if (acep->a_type == ALLOW) {
207 					mode |= S_IXGRP;
208 				}
209 			}
210 		} else if (entry_type == ACE_EVERYONE) {
211 			if ((acep->a_access_mask & ACE_READ_DATA)) {
212 				if (!(seen & S_IRUSR)) {
213 					seen |= S_IRUSR;
214 					if (acep->a_type == ALLOW) {
215 						mode |= S_IRUSR;
216 					}
217 				}
218 				if (!(seen & S_IRGRP)) {
219 					seen |= S_IRGRP;
220 					if (acep->a_type == ALLOW) {
221 						mode |= S_IRGRP;
222 					}
223 				}
224 				if (!(seen & S_IROTH)) {
225 					seen |= S_IROTH;
226 					if (acep->a_type == ALLOW) {
227 						mode |= S_IROTH;
228 					}
229 				}
230 			}
231 			if ((acep->a_access_mask & ACE_WRITE_DATA)) {
232 				if (!(seen & S_IWUSR)) {
233 					seen |= S_IWUSR;
234 					if (acep->a_type == ALLOW) {
235 						mode |= S_IWUSR;
236 					}
237 				}
238 				if (!(seen & S_IWGRP)) {
239 					seen |= S_IWGRP;
240 					if (acep->a_type == ALLOW) {
241 						mode |= S_IWGRP;
242 					}
243 				}
244 				if (!(seen & S_IWOTH)) {
245 					seen |= S_IWOTH;
246 					if (acep->a_type == ALLOW) {
247 						mode |= S_IWOTH;
248 					}
249 				}
250 			}
251 			if ((acep->a_access_mask & ACE_EXECUTE)) {
252 				if (!(seen & S_IXUSR)) {
253 					seen |= S_IXUSR;
254 					if (acep->a_type == ALLOW) {
255 						mode |= S_IXUSR;
256 					}
257 				}
258 				if (!(seen & S_IXGRP)) {
259 					seen |= S_IXGRP;
260 					if (acep->a_type == ALLOW) {
261 						mode |= S_IXGRP;
262 					}
263 				}
264 				if (!(seen & S_IXOTH)) {
265 					seen |= S_IXOTH;
266 					if (acep->a_type == ALLOW) {
267 						mode |= S_IXOTH;
268 					}
269 				}
270 			}
271 		}
272 	}
273 	return (mode);
274 }
275 
276 static zfs_acl_t *
277 zfs_acl_node_read_internal(znode_t *zp)
278 {
279 	zfs_acl_t	*aclp;
280 
281 	aclp = zfs_acl_alloc(0);
282 	aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
283 	aclp->z_acl = &zp->z_phys->zp_acl.z_ace_data[0];
284 
285 	return (aclp);
286 }
287 
288 /*
289  * Read an external acl object.
290  */
291 zfs_acl_t *
292 zfs_acl_node_read(znode_t *zp)
293 {
294 	uint64_t extacl = zp->z_phys->zp_acl.z_acl_extern_obj;
295 	zfs_acl_t	*aclp;
296 
297 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
298 
299 	if (zp->z_phys->zp_acl.z_acl_extern_obj == 0)
300 		return (zfs_acl_node_read_internal(zp));
301 
302 	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_count);
303 
304 	dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
305 	    ZFS_ACL_SIZE(zp->z_phys->zp_acl.z_acl_count), aclp->z_acl);
306 
307 	aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
308 
309 	return (aclp);
310 }
311 
312 static boolean_t
313 zfs_acl_valid(znode_t *zp, ace_t *uace, int aclcnt, int *inherit)
314 {
315 	ace_t 	*acep;
316 	int i;
317 
318 	*inherit = 0;
319 
320 	if (aclcnt > MAX_ACL_ENTRIES || aclcnt <= 0) {
321 		return (B_FALSE);
322 	}
323 
324 	for (i = 0, acep = uace; i != aclcnt; i++, acep++) {
325 
326 		/*
327 		 * first check type of entry
328 		 */
329 
330 		switch (acep->a_flags & 0xf040) {
331 		case ACE_OWNER:
332 			acep->a_who = -1;
333 			break;
334 		case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
335 		case ACE_IDENTIFIER_GROUP:
336 			if (acep->a_flags & ACE_GROUP) {
337 				acep->a_who = -1;
338 			}
339 			break;
340 		case ACE_EVERYONE:
341 			acep->a_who = -1;
342 			break;
343 		}
344 
345 		/*
346 		 * next check inheritance level flags
347 		 */
348 
349 		if (acep->a_type != ALLOW && acep->a_type != DENY)
350 			return (B_FALSE);
351 
352 		/*
353 		 * Only directories should have inheritance flags.
354 		 */
355 		if (ZTOV(zp)->v_type != VDIR && (acep->a_flags &
356 		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE|
357 		    ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE))) {
358 			return (B_FALSE);
359 		}
360 
361 		if (acep->a_flags &
362 		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))
363 			*inherit = 1;
364 
365 		if (acep->a_flags &
366 		    (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
367 			if ((acep->a_flags & (ACE_FILE_INHERIT_ACE|
368 			    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
369 				return (B_FALSE);
370 			}
371 		}
372 	}
373 
374 	return (B_TRUE);
375 }
376 /*
377  * common code for setting acl's.
378  *
379  * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
380  * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
381  * already checked the acl and knows whether to inherit.
382  */
383 int
384 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, dmu_tx_t *tx, int *ihp)
385 {
386 	int 		inherit = 0;
387 	int		error;
388 	znode_phys_t	*zphys = zp->z_phys;
389 	zfs_znode_acl_t	*zacl = &zphys->zp_acl;
390 	uint32_t	acl_phys_size = ZFS_ACL_SIZE(aclp->z_acl_count);
391 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
392 	uint64_t	aoid = zphys->zp_acl.z_acl_extern_obj;
393 
394 	ASSERT(MUTEX_HELD(&zp->z_lock));
395 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
396 
397 	if (ihp)
398 		inherit = *ihp;		/* already determined by caller */
399 	else if (!zfs_acl_valid(zp, aclp->z_acl,
400 	    aclp->z_acl_count, &inherit)) {
401 		return (EINVAL);
402 	}
403 
404 	dmu_buf_will_dirty(zp->z_dbuf, tx);
405 
406 	/*
407 	 * Will ACL fit internally?
408 	 */
409 	if (aclp->z_acl_count > ACE_SLOT_CNT) {
410 		if (aoid == 0) {
411 			aoid = dmu_object_alloc(zfsvfs->z_os,
412 			    DMU_OT_ACL, acl_phys_size, DMU_OT_NONE, 0, tx);
413 		} else {
414 			(void) dmu_object_set_blocksize(zfsvfs->z_os, aoid,
415 			    acl_phys_size, 0, tx);
416 		}
417 		zphys->zp_acl.z_acl_extern_obj = aoid;
418 		zphys->zp_acl.z_acl_count = aclp->z_acl_count;
419 		dmu_write(zfsvfs->z_os, aoid, 0,
420 		    acl_phys_size, aclp->z_acl, tx);
421 	} else {
422 		/*
423 		 * Migrating back embedded?
424 		 */
425 		if (zphys->zp_acl.z_acl_extern_obj) {
426 			error = dmu_object_free(zfsvfs->z_os,
427 				zp->z_phys->zp_acl.z_acl_extern_obj, tx);
428 			if (error)
429 				return (error);
430 			zphys->zp_acl.z_acl_extern_obj = 0;
431 		}
432 		bcopy(aclp->z_acl, zacl->z_ace_data,
433 		    aclp->z_acl_count * sizeof (ace_t));
434 		zacl->z_acl_count = aclp->z_acl_count;
435 	}
436 
437 	zp->z_phys->zp_flags &= ~(ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE);
438 	if (inherit) {
439 		zp->z_phys->zp_flags |= ZFS_INHERIT_ACE;
440 	} else if (ace_trivial(zacl->z_ace_data, zacl->z_acl_count) == 0) {
441 		zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
442 	}
443 
444 	zphys->zp_mode = zfs_mode_compute(zp, aclp);
445 	zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
446 
447 	return (0);
448 }
449 
450 /*
451  * Create space for slots_needed ACEs to be append
452  * to aclp.
453  */
454 static void
455 zfs_acl_append(zfs_acl_t *aclp, int slots_needed)
456 {
457 	ace_t	*newacep;
458 	ace_t	*oldaclp;
459 	int	slot_cnt;
460 	int 	slots_left = aclp->z_slots - aclp->z_acl_count;
461 
462 	if (aclp->z_state == ACL_DATA_ALLOCED)
463 		ASSERT(aclp->z_slots >= aclp->z_acl_count);
464 	if (slots_left < slots_needed || aclp->z_state != ACL_DATA_ALLOCED) {
465 		slot_cnt = aclp->z_slots +  1 + (slots_needed - slots_left);
466 		newacep = kmem_alloc(ZFS_ACL_SIZE(slot_cnt), KM_SLEEP);
467 		bcopy(aclp->z_acl, newacep,
468 		    ZFS_ACL_SIZE(aclp->z_acl_count));
469 		oldaclp = aclp->z_acl;
470 		if (aclp->z_state == ACL_DATA_ALLOCED)
471 			kmem_free(oldaclp, ZFS_ACL_SIZE(aclp->z_slots));
472 		aclp->z_acl = newacep;
473 		aclp->z_slots = slot_cnt;
474 		aclp->z_state = ACL_DATA_ALLOCED;
475 	}
476 }
477 
478 /*
479  * Remove "slot" ACE from aclp
480  */
481 static void
482 zfs_ace_remove(zfs_acl_t *aclp, int slot)
483 {
484 	if (aclp->z_acl_count > 1) {
485 		(void) memmove(&aclp->z_acl[slot],
486 		    &aclp->z_acl[slot +1], sizeof (ace_t) *
487 		    (--aclp->z_acl_count - slot));
488 	} else
489 		aclp->z_acl_count--;
490 }
491 
492 /*
493  * Update access mask for prepended ACE
494  *
495  * This applies the "groupmask" value for aclmode property.
496  */
497 static void
498 zfs_acl_prepend_fixup(ace_t *acep, ace_t *origacep, mode_t mode, uid_t owner)
499 {
500 
501 	int	rmask, wmask, xmask;
502 	int	user_ace;
503 
504 	user_ace = (!(acep->a_flags &
505 	    (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
506 
507 	if (user_ace && (acep->a_who == owner)) {
508 		rmask = S_IRUSR;
509 		wmask = S_IWUSR;
510 		xmask = S_IXUSR;
511 	} else {
512 		rmask = S_IRGRP;
513 		wmask = S_IWGRP;
514 		xmask = S_IXGRP;
515 	}
516 
517 	if (origacep->a_access_mask & ACE_READ_DATA) {
518 		if (mode & rmask)
519 			acep->a_access_mask &= ~ACE_READ_DATA;
520 		else
521 			acep->a_access_mask |= ACE_READ_DATA;
522 	}
523 
524 	if (origacep->a_access_mask & ACE_WRITE_DATA) {
525 		if (mode & wmask)
526 			acep->a_access_mask &= ~ACE_WRITE_DATA;
527 		else
528 			acep->a_access_mask |= ACE_WRITE_DATA;
529 	}
530 
531 	if (origacep->a_access_mask & ACE_APPEND_DATA) {
532 		if (mode & wmask)
533 			acep->a_access_mask &= ~ACE_APPEND_DATA;
534 		else
535 			acep->a_access_mask |= ACE_APPEND_DATA;
536 	}
537 
538 	if (origacep->a_access_mask & ACE_EXECUTE) {
539 		if (mode & xmask)
540 			acep->a_access_mask &= ~ACE_EXECUTE;
541 		else
542 			acep->a_access_mask |= ACE_EXECUTE;
543 	}
544 }
545 
546 /*
547  * Apply mode to canonical six ACEs.
548  */
549 static void
550 zfs_acl_fixup_canonical_six(zfs_acl_t *aclp, mode_t mode)
551 {
552 	int	cnt;
553 	ace_t	*acep;
554 
555 	cnt = aclp->z_acl_count -1;
556 	acep = aclp->z_acl;
557 
558 	/*
559 	 * Fixup final ACEs to match the mode
560 	 */
561 
562 	ASSERT(cnt >= 5);
563 	adjust_ace_pair(&acep[cnt - 1], mode);	/* everyone@ */
564 	adjust_ace_pair(&acep[cnt - 3], (mode & 0070) >> 3);	/* group@ */
565 	adjust_ace_pair(&acep[cnt - 5], (mode & 0700) >> 6);	/* owner@ */
566 }
567 
568 
569 static int
570 zfs_acl_ace_match(ace_t *acep, int allow_deny, int type, int mask)
571 {
572 	return (acep->a_access_mask == mask && acep->a_type == allow_deny &&
573 	    ((acep->a_flags & 0xf040) == type));
574 }
575 
576 /*
577  * Can prepended ACE be reused?
578  */
579 static int
580 zfs_reuse_deny(ace_t *acep, int i)
581 {
582 	int okay_masks;
583 
584 	if (i < 1)
585 		return (B_FALSE);
586 
587 	if (acep[i-1].a_type != DENY)
588 		return (B_FALSE);
589 
590 	if (acep[i-1].a_flags != (acep[i].a_flags & ACE_IDENTIFIER_GROUP))
591 		return (B_FALSE);
592 
593 	okay_masks = (acep[i].a_access_mask & OKAY_MASK_BITS);
594 
595 	if (acep[i-1].a_access_mask & ~okay_masks)
596 		return (B_FALSE);
597 
598 	return (B_TRUE);
599 }
600 
601 /*
602  * Create space to prepend an ACE
603  */
604 static void
605 zfs_acl_prepend(zfs_acl_t *aclp, int i)
606 {
607 	ace_t	*oldaclp = NULL;
608 	ace_t	*to, *from;
609 	int	slots_left = aclp->z_slots - aclp->z_acl_count;
610 	int	oldslots;
611 	int	need_free = 0;
612 
613 	if (aclp->z_state == ACL_DATA_ALLOCED)
614 		ASSERT(aclp->z_slots >= aclp->z_acl_count);
615 
616 	if (slots_left == 0 || aclp->z_state != ACL_DATA_ALLOCED) {
617 
618 		to = kmem_alloc(ZFS_ACL_SIZE(aclp->z_acl_count +
619 		    OGE_PAD), KM_SLEEP);
620 		if (aclp->z_state == ACL_DATA_ALLOCED)
621 			need_free++;
622 		from = aclp->z_acl;
623 		oldaclp = aclp->z_acl;
624 		(void) memmove(to, from,
625 		    sizeof (ace_t) * aclp->z_acl_count);
626 		aclp->z_state = ACL_DATA_ALLOCED;
627 	} else {
628 		from = aclp->z_acl;
629 		to = aclp->z_acl;
630 	}
631 
632 
633 	(void) memmove(&to[i + 1], &from[i],
634 	    sizeof (ace_t) * (aclp->z_acl_count - i));
635 
636 	if (oldaclp) {
637 		aclp->z_acl = to;
638 		oldslots = aclp->z_slots;
639 		aclp->z_slots = aclp->z_acl_count + OGE_PAD;
640 		if (need_free)
641 			kmem_free(oldaclp, ZFS_ACL_SIZE(oldslots));
642 	}
643 
644 }
645 
646 /*
647  * Prepend deny ACE
648  */
649 static void
650 zfs_acl_prepend_deny(znode_t *zp, zfs_acl_t *aclp, int i,
651     mode_t mode)
652 {
653 	ace_t	*acep;
654 
655 	zfs_acl_prepend(aclp, i);
656 
657 	acep = aclp->z_acl;
658 	zfs_set_ace(&acep[i], 0, DENY, acep[i + 1].a_who,
659 	    (acep[i + 1].a_flags & 0xf040));
660 	zfs_acl_prepend_fixup(&acep[i], &acep[i+1], mode, zp->z_phys->zp_uid);
661 	aclp->z_acl_count++;
662 }
663 
664 /*
665  * Split an inherited ACE into inherit_only ACE
666  * and original ACE with inheritance flags stripped off.
667  */
668 static void
669 zfs_acl_split_ace(zfs_acl_t *aclp, int i)
670 {
671 	ace_t *acep = aclp->z_acl;
672 
673 	zfs_acl_prepend(aclp, i);
674 	acep = aclp->z_acl;
675 	acep[i] = acep[i + 1];
676 	acep[i].a_flags |= ACE_INHERIT_ONLY_ACE;
677 	acep[i + 1].a_flags &= ~ALL_INHERIT;
678 	aclp->z_acl_count++;
679 }
680 
681 /*
682  * Are ACES started at index i, the canonical six ACES?
683  */
684 static int
685 zfs_have_canonical_six(zfs_acl_t *aclp, int i)
686 {
687 	ace_t *acep = aclp->z_acl;
688 
689 	if ((zfs_acl_ace_match(&acep[i],
690 	    DENY, ACE_OWNER, 0) &&
691 	    zfs_acl_ace_match(&acep[i + 1], ALLOW, ACE_OWNER,
692 	    OWNER_ALLOW_MASK) && zfs_acl_ace_match(&acep[i + 2],
693 	    DENY, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 3],
694 	    ALLOW, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 4],
695 	    DENY, ACE_EVERYONE, EVERYONE_DENY_MASK) &&
696 	    zfs_acl_ace_match(&acep[i + 5], ALLOW, ACE_EVERYONE,
697 	    EVERYONE_ALLOW_MASK))) {
698 		return (1);
699 	} else {
700 		return (0);
701 	}
702 }
703 
704 /*
705  * Apply step 1g, to group entries
706  *
707  * Need to deal with corner case where group may have
708  * greater permissions than owner.  If so then limit
709  * group permissions, based on what extra permissions
710  * group has.
711  */
712 static void
713 zfs_fixup_group_entries(ace_t *acep, mode_t mode)
714 {
715 	mode_t extramode = (mode >> 3) & 07;
716 	mode_t ownermode = (mode >> 6);
717 
718 	if (acep[0].a_flags & ACE_IDENTIFIER_GROUP) {
719 
720 		extramode &= ~ownermode;
721 
722 		if (extramode) {
723 			if (extramode & 04) {
724 				acep[0].a_access_mask &= ~ACE_READ_DATA;
725 				acep[1].a_access_mask &= ~ACE_READ_DATA;
726 			}
727 			if (extramode & 02) {
728 				acep[0].a_access_mask &=
729 				    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
730 				acep[1].a_access_mask &=
731 				    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
732 			}
733 			if (extramode & 01) {
734 				acep[0].a_access_mask &= ~ACE_EXECUTE;
735 				acep[1].a_access_mask &= ~ACE_EXECUTE;
736 			}
737 		}
738 	}
739 }
740 
741 /*
742  * Apply the chmod algorithm as described
743  * in PSARC/2002/240
744  */
745 static int
746 zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp,
747     dmu_tx_t *tx)
748 {
749 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
750 	ace_t 		*acep;
751 	int 		i;
752 	int		error;
753 	int 		entry_type;
754 	int 		reuse_deny;
755 	int 		need_canonical_six = 1;
756 	int		inherit = 0;
757 	int		iflags;
758 
759 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
760 	ASSERT(MUTEX_HELD(&zp->z_lock));
761 
762 	i = 0;
763 	while (i < aclp->z_acl_count) {
764 		acep = aclp->z_acl;
765 		entry_type = (acep[i].a_flags & 0xf040);
766 		iflags = (acep[i].a_flags & ALL_INHERIT);
767 
768 		if ((acep[i].a_type != ALLOW && acep[i].a_type != DENY) ||
769 		    (iflags & ACE_INHERIT_ONLY_ACE)) {
770 			i++;
771 			if (iflags)
772 				inherit = 1;
773 			continue;
774 		}
775 
776 
777 		if (zfsvfs->z_acl_mode == DISCARD) {
778 			zfs_ace_remove(aclp, i);
779 			continue;
780 		}
781 
782 		/*
783 		 * Need to split ace into two?
784 		 */
785 		if ((iflags & (ACE_FILE_INHERIT_ACE|
786 		    ACE_DIRECTORY_INHERIT_ACE)) &&
787 		    (!(iflags & ACE_INHERIT_ONLY_ACE))) {
788 			zfs_acl_split_ace(aclp, i);
789 			i++;
790 			inherit = 1;
791 			continue;
792 		}
793 
794 		if (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
795 		    (entry_type == OWNING_GROUP)) {
796 			acep[i].a_access_mask &= ~OGE_CLEAR;
797 			i++;
798 			continue;
799 
800 		} else {
801 			if (acep[i].a_type == ALLOW) {
802 
803 				/*
804 				 * Check preceding ACE if any, to see
805 				 * if we need to prepend a DENY ACE.
806 				 * This is only applicable when the acl_mode
807 				 * property == groupmask.
808 				 */
809 				if (zfsvfs->z_acl_mode == GROUPMASK) {
810 
811 					reuse_deny = zfs_reuse_deny(acep, i);
812 
813 					if (reuse_deny == B_FALSE) {
814 						zfs_acl_prepend_deny(zp, aclp,
815 						    i, mode);
816 						i++;
817 						acep = aclp->z_acl;
818 					} else {
819 						zfs_acl_prepend_fixup(
820 						    &acep[i - 1],
821 						    &acep[i], mode,
822 						    zp->z_phys->zp_uid);
823 					}
824 					zfs_fixup_group_entries(&acep[i - 1],
825 					    mode);
826 				}
827 			}
828 			i++;
829 		}
830 	}
831 
832 	/*
833 	 * Check out last six aces, if we have six.
834 	 */
835 
836 	if (aclp->z_acl_count >= 6) {
837 		i = aclp->z_acl_count - 6;
838 
839 		if (zfs_have_canonical_six(aclp, i)) {
840 			need_canonical_six = 0;
841 		}
842 	}
843 
844 	if (need_canonical_six) {
845 
846 		zfs_acl_append(aclp, 6);
847 		i = aclp->z_acl_count;
848 		acep = aclp->z_acl;
849 		zfs_set_ace(&acep[i++], 0, DENY, -1, ACE_OWNER);
850 		zfs_set_ace(&acep[i++], OWNER_ALLOW_MASK, ALLOW, -1, ACE_OWNER);
851 		zfs_set_ace(&acep[i++], 0, DENY, -1, OWNING_GROUP);
852 		zfs_set_ace(&acep[i++], 0, ALLOW, -1, OWNING_GROUP);
853 		zfs_set_ace(&acep[i++], EVERYONE_DENY_MASK,
854 		    DENY, -1, ACE_EVERYONE);
855 		zfs_set_ace(&acep[i++], EVERYONE_ALLOW_MASK,
856 		    ALLOW, -1, ACE_EVERYONE);
857 		aclp->z_acl_count += 6;
858 	}
859 
860 	zfs_acl_fixup_canonical_six(aclp, mode);
861 
862 	zp->z_phys->zp_mode = mode;
863 	error = zfs_aclset_common(zp, aclp, tx, &inherit);
864 	return (error);
865 }
866 
867 
868 int
869 zfs_acl_chmod_setattr(znode_t *zp, uint64_t mode, dmu_tx_t *tx)
870 {
871 	zfs_acl_t *aclp;
872 	int error;
873 
874 	ASSERT(MUTEX_HELD(&zp->z_lock));
875 	mutex_enter(&zp->z_acl_lock);
876 	aclp = zfs_acl_node_read(zp);
877 	error = zfs_acl_chmod(zp, mode, aclp, tx);
878 	mutex_exit(&zp->z_acl_lock);
879 	zfs_acl_free(aclp);
880 	return (error);
881 }
882 
883 /*
884  * strip off write_owner and write_acl
885  */
886 static void
887 zfs_securemode_update(zfsvfs_t *zfsvfs, ace_t *acep)
888 {
889 	if ((zfsvfs->z_acl_inherit == SECURE) &&
890 	    acep->a_type == ALLOW)
891 		acep->a_access_mask &= ~SECURE_NO_INHERIT;
892 }
893 
894 /*
895  * inherit inheritable ACEs from parent
896  */
897 static zfs_acl_t *
898 zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp)
899 {
900 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
901 	ace_t 		*pacep;
902 	ace_t		*acep;
903 	int 		ace_cnt = 0;
904 	int		pace_cnt;
905 	int 		i, j;
906 	zfs_acl_t	*aclp = NULL;
907 
908 	i = j = 0;
909 	pace_cnt = paclp->z_acl_count;
910 	pacep = paclp->z_acl;
911 	if (zfsvfs->z_acl_inherit != DISCARD) {
912 		for (i = 0; i != pace_cnt; i++) {
913 
914 			if (zfsvfs->z_acl_inherit == NOALLOW &&
915 			    pacep[i].a_type == ALLOW)
916 				continue;
917 
918 			if (zfs_ace_can_use(zp, &pacep[i])) {
919 				ace_cnt++;
920 				if (!(pacep[i].a_flags &
921 				    ACE_NO_PROPAGATE_INHERIT_ACE))
922 					ace_cnt++;
923 			}
924 		}
925 	}
926 
927 	aclp = zfs_acl_alloc(ace_cnt + OGE_PAD);
928 	if (ace_cnt && zfsvfs->z_acl_inherit != DISCARD) {
929 		acep = aclp->z_acl;
930 		pacep = paclp->z_acl;
931 		for (i = 0; i != pace_cnt; i++) {
932 
933 			if (zfsvfs->z_acl_inherit == NOALLOW &&
934 			    pacep[i].a_type == ALLOW)
935 				continue;
936 
937 			if (zfs_ace_can_use(zp, &pacep[i])) {
938 				/*
939 				 * Now create entry for inherited ace
940 				 */
941 				acep[j] = pacep[i];
942 
943 				if (pacep[i].a_flags &
944 				    ACE_NO_PROPAGATE_INHERIT_ACE) {
945 					acep[j].a_flags &= ~ALL_INHERIT;
946 					j++;
947 					continue;
948 				}
949 
950 				if (pacep[i].a_type != ALLOW &&
951 				    pacep[i].a_type != DENY) {
952 					zfs_securemode_update(zfsvfs, &acep[j]);
953 					j++;
954 					continue;
955 				}
956 
957 				if (ZTOV(zp)->v_type != VDIR) {
958 					acep[j].a_flags &= ~ALL_INHERIT;
959 					zfs_securemode_update(zfsvfs, &acep[j]);
960 					j++;
961 					continue;
962 				}
963 
964 				ASSERT(ZTOV(zp)->v_type == VDIR);
965 
966 				/*
967 				 * If we are inheriting an ACE targeted for
968 				 * only files, then make sure inherit_only
969 				 * is on for future propagation.
970 				 */
971 				if ((acep[j].a_flags & (ACE_FILE_INHERIT_ACE |
972 				    ACE_DIRECTORY_INHERIT_ACE)) ==
973 				    ACE_FILE_INHERIT_ACE) {
974 					acep[j].a_flags |= ACE_INHERIT_ONLY_ACE;
975 				} else {
976 					acep[j].a_flags &=
977 					    ~ACE_INHERIT_ONLY_ACE;
978 				}
979 
980 				zfs_securemode_update(zfsvfs, &acep[j]);
981 				j++;
982 			}
983 		}
984 	}
985 	aclp->z_acl_count = j;
986 	ASSERT(aclp->z_slots >= aclp->z_acl_count);
987 
988 	return (aclp);
989 }
990 
991 /*
992  * Create file system object initial permissions
993  * including inheritable ACEs.
994  */
995 void
996 zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
997     vattr_t *vap, dmu_tx_t *tx, cred_t *cr)
998 {
999 	uint64_t	mode;
1000 	uid_t		uid;
1001 	gid_t		gid;
1002 	int		error;
1003 	int		pull_down;
1004 	zfs_acl_t	*aclp, *paclp;
1005 
1006 	mode = MAKEIMODE(vap->va_type, vap->va_mode);
1007 
1008 	/*
1009 	 * Determine uid and gid.
1010 	 */
1011 	if ((flag & (IS_ROOT_NODE | IS_REPLAY)) ||
1012 	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1013 		uid = vap->va_uid;
1014 		gid = vap->va_gid;
1015 	} else {
1016 		uid = crgetuid(cr);
1017 		if ((vap->va_mask & AT_GID) &&
1018 		    ((vap->va_gid == parent->z_phys->zp_gid) ||
1019 		    groupmember(vap->va_gid, cr) ||
1020 		    secpolicy_vnode_create_gid(cr)))
1021 			gid = vap->va_gid;
1022 		else
1023 			gid = (parent->z_phys->zp_mode & S_ISGID) ?
1024 			    parent->z_phys->zp_gid : crgetgid(cr);
1025 	}
1026 
1027 	/*
1028 	 * If we're creating a directory, and the parent directory has the
1029 	 * set-GID bit set, set in on the new directory.
1030 	 * Otherwise, if the user is neither privileged nor a member of the
1031 	 * file's new group, clear the file's set-GID bit.
1032 	 */
1033 
1034 	if ((parent->z_phys->zp_mode & S_ISGID) && (vap->va_type == VDIR))
1035 		mode |= S_ISGID;
1036 	else {
1037 		if ((mode & S_ISGID) &&
1038 		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
1039 			mode &= ~S_ISGID;
1040 	}
1041 
1042 	zp->z_phys->zp_uid = uid;
1043 	zp->z_phys->zp_gid = gid;
1044 	zp->z_phys->zp_mode = mode;
1045 
1046 	mutex_enter(&parent->z_lock);
1047 	pull_down = (parent->z_phys->zp_flags & ZFS_INHERIT_ACE);
1048 	if (pull_down) {
1049 		mutex_enter(&parent->z_acl_lock);
1050 		paclp = zfs_acl_node_read(parent);
1051 		mutex_exit(&parent->z_acl_lock);
1052 		aclp = zfs_acl_inherit(zp, paclp);
1053 		zfs_acl_free(paclp);
1054 	} else {
1055 		aclp = zfs_acl_alloc(6);
1056 	}
1057 	mutex_exit(&parent->z_lock);
1058 	mutex_enter(&zp->z_lock);
1059 	mutex_enter(&zp->z_acl_lock);
1060 	error = zfs_acl_chmod(zp, mode, aclp, tx);
1061 	mutex_exit(&zp->z_lock);
1062 	mutex_exit(&zp->z_acl_lock);
1063 	ASSERT3U(error, ==, 0);
1064 	zfs_acl_free(aclp);
1065 }
1066 
1067 /*
1068  * Should ACE be inherited?
1069  */
1070 static int
1071 zfs_ace_can_use(znode_t *zp, ace_t *acep)
1072 {
1073 	int vtype = ZTOV(zp)->v_type;
1074 
1075 	int	iflags = (acep->a_flags & 0xf);
1076 
1077 	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1078 		return (1);
1079 	else if (iflags & ACE_FILE_INHERIT_ACE)
1080 		return (!((vtype == VDIR) &&
1081 		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1082 	return (0);
1083 }
1084 
1085 /*
1086  * Retrieve a files ACL
1087  */
1088 int
1089 zfs_getacl(znode_t *zp, vsecattr_t  *vsecp, cred_t *cr)
1090 {
1091 	zfs_acl_t	*aclp;
1092 	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1093 	int		error;
1094 
1095 	if (error = zfs_zaccess(zp, ACE_READ_ACL, cr)) {
1096 		/*
1097 		 * If owner of file then allow reading of the
1098 		 * ACL.
1099 		 */
1100 		if (crgetuid(cr) != zp->z_phys->zp_uid)
1101 			return (error);
1102 	}
1103 
1104 	if (mask == 0)
1105 		return (ENOSYS);
1106 
1107 	mutex_enter(&zp->z_acl_lock);
1108 
1109 	aclp = zfs_acl_node_read(zp);
1110 
1111 	if (mask & VSA_ACECNT) {
1112 		vsecp->vsa_aclcnt = aclp->z_acl_count;
1113 	}
1114 
1115 	if (mask & VSA_ACE) {
1116 		vsecp->vsa_aclentp = kmem_alloc(aclp->z_acl_count *
1117 		    sizeof (ace_t), KM_SLEEP);
1118 		bcopy(aclp->z_acl, vsecp->vsa_aclentp,
1119 		    aclp->z_acl_count * sizeof (ace_t));
1120 	}
1121 
1122 	mutex_exit(&zp->z_acl_lock);
1123 
1124 	zfs_acl_free(aclp);
1125 
1126 	return (0);
1127 }
1128 
1129 /*
1130  * Set a files ACL
1131  */
1132 int
1133 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, cred_t *cr)
1134 {
1135 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1136 	zilog_t		*zilog = zfsvfs->z_log;
1137 	ace_t		*acep = vsecp->vsa_aclentp;
1138 	int		aclcnt = vsecp->vsa_aclcnt;
1139 	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1140 	dmu_tx_t	*tx;
1141 	int		error;
1142 	int		inherit;
1143 	zfs_acl_t	*aclp;
1144 	uint64_t	seq = 0;
1145 
1146 	if (mask == 0)
1147 		return (EINVAL);
1148 
1149 	if (!zfs_acl_valid(zp, acep, aclcnt, &inherit))
1150 		return (EINVAL);
1151 top:
1152 	error = zfs_zaccess_v4_perm(zp, ACE_WRITE_ACL, cr);
1153 	if (error == EACCES || error == ACCESS_UNDETERMINED) {
1154 		if ((error = secpolicy_vnode_setdac(cr,
1155 		    zp->z_phys->zp_uid)) != 0) {
1156 			return (error);
1157 		}
1158 	} else if (error) {
1159 		return (error == EROFS ? error : EPERM);
1160 	}
1161 
1162 	mutex_enter(&zp->z_lock);
1163 	mutex_enter(&zp->z_acl_lock);
1164 
1165 	tx = dmu_tx_create(zfsvfs->z_os);
1166 	dmu_tx_hold_bonus(tx, zp->z_id);
1167 
1168 	if (zp->z_phys->zp_acl.z_acl_extern_obj) {
1169 		dmu_tx_hold_write(tx, zp->z_phys->zp_acl.z_acl_extern_obj,
1170 		    0, ZFS_ACL_SIZE(aclcnt));
1171 	} else if (aclcnt > ACE_SLOT_CNT) {
1172 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, ZFS_ACL_SIZE(aclcnt));
1173 	}
1174 
1175 	error = dmu_tx_assign(tx, zfsvfs->z_assign);
1176 	if (error) {
1177 		dmu_tx_abort(tx);
1178 
1179 		mutex_exit(&zp->z_acl_lock);
1180 		mutex_exit(&zp->z_lock);
1181 
1182 		if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
1183 			txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
1184 			goto top;
1185 		}
1186 		return (error);
1187 	}
1188 
1189 	aclp = zfs_acl_alloc(aclcnt);
1190 	bcopy(acep, aclp->z_acl, sizeof (ace_t) * aclcnt);
1191 	aclp->z_acl_count = aclcnt;
1192 	error = zfs_aclset_common(zp, aclp, tx, &inherit);
1193 	ASSERT(error == 0);
1194 
1195 	zfs_acl_free(aclp);
1196 	seq = zfs_log_acl(zilog, tx, TX_ACL, zp, aclcnt, acep);
1197 	dmu_tx_commit(tx);
1198 done:
1199 	mutex_exit(&zp->z_acl_lock);
1200 	mutex_exit(&zp->z_lock);
1201 
1202 	zil_commit(zilog, seq, 0);
1203 
1204 	return (error);
1205 }
1206 
1207 static int
1208 zfs_ace_access(ace_t *zacep, int mode_wanted, int *working_mode)
1209 {
1210 	if ((*working_mode & mode_wanted) == mode_wanted) {
1211 		return (0);
1212 	}
1213 
1214 	if (zacep->a_access_mask & mode_wanted) {
1215 		if (zacep->a_type == ALLOW) {
1216 			*working_mode |= (mode_wanted & zacep->a_access_mask);
1217 			if ((*working_mode & mode_wanted) == mode_wanted)
1218 				return (0);
1219 		} else if (zacep->a_type == DENY) {
1220 			return (EACCES);
1221 		}
1222 	}
1223 
1224 	/*
1225 	 * haven't been specifcally denied at this point
1226 	 * so return UNDETERMINED.
1227 	 */
1228 
1229 	return (ACCESS_UNDETERMINED);
1230 }
1231 
1232 
1233 static int
1234 zfs_zaccess_common(znode_t *zp, int v4_mode, int *working_mode, cred_t *cr)
1235 {
1236 	zfs_acl_t	*aclp;
1237 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1238 	ace_t		*zacep;
1239 	gid_t		gid;
1240 	int		mode_wanted = v4_mode;
1241 	int		cnt;
1242 	int		i;
1243 	int		access_deny = ACCESS_UNDETERMINED;
1244 	uint_t		entry_type;
1245 	uid_t		uid = crgetuid(cr);
1246 
1247 	*working_mode = 0;
1248 
1249 	if (zfsvfs->z_assign >= TXG_INITIAL)		/* ZIL replay */
1250 		return (0);
1251 
1252 	if ((v4_mode & WRITE_MASK) &&
1253 	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
1254 	    (!IS_DEVVP(ZTOV(zp)))) {
1255 		return (EROFS);
1256 	}
1257 
1258 	mutex_enter(&zp->z_acl_lock);
1259 
1260 	aclp = zfs_acl_node_read(zp);
1261 
1262 	zacep = aclp->z_acl;
1263 	cnt = aclp->z_acl_count;
1264 
1265 	for (i = 0; i != cnt; i++) {
1266 
1267 		if (zacep[i].a_flags & ACE_INHERIT_ONLY_ACE)
1268 			continue;
1269 
1270 		entry_type = (zacep[i].a_flags & 0xf040);
1271 		switch (entry_type) {
1272 		case ACE_OWNER:
1273 			if (uid == zp->z_phys->zp_uid) {
1274 				access_deny = zfs_ace_access(&zacep[i],
1275 				    mode_wanted, working_mode);
1276 			}
1277 			break;
1278 		case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
1279 		case ACE_IDENTIFIER_GROUP:
1280 			/*
1281 			 * Owning group gid is in znode not ACL
1282 			 */
1283 			if (entry_type == (ACE_IDENTIFIER_GROUP | ACE_GROUP))
1284 				gid = zp->z_phys->zp_gid;
1285 			else
1286 				gid = zacep[i].a_who;
1287 
1288 			if (groupmember(gid, cr)) {
1289 				access_deny = zfs_ace_access(&zacep[i],
1290 				    mode_wanted, working_mode);
1291 			}
1292 			break;
1293 		case ACE_EVERYONE:
1294 			access_deny = zfs_ace_access(&zacep[i],
1295 			    mode_wanted, working_mode);
1296 			break;
1297 
1298 		/* USER Entry */
1299 		default:
1300 			if (entry_type == 0) {
1301 				if (uid == zacep[i].a_who) {
1302 					access_deny = zfs_ace_access(&zacep[i],
1303 					    mode_wanted, working_mode);
1304 				}
1305 				break;
1306 			}
1307 			zfs_acl_free(aclp);
1308 			mutex_exit(&zp->z_acl_lock);
1309 			return (EIO);
1310 		}
1311 
1312 		if (access_deny != ACCESS_UNDETERMINED)
1313 			break;
1314 
1315 	}
1316 
1317 	mutex_exit(&zp->z_acl_lock);
1318 	zfs_acl_free(aclp);
1319 
1320 	return (access_deny);
1321 }
1322 
1323 
1324 /*
1325  * Determine whether Access should be granted/denied, invoking least
1326  * priv subsytem when a deny is determined.
1327  */
1328 int
1329 zfs_zaccess(znode_t *zp, int mode, cred_t *cr)
1330 {
1331 	int	working_mode = 0;
1332 	int	error;
1333 	int	is_attr;
1334 	znode_t	*xzp;
1335 	znode_t *check_zp = zp;
1336 
1337 	is_attr = ((zp->z_phys->zp_flags & ZFS_XATTR) &&
1338 	    (ZTOV(zp)->v_type == VDIR));
1339 
1340 	/*
1341 	 * If attribute then validate against base file
1342 	 */
1343 	if (is_attr) {
1344 		if ((error = zfs_zget(zp->z_zfsvfs,
1345 		    zp->z_phys->zp_parent, &xzp)) != 0)	{
1346 			return (error);
1347 		}
1348 		check_zp = xzp;
1349 		/*
1350 		 * fixup mode to map to xattr perms
1351 		 */
1352 
1353 		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
1354 			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
1355 			mode |= ACE_WRITE_NAMED_ATTRS;
1356 		}
1357 
1358 		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
1359 			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
1360 			mode |= ACE_READ_NAMED_ATTRS;
1361 		}
1362 	}
1363 
1364 	error = zfs_zaccess_common(check_zp, mode, &working_mode, cr);
1365 
1366 	if (error == EROFS) {
1367 		if (is_attr)
1368 			VN_RELE(ZTOV(xzp));
1369 		return (error);
1370 	}
1371 
1372 	if (error || (working_mode != mode)) {
1373 		error = secpolicy_vnode_access(cr, ZTOV(check_zp),
1374 		    check_zp->z_phys->zp_uid, ~zfs_v4_to_unix(working_mode));
1375 	}
1376 
1377 	if (is_attr)
1378 		VN_RELE(ZTOV(xzp));
1379 
1380 	return (error);
1381 }
1382 
1383 /*
1384  * Special zaccess function to check for special nfsv4 perm.
1385  * doesn't call secpolicy_vnode_access() for failure, since that
1386  * would probably be the wrong policy function to call.
1387  * instead its up to the caller to handle that situation.
1388  */
1389 
1390 int
1391 zfs_zaccess_v4_perm(znode_t *zp, int mode, cred_t *cr)
1392 {
1393 	int working_mode = 0;
1394 	return (zfs_zaccess_common(zp, mode, &working_mode, cr));
1395 }
1396 
1397 /*
1398  * Translate tradition unix VREAD/VWRITE/VEXEC mode into
1399  * native ACL format and call zfs_zaccess()
1400  */
1401 int
1402 zfs_zaccess_rwx(znode_t *zp, mode_t mode, cred_t *cr)
1403 {
1404 	int v4_mode = zfs_unix_to_v4(mode >> 6);
1405 
1406 	return (zfs_zaccess(zp, v4_mode, cr));
1407 }
1408 
1409 /*
1410  * Determine whether Access should be granted/deny, without
1411  * consulting least priv subsystem.
1412  *
1413  *
1414  * The following chart is the recommended NFSv4 enforcement for
1415  * ability to delete an object.
1416  *
1417  *      -------------------------------------------------------
1418  *      |   Parent Dir  |           Target Object Permissions |
1419  *      |  permissions  |                                     |
1420  *      -------------------------------------------------------
1421  *      |               | ACL Allows | ACL Denies| Delete     |
1422  *      |               |  Delete    |  Delete   | unspecified|
1423  *      -------------------------------------------------------
1424  *      |  ACL Allows   | Permit     | Permit    | Permit     |
1425  *      |  DELETE_CHILD |                                     |
1426  *      -------------------------------------------------------
1427  *      |  ACL Denies   | Permit     | Deny      | Deny       |
1428  *      |  DELETE_CHILD |            |           |            |
1429  *      -------------------------------------------------------
1430  *      | ACL specifies |            |           |            |
1431  *      | only allow    | Permit     | Permit    | Permit     |
1432  *      | write and     |            |           |            |
1433  *      | execute       |            |           |            |
1434  *      -------------------------------------------------------
1435  *      | ACL denies    |            |           |            |
1436  *      | write and     | Permit     | Deny      | Deny       |
1437  *      | execute       |            |           |            |
1438  *      -------------------------------------------------------
1439  *         ^
1440  *         |
1441  *         No search privilege, can't even look up file?
1442  *
1443  */
1444 int
1445 zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
1446 {
1447 	int dzp_working_mode = 0;
1448 	int zp_working_mode = 0;
1449 	int dzp_error, zp_error;
1450 	int error;
1451 
1452 	/*
1453 	 * Arghh, this check is going to require a couple of questions
1454 	 * to be asked.  We want specific DELETE permissions to
1455 	 * take precedence over WRITE/EXECUTE.  We don't
1456 	 * want an ACL such as this to mess us up.
1457 	 * user:sloar:write_data:deny,user:sloar:delete:allow
1458 	 *
1459 	 * However, deny permissions may ultimately be overridden
1460 	 * by secpolicy_vnode_access().
1461 	 */
1462 
1463 	dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
1464 	    &dzp_working_mode, cr);
1465 	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode, cr);
1466 
1467 	if (dzp_error == EROFS || zp_error == EROFS)
1468 		return (dzp_error);
1469 
1470 	/*
1471 	 * First handle the first row
1472 	 */
1473 	if (dzp_working_mode & ACE_DELETE_CHILD)
1474 		return (0);
1475 
1476 	/*
1477 	 * Second row
1478 	 */
1479 
1480 	if (zp_working_mode & ACE_DELETE)
1481 		return (0);
1482 
1483 	/*
1484 	 * Third Row
1485 	 */
1486 
1487 	dzp_error = zfs_zaccess_common(dzp, ACE_WRITE_DATA|ACE_EXECUTE,
1488 	    &dzp_working_mode, cr);
1489 
1490 	if (dzp_error == EROFS)
1491 		return (dzp_error);
1492 
1493 	if (dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE))
1494 		goto sticky;
1495 
1496 	/*
1497 	 * Fourth Row
1498 	 */
1499 
1500 	if (((dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE)) == 0) &&
1501 	    (zp_working_mode & ACE_DELETE))
1502 		goto sticky;
1503 
1504 	error = secpolicy_vnode_access(cr, ZTOV(zp),
1505 	    dzp->z_phys->zp_uid, S_IWRITE|S_IEXEC);
1506 
1507 	if (error)
1508 		return (error);
1509 
1510 sticky:
1511 	error = zfs_sticky_remove_access(dzp, zp, cr);
1512 
1513 	return (error);
1514 }
1515 
1516 int
1517 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
1518     znode_t *tzp, cred_t *cr)
1519 {
1520 	int add_perm;
1521 	int error;
1522 
1523 	add_perm = (ZTOV(szp)->v_type == VDIR) ?
1524 	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
1525 
1526 	/*
1527 	 * Rename permissions are combination of delete permission +
1528 	 * add file/subdir permission.
1529 	 */
1530 
1531 	/*
1532 	 * first make sure we do the delete portion.
1533 	 *
1534 	 * If that succeeds then check for add_file/add_subdir permissions
1535 	 */
1536 
1537 	if (error = zfs_zaccess_delete(sdzp, szp, cr))
1538 		return (error);
1539 
1540 	/*
1541 	 * If we have a tzp, see if we can delete it?
1542 	 */
1543 	if (tzp) {
1544 		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
1545 			return (error);
1546 	}
1547 
1548 	/*
1549 	 * Now check for add permissions
1550 	 */
1551 	error = zfs_zaccess(tdzp, add_perm, cr);
1552 
1553 	return (error);
1554 }
1555