xref: /titanic_51/usr/src/uts/common/fs/zfs/zfs_acl.c (revision 30a5e8fa1253cb33980ee4514743cf683f584b4e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/time.h>
31 #include <sys/systm.h>
32 #include <sys/sysmacros.h>
33 #include <sys/resource.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/file.h>
37 #include <sys/stat.h>
38 #include <sys/kmem.h>
39 #include <sys/cmn_err.h>
40 #include <sys/errno.h>
41 #include <sys/unistd.h>
42 #include <sys/sdt.h>
43 #include <sys/fs/zfs.h>
44 #include <sys/mode.h>
45 #include <sys/policy.h>
46 #include <sys/zfs_znode.h>
47 #include <sys/zfs_acl.h>
48 #include <sys/zfs_dir.h>
49 #include <sys/zfs_vfsops.h>
50 #include <sys/dmu.h>
51 #include <sys/zap.h>
52 #include <util/qsort.h>
53 #include "fs/fs_subr.h"
54 #include <acl/acl_common.h>
55 
56 #define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
57 #define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
58 
59 #define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
60 #define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
61     ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
62 #define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
63     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
64 #define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
65     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
66 #define	WRITE_MASK (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS| \
67     ACE_WRITE_ATTRIBUTES|ACE_WRITE_ACL|ACE_WRITE_OWNER)
68 
69 #define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
70     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
71 
72 #define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
73     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
74 
75 #define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
76     ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
77 
78 #define	SECURE_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
79 
80 #define	OGE_PAD	6		/* traditional owner/group/everyone ACES */
81 
82 static int zfs_ace_can_use(znode_t *zp, ace_t *);
83 
84 static zfs_acl_t *
85 zfs_acl_alloc(int slots)
86 {
87 	zfs_acl_t *aclp;
88 
89 	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
90 	if (slots != 0) {
91 		aclp->z_acl = kmem_alloc(ZFS_ACL_SIZE(slots), KM_SLEEP);
92 		aclp->z_acl_count = 0;
93 		aclp->z_state = ACL_DATA_ALLOCED;
94 	} else {
95 		aclp->z_state = 0;
96 	}
97 	aclp->z_slots = slots;
98 	return (aclp);
99 }
100 
101 void
102 zfs_acl_free(zfs_acl_t *aclp)
103 {
104 	if (aclp->z_state == ACL_DATA_ALLOCED) {
105 		kmem_free(aclp->z_acl, ZFS_ACL_SIZE(aclp->z_slots));
106 	}
107 	kmem_free(aclp, sizeof (zfs_acl_t));
108 }
109 
110 static uint32_t
111 zfs_v4_to_unix(uint32_t access_mask)
112 {
113 	uint32_t new_mask = 0;
114 
115 	/*
116 	 * This is used for mapping v4 permissions into permissions
117 	 * that can be passed to secpolicy_vnode_access()
118 	 */
119 	if (access_mask & (ACE_READ_DATA | ACE_LIST_DIRECTORY |
120 	    ACE_READ_ATTRIBUTES | ACE_READ_ACL))
121 		new_mask |= S_IROTH;
122 	if (access_mask & (ACE_WRITE_DATA | ACE_APPEND_DATA |
123 	    ACE_WRITE_ATTRIBUTES | ACE_ADD_FILE | ACE_WRITE_NAMED_ATTRS))
124 		new_mask |= S_IWOTH;
125 	if (access_mask & (ACE_EXECUTE | ACE_READ_NAMED_ATTRS))
126 		new_mask |= S_IXOTH;
127 
128 	return (new_mask);
129 }
130 
131 /*
132  * Convert unix access mask to v4 access mask
133  */
134 static uint32_t
135 zfs_unix_to_v4(uint32_t access_mask)
136 {
137 	uint32_t new_mask = 0;
138 
139 	if (access_mask & 01)
140 		new_mask |= (ACE_EXECUTE);
141 	if (access_mask & 02) {
142 		new_mask |= (ACE_WRITE_DATA);
143 	} if (access_mask & 04) {
144 		new_mask |= ACE_READ_DATA;
145 	}
146 	return (new_mask);
147 }
148 
149 static void
150 zfs_set_ace(ace_t *zacep, uint32_t access_mask, int access_type,
151     uid_t uid, int entry_type)
152 {
153 	zacep->a_access_mask = access_mask;
154 	zacep->a_type = access_type;
155 	zacep->a_who = uid;
156 	zacep->a_flags = entry_type;
157 }
158 
159 static uint64_t
160 zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
161 {
162 	int 	i;
163 	int	entry_type;
164 	mode_t	mode = (zp->z_phys->zp_mode &
165 	    (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
166 	mode_t	 seen = 0;
167 	ace_t 	*acep;
168 
169 	for (i = 0, acep = aclp->z_acl;
170 	    i != aclp->z_acl_count; i++, acep++) {
171 
172 		/*
173 		 * Skip over inherit only ACEs
174 		 */
175 		if (acep->a_flags & ACE_INHERIT_ONLY_ACE)
176 			continue;
177 
178 		entry_type = (acep->a_flags & ACE_TYPE_FLAGS);
179 		if (entry_type == ACE_OWNER) {
180 			if ((acep->a_access_mask & ACE_READ_DATA) &&
181 			    (!(seen & S_IRUSR))) {
182 				seen |= S_IRUSR;
183 				if (acep->a_type == ALLOW) {
184 					mode |= S_IRUSR;
185 				}
186 			}
187 			if ((acep->a_access_mask & ACE_WRITE_DATA) &&
188 			    (!(seen & S_IWUSR))) {
189 				seen |= S_IWUSR;
190 				if (acep->a_type == ALLOW) {
191 					mode |= S_IWUSR;
192 				}
193 			}
194 			if ((acep->a_access_mask & ACE_EXECUTE) &&
195 			    (!(seen & S_IXUSR))) {
196 				seen |= S_IXUSR;
197 				if (acep->a_type == ALLOW) {
198 					mode |= S_IXUSR;
199 				}
200 			}
201 		} else if (entry_type == OWNING_GROUP) {
202 			if ((acep->a_access_mask & ACE_READ_DATA) &&
203 			    (!(seen & S_IRGRP))) {
204 				seen |= S_IRGRP;
205 				if (acep->a_type == ALLOW) {
206 					mode |= S_IRGRP;
207 				}
208 			}
209 			if ((acep->a_access_mask & ACE_WRITE_DATA) &&
210 			    (!(seen & S_IWGRP))) {
211 				seen |= S_IWGRP;
212 				if (acep->a_type == ALLOW) {
213 					mode |= S_IWGRP;
214 				}
215 			}
216 			if ((acep->a_access_mask & ACE_EXECUTE) &&
217 			    (!(seen & S_IXGRP))) {
218 				seen |= S_IXGRP;
219 				if (acep->a_type == ALLOW) {
220 					mode |= S_IXGRP;
221 				}
222 			}
223 		} else if (entry_type == ACE_EVERYONE) {
224 			if ((acep->a_access_mask & ACE_READ_DATA)) {
225 				if (!(seen & S_IRUSR)) {
226 					seen |= S_IRUSR;
227 					if (acep->a_type == ALLOW) {
228 						mode |= S_IRUSR;
229 					}
230 				}
231 				if (!(seen & S_IRGRP)) {
232 					seen |= S_IRGRP;
233 					if (acep->a_type == ALLOW) {
234 						mode |= S_IRGRP;
235 					}
236 				}
237 				if (!(seen & S_IROTH)) {
238 					seen |= S_IROTH;
239 					if (acep->a_type == ALLOW) {
240 						mode |= S_IROTH;
241 					}
242 				}
243 			}
244 			if ((acep->a_access_mask & ACE_WRITE_DATA)) {
245 				if (!(seen & S_IWUSR)) {
246 					seen |= S_IWUSR;
247 					if (acep->a_type == ALLOW) {
248 						mode |= S_IWUSR;
249 					}
250 				}
251 				if (!(seen & S_IWGRP)) {
252 					seen |= S_IWGRP;
253 					if (acep->a_type == ALLOW) {
254 						mode |= S_IWGRP;
255 					}
256 				}
257 				if (!(seen & S_IWOTH)) {
258 					seen |= S_IWOTH;
259 					if (acep->a_type == ALLOW) {
260 						mode |= S_IWOTH;
261 					}
262 				}
263 			}
264 			if ((acep->a_access_mask & ACE_EXECUTE)) {
265 				if (!(seen & S_IXUSR)) {
266 					seen |= S_IXUSR;
267 					if (acep->a_type == ALLOW) {
268 						mode |= S_IXUSR;
269 					}
270 				}
271 				if (!(seen & S_IXGRP)) {
272 					seen |= S_IXGRP;
273 					if (acep->a_type == ALLOW) {
274 						mode |= S_IXGRP;
275 					}
276 				}
277 				if (!(seen & S_IXOTH)) {
278 					seen |= S_IXOTH;
279 					if (acep->a_type == ALLOW) {
280 						mode |= S_IXOTH;
281 					}
282 				}
283 			}
284 		}
285 	}
286 	return (mode);
287 }
288 
289 static zfs_acl_t *
290 zfs_acl_node_read_internal(znode_t *zp)
291 {
292 	zfs_acl_t	*aclp;
293 
294 	aclp = zfs_acl_alloc(0);
295 	aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
296 	aclp->z_acl = &zp->z_phys->zp_acl.z_ace_data[0];
297 
298 	return (aclp);
299 }
300 
301 /*
302  * Read an external acl object.
303  */
304 static int
305 zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp)
306 {
307 	uint64_t extacl = zp->z_phys->zp_acl.z_acl_extern_obj;
308 	zfs_acl_t	*aclp;
309 	int error;
310 
311 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
312 
313 	if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
314 		*aclpp = zfs_acl_node_read_internal(zp);
315 		return (0);
316 	}
317 
318 	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_count);
319 
320 	error = dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
321 	    ZFS_ACL_SIZE(zp->z_phys->zp_acl.z_acl_count), aclp->z_acl);
322 	if (error != 0) {
323 		zfs_acl_free(aclp);
324 		return (error);
325 	}
326 
327 	aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
328 
329 	*aclpp = aclp;
330 	return (0);
331 }
332 
333 static boolean_t
334 zfs_acl_valid(znode_t *zp, ace_t *uace, int aclcnt, int *inherit)
335 {
336 	ace_t 	*acep;
337 	int i;
338 
339 	*inherit = 0;
340 
341 	if (aclcnt > MAX_ACL_ENTRIES || aclcnt <= 0) {
342 		return (B_FALSE);
343 	}
344 
345 	for (i = 0, acep = uace; i != aclcnt; i++, acep++) {
346 
347 		/*
348 		 * first check type of entry
349 		 */
350 
351 		switch (acep->a_flags & ACE_TYPE_FLAGS) {
352 		case ACE_OWNER:
353 			acep->a_who = (uid_t)-1;
354 			break;
355 		case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
356 		case ACE_IDENTIFIER_GROUP:
357 			if (acep->a_flags & ACE_GROUP) {
358 				acep->a_who = (uid_t)-1;
359 			}
360 			break;
361 		case ACE_EVERYONE:
362 			acep->a_who = (uid_t)-1;
363 			break;
364 		}
365 
366 		/*
367 		 * next check inheritance level flags
368 		 */
369 
370 		if (acep->a_type != ALLOW && acep->a_type != DENY)
371 			return (B_FALSE);
372 
373 		/*
374 		 * Only directories should have inheritance flags.
375 		 */
376 		if (ZTOV(zp)->v_type != VDIR && (acep->a_flags &
377 		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE|
378 		    ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE))) {
379 			return (B_FALSE);
380 		}
381 
382 		if (acep->a_flags &
383 		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))
384 			*inherit = 1;
385 
386 		if (acep->a_flags &
387 		    (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
388 			if ((acep->a_flags & (ACE_FILE_INHERIT_ACE|
389 			    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
390 				return (B_FALSE);
391 			}
392 		}
393 	}
394 
395 	return (B_TRUE);
396 }
397 /*
398  * common code for setting acl's.
399  *
400  * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
401  * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
402  * already checked the acl and knows whether to inherit.
403  */
404 int
405 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, dmu_tx_t *tx, int *ihp)
406 {
407 	int 		inherit = 0;
408 	int		error;
409 	znode_phys_t	*zphys = zp->z_phys;
410 	zfs_znode_acl_t	*zacl = &zphys->zp_acl;
411 	uint32_t	acl_phys_size = ZFS_ACL_SIZE(aclp->z_acl_count);
412 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
413 	uint64_t	aoid = zphys->zp_acl.z_acl_extern_obj;
414 
415 	ASSERT(MUTEX_HELD(&zp->z_lock));
416 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
417 
418 	if (ihp)
419 		inherit = *ihp;		/* already determined by caller */
420 	else if (!zfs_acl_valid(zp, aclp->z_acl,
421 	    aclp->z_acl_count, &inherit)) {
422 		return (EINVAL);
423 	}
424 
425 	dmu_buf_will_dirty(zp->z_dbuf, tx);
426 
427 	/*
428 	 * Will ACL fit internally?
429 	 */
430 	if (aclp->z_acl_count > ACE_SLOT_CNT) {
431 		if (aoid == 0) {
432 			aoid = dmu_object_alloc(zfsvfs->z_os,
433 			    DMU_OT_ACL, acl_phys_size, DMU_OT_NONE, 0, tx);
434 		} else {
435 			(void) dmu_object_set_blocksize(zfsvfs->z_os, aoid,
436 			    acl_phys_size, 0, tx);
437 		}
438 		zphys->zp_acl.z_acl_extern_obj = aoid;
439 		zphys->zp_acl.z_acl_count = aclp->z_acl_count;
440 		dmu_write(zfsvfs->z_os, aoid, 0,
441 		    acl_phys_size, aclp->z_acl, tx);
442 	} else {
443 		/*
444 		 * Migrating back embedded?
445 		 */
446 		if (zphys->zp_acl.z_acl_extern_obj) {
447 			error = dmu_object_free(zfsvfs->z_os,
448 			    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
449 			if (error)
450 				return (error);
451 			zphys->zp_acl.z_acl_extern_obj = 0;
452 		}
453 		bcopy(aclp->z_acl, zacl->z_ace_data,
454 		    aclp->z_acl_count * sizeof (ace_t));
455 		zacl->z_acl_count = aclp->z_acl_count;
456 	}
457 
458 	zp->z_phys->zp_flags &= ~(ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE);
459 	if (inherit) {
460 		zp->z_phys->zp_flags |= ZFS_INHERIT_ACE;
461 	} else if (ace_trivial(zacl->z_ace_data, zacl->z_acl_count) == 0) {
462 		zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
463 	}
464 
465 	zphys->zp_mode = zfs_mode_compute(zp, aclp);
466 	zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
467 
468 	return (0);
469 }
470 
471 /*
472  * Create space for slots_needed ACEs to be append
473  * to aclp.
474  */
475 static void
476 zfs_acl_append(zfs_acl_t *aclp, int slots_needed)
477 {
478 	ace_t	*newacep;
479 	ace_t	*oldaclp;
480 	int	slot_cnt;
481 	int 	slots_left = aclp->z_slots - aclp->z_acl_count;
482 
483 	if (aclp->z_state == ACL_DATA_ALLOCED)
484 		ASSERT(aclp->z_slots >= aclp->z_acl_count);
485 	if (slots_left < slots_needed || aclp->z_state != ACL_DATA_ALLOCED) {
486 		slot_cnt = aclp->z_slots +  1 + (slots_needed - slots_left);
487 		newacep = kmem_alloc(ZFS_ACL_SIZE(slot_cnt), KM_SLEEP);
488 		bcopy(aclp->z_acl, newacep,
489 		    ZFS_ACL_SIZE(aclp->z_acl_count));
490 		oldaclp = aclp->z_acl;
491 		if (aclp->z_state == ACL_DATA_ALLOCED)
492 			kmem_free(oldaclp, ZFS_ACL_SIZE(aclp->z_slots));
493 		aclp->z_acl = newacep;
494 		aclp->z_slots = slot_cnt;
495 		aclp->z_state = ACL_DATA_ALLOCED;
496 	}
497 }
498 
499 /*
500  * Remove "slot" ACE from aclp
501  */
502 static void
503 zfs_ace_remove(zfs_acl_t *aclp, int slot)
504 {
505 	if (aclp->z_acl_count > 1) {
506 		(void) memmove(&aclp->z_acl[slot],
507 		    &aclp->z_acl[slot +1], sizeof (ace_t) *
508 		    (--aclp->z_acl_count - slot));
509 	} else
510 		aclp->z_acl_count--;
511 }
512 
513 /*
514  * Update access mask for prepended ACE
515  *
516  * This applies the "groupmask" value for aclmode property.
517  */
518 static void
519 zfs_acl_prepend_fixup(ace_t *acep, ace_t *origacep, mode_t mode, uid_t owner)
520 {
521 
522 	int	rmask, wmask, xmask;
523 	int	user_ace;
524 
525 	user_ace = (!(acep->a_flags &
526 	    (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
527 
528 	if (user_ace && (acep->a_who == owner)) {
529 		rmask = S_IRUSR;
530 		wmask = S_IWUSR;
531 		xmask = S_IXUSR;
532 	} else {
533 		rmask = S_IRGRP;
534 		wmask = S_IWGRP;
535 		xmask = S_IXGRP;
536 	}
537 
538 	if (origacep->a_access_mask & ACE_READ_DATA) {
539 		if (mode & rmask)
540 			acep->a_access_mask &= ~ACE_READ_DATA;
541 		else
542 			acep->a_access_mask |= ACE_READ_DATA;
543 	}
544 
545 	if (origacep->a_access_mask & ACE_WRITE_DATA) {
546 		if (mode & wmask)
547 			acep->a_access_mask &= ~ACE_WRITE_DATA;
548 		else
549 			acep->a_access_mask |= ACE_WRITE_DATA;
550 	}
551 
552 	if (origacep->a_access_mask & ACE_APPEND_DATA) {
553 		if (mode & wmask)
554 			acep->a_access_mask &= ~ACE_APPEND_DATA;
555 		else
556 			acep->a_access_mask |= ACE_APPEND_DATA;
557 	}
558 
559 	if (origacep->a_access_mask & ACE_EXECUTE) {
560 		if (mode & xmask)
561 			acep->a_access_mask &= ~ACE_EXECUTE;
562 		else
563 			acep->a_access_mask |= ACE_EXECUTE;
564 	}
565 }
566 
567 /*
568  * Apply mode to canonical six ACEs.
569  */
570 static void
571 zfs_acl_fixup_canonical_six(zfs_acl_t *aclp, mode_t mode)
572 {
573 	int	cnt;
574 	ace_t	*acep;
575 
576 	cnt = aclp->z_acl_count -1;
577 	acep = aclp->z_acl;
578 
579 	/*
580 	 * Fixup final ACEs to match the mode
581 	 */
582 
583 	ASSERT(cnt >= 5);
584 	adjust_ace_pair(&acep[cnt - 1], mode);	/* everyone@ */
585 	adjust_ace_pair(&acep[cnt - 3], (mode & 0070) >> 3);	/* group@ */
586 	adjust_ace_pair(&acep[cnt - 5], (mode & 0700) >> 6);	/* owner@ */
587 }
588 
589 
590 static int
591 zfs_acl_ace_match(ace_t *acep, int allow_deny, int type, int mask)
592 {
593 	return (acep->a_access_mask == mask && acep->a_type == allow_deny &&
594 	    ((acep->a_flags & ACE_TYPE_FLAGS) == type));
595 }
596 
597 /*
598  * Can prepended ACE be reused?
599  */
600 static int
601 zfs_reuse_deny(ace_t *acep, int i)
602 {
603 	int okay_masks;
604 
605 	if (i < 1)
606 		return (B_FALSE);
607 
608 	if (acep[i-1].a_type != DENY)
609 		return (B_FALSE);
610 
611 	if (acep[i-1].a_flags != (acep[i].a_flags & ACE_IDENTIFIER_GROUP))
612 		return (B_FALSE);
613 
614 	okay_masks = (acep[i].a_access_mask & OKAY_MASK_BITS);
615 
616 	if (acep[i-1].a_access_mask & ~okay_masks)
617 		return (B_FALSE);
618 
619 	return (B_TRUE);
620 }
621 
622 /*
623  * Create space to prepend an ACE
624  */
625 static void
626 zfs_acl_prepend(zfs_acl_t *aclp, int i)
627 {
628 	ace_t	*oldaclp = NULL;
629 	ace_t	*to, *from;
630 	int	slots_left = aclp->z_slots - aclp->z_acl_count;
631 	int	oldslots;
632 	int	need_free = 0;
633 
634 	if (aclp->z_state == ACL_DATA_ALLOCED)
635 		ASSERT(aclp->z_slots >= aclp->z_acl_count);
636 
637 	if (slots_left == 0 || aclp->z_state != ACL_DATA_ALLOCED) {
638 
639 		to = kmem_alloc(ZFS_ACL_SIZE(aclp->z_acl_count +
640 		    OGE_PAD), KM_SLEEP);
641 		if (aclp->z_state == ACL_DATA_ALLOCED)
642 			need_free++;
643 		from = aclp->z_acl;
644 		oldaclp = aclp->z_acl;
645 		(void) memmove(to, from,
646 		    sizeof (ace_t) * aclp->z_acl_count);
647 		aclp->z_state = ACL_DATA_ALLOCED;
648 	} else {
649 		from = aclp->z_acl;
650 		to = aclp->z_acl;
651 	}
652 
653 
654 	(void) memmove(&to[i + 1], &from[i],
655 	    sizeof (ace_t) * (aclp->z_acl_count - i));
656 
657 	if (oldaclp) {
658 		aclp->z_acl = to;
659 		oldslots = aclp->z_slots;
660 		aclp->z_slots = aclp->z_acl_count + OGE_PAD;
661 		if (need_free)
662 			kmem_free(oldaclp, ZFS_ACL_SIZE(oldslots));
663 	}
664 
665 }
666 
667 /*
668  * Prepend deny ACE
669  */
670 static void
671 zfs_acl_prepend_deny(znode_t *zp, zfs_acl_t *aclp, int i,
672     mode_t mode)
673 {
674 	ace_t	*acep;
675 
676 	zfs_acl_prepend(aclp, i);
677 
678 	acep = aclp->z_acl;
679 	zfs_set_ace(&acep[i], 0, DENY, acep[i + 1].a_who,
680 	    (acep[i + 1].a_flags & ACE_TYPE_FLAGS));
681 	zfs_acl_prepend_fixup(&acep[i], &acep[i+1], mode, zp->z_phys->zp_uid);
682 	aclp->z_acl_count++;
683 }
684 
685 /*
686  * Split an inherited ACE into inherit_only ACE
687  * and original ACE with inheritance flags stripped off.
688  */
689 static void
690 zfs_acl_split_ace(zfs_acl_t *aclp, int i)
691 {
692 	ace_t *acep = aclp->z_acl;
693 
694 	zfs_acl_prepend(aclp, i);
695 	acep = aclp->z_acl;
696 	acep[i] = acep[i + 1];
697 	acep[i].a_flags |= ACE_INHERIT_ONLY_ACE;
698 	acep[i + 1].a_flags &= ~ALL_INHERIT;
699 	aclp->z_acl_count++;
700 }
701 
702 /*
703  * Are ACES started at index i, the canonical six ACES?
704  */
705 static int
706 zfs_have_canonical_six(zfs_acl_t *aclp, int i)
707 {
708 	ace_t *acep = aclp->z_acl;
709 
710 	if ((zfs_acl_ace_match(&acep[i],
711 	    DENY, ACE_OWNER, 0) &&
712 	    zfs_acl_ace_match(&acep[i + 1], ALLOW, ACE_OWNER,
713 	    OWNER_ALLOW_MASK) && zfs_acl_ace_match(&acep[i + 2],
714 	    DENY, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 3],
715 	    ALLOW, OWNING_GROUP, 0) && zfs_acl_ace_match(&acep[i + 4],
716 	    DENY, ACE_EVERYONE, EVERYONE_DENY_MASK) &&
717 	    zfs_acl_ace_match(&acep[i + 5], ALLOW, ACE_EVERYONE,
718 	    EVERYONE_ALLOW_MASK))) {
719 		return (1);
720 	} else {
721 		return (0);
722 	}
723 }
724 
725 /*
726  * Apply step 1g, to group entries
727  *
728  * Need to deal with corner case where group may have
729  * greater permissions than owner.  If so then limit
730  * group permissions, based on what extra permissions
731  * group has.
732  */
733 static void
734 zfs_fixup_group_entries(ace_t *acep, mode_t mode)
735 {
736 	mode_t extramode = (mode >> 3) & 07;
737 	mode_t ownermode = (mode >> 6);
738 
739 	if (acep[0].a_flags & ACE_IDENTIFIER_GROUP) {
740 
741 		extramode &= ~ownermode;
742 
743 		if (extramode) {
744 			if (extramode & 04) {
745 				acep[0].a_access_mask &= ~ACE_READ_DATA;
746 				acep[1].a_access_mask &= ~ACE_READ_DATA;
747 			}
748 			if (extramode & 02) {
749 				acep[0].a_access_mask &=
750 				    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
751 				acep[1].a_access_mask &=
752 				    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
753 			}
754 			if (extramode & 01) {
755 				acep[0].a_access_mask &= ~ACE_EXECUTE;
756 				acep[1].a_access_mask &= ~ACE_EXECUTE;
757 			}
758 		}
759 	}
760 }
761 
762 /*
763  * Apply the chmod algorithm as described
764  * in PSARC/2002/240
765  */
766 static int
767 zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp,
768     dmu_tx_t *tx)
769 {
770 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
771 	ace_t 		*acep;
772 	int 		i;
773 	int		error;
774 	int 		entry_type;
775 	int 		reuse_deny;
776 	int 		need_canonical_six = 1;
777 	int		inherit = 0;
778 	int		iflags;
779 
780 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
781 	ASSERT(MUTEX_HELD(&zp->z_lock));
782 
783 	i = 0;
784 	while (i < aclp->z_acl_count) {
785 		acep = aclp->z_acl;
786 		entry_type = (acep[i].a_flags & ACE_TYPE_FLAGS);
787 		iflags = (acep[i].a_flags & ALL_INHERIT);
788 
789 		if ((acep[i].a_type != ALLOW && acep[i].a_type != DENY) ||
790 		    (iflags & ACE_INHERIT_ONLY_ACE)) {
791 			i++;
792 			if (iflags)
793 				inherit = 1;
794 			continue;
795 		}
796 
797 
798 		if (zfsvfs->z_acl_mode == ZFS_ACL_DISCARD) {
799 			zfs_ace_remove(aclp, i);
800 			continue;
801 		}
802 
803 		/*
804 		 * Need to split ace into two?
805 		 */
806 		if ((iflags & (ACE_FILE_INHERIT_ACE|
807 		    ACE_DIRECTORY_INHERIT_ACE)) &&
808 		    (!(iflags & ACE_INHERIT_ONLY_ACE))) {
809 			zfs_acl_split_ace(aclp, i);
810 			i++;
811 			inherit = 1;
812 			continue;
813 		}
814 
815 		if (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
816 		    (entry_type == OWNING_GROUP)) {
817 			acep[i].a_access_mask &= ~OGE_CLEAR;
818 			i++;
819 			continue;
820 
821 		} else {
822 			if (acep[i].a_type == ALLOW) {
823 
824 				/*
825 				 * Check preceding ACE if any, to see
826 				 * if we need to prepend a DENY ACE.
827 				 * This is only applicable when the acl_mode
828 				 * property == groupmask.
829 				 */
830 				if (zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK) {
831 
832 					reuse_deny = zfs_reuse_deny(acep, i);
833 
834 					if (reuse_deny == B_FALSE) {
835 						zfs_acl_prepend_deny(zp, aclp,
836 						    i, mode);
837 						i++;
838 						acep = aclp->z_acl;
839 					} else {
840 						zfs_acl_prepend_fixup(
841 						    &acep[i - 1],
842 						    &acep[i], mode,
843 						    zp->z_phys->zp_uid);
844 					}
845 					zfs_fixup_group_entries(&acep[i - 1],
846 					    mode);
847 				}
848 			}
849 			i++;
850 		}
851 	}
852 
853 	/*
854 	 * Check out last six aces, if we have six.
855 	 */
856 
857 	if (aclp->z_acl_count >= 6) {
858 		i = aclp->z_acl_count - 6;
859 
860 		if (zfs_have_canonical_six(aclp, i)) {
861 			need_canonical_six = 0;
862 		}
863 	}
864 
865 	if (need_canonical_six) {
866 
867 		zfs_acl_append(aclp, 6);
868 		i = aclp->z_acl_count;
869 		acep = aclp->z_acl;
870 		zfs_set_ace(&acep[i++], 0, DENY, -1, ACE_OWNER);
871 		zfs_set_ace(&acep[i++], OWNER_ALLOW_MASK, ALLOW, -1, ACE_OWNER);
872 		zfs_set_ace(&acep[i++], 0, DENY, -1, OWNING_GROUP);
873 		zfs_set_ace(&acep[i++], 0, ALLOW, -1, OWNING_GROUP);
874 		zfs_set_ace(&acep[i++], EVERYONE_DENY_MASK,
875 		    DENY, -1, ACE_EVERYONE);
876 		zfs_set_ace(&acep[i++], EVERYONE_ALLOW_MASK,
877 		    ALLOW, -1, ACE_EVERYONE);
878 		aclp->z_acl_count += 6;
879 	}
880 
881 	zfs_acl_fixup_canonical_six(aclp, mode);
882 
883 	zp->z_phys->zp_mode = mode;
884 	error = zfs_aclset_common(zp, aclp, tx, &inherit);
885 	return (error);
886 }
887 
888 
889 int
890 zfs_acl_chmod_setattr(znode_t *zp, uint64_t mode, dmu_tx_t *tx)
891 {
892 	zfs_acl_t *aclp = NULL;
893 	int error;
894 
895 	ASSERT(MUTEX_HELD(&zp->z_lock));
896 	mutex_enter(&zp->z_acl_lock);
897 	error = zfs_acl_node_read(zp, &aclp);
898 	if (error == 0)
899 		error = zfs_acl_chmod(zp, mode, aclp, tx);
900 	mutex_exit(&zp->z_acl_lock);
901 	if (aclp)
902 		zfs_acl_free(aclp);
903 	return (error);
904 }
905 
906 /*
907  * strip off write_owner and write_acl
908  */
909 static void
910 zfs_securemode_update(zfsvfs_t *zfsvfs, ace_t *acep)
911 {
912 	if ((zfsvfs->z_acl_inherit == ZFS_ACL_SECURE) &&
913 	    (acep->a_type == ALLOW))
914 		acep->a_access_mask &= ~SECURE_CLEAR;
915 }
916 
917 /*
918  * inherit inheritable ACEs from parent
919  */
920 static zfs_acl_t *
921 zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp)
922 {
923 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
924 	ace_t 		*pacep;
925 	ace_t		*acep;
926 	int 		ace_cnt = 0;
927 	int		pace_cnt;
928 	int 		i, j;
929 	zfs_acl_t	*aclp = NULL;
930 
931 	i = j = 0;
932 	pace_cnt = paclp->z_acl_count;
933 	pacep = paclp->z_acl;
934 	if (zfsvfs->z_acl_inherit != ZFS_ACL_DISCARD) {
935 		for (i = 0; i != pace_cnt; i++) {
936 
937 			if (zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW &&
938 			    pacep[i].a_type == ALLOW)
939 				continue;
940 
941 			if (zfs_ace_can_use(zp, &pacep[i])) {
942 				ace_cnt++;
943 				if (!(pacep[i].a_flags &
944 				    ACE_NO_PROPAGATE_INHERIT_ACE))
945 					ace_cnt++;
946 			}
947 		}
948 	}
949 
950 	aclp = zfs_acl_alloc(ace_cnt + OGE_PAD);
951 	if (ace_cnt && zfsvfs->z_acl_inherit != ZFS_ACL_DISCARD) {
952 		acep = aclp->z_acl;
953 		pacep = paclp->z_acl;
954 		for (i = 0; i != pace_cnt; i++) {
955 
956 			if (zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW &&
957 			    pacep[i].a_type == ALLOW)
958 				continue;
959 
960 			if (zfs_ace_can_use(zp, &pacep[i])) {
961 
962 				/*
963 				 * Now create entry for inherited ace
964 				 */
965 
966 				acep[j] = pacep[i];
967 
968 				/*
969 				 * When AUDIT/ALARM a_types are supported
970 				 * they should be inherited here.
971 				 */
972 
973 				if ((pacep[i].a_flags &
974 				    ACE_NO_PROPAGATE_INHERIT_ACE) ||
975 				    (ZTOV(zp)->v_type != VDIR)) {
976 					acep[j].a_flags &= ~ALL_INHERIT;
977 					zfs_securemode_update(zfsvfs, &acep[j]);
978 					j++;
979 					continue;
980 				}
981 
982 				ASSERT(ZTOV(zp)->v_type == VDIR);
983 
984 				/*
985 				 * If we are inheriting an ACE targeted for
986 				 * only files, then make sure inherit_only
987 				 * is on for future propagation.
988 				 */
989 				if ((pacep[i].a_flags & (ACE_FILE_INHERIT_ACE |
990 				    ACE_DIRECTORY_INHERIT_ACE)) !=
991 				    ACE_FILE_INHERIT_ACE) {
992 					j++;
993 					acep[j] = acep[j-1];
994 					acep[j-1].a_flags |=
995 					    ACE_INHERIT_ONLY_ACE;
996 					acep[j].a_flags &= ~ALL_INHERIT;
997 				} else {
998 					acep[j].a_flags |= ACE_INHERIT_ONLY_ACE;
999 				}
1000 				zfs_securemode_update(zfsvfs, &acep[j]);
1001 				j++;
1002 			}
1003 		}
1004 	}
1005 	aclp->z_acl_count = j;
1006 	ASSERT(aclp->z_slots >= aclp->z_acl_count);
1007 
1008 	return (aclp);
1009 }
1010 
1011 /*
1012  * Create file system object initial permissions
1013  * including inheritable ACEs.
1014  */
1015 void
1016 zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
1017     vattr_t *vap, dmu_tx_t *tx, cred_t *cr)
1018 {
1019 	uint64_t	mode;
1020 	uid_t		uid;
1021 	gid_t		gid;
1022 	int		error;
1023 	int		pull_down;
1024 	zfs_acl_t	*aclp, *paclp;
1025 
1026 	mode = MAKEIMODE(vap->va_type, vap->va_mode);
1027 
1028 	/*
1029 	 * Determine uid and gid.
1030 	 */
1031 	if ((flag & (IS_ROOT_NODE | IS_REPLAY)) ||
1032 	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
1033 		uid = vap->va_uid;
1034 		gid = vap->va_gid;
1035 	} else {
1036 		uid = crgetuid(cr);
1037 		if ((vap->va_mask & AT_GID) &&
1038 		    ((vap->va_gid == parent->z_phys->zp_gid) ||
1039 		    groupmember(vap->va_gid, cr) ||
1040 		    secpolicy_vnode_create_gid(cr) == 0))
1041 			gid = vap->va_gid;
1042 		else
1043 			gid = (parent->z_phys->zp_mode & S_ISGID) ?
1044 			    parent->z_phys->zp_gid : crgetgid(cr);
1045 	}
1046 
1047 	/*
1048 	 * If we're creating a directory, and the parent directory has the
1049 	 * set-GID bit set, set in on the new directory.
1050 	 * Otherwise, if the user is neither privileged nor a member of the
1051 	 * file's new group, clear the file's set-GID bit.
1052 	 */
1053 
1054 	if ((parent->z_phys->zp_mode & S_ISGID) && (vap->va_type == VDIR))
1055 		mode |= S_ISGID;
1056 	else {
1057 		if ((mode & S_ISGID) &&
1058 		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
1059 			mode &= ~S_ISGID;
1060 	}
1061 
1062 	zp->z_phys->zp_uid = uid;
1063 	zp->z_phys->zp_gid = gid;
1064 	zp->z_phys->zp_mode = mode;
1065 
1066 	mutex_enter(&parent->z_lock);
1067 	pull_down = (parent->z_phys->zp_flags & ZFS_INHERIT_ACE);
1068 	if (pull_down) {
1069 		mutex_enter(&parent->z_acl_lock);
1070 		VERIFY(0 == zfs_acl_node_read(parent, &paclp));
1071 		mutex_exit(&parent->z_acl_lock);
1072 		aclp = zfs_acl_inherit(zp, paclp);
1073 		zfs_acl_free(paclp);
1074 	} else {
1075 		aclp = zfs_acl_alloc(6);
1076 	}
1077 	mutex_exit(&parent->z_lock);
1078 	mutex_enter(&zp->z_lock);
1079 	mutex_enter(&zp->z_acl_lock);
1080 	error = zfs_acl_chmod(zp, mode, aclp, tx);
1081 	mutex_exit(&zp->z_lock);
1082 	mutex_exit(&zp->z_acl_lock);
1083 	ASSERT3U(error, ==, 0);
1084 	zfs_acl_free(aclp);
1085 }
1086 
1087 /*
1088  * Should ACE be inherited?
1089  */
1090 static int
1091 zfs_ace_can_use(znode_t *zp, ace_t *acep)
1092 {
1093 	int vtype = ZTOV(zp)->v_type;
1094 
1095 	int	iflags = (acep->a_flags & 0xf);
1096 
1097 	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
1098 		return (1);
1099 	else if (iflags & ACE_FILE_INHERIT_ACE)
1100 		return (!((vtype == VDIR) &&
1101 		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
1102 	return (0);
1103 }
1104 
1105 /*
1106  * Retrieve a files ACL
1107  */
1108 int
1109 zfs_getacl(znode_t *zp, vsecattr_t  *vsecp, cred_t *cr)
1110 {
1111 	zfs_acl_t	*aclp;
1112 	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1113 	int		error;
1114 
1115 	if (error = zfs_zaccess(zp, ACE_READ_ACL, cr)) {
1116 		/*
1117 		 * If owner of file then allow reading of the
1118 		 * ACL.
1119 		 */
1120 		if (crgetuid(cr) != zp->z_phys->zp_uid)
1121 			return (error);
1122 	}
1123 
1124 	if (mask == 0)
1125 		return (ENOSYS);
1126 
1127 	mutex_enter(&zp->z_acl_lock);
1128 
1129 	error = zfs_acl_node_read(zp, &aclp);
1130 	if (error != 0) {
1131 		mutex_exit(&zp->z_acl_lock);
1132 		return (error);
1133 	}
1134 
1135 
1136 	if (mask & VSA_ACECNT) {
1137 		vsecp->vsa_aclcnt = aclp->z_acl_count;
1138 	}
1139 
1140 	if (mask & VSA_ACE) {
1141 		vsecp->vsa_aclentp = kmem_alloc(aclp->z_acl_count *
1142 		    sizeof (ace_t), KM_SLEEP);
1143 		bcopy(aclp->z_acl, vsecp->vsa_aclentp,
1144 		    aclp->z_acl_count * sizeof (ace_t));
1145 	}
1146 
1147 	mutex_exit(&zp->z_acl_lock);
1148 
1149 	zfs_acl_free(aclp);
1150 
1151 	return (0);
1152 }
1153 
1154 /*
1155  * Set a files ACL
1156  */
1157 int
1158 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, cred_t *cr)
1159 {
1160 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1161 	zilog_t		*zilog = zfsvfs->z_log;
1162 	ace_t		*acep = vsecp->vsa_aclentp;
1163 	int		aclcnt = vsecp->vsa_aclcnt;
1164 	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
1165 	dmu_tx_t	*tx;
1166 	int		error;
1167 	int		inherit;
1168 	zfs_acl_t	*aclp;
1169 
1170 	if (mask == 0)
1171 		return (ENOSYS);
1172 
1173 	if (!zfs_acl_valid(zp, acep, aclcnt, &inherit))
1174 		return (EINVAL);
1175 top:
1176 	error = zfs_zaccess_v4_perm(zp, ACE_WRITE_ACL, cr);
1177 	if (error == EACCES || error == ACCESS_UNDETERMINED) {
1178 		if ((error = secpolicy_vnode_setdac(cr,
1179 		    zp->z_phys->zp_uid)) != 0) {
1180 			return (error);
1181 		}
1182 	} else if (error) {
1183 		return (error == EROFS ? error : EPERM);
1184 	}
1185 
1186 	mutex_enter(&zp->z_lock);
1187 	mutex_enter(&zp->z_acl_lock);
1188 
1189 	tx = dmu_tx_create(zfsvfs->z_os);
1190 	dmu_tx_hold_bonus(tx, zp->z_id);
1191 
1192 	if (zp->z_phys->zp_acl.z_acl_extern_obj) {
1193 		dmu_tx_hold_write(tx, zp->z_phys->zp_acl.z_acl_extern_obj,
1194 		    0, ZFS_ACL_SIZE(aclcnt));
1195 	} else if (aclcnt > ACE_SLOT_CNT) {
1196 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, ZFS_ACL_SIZE(aclcnt));
1197 	}
1198 
1199 	error = dmu_tx_assign(tx, zfsvfs->z_assign);
1200 	if (error) {
1201 		mutex_exit(&zp->z_acl_lock);
1202 		mutex_exit(&zp->z_lock);
1203 
1204 		if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
1205 			dmu_tx_wait(tx);
1206 			dmu_tx_abort(tx);
1207 			goto top;
1208 		}
1209 		dmu_tx_abort(tx);
1210 		return (error);
1211 	}
1212 
1213 	aclp = zfs_acl_alloc(aclcnt);
1214 	bcopy(acep, aclp->z_acl, sizeof (ace_t) * aclcnt);
1215 	aclp->z_acl_count = aclcnt;
1216 	error = zfs_aclset_common(zp, aclp, tx, &inherit);
1217 	ASSERT(error == 0);
1218 
1219 	zfs_acl_free(aclp);
1220 	zfs_log_acl(zilog, tx, TX_ACL, zp, aclcnt, acep);
1221 	dmu_tx_commit(tx);
1222 done:
1223 	mutex_exit(&zp->z_acl_lock);
1224 	mutex_exit(&zp->z_lock);
1225 
1226 	return (error);
1227 }
1228 
1229 static int
1230 zfs_ace_access(ace_t *zacep, int *working_mode)
1231 {
1232 	if (*working_mode == 0) {
1233 		return (0);
1234 	}
1235 
1236 	if (zacep->a_access_mask & *working_mode) {
1237 		if (zacep->a_type == ALLOW) {
1238 			*working_mode &=
1239 			    ~(*working_mode & zacep->a_access_mask);
1240 			if (*working_mode == 0)
1241 				return (0);
1242 		} else if (zacep->a_type == DENY) {
1243 			return (EACCES);
1244 		}
1245 	}
1246 
1247 	/*
1248 	 * haven't been specifcally denied at this point
1249 	 * so return UNDETERMINED.
1250 	 */
1251 
1252 	return (ACCESS_UNDETERMINED);
1253 }
1254 
1255 
1256 static int
1257 zfs_zaccess_common(znode_t *zp, int v4_mode, int *working_mode, cred_t *cr)
1258 {
1259 	zfs_acl_t	*aclp;
1260 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1261 	ace_t		*zacep;
1262 	gid_t		gid;
1263 	int		cnt;
1264 	int		i;
1265 	int		error;
1266 	int		access_deny = ACCESS_UNDETERMINED;
1267 	uint_t		entry_type;
1268 	uid_t		uid = crgetuid(cr);
1269 
1270 	if (zfsvfs->z_assign >= TXG_INITIAL) {		/* ZIL replay */
1271 		*working_mode = 0;
1272 		return (0);
1273 	}
1274 
1275 	*working_mode = v4_mode;
1276 
1277 	if ((v4_mode & WRITE_MASK) &&
1278 	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
1279 	    (!IS_DEVVP(ZTOV(zp)))) {
1280 		return (EROFS);
1281 	}
1282 
1283 	mutex_enter(&zp->z_acl_lock);
1284 
1285 	error = zfs_acl_node_read(zp, &aclp);
1286 	if (error != 0) {
1287 		mutex_exit(&zp->z_acl_lock);
1288 		return (error);
1289 	}
1290 
1291 
1292 	zacep = aclp->z_acl;
1293 	cnt = aclp->z_acl_count;
1294 
1295 	for (i = 0; i != cnt; i++) {
1296 
1297 		DTRACE_PROBE2(zfs__access__common,
1298 		    ace_t *, &zacep[i], int, *working_mode);
1299 
1300 		if (zacep[i].a_flags & ACE_INHERIT_ONLY_ACE)
1301 			continue;
1302 
1303 		entry_type = (zacep[i].a_flags & ACE_TYPE_FLAGS);
1304 		switch (entry_type) {
1305 		case ACE_OWNER:
1306 			if (uid == zp->z_phys->zp_uid) {
1307 				access_deny = zfs_ace_access(&zacep[i],
1308 				    working_mode);
1309 			}
1310 			break;
1311 		case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
1312 		case ACE_IDENTIFIER_GROUP:
1313 			/*
1314 			 * Owning group gid is in znode not ACL
1315 			 */
1316 			if (entry_type == (ACE_IDENTIFIER_GROUP | ACE_GROUP))
1317 				gid = zp->z_phys->zp_gid;
1318 			else
1319 				gid = zacep[i].a_who;
1320 
1321 			if (groupmember(gid, cr)) {
1322 				access_deny = zfs_ace_access(&zacep[i],
1323 				    working_mode);
1324 			}
1325 			break;
1326 		case ACE_EVERYONE:
1327 			access_deny = zfs_ace_access(&zacep[i], working_mode);
1328 			break;
1329 
1330 		/* USER Entry */
1331 		default:
1332 			if (entry_type == 0) {
1333 				if (uid == zacep[i].a_who) {
1334 					access_deny = zfs_ace_access(&zacep[i],
1335 					    working_mode);
1336 				}
1337 				break;
1338 			}
1339 			zfs_acl_free(aclp);
1340 			mutex_exit(&zp->z_acl_lock);
1341 			return (EIO);
1342 		}
1343 
1344 		if (access_deny != ACCESS_UNDETERMINED)
1345 			break;
1346 	}
1347 
1348 	mutex_exit(&zp->z_acl_lock);
1349 	zfs_acl_free(aclp);
1350 
1351 	return (access_deny);
1352 }
1353 
1354 
1355 /*
1356  * Determine whether Access should be granted/denied, invoking least
1357  * priv subsytem when a deny is determined.
1358  */
1359 int
1360 zfs_zaccess(znode_t *zp, int mode, cred_t *cr)
1361 {
1362 	int	working_mode;
1363 	int	error;
1364 	int	is_attr;
1365 	znode_t	*xzp;
1366 	znode_t *check_zp = zp;
1367 
1368 	is_attr = ((zp->z_phys->zp_flags & ZFS_XATTR) &&
1369 	    (ZTOV(zp)->v_type == VDIR));
1370 
1371 	/*
1372 	 * If attribute then validate against base file
1373 	 */
1374 	if (is_attr) {
1375 		if ((error = zfs_zget(zp->z_zfsvfs,
1376 		    zp->z_phys->zp_parent, &xzp)) != 0)	{
1377 			return (error);
1378 		}
1379 		check_zp = xzp;
1380 		/*
1381 		 * fixup mode to map to xattr perms
1382 		 */
1383 
1384 		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
1385 			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
1386 			mode |= ACE_WRITE_NAMED_ATTRS;
1387 		}
1388 
1389 		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
1390 			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
1391 			mode |= ACE_READ_NAMED_ATTRS;
1392 		}
1393 	}
1394 
1395 	error = zfs_zaccess_common(check_zp, mode, &working_mode, cr);
1396 
1397 	if (error == EROFS) {
1398 		if (is_attr)
1399 			VN_RELE(ZTOV(xzp));
1400 		return (error);
1401 	}
1402 
1403 	if (error || working_mode) {
1404 		working_mode = (zfs_v4_to_unix(working_mode) << 6);
1405 		error = secpolicy_vnode_access(cr, ZTOV(check_zp),
1406 		    check_zp->z_phys->zp_uid, working_mode);
1407 	}
1408 
1409 	if (is_attr)
1410 		VN_RELE(ZTOV(xzp));
1411 
1412 	return (error);
1413 }
1414 
1415 /*
1416  * Special zaccess function to check for special nfsv4 perm.
1417  * doesn't call secpolicy_vnode_access() for failure, since that
1418  * would probably be the wrong policy function to call.
1419  * instead its up to the caller to handle that situation.
1420  */
1421 
1422 int
1423 zfs_zaccess_v4_perm(znode_t *zp, int mode, cred_t *cr)
1424 {
1425 	int working_mode = 0;
1426 	return (zfs_zaccess_common(zp, mode, &working_mode, cr));
1427 }
1428 
1429 /*
1430  * Translate tradition unix VREAD/VWRITE/VEXEC mode into
1431  * native ACL format and call zfs_zaccess()
1432  */
1433 int
1434 zfs_zaccess_rwx(znode_t *zp, mode_t mode, cred_t *cr)
1435 {
1436 	int v4_mode = zfs_unix_to_v4(mode >> 6);
1437 
1438 	return (zfs_zaccess(zp, v4_mode, cr));
1439 }
1440 
1441 static int
1442 zfs_delete_final_check(znode_t *zp, znode_t *dzp, cred_t *cr)
1443 {
1444 	int error;
1445 
1446 	error = secpolicy_vnode_access(cr, ZTOV(zp),
1447 	    dzp->z_phys->zp_uid, S_IWRITE|S_IEXEC);
1448 
1449 	if (error == 0)
1450 		error = zfs_sticky_remove_access(dzp, zp, cr);
1451 
1452 	return (error);
1453 }
1454 
1455 /*
1456  * Determine whether Access should be granted/deny, without
1457  * consulting least priv subsystem.
1458  *
1459  *
1460  * The following chart is the recommended NFSv4 enforcement for
1461  * ability to delete an object.
1462  *
1463  *      -------------------------------------------------------
1464  *      |   Parent Dir  |           Target Object Permissions |
1465  *      |  permissions  |                                     |
1466  *      -------------------------------------------------------
1467  *      |               | ACL Allows | ACL Denies| Delete     |
1468  *      |               |  Delete    |  Delete   | unspecified|
1469  *      -------------------------------------------------------
1470  *      |  ACL Allows   | Permit     | Permit    | Permit     |
1471  *      |  DELETE_CHILD |                                     |
1472  *      -------------------------------------------------------
1473  *      |  ACL Denies   | Permit     | Deny      | Deny       |
1474  *      |  DELETE_CHILD |            |           |            |
1475  *      -------------------------------------------------------
1476  *      | ACL specifies |            |           |            |
1477  *      | only allow    | Permit     | Permit    | Permit     |
1478  *      | write and     |            |           |            |
1479  *      | execute       |            |           |            |
1480  *      -------------------------------------------------------
1481  *      | ACL denies    |            |           |            |
1482  *      | write and     | Permit     | Deny      | Deny       |
1483  *      | execute       |            |           |            |
1484  *      -------------------------------------------------------
1485  *         ^
1486  *         |
1487  *         No search privilege, can't even look up file?
1488  *
1489  */
1490 int
1491 zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
1492 {
1493 	int dzp_working_mode = 0;
1494 	int zp_working_mode = 0;
1495 	int dzp_error, zp_error;
1496 
1497 	/*
1498 	 * Arghh, this check is going to require a couple of questions
1499 	 * to be asked.  We want specific DELETE permissions to
1500 	 * take precedence over WRITE/EXECUTE.  We don't
1501 	 * want an ACL such as this to mess us up.
1502 	 * user:joe:write_data:deny,user:joe:delete:allow
1503 	 *
1504 	 * However, deny permissions may ultimately be overridden
1505 	 * by secpolicy_vnode_access().
1506 	 */
1507 
1508 	dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
1509 	    &dzp_working_mode, cr);
1510 	zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode, cr);
1511 
1512 	if (dzp_error == EROFS || zp_error == EROFS)
1513 		return (dzp_error);
1514 
1515 	/*
1516 	 * First check the first row.
1517 	 * We only need to see if parent Allows delete_child
1518 	 */
1519 	if ((dzp_working_mode & ACE_DELETE_CHILD) == 0)
1520 		return (0);
1521 
1522 	/*
1523 	 * Second row
1524 	 * we already have the necessary information in
1525 	 * zp_working_mode, zp_error and dzp_error.
1526 	 */
1527 
1528 	if ((zp_working_mode & ACE_DELETE) == 0)
1529 		return (0);
1530 
1531 	/*
1532 	 * Now zp_error should either be EACCES which indicates
1533 	 * a "deny" delete entry or ACCESS_UNDETERMINED if the "delete"
1534 	 * entry exists on the target.
1535 	 *
1536 	 * dzp_error should be either EACCES which indicates a "deny"
1537 	 * entry for delete_child or ACCESS_UNDETERMINED if no delete_child
1538 	 * entry exists.  If value is EACCES then we are done
1539 	 * and zfs_delete_final_check() will make the final decision
1540 	 * regarding to allow the delete.
1541 	 */
1542 
1543 	ASSERT(zp_error != 0 && dzp_error != 0);
1544 	if (dzp_error == EACCES)
1545 		return (zfs_delete_final_check(zp, dzp, cr));
1546 
1547 	/*
1548 	 * Third Row
1549 	 * Only need to check for write/execute on parent
1550 	 */
1551 
1552 	dzp_error = zfs_zaccess_common(dzp, ACE_WRITE_DATA|ACE_EXECUTE,
1553 	    &dzp_working_mode, cr);
1554 
1555 	if (dzp_error == EROFS)
1556 		return (dzp_error);
1557 
1558 	if ((dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE)) == 0)
1559 		return (zfs_sticky_remove_access(dzp, zp, cr));
1560 
1561 	/*
1562 	 * Fourth Row
1563 	 */
1564 
1565 	if (((dzp_working_mode & (ACE_WRITE_DATA|ACE_EXECUTE)) != 0) &&
1566 	    ((zp_working_mode & ACE_DELETE) == 0))
1567 		return (zfs_sticky_remove_access(dzp, zp, cr));
1568 
1569 	return (zfs_delete_final_check(zp, dzp, cr));
1570 }
1571 
1572 int
1573 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
1574     znode_t *tzp, cred_t *cr)
1575 {
1576 	int add_perm;
1577 	int error;
1578 
1579 	add_perm = (ZTOV(szp)->v_type == VDIR) ?
1580 	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
1581 
1582 	/*
1583 	 * Rename permissions are combination of delete permission +
1584 	 * add file/subdir permission.
1585 	 */
1586 
1587 	/*
1588 	 * first make sure we do the delete portion.
1589 	 *
1590 	 * If that succeeds then check for add_file/add_subdir permissions
1591 	 */
1592 
1593 	if (error = zfs_zaccess_delete(sdzp, szp, cr))
1594 		return (error);
1595 
1596 	/*
1597 	 * If we have a tzp, see if we can delete it?
1598 	 */
1599 	if (tzp) {
1600 		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
1601 			return (error);
1602 	}
1603 
1604 	/*
1605 	 * Now check for add permissions
1606 	 */
1607 	error = zfs_zaccess(tdzp, add_perm, cr);
1608 
1609 	return (error);
1610 }
1611