xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c (revision ca53e5aedfebcc1b4091b68e01b2d5cae923f85e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  *
24  * Extended attributes (xattr) on Solaris are implemented as files
25  * which exist in a hidden xattr directory.  These extended attributes
26  * can be accessed using the attropen() system call which opens
27  * the extended attribute.  It can then be manipulated just like
28  * a standard file descriptor.  This has a couple advantages such
29  * as practically no size limit on the file, and the extended
30  * attributes permissions may differ from those of the parent file.
31  * This interface is really quite clever, but it's also completely
32  * different than what is supported on Linux.  It also comes with a
33  * steep performance penalty when accessing small xattrs because they
34  * are not stored with the parent file.
35  *
36  * Under Linux extended attributes are manipulated by the system
37  * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38  * extended attributes to be name/value pairs where the name is a
39  * NULL terminated string.  The name must also include one of the
40  * following namespace prefixes:
41  *
42  *   user     - No restrictions and is available to user applications.
43  *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44  *   system   - Used for access control lists (system.nfs4_acl, etc).
45  *   security - Used by SELinux to store a files security context.
46  *
47  * The value under Linux to limited to 65536 bytes of binary data.
48  * In practice, individual xattrs tend to be much smaller than this
49  * and are typically less than 100 bytes.  A good example of this
50  * are the security.selinux xattrs which are less than 100 bytes and
51  * exist for every file when xattr labeling is enabled.
52  *
53  * The Linux xattr implementation has been written to take advantage of
54  * this typical usage.  When the dataset property 'xattr=sa' is set,
55  * then xattrs will be preferentially stored as System Attributes (SA).
56  * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57  * up to 64k of xattrs to be stored in the spill block.  If additional
58  * xattr space is required, which is unlikely under Linux, they will
59  * be stored using the traditional directory approach.
60  *
61  * This optimization results in roughly a 3x performance improvement
62  * when accessing xattrs because it avoids the need to perform a seek
63  * for every xattr value.  When multiple xattrs are stored per-file
64  * the performance improvements are even greater because all of the
65  * xattrs stored in the spill block will be cached.
66  *
67  * However, by default SA based xattrs are disabled in the Linux port
68  * to maximize compatibility with other implementations.  If you do
69  * enable SA based xattrs then they will not be visible on platforms
70  * which do not support this feature.
71  *
72  * NOTE: One additional consequence of the xattr directory implementation
73  * is that when an extended attribute is manipulated an inode is created.
74  * This inode will exist in the Linux inode cache but there will be no
75  * associated entry in the dentry cache which references it.  This is
76  * safe but it may result in some confusion.  Enabling SA based xattrs
77  * largely avoids the issue except in the overflow case.
78  */
79 
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86 
87 typedef struct xattr_filldir {
88 	size_t size;
89 	size_t offset;
90 	char *buf;
91 	struct dentry *dentry;
92 } xattr_filldir_t;
93 
94 static const struct xattr_handler *zpl_xattr_handler(const char *);
95 
96 static int
97 zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
98 {
99 	static const struct xattr_handler *handler;
100 	struct dentry *d = xf->dentry;
101 
102 	handler = zpl_xattr_handler(name);
103 	if (!handler)
104 		return (0);
105 
106 	if (handler->list) {
107 #if defined(HAVE_XATTR_LIST_SIMPLE)
108 		if (!handler->list(d))
109 			return (0);
110 #elif defined(HAVE_XATTR_LIST_DENTRY)
111 		if (!handler->list(d, NULL, 0, name, name_len, 0))
112 			return (0);
113 #elif defined(HAVE_XATTR_LIST_HANDLER)
114 		if (!handler->list(handler, d, NULL, 0, name, name_len))
115 			return (0);
116 #endif
117 	}
118 
119 	return (1);
120 }
121 
122 /*
123  * Determine is a given xattr name should be visible and if so copy it
124  * in to the provided buffer (xf->buf).
125  */
126 static int
127 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
128 {
129 	/* Check permissions using the per-namespace list xattr handler. */
130 	if (!zpl_xattr_permission(xf, name, name_len))
131 		return (0);
132 
133 	/* When xf->buf is NULL only calculate the required size. */
134 	if (xf->buf) {
135 		if (xf->offset + name_len + 1 > xf->size)
136 			return (-ERANGE);
137 
138 		memcpy(xf->buf + xf->offset, name, name_len);
139 		xf->buf[xf->offset + name_len] = '\0';
140 	}
141 
142 	xf->offset += (name_len + 1);
143 
144 	return (0);
145 }
146 
147 /*
148  * Read as many directory entry names as will fit in to the provided buffer,
149  * or when no buffer is provided calculate the required buffer size.
150  */
151 static int
152 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
153 {
154 	zap_cursor_t zc;
155 	zap_attribute_t	zap;
156 	int error;
157 
158 	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
159 
160 	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
161 
162 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
163 			error = -ENXIO;
164 			break;
165 		}
166 
167 		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
168 		if (error)
169 			break;
170 
171 		zap_cursor_advance(&zc);
172 	}
173 
174 	zap_cursor_fini(&zc);
175 
176 	if (error == -ENOENT)
177 		error = 0;
178 
179 	return (error);
180 }
181 
182 static ssize_t
183 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
184 {
185 	struct inode *ip = xf->dentry->d_inode;
186 	struct inode *dxip = NULL;
187 	znode_t *dxzp;
188 	int error;
189 
190 	/* Lookup the xattr directory */
191 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
192 	    cr, NULL, NULL);
193 	if (error) {
194 		if (error == -ENOENT)
195 			error = 0;
196 
197 		return (error);
198 	}
199 
200 	dxip = ZTOI(dxzp);
201 	error = zpl_xattr_readdir(dxip, xf);
202 	iput(dxip);
203 
204 	return (error);
205 }
206 
207 static ssize_t
208 zpl_xattr_list_sa(xattr_filldir_t *xf)
209 {
210 	znode_t *zp = ITOZ(xf->dentry->d_inode);
211 	nvpair_t *nvp = NULL;
212 	int error = 0;
213 
214 	mutex_enter(&zp->z_lock);
215 	if (zp->z_xattr_cached == NULL)
216 		error = -zfs_sa_get_xattr(zp);
217 	mutex_exit(&zp->z_lock);
218 
219 	if (error)
220 		return (error);
221 
222 	ASSERT(zp->z_xattr_cached);
223 
224 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
225 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
226 
227 		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
228 		    strlen(nvpair_name(nvp)));
229 		if (error)
230 			return (error);
231 	}
232 
233 	return (0);
234 }
235 
236 ssize_t
237 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
238 {
239 	znode_t *zp = ITOZ(dentry->d_inode);
240 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
241 	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
242 	cred_t *cr = CRED();
243 	fstrans_cookie_t cookie;
244 	int error = 0;
245 
246 	crhold(cr);
247 	cookie = spl_fstrans_mark();
248 	ZPL_ENTER(zfsvfs);
249 	ZPL_VERIFY_ZP(zp);
250 	rw_enter(&zp->z_xattr_lock, RW_READER);
251 
252 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
253 		error = zpl_xattr_list_sa(&xf);
254 		if (error)
255 			goto out;
256 	}
257 
258 	error = zpl_xattr_list_dir(&xf, cr);
259 	if (error)
260 		goto out;
261 
262 	error = xf.offset;
263 out:
264 
265 	rw_exit(&zp->z_xattr_lock);
266 	ZPL_EXIT(zfsvfs);
267 	spl_fstrans_unmark(cookie);
268 	crfree(cr);
269 
270 	return (error);
271 }
272 
273 static int
274 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
275     size_t size, cred_t *cr)
276 {
277 	struct inode *xip = NULL;
278 	znode_t *dxzp = NULL;
279 	znode_t *xzp = NULL;
280 	loff_t pos = 0;
281 	int error;
282 
283 	/* Lookup the xattr directory */
284 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
285 	    cr, NULL, NULL);
286 	if (error)
287 		goto out;
288 
289 	/* Lookup a specific xattr name in the directory */
290 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
291 	if (error)
292 		goto out;
293 
294 	xip = ZTOI(xzp);
295 	if (!size) {
296 		error = i_size_read(xip);
297 		goto out;
298 	}
299 
300 	if (size < i_size_read(xip)) {
301 		error = -ERANGE;
302 		goto out;
303 	}
304 
305 	error = zpl_read_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
306 out:
307 	if (xzp)
308 		zrele(xzp);
309 
310 	if (dxzp)
311 		zrele(dxzp);
312 
313 	return (error);
314 }
315 
316 static int
317 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
318 {
319 	znode_t *zp = ITOZ(ip);
320 	uchar_t *nv_value;
321 	uint_t nv_size;
322 	int error = 0;
323 
324 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
325 
326 	mutex_enter(&zp->z_lock);
327 	if (zp->z_xattr_cached == NULL)
328 		error = -zfs_sa_get_xattr(zp);
329 	mutex_exit(&zp->z_lock);
330 
331 	if (error)
332 		return (error);
333 
334 	ASSERT(zp->z_xattr_cached);
335 	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
336 	    &nv_value, &nv_size);
337 	if (error)
338 		return (error);
339 
340 	if (size == 0 || value == NULL)
341 		return (nv_size);
342 
343 	if (size < nv_size)
344 		return (-ERANGE);
345 
346 	memcpy(value, nv_value, nv_size);
347 
348 	return (nv_size);
349 }
350 
351 static int
352 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
353     cred_t *cr)
354 {
355 	znode_t *zp = ITOZ(ip);
356 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
357 	int error;
358 
359 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
360 
361 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
362 		error = zpl_xattr_get_sa(ip, name, value, size);
363 		if (error != -ENOENT)
364 			goto out;
365 	}
366 
367 	error = zpl_xattr_get_dir(ip, name, value, size, cr);
368 out:
369 	if (error == -ENOENT)
370 		error = -ENODATA;
371 
372 	return (error);
373 }
374 
375 #define	XATTR_NOENT	0x0
376 #define	XATTR_IN_SA	0x1
377 #define	XATTR_IN_DIR	0x2
378 /* check where the xattr resides */
379 static int
380 __zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
381 {
382 	znode_t *zp = ITOZ(ip);
383 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
384 	int error;
385 
386 	ASSERT(where);
387 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
388 
389 	*where = XATTR_NOENT;
390 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
391 		error = zpl_xattr_get_sa(ip, name, NULL, 0);
392 		if (error >= 0)
393 			*where |= XATTR_IN_SA;
394 		else if (error != -ENOENT)
395 			return (error);
396 	}
397 
398 	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
399 	if (error >= 0)
400 		*where |= XATTR_IN_DIR;
401 	else if (error != -ENOENT)
402 		return (error);
403 
404 	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
405 		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
406 		    " in both SA and dir", ip, name);
407 	if (*where == XATTR_NOENT)
408 		error = -ENODATA;
409 	else
410 		error = 0;
411 	return (error);
412 }
413 
414 static int
415 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
416 {
417 	znode_t *zp = ITOZ(ip);
418 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
419 	cred_t *cr = CRED();
420 	fstrans_cookie_t cookie;
421 	int error;
422 
423 	crhold(cr);
424 	cookie = spl_fstrans_mark();
425 	ZPL_ENTER(zfsvfs);
426 	ZPL_VERIFY_ZP(zp);
427 	rw_enter(&zp->z_xattr_lock, RW_READER);
428 	error = __zpl_xattr_get(ip, name, value, size, cr);
429 	rw_exit(&zp->z_xattr_lock);
430 	ZPL_EXIT(zfsvfs);
431 	spl_fstrans_unmark(cookie);
432 	crfree(cr);
433 
434 	return (error);
435 }
436 
437 static int
438 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
439     size_t size, int flags, cred_t *cr)
440 {
441 	znode_t *dxzp = NULL;
442 	znode_t *xzp = NULL;
443 	vattr_t *vap = NULL;
444 	ssize_t wrote;
445 	int lookup_flags, error;
446 	const int xattr_mode = S_IFREG | 0644;
447 	loff_t pos = 0;
448 
449 	/*
450 	 * Lookup the xattr directory.  When we're adding an entry pass
451 	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
452 	 * When removing an entry this flag is not passed to avoid
453 	 * unnecessarily creating a new xattr directory.
454 	 */
455 	lookup_flags = LOOKUP_XATTR;
456 	if (value != NULL)
457 		lookup_flags |= CREATE_XATTR_DIR;
458 
459 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
460 	    cr, NULL, NULL);
461 	if (error)
462 		goto out;
463 
464 	/* Lookup a specific xattr name in the directory */
465 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
466 	if (error && (error != -ENOENT))
467 		goto out;
468 
469 	error = 0;
470 
471 	/* Remove a specific name xattr when value is set to NULL. */
472 	if (value == NULL) {
473 		if (xzp)
474 			error = -zfs_remove(dxzp, (char *)name, cr, 0);
475 
476 		goto out;
477 	}
478 
479 	/* Lookup failed create a new xattr. */
480 	if (xzp == NULL) {
481 		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
482 		vap->va_mode = xattr_mode;
483 		vap->va_mask = ATTR_MODE;
484 		vap->va_uid = crgetfsuid(cr);
485 		vap->va_gid = crgetfsgid(cr);
486 
487 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
488 		    cr, 0, NULL);
489 		if (error)
490 			goto out;
491 	}
492 
493 	ASSERT(xzp != NULL);
494 
495 	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
496 	if (error)
497 		goto out;
498 
499 	wrote = zpl_write_common(ZTOI(xzp), value, size, &pos,
500 	    UIO_SYSSPACE, 0, cr);
501 	if (wrote < 0)
502 		error = wrote;
503 
504 out:
505 
506 	if (error == 0) {
507 		ip->i_ctime = current_time(ip);
508 		zfs_mark_inode_dirty(ip);
509 	}
510 
511 	if (vap)
512 		kmem_free(vap, sizeof (vattr_t));
513 
514 	if (xzp)
515 		zrele(xzp);
516 
517 	if (dxzp)
518 		zrele(dxzp);
519 
520 	if (error == -ENOENT)
521 		error = -ENODATA;
522 
523 	ASSERT3S(error, <=, 0);
524 
525 	return (error);
526 }
527 
528 static int
529 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
530     size_t size, int flags, cred_t *cr)
531 {
532 	znode_t *zp = ITOZ(ip);
533 	nvlist_t *nvl;
534 	size_t sa_size;
535 	int error = 0;
536 
537 	mutex_enter(&zp->z_lock);
538 	if (zp->z_xattr_cached == NULL)
539 		error = -zfs_sa_get_xattr(zp);
540 	mutex_exit(&zp->z_lock);
541 
542 	if (error)
543 		return (error);
544 
545 	ASSERT(zp->z_xattr_cached);
546 	nvl = zp->z_xattr_cached;
547 
548 	if (value == NULL) {
549 		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
550 		if (error == -ENOENT)
551 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
552 	} else {
553 		/* Limited to 32k to keep nvpair memory allocations small */
554 		if (size > DXATTR_MAX_ENTRY_SIZE)
555 			return (-EFBIG);
556 
557 		/* Prevent the DXATTR SA from consuming the entire SA region */
558 		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
559 		if (error)
560 			return (error);
561 
562 		if (sa_size > DXATTR_MAX_SA_SIZE)
563 			return (-EFBIG);
564 
565 		error = -nvlist_add_byte_array(nvl, name,
566 		    (uchar_t *)value, size);
567 	}
568 
569 	/*
570 	 * Update the SA for additions, modifications, and removals. On
571 	 * error drop the inconsistent cached version of the nvlist, it
572 	 * will be reconstructed from the ARC when next accessed.
573 	 */
574 	if (error == 0)
575 		error = -zfs_sa_set_xattr(zp);
576 
577 	if (error) {
578 		nvlist_free(nvl);
579 		zp->z_xattr_cached = NULL;
580 	}
581 
582 	ASSERT3S(error, <=, 0);
583 
584 	return (error);
585 }
586 
587 static int
588 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
589     size_t size, int flags)
590 {
591 	znode_t *zp = ITOZ(ip);
592 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
593 	cred_t *cr = CRED();
594 	fstrans_cookie_t cookie;
595 	int where;
596 	int error;
597 
598 	crhold(cr);
599 	cookie = spl_fstrans_mark();
600 	ZPL_ENTER(zfsvfs);
601 	ZPL_VERIFY_ZP(zp);
602 	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
603 
604 	/*
605 	 * Before setting the xattr check to see if it already exists.
606 	 * This is done to ensure the following optional flags are honored.
607 	 *
608 	 *   XATTR_CREATE: fail if xattr already exists
609 	 *   XATTR_REPLACE: fail if xattr does not exist
610 	 *
611 	 * We also want to know if it resides in sa or dir, so we can make
612 	 * sure we don't end up with duplicate in both places.
613 	 */
614 	error = __zpl_xattr_where(ip, name, &where, cr);
615 	if (error < 0) {
616 		if (error != -ENODATA)
617 			goto out;
618 		if (flags & XATTR_REPLACE)
619 			goto out;
620 
621 		/* The xattr to be removed already doesn't exist */
622 		error = 0;
623 		if (value == NULL)
624 			goto out;
625 	} else {
626 		error = -EEXIST;
627 		if (flags & XATTR_CREATE)
628 			goto out;
629 	}
630 
631 	/* Preferentially store the xattr as a SA for better performance */
632 	if (zfsvfs->z_use_sa && zp->z_is_sa &&
633 	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
634 		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
635 		if (error == 0) {
636 			/*
637 			 * Successfully put into SA, we need to clear the one
638 			 * in dir.
639 			 */
640 			if (where & XATTR_IN_DIR)
641 				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
642 			goto out;
643 		}
644 	}
645 
646 	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
647 	/*
648 	 * Successfully put into dir, we need to clear the one in SA.
649 	 */
650 	if (error == 0 && (where & XATTR_IN_SA))
651 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
652 out:
653 	rw_exit(&ITOZ(ip)->z_xattr_lock);
654 	ZPL_EXIT(zfsvfs);
655 	spl_fstrans_unmark(cookie);
656 	crfree(cr);
657 	ASSERT3S(error, <=, 0);
658 
659 	return (error);
660 }
661 
662 /*
663  * Extended user attributes
664  *
665  * "Extended user attributes may be assigned to files and directories for
666  * storing arbitrary additional information such as the mime type,
667  * character set or encoding of a file.  The access permissions for user
668  * attributes are defined by the file permission bits: read permission
669  * is required to retrieve the attribute value, and writer permission is
670  * required to change it.
671  *
672  * The file permission bits of regular files and directories are
673  * interpreted differently from the file permission bits of special
674  * files and symbolic links.  For regular files and directories the file
675  * permission bits define access to the file's contents, while for
676  * device special files they define access to the device described by
677  * the special file.  The file permissions of symbolic links are not
678  * used in access checks.  These differences would allow users to
679  * consume filesystem resources in a way not controllable by disk quotas
680  * for group or world writable special files and directories.
681  *
682  * For this reason, extended user attributes are allowed only for
683  * regular files and directories, and access to extended user attributes
684  * is restricted to the owner and to users with appropriate capabilities
685  * for directories with the sticky bit set (see the chmod(1) manual page
686  * for an explanation of the sticky bit)." - xattr(7)
687  *
688  * ZFS allows extended user attributes to be disabled administratively
689  * by setting the 'xattr=off' property on the dataset.
690  */
691 static int
692 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
693     const char *name, size_t name_len)
694 {
695 	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
696 }
697 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
698 
699 static int
700 __zpl_xattr_user_get(struct inode *ip, const char *name,
701     void *value, size_t size)
702 {
703 	char *xattr_name;
704 	int error;
705 	/* xattr_resolve_name will do this for us if this is defined */
706 #ifndef HAVE_XATTR_HANDLER_NAME
707 	if (strcmp(name, "") == 0)
708 		return (-EINVAL);
709 #endif
710 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
711 		return (-EOPNOTSUPP);
712 
713 	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
714 	error = zpl_xattr_get(ip, xattr_name, value, size);
715 	kmem_strfree(xattr_name);
716 
717 	return (error);
718 }
719 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
720 
721 static int
722 __zpl_xattr_user_set(struct inode *ip, const char *name,
723     const void *value, size_t size, int flags)
724 {
725 	char *xattr_name;
726 	int error;
727 	/* xattr_resolve_name will do this for us if this is defined */
728 #ifndef HAVE_XATTR_HANDLER_NAME
729 	if (strcmp(name, "") == 0)
730 		return (-EINVAL);
731 #endif
732 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
733 		return (-EOPNOTSUPP);
734 
735 	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
736 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
737 	kmem_strfree(xattr_name);
738 
739 	return (error);
740 }
741 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
742 
743 xattr_handler_t zpl_xattr_user_handler =
744 {
745 	.prefix	= XATTR_USER_PREFIX,
746 	.list	= zpl_xattr_user_list,
747 	.get	= zpl_xattr_user_get,
748 	.set	= zpl_xattr_user_set,
749 };
750 
751 /*
752  * Trusted extended attributes
753  *
754  * "Trusted extended attributes are visible and accessible only to
755  * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
756  * class are used to implement mechanisms in user space (i.e., outside
757  * the kernel) which keep information in extended attributes to which
758  * ordinary processes should not have access." - xattr(7)
759  */
760 static int
761 __zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
762     const char *name, size_t name_len)
763 {
764 	return (capable(CAP_SYS_ADMIN));
765 }
766 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
767 
768 static int
769 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
770     void *value, size_t size)
771 {
772 	char *xattr_name;
773 	int error;
774 
775 	if (!capable(CAP_SYS_ADMIN))
776 		return (-EACCES);
777 	/* xattr_resolve_name will do this for us if this is defined */
778 #ifndef HAVE_XATTR_HANDLER_NAME
779 	if (strcmp(name, "") == 0)
780 		return (-EINVAL);
781 #endif
782 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
783 	error = zpl_xattr_get(ip, xattr_name, value, size);
784 	kmem_strfree(xattr_name);
785 
786 	return (error);
787 }
788 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
789 
790 static int
791 __zpl_xattr_trusted_set(struct inode *ip, const char *name,
792     const void *value, size_t size, int flags)
793 {
794 	char *xattr_name;
795 	int error;
796 
797 	if (!capable(CAP_SYS_ADMIN))
798 		return (-EACCES);
799 	/* xattr_resolve_name will do this for us if this is defined */
800 #ifndef HAVE_XATTR_HANDLER_NAME
801 	if (strcmp(name, "") == 0)
802 		return (-EINVAL);
803 #endif
804 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
805 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
806 	kmem_strfree(xattr_name);
807 
808 	return (error);
809 }
810 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
811 
812 xattr_handler_t zpl_xattr_trusted_handler =
813 {
814 	.prefix	= XATTR_TRUSTED_PREFIX,
815 	.list	= zpl_xattr_trusted_list,
816 	.get	= zpl_xattr_trusted_get,
817 	.set	= zpl_xattr_trusted_set,
818 };
819 
820 /*
821  * Extended security attributes
822  *
823  * "The security attribute namespace is used by kernel security modules,
824  * such as Security Enhanced Linux, and also to implement file
825  * capabilities (see capabilities(7)).  Read and write access
826  * permissions to security attributes depend on the policy implemented
827  * for each security attribute by the security module.  When no security
828  * module is loaded, all processes have read access to extended security
829  * attributes, and write access is limited to processes that have the
830  * CAP_SYS_ADMIN capability." - xattr(7)
831  */
832 static int
833 __zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
834     const char *name, size_t name_len)
835 {
836 	return (1);
837 }
838 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
839 
840 static int
841 __zpl_xattr_security_get(struct inode *ip, const char *name,
842     void *value, size_t size)
843 {
844 	char *xattr_name;
845 	int error;
846 	/* xattr_resolve_name will do this for us if this is defined */
847 #ifndef HAVE_XATTR_HANDLER_NAME
848 	if (strcmp(name, "") == 0)
849 		return (-EINVAL);
850 #endif
851 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
852 	error = zpl_xattr_get(ip, xattr_name, value, size);
853 	kmem_strfree(xattr_name);
854 
855 	return (error);
856 }
857 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
858 
859 static int
860 __zpl_xattr_security_set(struct inode *ip, const char *name,
861     const void *value, size_t size, int flags)
862 {
863 	char *xattr_name;
864 	int error;
865 	/* xattr_resolve_name will do this for us if this is defined */
866 #ifndef HAVE_XATTR_HANDLER_NAME
867 	if (strcmp(name, "") == 0)
868 		return (-EINVAL);
869 #endif
870 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
871 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
872 	kmem_strfree(xattr_name);
873 
874 	return (error);
875 }
876 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
877 
878 static int
879 zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
880     void *fs_info)
881 {
882 	const struct xattr *xattr;
883 	int error = 0;
884 
885 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
886 		error = __zpl_xattr_security_set(ip,
887 		    xattr->name, xattr->value, xattr->value_len, 0);
888 
889 		if (error < 0)
890 			break;
891 	}
892 
893 	return (error);
894 }
895 
896 int
897 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
898     const struct qstr *qstr)
899 {
900 	return security_inode_init_security(ip, dip, qstr,
901 	    &zpl_xattr_security_init_impl, NULL);
902 }
903 
904 /*
905  * Security xattr namespace handlers.
906  */
907 xattr_handler_t zpl_xattr_security_handler = {
908 	.prefix	= XATTR_SECURITY_PREFIX,
909 	.list	= zpl_xattr_security_list,
910 	.get	= zpl_xattr_security_get,
911 	.set	= zpl_xattr_security_set,
912 };
913 
914 /*
915  * Extended system attributes
916  *
917  * "Extended system attributes are used by the kernel to store system
918  * objects such as Access Control Lists.  Read and write access permissions
919  * to system attributes depend on the policy implemented for each system
920  * attribute implemented by filesystems in the kernel." - xattr(7)
921  */
922 #ifdef CONFIG_FS_POSIX_ACL
923 #ifndef HAVE_SET_ACL
924 static
925 #endif
926 int
927 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
928 {
929 	char *name, *value = NULL;
930 	int error = 0;
931 	size_t size = 0;
932 
933 	if (S_ISLNK(ip->i_mode))
934 		return (-EOPNOTSUPP);
935 
936 	switch (type) {
937 	case ACL_TYPE_ACCESS:
938 		name = XATTR_NAME_POSIX_ACL_ACCESS;
939 		if (acl) {
940 			umode_t mode = ip->i_mode;
941 			error = posix_acl_equiv_mode(acl, &mode);
942 			if (error < 0) {
943 				return (error);
944 			} else {
945 				/*
946 				 * The mode bits will have been set by
947 				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
948 				 * using the ZFS ACL conversion.  If they
949 				 * differ from the Posix ACL conversion dirty
950 				 * the inode to write the Posix mode bits.
951 				 */
952 				if (ip->i_mode != mode) {
953 					ip->i_mode = mode;
954 					ip->i_ctime = current_time(ip);
955 					zfs_mark_inode_dirty(ip);
956 				}
957 
958 				if (error == 0)
959 					acl = NULL;
960 			}
961 		}
962 		break;
963 
964 	case ACL_TYPE_DEFAULT:
965 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
966 		if (!S_ISDIR(ip->i_mode))
967 			return (acl ? -EACCES : 0);
968 		break;
969 
970 	default:
971 		return (-EINVAL);
972 	}
973 
974 	if (acl) {
975 		size = posix_acl_xattr_size(acl->a_count);
976 		value = kmem_alloc(size, KM_SLEEP);
977 
978 		error = zpl_acl_to_xattr(acl, value, size);
979 		if (error < 0) {
980 			kmem_free(value, size);
981 			return (error);
982 		}
983 	}
984 
985 	error = zpl_xattr_set(ip, name, value, size, 0);
986 	if (value)
987 		kmem_free(value, size);
988 
989 	if (!error) {
990 		if (acl)
991 			zpl_set_cached_acl(ip, type, acl);
992 		else
993 			zpl_forget_cached_acl(ip, type);
994 	}
995 
996 	return (error);
997 }
998 
999 struct posix_acl *
1000 zpl_get_acl(struct inode *ip, int type)
1001 {
1002 	struct posix_acl *acl;
1003 	void *value = NULL;
1004 	char *name;
1005 	int size;
1006 
1007 	/*
1008 	 * As of Linux 3.14, the kernel get_acl will check this for us.
1009 	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
1010 	 * as the kernel get_acl will set it to temporary sentinel value.
1011 	 */
1012 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1013 	acl = get_cached_acl(ip, type);
1014 	if (acl != ACL_NOT_CACHED)
1015 		return (acl);
1016 #endif
1017 
1018 	switch (type) {
1019 	case ACL_TYPE_ACCESS:
1020 		name = XATTR_NAME_POSIX_ACL_ACCESS;
1021 		break;
1022 	case ACL_TYPE_DEFAULT:
1023 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1024 		break;
1025 	default:
1026 		return (ERR_PTR(-EINVAL));
1027 	}
1028 
1029 	size = zpl_xattr_get(ip, name, NULL, 0);
1030 	if (size > 0) {
1031 		value = kmem_alloc(size, KM_SLEEP);
1032 		size = zpl_xattr_get(ip, name, value, size);
1033 	}
1034 
1035 	if (size > 0) {
1036 		acl = zpl_acl_from_xattr(value, size);
1037 	} else if (size == -ENODATA || size == -ENOSYS) {
1038 		acl = NULL;
1039 	} else {
1040 		acl = ERR_PTR(-EIO);
1041 	}
1042 
1043 	if (size > 0)
1044 		kmem_free(value, size);
1045 
1046 	/* As of Linux 4.7, the kernel get_acl will set this for us */
1047 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1048 	if (!IS_ERR(acl))
1049 		zpl_set_cached_acl(ip, type, acl);
1050 #endif
1051 
1052 	return (acl);
1053 }
1054 
1055 int
1056 zpl_init_acl(struct inode *ip, struct inode *dir)
1057 {
1058 	struct posix_acl *acl = NULL;
1059 	int error = 0;
1060 
1061 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1062 		return (0);
1063 
1064 	if (!S_ISLNK(ip->i_mode)) {
1065 		acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT);
1066 		if (IS_ERR(acl))
1067 			return (PTR_ERR(acl));
1068 		if (!acl) {
1069 			ip->i_mode &= ~current_umask();
1070 			ip->i_ctime = current_time(ip);
1071 			zfs_mark_inode_dirty(ip);
1072 			return (0);
1073 		}
1074 	}
1075 
1076 	if (acl) {
1077 		umode_t mode;
1078 
1079 		if (S_ISDIR(ip->i_mode)) {
1080 			error = zpl_set_acl(ip, acl, ACL_TYPE_DEFAULT);
1081 			if (error)
1082 				goto out;
1083 		}
1084 
1085 		mode = ip->i_mode;
1086 		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1087 		if (error >= 0) {
1088 			ip->i_mode = mode;
1089 			zfs_mark_inode_dirty(ip);
1090 			if (error > 0)
1091 				error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
1092 		}
1093 	}
1094 out:
1095 	zpl_posix_acl_release(acl);
1096 
1097 	return (error);
1098 }
1099 
1100 int
1101 zpl_chmod_acl(struct inode *ip)
1102 {
1103 	struct posix_acl *acl;
1104 	int error;
1105 
1106 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1107 		return (0);
1108 
1109 	if (S_ISLNK(ip->i_mode))
1110 		return (-EOPNOTSUPP);
1111 
1112 	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
1113 	if (IS_ERR(acl) || !acl)
1114 		return (PTR_ERR(acl));
1115 
1116 	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1117 	if (!error)
1118 		error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
1119 
1120 	zpl_posix_acl_release(acl);
1121 
1122 	return (error);
1123 }
1124 
1125 static int
1126 __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1127     const char *name, size_t name_len)
1128 {
1129 	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1130 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1131 
1132 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1133 		return (0);
1134 
1135 	if (list && xattr_size <= list_size)
1136 		memcpy(list, xattr_name, xattr_size);
1137 
1138 	return (xattr_size);
1139 }
1140 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1141 
1142 static int
1143 __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1144     const char *name, size_t name_len)
1145 {
1146 	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1147 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1148 
1149 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1150 		return (0);
1151 
1152 	if (list && xattr_size <= list_size)
1153 		memcpy(list, xattr_name, xattr_size);
1154 
1155 	return (xattr_size);
1156 }
1157 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1158 
1159 static int
1160 __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1161     void *buffer, size_t size)
1162 {
1163 	struct posix_acl *acl;
1164 	int type = ACL_TYPE_ACCESS;
1165 	int error;
1166 	/* xattr_resolve_name will do this for us if this is defined */
1167 #ifndef HAVE_XATTR_HANDLER_NAME
1168 	if (strcmp(name, "") != 0)
1169 		return (-EINVAL);
1170 #endif
1171 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1172 		return (-EOPNOTSUPP);
1173 
1174 	acl = zpl_get_acl(ip, type);
1175 	if (IS_ERR(acl))
1176 		return (PTR_ERR(acl));
1177 	if (acl == NULL)
1178 		return (-ENODATA);
1179 
1180 	error = zpl_acl_to_xattr(acl, buffer, size);
1181 	zpl_posix_acl_release(acl);
1182 
1183 	return (error);
1184 }
1185 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1186 
1187 static int
1188 __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1189     void *buffer, size_t size)
1190 {
1191 	struct posix_acl *acl;
1192 	int type = ACL_TYPE_DEFAULT;
1193 	int error;
1194 	/* xattr_resolve_name will do this for us if this is defined */
1195 #ifndef HAVE_XATTR_HANDLER_NAME
1196 	if (strcmp(name, "") != 0)
1197 		return (-EINVAL);
1198 #endif
1199 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1200 		return (-EOPNOTSUPP);
1201 
1202 	acl = zpl_get_acl(ip, type);
1203 	if (IS_ERR(acl))
1204 		return (PTR_ERR(acl));
1205 	if (acl == NULL)
1206 		return (-ENODATA);
1207 
1208 	error = zpl_acl_to_xattr(acl, buffer, size);
1209 	zpl_posix_acl_release(acl);
1210 
1211 	return (error);
1212 }
1213 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1214 
1215 static int
1216 __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
1217     const void *value, size_t size, int flags)
1218 {
1219 	struct posix_acl *acl;
1220 	int type = ACL_TYPE_ACCESS;
1221 	int error = 0;
1222 	/* xattr_resolve_name will do this for us if this is defined */
1223 #ifndef HAVE_XATTR_HANDLER_NAME
1224 	if (strcmp(name, "") != 0)
1225 		return (-EINVAL);
1226 #endif
1227 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1228 		return (-EOPNOTSUPP);
1229 
1230 	if (!inode_owner_or_capable(ip))
1231 		return (-EPERM);
1232 
1233 	if (value) {
1234 		acl = zpl_acl_from_xattr(value, size);
1235 		if (IS_ERR(acl))
1236 			return (PTR_ERR(acl));
1237 		else if (acl) {
1238 			error = zpl_posix_acl_valid(ip, acl);
1239 			if (error) {
1240 				zpl_posix_acl_release(acl);
1241 				return (error);
1242 			}
1243 		}
1244 	} else {
1245 		acl = NULL;
1246 	}
1247 
1248 	error = zpl_set_acl(ip, acl, type);
1249 	zpl_posix_acl_release(acl);
1250 
1251 	return (error);
1252 }
1253 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1254 
1255 static int
1256 __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
1257     const void *value, size_t size, int flags)
1258 {
1259 	struct posix_acl *acl;
1260 	int type = ACL_TYPE_DEFAULT;
1261 	int error = 0;
1262 	/* xattr_resolve_name will do this for us if this is defined */
1263 #ifndef HAVE_XATTR_HANDLER_NAME
1264 	if (strcmp(name, "") != 0)
1265 		return (-EINVAL);
1266 #endif
1267 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1268 		return (-EOPNOTSUPP);
1269 
1270 	if (!inode_owner_or_capable(ip))
1271 		return (-EPERM);
1272 
1273 	if (value) {
1274 		acl = zpl_acl_from_xattr(value, size);
1275 		if (IS_ERR(acl))
1276 			return (PTR_ERR(acl));
1277 		else if (acl) {
1278 			error = zpl_posix_acl_valid(ip, acl);
1279 			if (error) {
1280 				zpl_posix_acl_release(acl);
1281 				return (error);
1282 			}
1283 		}
1284 	} else {
1285 		acl = NULL;
1286 	}
1287 
1288 	error = zpl_set_acl(ip, acl, type);
1289 	zpl_posix_acl_release(acl);
1290 
1291 	return (error);
1292 }
1293 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1294 
1295 /*
1296  * ACL access xattr namespace handlers.
1297  *
1298  * Use .name instead of .prefix when available. xattr_resolve_name will match
1299  * whole name and reject anything that has .name only as prefix.
1300  */
1301 xattr_handler_t zpl_xattr_acl_access_handler =
1302 {
1303 #ifdef HAVE_XATTR_HANDLER_NAME
1304 	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1305 #else
1306 	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
1307 #endif
1308 	.list	= zpl_xattr_acl_list_access,
1309 	.get	= zpl_xattr_acl_get_access,
1310 	.set	= zpl_xattr_acl_set_access,
1311 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1312     defined(HAVE_XATTR_LIST_DENTRY) || \
1313     defined(HAVE_XATTR_LIST_HANDLER)
1314 	.flags	= ACL_TYPE_ACCESS,
1315 #endif
1316 };
1317 
1318 /*
1319  * ACL default xattr namespace handlers.
1320  *
1321  * Use .name instead of .prefix when available. xattr_resolve_name will match
1322  * whole name and reject anything that has .name only as prefix.
1323  */
1324 xattr_handler_t zpl_xattr_acl_default_handler =
1325 {
1326 #ifdef HAVE_XATTR_HANDLER_NAME
1327 	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1328 #else
1329 	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
1330 #endif
1331 	.list	= zpl_xattr_acl_list_default,
1332 	.get	= zpl_xattr_acl_get_default,
1333 	.set	= zpl_xattr_acl_set_default,
1334 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1335     defined(HAVE_XATTR_LIST_DENTRY) || \
1336     defined(HAVE_XATTR_LIST_HANDLER)
1337 	.flags	= ACL_TYPE_DEFAULT,
1338 #endif
1339 };
1340 
1341 #endif /* CONFIG_FS_POSIX_ACL */
1342 
1343 xattr_handler_t *zpl_xattr_handlers[] = {
1344 	&zpl_xattr_security_handler,
1345 	&zpl_xattr_trusted_handler,
1346 	&zpl_xattr_user_handler,
1347 #ifdef CONFIG_FS_POSIX_ACL
1348 	&zpl_xattr_acl_access_handler,
1349 	&zpl_xattr_acl_default_handler,
1350 #endif /* CONFIG_FS_POSIX_ACL */
1351 	NULL
1352 };
1353 
1354 static const struct xattr_handler *
1355 zpl_xattr_handler(const char *name)
1356 {
1357 	if (strncmp(name, XATTR_USER_PREFIX,
1358 	    XATTR_USER_PREFIX_LEN) == 0)
1359 		return (&zpl_xattr_user_handler);
1360 
1361 	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1362 	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1363 		return (&zpl_xattr_trusted_handler);
1364 
1365 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1366 	    XATTR_SECURITY_PREFIX_LEN) == 0)
1367 		return (&zpl_xattr_security_handler);
1368 
1369 #ifdef CONFIG_FS_POSIX_ACL
1370 	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1371 	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1372 		return (&zpl_xattr_acl_access_handler);
1373 
1374 	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1375 	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1376 		return (&zpl_xattr_acl_default_handler);
1377 #endif /* CONFIG_FS_POSIX_ACL */
1378 
1379 	return (NULL);
1380 }
1381 
1382 #if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
1383 struct acl_rel_struct {
1384 	struct acl_rel_struct *next;
1385 	struct posix_acl *acl;
1386 	clock_t time;
1387 };
1388 
1389 #define	ACL_REL_GRACE	(60*HZ)
1390 #define	ACL_REL_WINDOW	(1*HZ)
1391 #define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1392 
1393 /*
1394  * Lockless multi-producer single-consumer fifo list.
1395  * Nodes are added to tail and removed from head. Tail pointer is our
1396  * synchronization point. It always points to the next pointer of the last
1397  * node, or head if list is empty.
1398  */
1399 static struct acl_rel_struct *acl_rel_head = NULL;
1400 static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1401 
1402 static void
1403 zpl_posix_acl_free(void *arg)
1404 {
1405 	struct acl_rel_struct *freelist = NULL;
1406 	struct acl_rel_struct *a;
1407 	clock_t new_time;
1408 	boolean_t refire = B_FALSE;
1409 
1410 	ASSERT3P(acl_rel_head, !=, NULL);
1411 	while (acl_rel_head) {
1412 		a = acl_rel_head;
1413 		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1414 			/*
1415 			 * If a is the last node we need to reset tail, but we
1416 			 * need to use cmpxchg to make sure it is still the
1417 			 * last node.
1418 			 */
1419 			if (acl_rel_tail == &a->next) {
1420 				acl_rel_head = NULL;
1421 				if (cmpxchg(&acl_rel_tail, &a->next,
1422 				    &acl_rel_head) == &a->next) {
1423 					ASSERT3P(a->next, ==, NULL);
1424 					a->next = freelist;
1425 					freelist = a;
1426 					break;
1427 				}
1428 			}
1429 			/*
1430 			 * a is not last node, make sure next pointer is set
1431 			 * by the adder and advance the head.
1432 			 */
1433 			while (READ_ONCE(a->next) == NULL)
1434 				cpu_relax();
1435 			acl_rel_head = a->next;
1436 			a->next = freelist;
1437 			freelist = a;
1438 		} else {
1439 			/*
1440 			 * a is still in grace period. We are responsible to
1441 			 * reschedule the free task, since adder will only do
1442 			 * so if list is empty.
1443 			 */
1444 			new_time = a->time + ACL_REL_SCHED;
1445 			refire = B_TRUE;
1446 			break;
1447 		}
1448 	}
1449 
1450 	if (refire)
1451 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1452 		    NULL, TQ_SLEEP, new_time);
1453 
1454 	while (freelist) {
1455 		a = freelist;
1456 		freelist = a->next;
1457 		kfree(a->acl);
1458 		kmem_free(a, sizeof (struct acl_rel_struct));
1459 	}
1460 }
1461 
1462 void
1463 zpl_posix_acl_release_impl(struct posix_acl *acl)
1464 {
1465 	struct acl_rel_struct *a, **prev;
1466 
1467 	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1468 	a->next = NULL;
1469 	a->acl = acl;
1470 	a->time = ddi_get_lbolt();
1471 	/* atomically points tail to us and get the previous tail */
1472 	prev = xchg(&acl_rel_tail, &a->next);
1473 	ASSERT3P(*prev, ==, NULL);
1474 	*prev = a;
1475 	/* if it was empty before, schedule the free task */
1476 	if (prev == &acl_rel_head)
1477 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1478 		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1479 }
1480 #endif
1481