xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c (revision 18054d0220cfc8df9c9568c437bd6fbb59d53c3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  *
24  * Extended attributes (xattr) on Solaris are implemented as files
25  * which exist in a hidden xattr directory.  These extended attributes
26  * can be accessed using the attropen() system call which opens
27  * the extended attribute.  It can then be manipulated just like
28  * a standard file descriptor.  This has a couple advantages such
29  * as practically no size limit on the file, and the extended
30  * attributes permissions may differ from those of the parent file.
31  * This interface is really quite clever, but it's also completely
32  * different than what is supported on Linux.  It also comes with a
33  * steep performance penalty when accessing small xattrs because they
34  * are not stored with the parent file.
35  *
36  * Under Linux extended attributes are manipulated by the system
37  * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38  * extended attributes to be name/value pairs where the name is a
39  * NULL terminated string.  The name must also include one of the
40  * following namespace prefixes:
41  *
42  *   user     - No restrictions and is available to user applications.
43  *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44  *   system   - Used for access control lists (system.nfs4_acl, etc).
45  *   security - Used by SELinux to store a files security context.
46  *
47  * The value under Linux to limited to 65536 bytes of binary data.
48  * In practice, individual xattrs tend to be much smaller than this
49  * and are typically less than 100 bytes.  A good example of this
50  * are the security.selinux xattrs which are less than 100 bytes and
51  * exist for every file when xattr labeling is enabled.
52  *
53  * The Linux xattr implementation has been written to take advantage of
54  * this typical usage.  When the dataset property 'xattr=sa' is set,
55  * then xattrs will be preferentially stored as System Attributes (SA).
56  * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57  * up to 64k of xattrs to be stored in the spill block.  If additional
58  * xattr space is required, which is unlikely under Linux, they will
59  * be stored using the traditional directory approach.
60  *
61  * This optimization results in roughly a 3x performance improvement
62  * when accessing xattrs because it avoids the need to perform a seek
63  * for every xattr value.  When multiple xattrs are stored per-file
64  * the performance improvements are even greater because all of the
65  * xattrs stored in the spill block will be cached.
66  *
67  * However, by default SA based xattrs are disabled in the Linux port
68  * to maximize compatibility with other implementations.  If you do
69  * enable SA based xattrs then they will not be visible on platforms
70  * which do not support this feature.
71  *
72  * NOTE: One additional consequence of the xattr directory implementation
73  * is that when an extended attribute is manipulated an inode is created.
74  * This inode will exist in the Linux inode cache but there will be no
75  * associated entry in the dentry cache which references it.  This is
76  * safe but it may result in some confusion.  Enabling SA based xattrs
77  * largely avoids the issue except in the overflow case.
78  */
79 
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86 
87 enum xattr_permission {
88 	XAPERM_DENY,
89 	XAPERM_ALLOW,
90 	XAPERM_COMPAT,
91 };
92 
93 typedef struct xattr_filldir {
94 	size_t size;
95 	size_t offset;
96 	char *buf;
97 	struct dentry *dentry;
98 } xattr_filldir_t;
99 
100 static enum xattr_permission zpl_xattr_permission(xattr_filldir_t *,
101     const char *, int);
102 
103 static int zfs_xattr_compat = 0;
104 
105 /*
106  * Determine is a given xattr name should be visible and if so copy it
107  * in to the provided buffer (xf->buf).
108  */
109 static int
110 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
111 {
112 	enum xattr_permission perm;
113 
114 	/* Check permissions using the per-namespace list xattr handler. */
115 	perm = zpl_xattr_permission(xf, name, name_len);
116 	if (perm == XAPERM_DENY)
117 		return (0);
118 
119 	/* Prefix the name with "user." if it does not have a namespace. */
120 	if (perm == XAPERM_COMPAT) {
121 		if (xf->buf) {
122 			if (xf->offset + XATTR_USER_PREFIX_LEN + 1 > xf->size)
123 				return (-ERANGE);
124 
125 			memcpy(xf->buf + xf->offset, XATTR_USER_PREFIX,
126 			    XATTR_USER_PREFIX_LEN);
127 			xf->buf[xf->offset + XATTR_USER_PREFIX_LEN] = '\0';
128 		}
129 
130 		xf->offset += XATTR_USER_PREFIX_LEN;
131 	}
132 
133 	/* When xf->buf is NULL only calculate the required size. */
134 	if (xf->buf) {
135 		if (xf->offset + name_len + 1 > xf->size)
136 			return (-ERANGE);
137 
138 		memcpy(xf->buf + xf->offset, name, name_len);
139 		xf->buf[xf->offset + name_len] = '\0';
140 	}
141 
142 	xf->offset += (name_len + 1);
143 
144 	return (0);
145 }
146 
147 /*
148  * Read as many directory entry names as will fit in to the provided buffer,
149  * or when no buffer is provided calculate the required buffer size.
150  */
151 static int
152 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
153 {
154 	zap_cursor_t zc;
155 	zap_attribute_t	zap;
156 	int error;
157 
158 	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
159 
160 	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
161 
162 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
163 			error = -ENXIO;
164 			break;
165 		}
166 
167 		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
168 		if (error)
169 			break;
170 
171 		zap_cursor_advance(&zc);
172 	}
173 
174 	zap_cursor_fini(&zc);
175 
176 	if (error == -ENOENT)
177 		error = 0;
178 
179 	return (error);
180 }
181 
182 static ssize_t
183 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
184 {
185 	struct inode *ip = xf->dentry->d_inode;
186 	struct inode *dxip = NULL;
187 	znode_t *dxzp;
188 	int error;
189 
190 	/* Lookup the xattr directory */
191 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
192 	    cr, NULL, NULL);
193 	if (error) {
194 		if (error == -ENOENT)
195 			error = 0;
196 
197 		return (error);
198 	}
199 
200 	dxip = ZTOI(dxzp);
201 	error = zpl_xattr_readdir(dxip, xf);
202 	iput(dxip);
203 
204 	return (error);
205 }
206 
207 static ssize_t
208 zpl_xattr_list_sa(xattr_filldir_t *xf)
209 {
210 	znode_t *zp = ITOZ(xf->dentry->d_inode);
211 	nvpair_t *nvp = NULL;
212 	int error = 0;
213 
214 	mutex_enter(&zp->z_lock);
215 	if (zp->z_xattr_cached == NULL)
216 		error = -zfs_sa_get_xattr(zp);
217 	mutex_exit(&zp->z_lock);
218 
219 	if (error)
220 		return (error);
221 
222 	ASSERT(zp->z_xattr_cached);
223 
224 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
225 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
226 
227 		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
228 		    strlen(nvpair_name(nvp)));
229 		if (error)
230 			return (error);
231 	}
232 
233 	return (0);
234 }
235 
236 ssize_t
237 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
238 {
239 	znode_t *zp = ITOZ(dentry->d_inode);
240 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
241 	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
242 	cred_t *cr = CRED();
243 	fstrans_cookie_t cookie;
244 	int error = 0;
245 
246 	crhold(cr);
247 	cookie = spl_fstrans_mark();
248 	ZPL_ENTER(zfsvfs);
249 	ZPL_VERIFY_ZP(zp);
250 	rw_enter(&zp->z_xattr_lock, RW_READER);
251 
252 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
253 		error = zpl_xattr_list_sa(&xf);
254 		if (error)
255 			goto out;
256 	}
257 
258 	error = zpl_xattr_list_dir(&xf, cr);
259 	if (error)
260 		goto out;
261 
262 	error = xf.offset;
263 out:
264 
265 	rw_exit(&zp->z_xattr_lock);
266 	ZPL_EXIT(zfsvfs);
267 	spl_fstrans_unmark(cookie);
268 	crfree(cr);
269 
270 	return (error);
271 }
272 
273 static int
274 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
275     size_t size, cred_t *cr)
276 {
277 	fstrans_cookie_t cookie;
278 	struct inode *xip = NULL;
279 	znode_t *dxzp = NULL;
280 	znode_t *xzp = NULL;
281 	int error;
282 
283 	/* Lookup the xattr directory */
284 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
285 	    cr, NULL, NULL);
286 	if (error)
287 		goto out;
288 
289 	/* Lookup a specific xattr name in the directory */
290 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
291 	if (error)
292 		goto out;
293 
294 	xip = ZTOI(xzp);
295 	if (!size) {
296 		error = i_size_read(xip);
297 		goto out;
298 	}
299 
300 	if (size < i_size_read(xip)) {
301 		error = -ERANGE;
302 		goto out;
303 	}
304 
305 	struct iovec iov;
306 	iov.iov_base = (void *)value;
307 	iov.iov_len = size;
308 
309 	zfs_uio_t uio;
310 	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
311 
312 	cookie = spl_fstrans_mark();
313 	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
314 	spl_fstrans_unmark(cookie);
315 
316 	if (error == 0)
317 		error = size - zfs_uio_resid(&uio);
318 out:
319 	if (xzp)
320 		zrele(xzp);
321 
322 	if (dxzp)
323 		zrele(dxzp);
324 
325 	return (error);
326 }
327 
328 static int
329 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
330 {
331 	znode_t *zp = ITOZ(ip);
332 	uchar_t *nv_value;
333 	uint_t nv_size;
334 	int error = 0;
335 
336 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
337 
338 	mutex_enter(&zp->z_lock);
339 	if (zp->z_xattr_cached == NULL)
340 		error = -zfs_sa_get_xattr(zp);
341 	mutex_exit(&zp->z_lock);
342 
343 	if (error)
344 		return (error);
345 
346 	ASSERT(zp->z_xattr_cached);
347 	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
348 	    &nv_value, &nv_size);
349 	if (error)
350 		return (error);
351 
352 	if (size == 0 || value == NULL)
353 		return (nv_size);
354 
355 	if (size < nv_size)
356 		return (-ERANGE);
357 
358 	memcpy(value, nv_value, nv_size);
359 
360 	return (nv_size);
361 }
362 
363 static int
364 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
365     cred_t *cr)
366 {
367 	znode_t *zp = ITOZ(ip);
368 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
369 	int error;
370 
371 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
372 
373 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
374 		error = zpl_xattr_get_sa(ip, name, value, size);
375 		if (error != -ENOENT)
376 			goto out;
377 	}
378 
379 	error = zpl_xattr_get_dir(ip, name, value, size, cr);
380 out:
381 	if (error == -ENOENT)
382 		error = -ENODATA;
383 
384 	return (error);
385 }
386 
387 #define	XATTR_NOENT	0x0
388 #define	XATTR_IN_SA	0x1
389 #define	XATTR_IN_DIR	0x2
390 /* check where the xattr resides */
391 static int
392 __zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
393 {
394 	znode_t *zp = ITOZ(ip);
395 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
396 	int error;
397 
398 	ASSERT(where);
399 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
400 
401 	*where = XATTR_NOENT;
402 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
403 		error = zpl_xattr_get_sa(ip, name, NULL, 0);
404 		if (error >= 0)
405 			*where |= XATTR_IN_SA;
406 		else if (error != -ENOENT)
407 			return (error);
408 	}
409 
410 	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
411 	if (error >= 0)
412 		*where |= XATTR_IN_DIR;
413 	else if (error != -ENOENT)
414 		return (error);
415 
416 	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
417 		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
418 		    " in both SA and dir", ip, name);
419 	if (*where == XATTR_NOENT)
420 		error = -ENODATA;
421 	else
422 		error = 0;
423 	return (error);
424 }
425 
426 static int
427 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
428 {
429 	znode_t *zp = ITOZ(ip);
430 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
431 	cred_t *cr = CRED();
432 	fstrans_cookie_t cookie;
433 	int error;
434 
435 	crhold(cr);
436 	cookie = spl_fstrans_mark();
437 	ZPL_ENTER(zfsvfs);
438 	ZPL_VERIFY_ZP(zp);
439 	rw_enter(&zp->z_xattr_lock, RW_READER);
440 	error = __zpl_xattr_get(ip, name, value, size, cr);
441 	rw_exit(&zp->z_xattr_lock);
442 	ZPL_EXIT(zfsvfs);
443 	spl_fstrans_unmark(cookie);
444 	crfree(cr);
445 
446 	return (error);
447 }
448 
449 static int
450 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
451     size_t size, int flags, cred_t *cr)
452 {
453 	znode_t *dxzp = NULL;
454 	znode_t *xzp = NULL;
455 	vattr_t *vap = NULL;
456 	int lookup_flags, error;
457 	const int xattr_mode = S_IFREG | 0644;
458 	loff_t pos = 0;
459 
460 	/*
461 	 * Lookup the xattr directory.  When we're adding an entry pass
462 	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
463 	 * When removing an entry this flag is not passed to avoid
464 	 * unnecessarily creating a new xattr directory.
465 	 */
466 	lookup_flags = LOOKUP_XATTR;
467 	if (value != NULL)
468 		lookup_flags |= CREATE_XATTR_DIR;
469 
470 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
471 	    cr, NULL, NULL);
472 	if (error)
473 		goto out;
474 
475 	/* Lookup a specific xattr name in the directory */
476 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
477 	if (error && (error != -ENOENT))
478 		goto out;
479 
480 	error = 0;
481 
482 	/* Remove a specific name xattr when value is set to NULL. */
483 	if (value == NULL) {
484 		if (xzp)
485 			error = -zfs_remove(dxzp, (char *)name, cr, 0);
486 
487 		goto out;
488 	}
489 
490 	/* Lookup failed create a new xattr. */
491 	if (xzp == NULL) {
492 		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
493 		vap->va_mode = xattr_mode;
494 		vap->va_mask = ATTR_MODE;
495 		vap->va_uid = crgetuid(cr);
496 		vap->va_gid = crgetgid(cr);
497 
498 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
499 		    cr, 0, NULL);
500 		if (error)
501 			goto out;
502 	}
503 
504 	ASSERT(xzp != NULL);
505 
506 	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
507 	if (error)
508 		goto out;
509 
510 	error = -zfs_write_simple(xzp, value, size, pos, NULL);
511 out:
512 	if (error == 0) {
513 		ip->i_ctime = current_time(ip);
514 		zfs_mark_inode_dirty(ip);
515 	}
516 
517 	if (vap)
518 		kmem_free(vap, sizeof (vattr_t));
519 
520 	if (xzp)
521 		zrele(xzp);
522 
523 	if (dxzp)
524 		zrele(dxzp);
525 
526 	if (error == -ENOENT)
527 		error = -ENODATA;
528 
529 	ASSERT3S(error, <=, 0);
530 
531 	return (error);
532 }
533 
534 static int
535 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
536     size_t size, int flags, cred_t *cr)
537 {
538 	znode_t *zp = ITOZ(ip);
539 	nvlist_t *nvl;
540 	size_t sa_size;
541 	int error = 0;
542 
543 	mutex_enter(&zp->z_lock);
544 	if (zp->z_xattr_cached == NULL)
545 		error = -zfs_sa_get_xattr(zp);
546 	mutex_exit(&zp->z_lock);
547 
548 	if (error)
549 		return (error);
550 
551 	ASSERT(zp->z_xattr_cached);
552 	nvl = zp->z_xattr_cached;
553 
554 	if (value == NULL) {
555 		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
556 		if (error == -ENOENT)
557 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
558 	} else {
559 		/* Limited to 32k to keep nvpair memory allocations small */
560 		if (size > DXATTR_MAX_ENTRY_SIZE)
561 			return (-EFBIG);
562 
563 		/* Prevent the DXATTR SA from consuming the entire SA region */
564 		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
565 		if (error)
566 			return (error);
567 
568 		if (sa_size > DXATTR_MAX_SA_SIZE)
569 			return (-EFBIG);
570 
571 		error = -nvlist_add_byte_array(nvl, name,
572 		    (uchar_t *)value, size);
573 	}
574 
575 	/*
576 	 * Update the SA for additions, modifications, and removals. On
577 	 * error drop the inconsistent cached version of the nvlist, it
578 	 * will be reconstructed from the ARC when next accessed.
579 	 */
580 	if (error == 0)
581 		error = -zfs_sa_set_xattr(zp, name, value, size);
582 
583 	if (error) {
584 		nvlist_free(nvl);
585 		zp->z_xattr_cached = NULL;
586 	}
587 
588 	ASSERT3S(error, <=, 0);
589 
590 	return (error);
591 }
592 
593 static int
594 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
595     size_t size, int flags)
596 {
597 	znode_t *zp = ITOZ(ip);
598 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
599 	cred_t *cr = CRED();
600 	fstrans_cookie_t cookie;
601 	int where;
602 	int error;
603 
604 	crhold(cr);
605 	cookie = spl_fstrans_mark();
606 	ZPL_ENTER(zfsvfs);
607 	ZPL_VERIFY_ZP(zp);
608 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
609 
610 	/*
611 	 * Before setting the xattr check to see if it already exists.
612 	 * This is done to ensure the following optional flags are honored.
613 	 *
614 	 *   XATTR_CREATE: fail if xattr already exists
615 	 *   XATTR_REPLACE: fail if xattr does not exist
616 	 *
617 	 * We also want to know if it resides in sa or dir, so we can make
618 	 * sure we don't end up with duplicate in both places.
619 	 */
620 	error = __zpl_xattr_where(ip, name, &where, cr);
621 	if (error < 0) {
622 		if (error != -ENODATA)
623 			goto out;
624 		if (flags & XATTR_REPLACE)
625 			goto out;
626 
627 		/* The xattr to be removed already doesn't exist */
628 		error = 0;
629 		if (value == NULL)
630 			goto out;
631 	} else {
632 		error = -EEXIST;
633 		if (flags & XATTR_CREATE)
634 			goto out;
635 	}
636 
637 	/* Preferentially store the xattr as a SA for better performance */
638 	if (zfsvfs->z_use_sa && zp->z_is_sa &&
639 	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
640 		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
641 		if (error == 0) {
642 			/*
643 			 * Successfully put into SA, we need to clear the one
644 			 * in dir.
645 			 */
646 			if (where & XATTR_IN_DIR)
647 				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
648 			goto out;
649 		}
650 	}
651 
652 	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
653 	/*
654 	 * Successfully put into dir, we need to clear the one in SA.
655 	 */
656 	if (error == 0 && (where & XATTR_IN_SA))
657 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
658 out:
659 	rw_exit(&zp->z_xattr_lock);
660 	ZPL_EXIT(zfsvfs);
661 	spl_fstrans_unmark(cookie);
662 	crfree(cr);
663 	ASSERT3S(error, <=, 0);
664 
665 	return (error);
666 }
667 
668 /*
669  * Extended user attributes
670  *
671  * "Extended user attributes may be assigned to files and directories for
672  * storing arbitrary additional information such as the mime type,
673  * character set or encoding of a file.  The access permissions for user
674  * attributes are defined by the file permission bits: read permission
675  * is required to retrieve the attribute value, and writer permission is
676  * required to change it.
677  *
678  * The file permission bits of regular files and directories are
679  * interpreted differently from the file permission bits of special
680  * files and symbolic links.  For regular files and directories the file
681  * permission bits define access to the file's contents, while for
682  * device special files they define access to the device described by
683  * the special file.  The file permissions of symbolic links are not
684  * used in access checks.  These differences would allow users to
685  * consume filesystem resources in a way not controllable by disk quotas
686  * for group or world writable special files and directories.
687  *
688  * For this reason, extended user attributes are allowed only for
689  * regular files and directories, and access to extended user attributes
690  * is restricted to the owner and to users with appropriate capabilities
691  * for directories with the sticky bit set (see the chmod(1) manual page
692  * for an explanation of the sticky bit)." - xattr(7)
693  *
694  * ZFS allows extended user attributes to be disabled administratively
695  * by setting the 'xattr=off' property on the dataset.
696  */
697 static int
698 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
699     const char *name, size_t name_len)
700 {
701 	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
702 }
703 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
704 
705 static int
706 __zpl_xattr_user_get(struct inode *ip, const char *name,
707     void *value, size_t size)
708 {
709 	int error;
710 	/* xattr_resolve_name will do this for us if this is defined */
711 #ifndef HAVE_XATTR_HANDLER_NAME
712 	if (strcmp(name, "") == 0)
713 		return (-EINVAL);
714 #endif
715 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
716 		return (-EINVAL);
717 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
718 		return (-EOPNOTSUPP);
719 
720 	/*
721 	 * Try to look up the name with the namespace prefix first for
722 	 * compatibility with xattrs from this platform.  If that fails,
723 	 * try again without the namespace prefix for compatibility with
724 	 * other platforms.
725 	 */
726 	char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
727 	error = zpl_xattr_get(ip, xattr_name, value, size);
728 	kmem_strfree(xattr_name);
729 	if (error == -ENODATA)
730 		error = zpl_xattr_get(ip, name, value, size);
731 
732 	return (error);
733 }
734 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
735 
736 static int
737 __zpl_xattr_user_set(struct inode *ip, const char *name,
738     const void *value, size_t size, int flags)
739 {
740 	int error = 0;
741 	/* xattr_resolve_name will do this for us if this is defined */
742 #ifndef HAVE_XATTR_HANDLER_NAME
743 	if (strcmp(name, "") == 0)
744 		return (-EINVAL);
745 #endif
746 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
747 		return (-EINVAL);
748 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
749 		return (-EOPNOTSUPP);
750 
751 	/*
752 	 * Remove alternate compat version of the xattr so we only set the
753 	 * version specified by the zfs_xattr_compat tunable.
754 	 *
755 	 * The following flags must be handled correctly:
756 	 *
757 	 *   XATTR_CREATE: fail if xattr already exists
758 	 *   XATTR_REPLACE: fail if xattr does not exist
759 	 */
760 	char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
761 	const char *clear_name, *set_name;
762 	if (zfs_xattr_compat) {
763 		clear_name = prefixed_name;
764 		set_name = name;
765 	} else {
766 		clear_name = name;
767 		set_name = prefixed_name;
768 	}
769 	/*
770 	 * Clear the old value with the alternative name format, if it exists.
771 	 */
772 	error = zpl_xattr_set(ip, clear_name, NULL, 0, flags);
773 	/*
774 	 * XATTR_CREATE was specified and we failed to clear the xattr
775 	 * because it already exists.  Stop here.
776 	 */
777 	if (error == -EEXIST)
778 		goto out;
779 	/*
780 	 * If XATTR_REPLACE was specified and we succeeded to clear
781 	 * an xattr, we don't need to replace anything when setting
782 	 * the new value.  If we failed with -ENODATA that's fine,
783 	 * there was nothing to be cleared and we can ignore the error.
784 	 */
785 	if (error == 0)
786 		flags &= ~XATTR_REPLACE;
787 	/*
788 	 * Set the new value with the configured name format.
789 	 */
790 	error = zpl_xattr_set(ip, set_name, value, size, flags);
791 out:
792 	kmem_strfree(prefixed_name);
793 	return (error);
794 }
795 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
796 
797 static xattr_handler_t zpl_xattr_user_handler =
798 {
799 	.prefix	= XATTR_USER_PREFIX,
800 	.list	= zpl_xattr_user_list,
801 	.get	= zpl_xattr_user_get,
802 	.set	= zpl_xattr_user_set,
803 };
804 
805 /*
806  * Trusted extended attributes
807  *
808  * "Trusted extended attributes are visible and accessible only to
809  * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
810  * class are used to implement mechanisms in user space (i.e., outside
811  * the kernel) which keep information in extended attributes to which
812  * ordinary processes should not have access." - xattr(7)
813  */
814 static int
815 __zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
816     const char *name, size_t name_len)
817 {
818 	return (capable(CAP_SYS_ADMIN));
819 }
820 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
821 
822 static int
823 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
824     void *value, size_t size)
825 {
826 	char *xattr_name;
827 	int error;
828 
829 	if (!capable(CAP_SYS_ADMIN))
830 		return (-EACCES);
831 	/* xattr_resolve_name will do this for us if this is defined */
832 #ifndef HAVE_XATTR_HANDLER_NAME
833 	if (strcmp(name, "") == 0)
834 		return (-EINVAL);
835 #endif
836 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
837 	error = zpl_xattr_get(ip, xattr_name, value, size);
838 	kmem_strfree(xattr_name);
839 
840 	return (error);
841 }
842 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
843 
844 static int
845 __zpl_xattr_trusted_set(struct inode *ip, const char *name,
846     const void *value, size_t size, int flags)
847 {
848 	char *xattr_name;
849 	int error;
850 
851 	if (!capable(CAP_SYS_ADMIN))
852 		return (-EACCES);
853 	/* xattr_resolve_name will do this for us if this is defined */
854 #ifndef HAVE_XATTR_HANDLER_NAME
855 	if (strcmp(name, "") == 0)
856 		return (-EINVAL);
857 #endif
858 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
859 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
860 	kmem_strfree(xattr_name);
861 
862 	return (error);
863 }
864 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
865 
866 static xattr_handler_t zpl_xattr_trusted_handler = {
867 	.prefix	= XATTR_TRUSTED_PREFIX,
868 	.list	= zpl_xattr_trusted_list,
869 	.get	= zpl_xattr_trusted_get,
870 	.set	= zpl_xattr_trusted_set,
871 };
872 
873 /*
874  * Extended security attributes
875  *
876  * "The security attribute namespace is used by kernel security modules,
877  * such as Security Enhanced Linux, and also to implement file
878  * capabilities (see capabilities(7)).  Read and write access
879  * permissions to security attributes depend on the policy implemented
880  * for each security attribute by the security module.  When no security
881  * module is loaded, all processes have read access to extended security
882  * attributes, and write access is limited to processes that have the
883  * CAP_SYS_ADMIN capability." - xattr(7)
884  */
885 static int
886 __zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
887     const char *name, size_t name_len)
888 {
889 	return (1);
890 }
891 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
892 
893 static int
894 __zpl_xattr_security_get(struct inode *ip, const char *name,
895     void *value, size_t size)
896 {
897 	char *xattr_name;
898 	int error;
899 	/* xattr_resolve_name will do this for us if this is defined */
900 #ifndef HAVE_XATTR_HANDLER_NAME
901 	if (strcmp(name, "") == 0)
902 		return (-EINVAL);
903 #endif
904 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
905 	error = zpl_xattr_get(ip, xattr_name, value, size);
906 	kmem_strfree(xattr_name);
907 
908 	return (error);
909 }
910 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
911 
912 static int
913 __zpl_xattr_security_set(struct inode *ip, const char *name,
914     const void *value, size_t size, int flags)
915 {
916 	char *xattr_name;
917 	int error;
918 	/* xattr_resolve_name will do this for us if this is defined */
919 #ifndef HAVE_XATTR_HANDLER_NAME
920 	if (strcmp(name, "") == 0)
921 		return (-EINVAL);
922 #endif
923 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
924 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
925 	kmem_strfree(xattr_name);
926 
927 	return (error);
928 }
929 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
930 
931 static int
932 zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
933     void *fs_info)
934 {
935 	const struct xattr *xattr;
936 	int error = 0;
937 
938 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
939 		error = __zpl_xattr_security_set(ip,
940 		    xattr->name, xattr->value, xattr->value_len, 0);
941 
942 		if (error < 0)
943 			break;
944 	}
945 
946 	return (error);
947 }
948 
949 int
950 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
951     const struct qstr *qstr)
952 {
953 	return security_inode_init_security(ip, dip, qstr,
954 	    &zpl_xattr_security_init_impl, NULL);
955 }
956 
957 /*
958  * Security xattr namespace handlers.
959  */
960 static xattr_handler_t zpl_xattr_security_handler = {
961 	.prefix	= XATTR_SECURITY_PREFIX,
962 	.list	= zpl_xattr_security_list,
963 	.get	= zpl_xattr_security_get,
964 	.set	= zpl_xattr_security_set,
965 };
966 
967 /*
968  * Extended system attributes
969  *
970  * "Extended system attributes are used by the kernel to store system
971  * objects such as Access Control Lists.  Read and write access permissions
972  * to system attributes depend on the policy implemented for each system
973  * attribute implemented by filesystems in the kernel." - xattr(7)
974  */
975 #ifdef CONFIG_FS_POSIX_ACL
976 static int
977 zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
978 {
979 	char *name, *value = NULL;
980 	int error = 0;
981 	size_t size = 0;
982 
983 	if (S_ISLNK(ip->i_mode))
984 		return (-EOPNOTSUPP);
985 
986 	switch (type) {
987 	case ACL_TYPE_ACCESS:
988 		name = XATTR_NAME_POSIX_ACL_ACCESS;
989 		if (acl) {
990 			umode_t mode = ip->i_mode;
991 			error = posix_acl_equiv_mode(acl, &mode);
992 			if (error < 0) {
993 				return (error);
994 			} else {
995 				/*
996 				 * The mode bits will have been set by
997 				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
998 				 * using the ZFS ACL conversion.  If they
999 				 * differ from the Posix ACL conversion dirty
1000 				 * the inode to write the Posix mode bits.
1001 				 */
1002 				if (ip->i_mode != mode) {
1003 					ip->i_mode = mode;
1004 					ip->i_ctime = current_time(ip);
1005 					zfs_mark_inode_dirty(ip);
1006 				}
1007 
1008 				if (error == 0)
1009 					acl = NULL;
1010 			}
1011 		}
1012 		break;
1013 
1014 	case ACL_TYPE_DEFAULT:
1015 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1016 		if (!S_ISDIR(ip->i_mode))
1017 			return (acl ? -EACCES : 0);
1018 		break;
1019 
1020 	default:
1021 		return (-EINVAL);
1022 	}
1023 
1024 	if (acl) {
1025 		size = posix_acl_xattr_size(acl->a_count);
1026 		value = kmem_alloc(size, KM_SLEEP);
1027 
1028 		error = zpl_acl_to_xattr(acl, value, size);
1029 		if (error < 0) {
1030 			kmem_free(value, size);
1031 			return (error);
1032 		}
1033 	}
1034 
1035 	error = zpl_xattr_set(ip, name, value, size, 0);
1036 	if (value)
1037 		kmem_free(value, size);
1038 
1039 	if (!error) {
1040 		if (acl)
1041 			zpl_set_cached_acl(ip, type, acl);
1042 		else
1043 			zpl_forget_cached_acl(ip, type);
1044 	}
1045 
1046 	return (error);
1047 }
1048 
1049 #ifdef HAVE_SET_ACL
1050 int
1051 #ifdef HAVE_SET_ACL_USERNS
1052 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1053     struct posix_acl *acl, int type)
1054 #else
1055 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
1056 #endif /* HAVE_SET_ACL_USERNS */
1057 {
1058 	return (zpl_set_acl_impl(ip, acl, type));
1059 }
1060 #endif /* HAVE_SET_ACL */
1061 
1062 static struct posix_acl *
1063 zpl_get_acl_impl(struct inode *ip, int type)
1064 {
1065 	struct posix_acl *acl;
1066 	void *value = NULL;
1067 	char *name;
1068 
1069 	/*
1070 	 * As of Linux 3.14, the kernel get_acl will check this for us.
1071 	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
1072 	 * as the kernel get_acl will set it to temporary sentinel value.
1073 	 */
1074 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1075 	acl = get_cached_acl(ip, type);
1076 	if (acl != ACL_NOT_CACHED)
1077 		return (acl);
1078 #endif
1079 
1080 	switch (type) {
1081 	case ACL_TYPE_ACCESS:
1082 		name = XATTR_NAME_POSIX_ACL_ACCESS;
1083 		break;
1084 	case ACL_TYPE_DEFAULT:
1085 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1086 		break;
1087 	default:
1088 		return (ERR_PTR(-EINVAL));
1089 	}
1090 
1091 	int size = zpl_xattr_get(ip, name, NULL, 0);
1092 	if (size > 0) {
1093 		value = kmem_alloc(size, KM_SLEEP);
1094 		size = zpl_xattr_get(ip, name, value, size);
1095 	}
1096 
1097 	if (size > 0) {
1098 		acl = zpl_acl_from_xattr(value, size);
1099 	} else if (size == -ENODATA || size == -ENOSYS) {
1100 		acl = NULL;
1101 	} else {
1102 		acl = ERR_PTR(-EIO);
1103 	}
1104 
1105 	if (size > 0)
1106 		kmem_free(value, size);
1107 
1108 	/* As of Linux 4.7, the kernel get_acl will set this for us */
1109 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1110 	if (!IS_ERR(acl))
1111 		zpl_set_cached_acl(ip, type, acl);
1112 #endif
1113 
1114 	return (acl);
1115 }
1116 
1117 #if defined(HAVE_GET_ACL_RCU)
1118 struct posix_acl *
1119 zpl_get_acl(struct inode *ip, int type, bool rcu)
1120 {
1121 	if (rcu)
1122 		return (ERR_PTR(-ECHILD));
1123 
1124 	return (zpl_get_acl_impl(ip, type));
1125 }
1126 #elif defined(HAVE_GET_ACL)
1127 struct posix_acl *
1128 zpl_get_acl(struct inode *ip, int type)
1129 {
1130 	return (zpl_get_acl_impl(ip, type));
1131 }
1132 #else
1133 #error "Unsupported iops->get_acl() implementation"
1134 #endif /* HAVE_GET_ACL_RCU */
1135 
1136 int
1137 zpl_init_acl(struct inode *ip, struct inode *dir)
1138 {
1139 	struct posix_acl *acl = NULL;
1140 	int error = 0;
1141 
1142 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1143 		return (0);
1144 
1145 	if (!S_ISLNK(ip->i_mode)) {
1146 		acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);
1147 		if (IS_ERR(acl))
1148 			return (PTR_ERR(acl));
1149 		if (!acl) {
1150 			ip->i_mode &= ~current_umask();
1151 			ip->i_ctime = current_time(ip);
1152 			zfs_mark_inode_dirty(ip);
1153 			return (0);
1154 		}
1155 	}
1156 
1157 	if (acl) {
1158 		umode_t mode;
1159 
1160 		if (S_ISDIR(ip->i_mode)) {
1161 			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
1162 			if (error)
1163 				goto out;
1164 		}
1165 
1166 		mode = ip->i_mode;
1167 		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1168 		if (error >= 0) {
1169 			ip->i_mode = mode;
1170 			zfs_mark_inode_dirty(ip);
1171 			if (error > 0) {
1172 				error = zpl_set_acl_impl(ip, acl,
1173 				    ACL_TYPE_ACCESS);
1174 			}
1175 		}
1176 	}
1177 out:
1178 	zpl_posix_acl_release(acl);
1179 
1180 	return (error);
1181 }
1182 
1183 int
1184 zpl_chmod_acl(struct inode *ip)
1185 {
1186 	struct posix_acl *acl;
1187 	int error;
1188 
1189 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1190 		return (0);
1191 
1192 	if (S_ISLNK(ip->i_mode))
1193 		return (-EOPNOTSUPP);
1194 
1195 	acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);
1196 	if (IS_ERR(acl) || !acl)
1197 		return (PTR_ERR(acl));
1198 
1199 	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1200 	if (!error)
1201 		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
1202 
1203 	zpl_posix_acl_release(acl);
1204 
1205 	return (error);
1206 }
1207 
1208 static int
1209 __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1210     const char *name, size_t name_len)
1211 {
1212 	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1213 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1214 
1215 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1216 		return (0);
1217 
1218 	if (list && xattr_size <= list_size)
1219 		memcpy(list, xattr_name, xattr_size);
1220 
1221 	return (xattr_size);
1222 }
1223 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1224 
1225 static int
1226 __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1227     const char *name, size_t name_len)
1228 {
1229 	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1230 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1231 
1232 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1233 		return (0);
1234 
1235 	if (list && xattr_size <= list_size)
1236 		memcpy(list, xattr_name, xattr_size);
1237 
1238 	return (xattr_size);
1239 }
1240 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1241 
1242 static int
1243 __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1244     void *buffer, size_t size)
1245 {
1246 	struct posix_acl *acl;
1247 	int type = ACL_TYPE_ACCESS;
1248 	int error;
1249 	/* xattr_resolve_name will do this for us if this is defined */
1250 #ifndef HAVE_XATTR_HANDLER_NAME
1251 	if (strcmp(name, "") != 0)
1252 		return (-EINVAL);
1253 #endif
1254 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1255 		return (-EOPNOTSUPP);
1256 
1257 	acl = zpl_get_acl_impl(ip, type);
1258 	if (IS_ERR(acl))
1259 		return (PTR_ERR(acl));
1260 	if (acl == NULL)
1261 		return (-ENODATA);
1262 
1263 	error = zpl_acl_to_xattr(acl, buffer, size);
1264 	zpl_posix_acl_release(acl);
1265 
1266 	return (error);
1267 }
1268 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1269 
1270 static int
1271 __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1272     void *buffer, size_t size)
1273 {
1274 	struct posix_acl *acl;
1275 	int type = ACL_TYPE_DEFAULT;
1276 	int error;
1277 	/* xattr_resolve_name will do this for us if this is defined */
1278 #ifndef HAVE_XATTR_HANDLER_NAME
1279 	if (strcmp(name, "") != 0)
1280 		return (-EINVAL);
1281 #endif
1282 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1283 		return (-EOPNOTSUPP);
1284 
1285 	acl = zpl_get_acl_impl(ip, type);
1286 	if (IS_ERR(acl))
1287 		return (PTR_ERR(acl));
1288 	if (acl == NULL)
1289 		return (-ENODATA);
1290 
1291 	error = zpl_acl_to_xattr(acl, buffer, size);
1292 	zpl_posix_acl_release(acl);
1293 
1294 	return (error);
1295 }
1296 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1297 
1298 static int
1299 __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
1300     const void *value, size_t size, int flags)
1301 {
1302 	struct posix_acl *acl;
1303 	int type = ACL_TYPE_ACCESS;
1304 	int error = 0;
1305 	/* xattr_resolve_name will do this for us if this is defined */
1306 #ifndef HAVE_XATTR_HANDLER_NAME
1307 	if (strcmp(name, "") != 0)
1308 		return (-EINVAL);
1309 #endif
1310 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1311 		return (-EOPNOTSUPP);
1312 
1313 	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1314 		return (-EPERM);
1315 
1316 	if (value) {
1317 		acl = zpl_acl_from_xattr(value, size);
1318 		if (IS_ERR(acl))
1319 			return (PTR_ERR(acl));
1320 		else if (acl) {
1321 			error = zpl_posix_acl_valid(ip, acl);
1322 			if (error) {
1323 				zpl_posix_acl_release(acl);
1324 				return (error);
1325 			}
1326 		}
1327 	} else {
1328 		acl = NULL;
1329 	}
1330 	error = zpl_set_acl_impl(ip, acl, type);
1331 	zpl_posix_acl_release(acl);
1332 
1333 	return (error);
1334 }
1335 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1336 
1337 static int
1338 __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
1339     const void *value, size_t size, int flags)
1340 {
1341 	struct posix_acl *acl;
1342 	int type = ACL_TYPE_DEFAULT;
1343 	int error = 0;
1344 	/* xattr_resolve_name will do this for us if this is defined */
1345 #ifndef HAVE_XATTR_HANDLER_NAME
1346 	if (strcmp(name, "") != 0)
1347 		return (-EINVAL);
1348 #endif
1349 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1350 		return (-EOPNOTSUPP);
1351 
1352 	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
1353 		return (-EPERM);
1354 
1355 	if (value) {
1356 		acl = zpl_acl_from_xattr(value, size);
1357 		if (IS_ERR(acl))
1358 			return (PTR_ERR(acl));
1359 		else if (acl) {
1360 			error = zpl_posix_acl_valid(ip, acl);
1361 			if (error) {
1362 				zpl_posix_acl_release(acl);
1363 				return (error);
1364 			}
1365 		}
1366 	} else {
1367 		acl = NULL;
1368 	}
1369 
1370 	error = zpl_set_acl_impl(ip, acl, type);
1371 	zpl_posix_acl_release(acl);
1372 
1373 	return (error);
1374 }
1375 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1376 
1377 /*
1378  * ACL access xattr namespace handlers.
1379  *
1380  * Use .name instead of .prefix when available. xattr_resolve_name will match
1381  * whole name and reject anything that has .name only as prefix.
1382  */
1383 static xattr_handler_t zpl_xattr_acl_access_handler = {
1384 #ifdef HAVE_XATTR_HANDLER_NAME
1385 	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1386 #else
1387 	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
1388 #endif
1389 	.list	= zpl_xattr_acl_list_access,
1390 	.get	= zpl_xattr_acl_get_access,
1391 	.set	= zpl_xattr_acl_set_access,
1392 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1393     defined(HAVE_XATTR_LIST_DENTRY) || \
1394     defined(HAVE_XATTR_LIST_HANDLER)
1395 	.flags	= ACL_TYPE_ACCESS,
1396 #endif
1397 };
1398 
1399 /*
1400  * ACL default xattr namespace handlers.
1401  *
1402  * Use .name instead of .prefix when available. xattr_resolve_name will match
1403  * whole name and reject anything that has .name only as prefix.
1404  */
1405 static xattr_handler_t zpl_xattr_acl_default_handler = {
1406 #ifdef HAVE_XATTR_HANDLER_NAME
1407 	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1408 #else
1409 	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
1410 #endif
1411 	.list	= zpl_xattr_acl_list_default,
1412 	.get	= zpl_xattr_acl_get_default,
1413 	.set	= zpl_xattr_acl_set_default,
1414 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1415     defined(HAVE_XATTR_LIST_DENTRY) || \
1416     defined(HAVE_XATTR_LIST_HANDLER)
1417 	.flags	= ACL_TYPE_DEFAULT,
1418 #endif
1419 };
1420 
1421 #endif /* CONFIG_FS_POSIX_ACL */
1422 
1423 xattr_handler_t *zpl_xattr_handlers[] = {
1424 	&zpl_xattr_security_handler,
1425 	&zpl_xattr_trusted_handler,
1426 	&zpl_xattr_user_handler,
1427 #ifdef CONFIG_FS_POSIX_ACL
1428 	&zpl_xattr_acl_access_handler,
1429 	&zpl_xattr_acl_default_handler,
1430 #endif /* CONFIG_FS_POSIX_ACL */
1431 	NULL
1432 };
1433 
1434 static const struct xattr_handler *
1435 zpl_xattr_handler(const char *name)
1436 {
1437 	if (strncmp(name, XATTR_USER_PREFIX,
1438 	    XATTR_USER_PREFIX_LEN) == 0)
1439 		return (&zpl_xattr_user_handler);
1440 
1441 	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1442 	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1443 		return (&zpl_xattr_trusted_handler);
1444 
1445 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1446 	    XATTR_SECURITY_PREFIX_LEN) == 0)
1447 		return (&zpl_xattr_security_handler);
1448 
1449 #ifdef CONFIG_FS_POSIX_ACL
1450 	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1451 	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1452 		return (&zpl_xattr_acl_access_handler);
1453 
1454 	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1455 	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1456 		return (&zpl_xattr_acl_default_handler);
1457 #endif /* CONFIG_FS_POSIX_ACL */
1458 
1459 	return (NULL);
1460 }
1461 
1462 static enum xattr_permission
1463 zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
1464 {
1465 	const struct xattr_handler *handler;
1466 	struct dentry *d __maybe_unused = xf->dentry;
1467 	enum xattr_permission perm = XAPERM_ALLOW;
1468 
1469 	handler = zpl_xattr_handler(name);
1470 	if (handler == NULL) {
1471 		/* Do not expose FreeBSD system namespace xattrs. */
1472 		if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD, name))
1473 			return (XAPERM_DENY);
1474 		/*
1475 		 * Anything that doesn't match a known namespace gets put in the
1476 		 * user namespace for compatibility with other platforms.
1477 		 */
1478 		perm = XAPERM_COMPAT;
1479 		handler = &zpl_xattr_user_handler;
1480 	}
1481 
1482 	if (handler->list) {
1483 #if defined(HAVE_XATTR_LIST_SIMPLE)
1484 		if (!handler->list(d))
1485 			return (XAPERM_DENY);
1486 #elif defined(HAVE_XATTR_LIST_DENTRY)
1487 		if (!handler->list(d, NULL, 0, name, name_len, 0))
1488 			return (XAPERM_DENY);
1489 #elif defined(HAVE_XATTR_LIST_HANDLER)
1490 		if (!handler->list(handler, d, NULL, 0, name, name_len))
1491 			return (XAPERM_DENY);
1492 #endif
1493 	}
1494 
1495 	return (perm);
1496 }
1497 
1498 #if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
1499 struct acl_rel_struct {
1500 	struct acl_rel_struct *next;
1501 	struct posix_acl *acl;
1502 	clock_t time;
1503 };
1504 
1505 #define	ACL_REL_GRACE	(60*HZ)
1506 #define	ACL_REL_WINDOW	(1*HZ)
1507 #define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1508 
1509 /*
1510  * Lockless multi-producer single-consumer fifo list.
1511  * Nodes are added to tail and removed from head. Tail pointer is our
1512  * synchronization point. It always points to the next pointer of the last
1513  * node, or head if list is empty.
1514  */
1515 static struct acl_rel_struct *acl_rel_head = NULL;
1516 static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1517 
1518 static void
1519 zpl_posix_acl_free(void *arg)
1520 {
1521 	struct acl_rel_struct *freelist = NULL;
1522 	struct acl_rel_struct *a;
1523 	clock_t new_time;
1524 	boolean_t refire = B_FALSE;
1525 
1526 	ASSERT3P(acl_rel_head, !=, NULL);
1527 	while (acl_rel_head) {
1528 		a = acl_rel_head;
1529 		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1530 			/*
1531 			 * If a is the last node we need to reset tail, but we
1532 			 * need to use cmpxchg to make sure it is still the
1533 			 * last node.
1534 			 */
1535 			if (acl_rel_tail == &a->next) {
1536 				acl_rel_head = NULL;
1537 				if (cmpxchg(&acl_rel_tail, &a->next,
1538 				    &acl_rel_head) == &a->next) {
1539 					ASSERT3P(a->next, ==, NULL);
1540 					a->next = freelist;
1541 					freelist = a;
1542 					break;
1543 				}
1544 			}
1545 			/*
1546 			 * a is not last node, make sure next pointer is set
1547 			 * by the adder and advance the head.
1548 			 */
1549 			while (READ_ONCE(a->next) == NULL)
1550 				cpu_relax();
1551 			acl_rel_head = a->next;
1552 			a->next = freelist;
1553 			freelist = a;
1554 		} else {
1555 			/*
1556 			 * a is still in grace period. We are responsible to
1557 			 * reschedule the free task, since adder will only do
1558 			 * so if list is empty.
1559 			 */
1560 			new_time = a->time + ACL_REL_SCHED;
1561 			refire = B_TRUE;
1562 			break;
1563 		}
1564 	}
1565 
1566 	if (refire)
1567 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1568 		    NULL, TQ_SLEEP, new_time);
1569 
1570 	while (freelist) {
1571 		a = freelist;
1572 		freelist = a->next;
1573 		kfree(a->acl);
1574 		kmem_free(a, sizeof (struct acl_rel_struct));
1575 	}
1576 }
1577 
1578 void
1579 zpl_posix_acl_release_impl(struct posix_acl *acl)
1580 {
1581 	struct acl_rel_struct *a, **prev;
1582 
1583 	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1584 	a->next = NULL;
1585 	a->acl = acl;
1586 	a->time = ddi_get_lbolt();
1587 	/* atomically points tail to us and get the previous tail */
1588 	prev = xchg(&acl_rel_tail, &a->next);
1589 	ASSERT3P(*prev, ==, NULL);
1590 	*prev = a;
1591 	/* if it was empty before, schedule the free task */
1592 	if (prev == &acl_rel_head)
1593 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1594 		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1595 }
1596 #endif
1597 
1598 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
1599 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
1600