xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  *
24  * Extended attributes (xattr) on Solaris are implemented as files
25  * which exist in a hidden xattr directory.  These extended attributes
26  * can be accessed using the attropen() system call which opens
27  * the extended attribute.  It can then be manipulated just like
28  * a standard file descriptor.  This has a couple advantages such
29  * as practically no size limit on the file, and the extended
30  * attributes permissions may differ from those of the parent file.
31  * This interface is really quite clever, but it's also completely
32  * different than what is supported on Linux.  It also comes with a
33  * steep performance penalty when accessing small xattrs because they
34  * are not stored with the parent file.
35  *
36  * Under Linux extended attributes are manipulated by the system
37  * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38  * extended attributes to be name/value pairs where the name is a
39  * NULL terminated string.  The name must also include one of the
40  * following namespace prefixes:
41  *
42  *   user     - No restrictions and is available to user applications.
43  *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44  *   system   - Used for access control lists (system.nfs4_acl, etc).
45  *   security - Used by SELinux to store a files security context.
46  *
47  * The value under Linux to limited to 65536 bytes of binary data.
48  * In practice, individual xattrs tend to be much smaller than this
49  * and are typically less than 100 bytes.  A good example of this
50  * are the security.selinux xattrs which are less than 100 bytes and
51  * exist for every file when xattr labeling is enabled.
52  *
53  * The Linux xattr implementation has been written to take advantage of
54  * this typical usage.  When the dataset property 'xattr=sa' is set,
55  * then xattrs will be preferentially stored as System Attributes (SA).
56  * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57  * up to 64k of xattrs to be stored in the spill block.  If additional
58  * xattr space is required, which is unlikely under Linux, they will
59  * be stored using the traditional directory approach.
60  *
61  * This optimization results in roughly a 3x performance improvement
62  * when accessing xattrs because it avoids the need to perform a seek
63  * for every xattr value.  When multiple xattrs are stored per-file
64  * the performance improvements are even greater because all of the
65  * xattrs stored in the spill block will be cached.
66  *
67  * However, by default SA based xattrs are disabled in the Linux port
68  * to maximize compatibility with other implementations.  If you do
69  * enable SA based xattrs then they will not be visible on platforms
70  * which do not support this feature.
71  *
72  * NOTE: One additional consequence of the xattr directory implementation
73  * is that when an extended attribute is manipulated an inode is created.
74  * This inode will exist in the Linux inode cache but there will be no
75  * associated entry in the dentry cache which references it.  This is
76  * safe but it may result in some confusion.  Enabling SA based xattrs
77  * largely avoids the issue except in the overflow case.
78  */
79 
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86 #include <linux/vfs_compat.h>
87 
88 enum xattr_permission {
89 	XAPERM_DENY,
90 	XAPERM_ALLOW,
91 	XAPERM_COMPAT,
92 };
93 
94 typedef struct xattr_filldir {
95 	size_t size;
96 	size_t offset;
97 	char *buf;
98 	struct dentry *dentry;
99 } xattr_filldir_t;
100 
101 static enum xattr_permission zpl_xattr_permission(xattr_filldir_t *,
102     const char *, int);
103 
104 static int zfs_xattr_compat = 0;
105 
106 /*
107  * Determine is a given xattr name should be visible and if so copy it
108  * in to the provided buffer (xf->buf).
109  */
110 static int
111 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
112 {
113 	enum xattr_permission perm;
114 
115 	/* Check permissions using the per-namespace list xattr handler. */
116 	perm = zpl_xattr_permission(xf, name, name_len);
117 	if (perm == XAPERM_DENY)
118 		return (0);
119 
120 	/* Prefix the name with "user." if it does not have a namespace. */
121 	if (perm == XAPERM_COMPAT) {
122 		if (xf->buf) {
123 			if (xf->offset + XATTR_USER_PREFIX_LEN + 1 > xf->size)
124 				return (-ERANGE);
125 
126 			memcpy(xf->buf + xf->offset, XATTR_USER_PREFIX,
127 			    XATTR_USER_PREFIX_LEN);
128 			xf->buf[xf->offset + XATTR_USER_PREFIX_LEN] = '\0';
129 		}
130 
131 		xf->offset += XATTR_USER_PREFIX_LEN;
132 	}
133 
134 	/* When xf->buf is NULL only calculate the required size. */
135 	if (xf->buf) {
136 		if (xf->offset + name_len + 1 > xf->size)
137 			return (-ERANGE);
138 
139 		memcpy(xf->buf + xf->offset, name, name_len);
140 		xf->buf[xf->offset + name_len] = '\0';
141 	}
142 
143 	xf->offset += (name_len + 1);
144 
145 	return (0);
146 }
147 
148 /*
149  * Read as many directory entry names as will fit in to the provided buffer,
150  * or when no buffer is provided calculate the required buffer size.
151  */
152 static int
153 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
154 {
155 	zap_cursor_t zc;
156 	zap_attribute_t	*zap = zap_attribute_alloc();
157 	int error;
158 
159 	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
160 
161 	while ((error = -zap_cursor_retrieve(&zc, zap)) == 0) {
162 
163 		if (zap->za_integer_length != 8 || zap->za_num_integers != 1) {
164 			error = -ENXIO;
165 			break;
166 		}
167 
168 		error = zpl_xattr_filldir(xf, zap->za_name,
169 		    strlen(zap->za_name));
170 		if (error)
171 			break;
172 
173 		zap_cursor_advance(&zc);
174 	}
175 
176 	zap_cursor_fini(&zc);
177 	zap_attribute_free(zap);
178 
179 	if (error == -ENOENT)
180 		error = 0;
181 
182 	return (error);
183 }
184 
185 static ssize_t
186 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
187 {
188 	struct inode *ip = xf->dentry->d_inode;
189 	struct inode *dxip = NULL;
190 	znode_t *dxzp;
191 	int error;
192 
193 	/* Lookup the xattr directory */
194 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
195 	    cr, NULL, NULL);
196 	if (error) {
197 		if (error == -ENOENT)
198 			error = 0;
199 
200 		return (error);
201 	}
202 
203 	dxip = ZTOI(dxzp);
204 	error = zpl_xattr_readdir(dxip, xf);
205 	iput(dxip);
206 
207 	return (error);
208 }
209 
210 static ssize_t
211 zpl_xattr_list_sa(xattr_filldir_t *xf)
212 {
213 	znode_t *zp = ITOZ(xf->dentry->d_inode);
214 	nvpair_t *nvp = NULL;
215 	int error = 0;
216 
217 	mutex_enter(&zp->z_lock);
218 	if (zp->z_xattr_cached == NULL)
219 		error = -zfs_sa_get_xattr(zp);
220 	mutex_exit(&zp->z_lock);
221 
222 	if (error)
223 		return (error);
224 
225 	ASSERT(zp->z_xattr_cached);
226 
227 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
228 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
229 
230 		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
231 		    strlen(nvpair_name(nvp)));
232 		if (error)
233 			return (error);
234 	}
235 
236 	return (0);
237 }
238 
239 ssize_t
240 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
241 {
242 	znode_t *zp = ITOZ(dentry->d_inode);
243 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
244 	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
245 	cred_t *cr = CRED();
246 	fstrans_cookie_t cookie;
247 	int error = 0;
248 
249 	crhold(cr);
250 	cookie = spl_fstrans_mark();
251 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
252 		goto out1;
253 	rw_enter(&zp->z_xattr_lock, RW_READER);
254 
255 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
256 		error = zpl_xattr_list_sa(&xf);
257 		if (error)
258 			goto out;
259 	}
260 
261 	error = zpl_xattr_list_dir(&xf, cr);
262 	if (error)
263 		goto out;
264 
265 	error = xf.offset;
266 out:
267 
268 	rw_exit(&zp->z_xattr_lock);
269 	zpl_exit(zfsvfs, FTAG);
270 out1:
271 	spl_fstrans_unmark(cookie);
272 	crfree(cr);
273 
274 	return (error);
275 }
276 
277 static int
278 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
279     size_t size, cred_t *cr)
280 {
281 	fstrans_cookie_t cookie;
282 	struct inode *xip = NULL;
283 	znode_t *dxzp = NULL;
284 	znode_t *xzp = NULL;
285 	int error;
286 
287 	/* Lookup the xattr directory */
288 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
289 	    cr, NULL, NULL);
290 	if (error)
291 		goto out;
292 
293 	/* Lookup a specific xattr name in the directory */
294 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
295 	if (error)
296 		goto out;
297 
298 	xip = ZTOI(xzp);
299 	if (!size) {
300 		error = i_size_read(xip);
301 		goto out;
302 	}
303 
304 	if (size < i_size_read(xip)) {
305 		error = -ERANGE;
306 		goto out;
307 	}
308 
309 	struct iovec iov;
310 	iov.iov_base = (void *)value;
311 	iov.iov_len = size;
312 
313 	zfs_uio_t uio;
314 	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
315 
316 	cookie = spl_fstrans_mark();
317 	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
318 	spl_fstrans_unmark(cookie);
319 
320 	if (error == 0)
321 		error = size - zfs_uio_resid(&uio);
322 out:
323 	if (xzp)
324 		zrele(xzp);
325 
326 	if (dxzp)
327 		zrele(dxzp);
328 
329 	return (error);
330 }
331 
332 static int
333 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
334 {
335 	znode_t *zp = ITOZ(ip);
336 	uchar_t *nv_value;
337 	uint_t nv_size;
338 	int error = 0;
339 
340 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
341 
342 	mutex_enter(&zp->z_lock);
343 	if (zp->z_xattr_cached == NULL)
344 		error = -zfs_sa_get_xattr(zp);
345 	mutex_exit(&zp->z_lock);
346 
347 	if (error)
348 		return (error);
349 
350 	ASSERT(zp->z_xattr_cached);
351 	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
352 	    &nv_value, &nv_size);
353 	if (error)
354 		return (error);
355 
356 	if (size == 0 || value == NULL)
357 		return (nv_size);
358 
359 	if (size < nv_size)
360 		return (-ERANGE);
361 
362 	memcpy(value, nv_value, nv_size);
363 
364 	return (nv_size);
365 }
366 
367 static int
368 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
369     cred_t *cr)
370 {
371 	znode_t *zp = ITOZ(ip);
372 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
373 	int error;
374 
375 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
376 
377 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
378 		error = zpl_xattr_get_sa(ip, name, value, size);
379 		if (error != -ENOENT)
380 			goto out;
381 	}
382 
383 	error = zpl_xattr_get_dir(ip, name, value, size, cr);
384 out:
385 	if (error == -ENOENT)
386 		error = -ENODATA;
387 
388 	return (error);
389 }
390 
391 #define	XATTR_NOENT	0x0
392 #define	XATTR_IN_SA	0x1
393 #define	XATTR_IN_DIR	0x2
394 /* check where the xattr resides */
395 static int
396 __zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
397 {
398 	znode_t *zp = ITOZ(ip);
399 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
400 	int error;
401 
402 	ASSERT(where);
403 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
404 
405 	*where = XATTR_NOENT;
406 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
407 		error = zpl_xattr_get_sa(ip, name, NULL, 0);
408 		if (error >= 0)
409 			*where |= XATTR_IN_SA;
410 		else if (error != -ENOENT)
411 			return (error);
412 	}
413 
414 	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
415 	if (error >= 0)
416 		*where |= XATTR_IN_DIR;
417 	else if (error != -ENOENT)
418 		return (error);
419 
420 	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
421 		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
422 		    " in both SA and dir", ip, name);
423 	if (*where == XATTR_NOENT)
424 		error = -ENODATA;
425 	else
426 		error = 0;
427 	return (error);
428 }
429 
430 static int
431 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
432 {
433 	znode_t *zp = ITOZ(ip);
434 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
435 	cred_t *cr = CRED();
436 	fstrans_cookie_t cookie;
437 	int error;
438 
439 	crhold(cr);
440 	cookie = spl_fstrans_mark();
441 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
442 		goto out;
443 	rw_enter(&zp->z_xattr_lock, RW_READER);
444 	error = __zpl_xattr_get(ip, name, value, size, cr);
445 	rw_exit(&zp->z_xattr_lock);
446 	zpl_exit(zfsvfs, FTAG);
447 out:
448 	spl_fstrans_unmark(cookie);
449 	crfree(cr);
450 
451 	return (error);
452 }
453 
454 static int
455 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
456     size_t size, int flags, cred_t *cr)
457 {
458 	znode_t *dxzp = NULL;
459 	znode_t *xzp = NULL;
460 	vattr_t *vap = NULL;
461 	int lookup_flags, error;
462 	const int xattr_mode = S_IFREG | 0644;
463 	loff_t pos = 0;
464 
465 	/*
466 	 * Lookup the xattr directory.  When we're adding an entry pass
467 	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
468 	 * When removing an entry this flag is not passed to avoid
469 	 * unnecessarily creating a new xattr directory.
470 	 */
471 	lookup_flags = LOOKUP_XATTR;
472 	if (value != NULL)
473 		lookup_flags |= CREATE_XATTR_DIR;
474 
475 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
476 	    cr, NULL, NULL);
477 	if (error)
478 		goto out;
479 
480 	/* Lookup a specific xattr name in the directory */
481 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
482 	if (error && (error != -ENOENT))
483 		goto out;
484 
485 	error = 0;
486 
487 	/* Remove a specific name xattr when value is set to NULL. */
488 	if (value == NULL) {
489 		if (xzp)
490 			error = -zfs_remove(dxzp, (char *)name, cr, 0);
491 
492 		goto out;
493 	}
494 
495 	/* Lookup failed create a new xattr. */
496 	if (xzp == NULL) {
497 		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
498 		vap->va_mode = xattr_mode;
499 		vap->va_mask = ATTR_MODE;
500 		vap->va_uid = crgetuid(cr);
501 		vap->va_gid = crgetgid(cr);
502 
503 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
504 		    cr, ATTR_NOACLCHECK, NULL, zfs_init_idmap);
505 		if (error)
506 			goto out;
507 	}
508 
509 	ASSERT(xzp != NULL);
510 
511 	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
512 	if (error)
513 		goto out;
514 
515 	error = -zfs_write_simple(xzp, value, size, pos, NULL);
516 out:
517 	if (error == 0) {
518 		zpl_inode_set_ctime_to_ts(ip, current_time(ip));
519 		zfs_mark_inode_dirty(ip);
520 	}
521 
522 	if (vap)
523 		kmem_free(vap, sizeof (vattr_t));
524 
525 	if (xzp)
526 		zrele(xzp);
527 
528 	if (dxzp)
529 		zrele(dxzp);
530 
531 	if (error == -ENOENT)
532 		error = -ENODATA;
533 
534 	ASSERT3S(error, <=, 0);
535 
536 	return (error);
537 }
538 
539 static int
540 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
541     size_t size, int flags, cred_t *cr)
542 {
543 	znode_t *zp = ITOZ(ip);
544 	nvlist_t *nvl;
545 	size_t sa_size;
546 	int error = 0;
547 
548 	mutex_enter(&zp->z_lock);
549 	if (zp->z_xattr_cached == NULL)
550 		error = -zfs_sa_get_xattr(zp);
551 	mutex_exit(&zp->z_lock);
552 
553 	if (error)
554 		return (error);
555 
556 	ASSERT(zp->z_xattr_cached);
557 	nvl = zp->z_xattr_cached;
558 
559 	if (value == NULL) {
560 		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
561 		if (error == -ENOENT)
562 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
563 	} else {
564 		/* Limited to 32k to keep nvpair memory allocations small */
565 		if (size > DXATTR_MAX_ENTRY_SIZE)
566 			return (-EFBIG);
567 
568 		/* Prevent the DXATTR SA from consuming the entire SA region */
569 		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
570 		if (error)
571 			return (error);
572 
573 		if (sa_size > DXATTR_MAX_SA_SIZE)
574 			return (-EFBIG);
575 
576 		error = -nvlist_add_byte_array(nvl, name,
577 		    (uchar_t *)value, size);
578 	}
579 
580 	/*
581 	 * Update the SA for additions, modifications, and removals. On
582 	 * error drop the inconsistent cached version of the nvlist, it
583 	 * will be reconstructed from the ARC when next accessed.
584 	 */
585 	if (error == 0)
586 		error = -zfs_sa_set_xattr(zp, name, value, size);
587 
588 	if (error) {
589 		nvlist_free(nvl);
590 		zp->z_xattr_cached = NULL;
591 	}
592 
593 	ASSERT3S(error, <=, 0);
594 
595 	return (error);
596 }
597 
598 static int
599 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
600     size_t size, int flags)
601 {
602 	znode_t *zp = ITOZ(ip);
603 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
604 	cred_t *cr = CRED();
605 	fstrans_cookie_t cookie;
606 	int where;
607 	int error;
608 
609 	crhold(cr);
610 	cookie = spl_fstrans_mark();
611 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
612 		goto out1;
613 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
614 
615 	/*
616 	 * Before setting the xattr check to see if it already exists.
617 	 * This is done to ensure the following optional flags are honored.
618 	 *
619 	 *   XATTR_CREATE: fail if xattr already exists
620 	 *   XATTR_REPLACE: fail if xattr does not exist
621 	 *
622 	 * We also want to know if it resides in sa or dir, so we can make
623 	 * sure we don't end up with duplicate in both places.
624 	 */
625 	error = __zpl_xattr_where(ip, name, &where, cr);
626 	if (error < 0) {
627 		if (error != -ENODATA)
628 			goto out;
629 		if (flags & XATTR_REPLACE)
630 			goto out;
631 
632 		/* The xattr to be removed already doesn't exist */
633 		error = 0;
634 		if (value == NULL)
635 			goto out;
636 	} else {
637 		error = -EEXIST;
638 		if (flags & XATTR_CREATE)
639 			goto out;
640 	}
641 
642 	/* Preferentially store the xattr as a SA for better performance */
643 	if (zfsvfs->z_use_sa && zp->z_is_sa &&
644 	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
645 		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
646 		if (error == 0) {
647 			/*
648 			 * Successfully put into SA, we need to clear the one
649 			 * in dir.
650 			 */
651 			if (where & XATTR_IN_DIR)
652 				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
653 			goto out;
654 		}
655 	}
656 
657 	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
658 	/*
659 	 * Successfully put into dir, we need to clear the one in SA.
660 	 */
661 	if (error == 0 && (where & XATTR_IN_SA))
662 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
663 out:
664 	rw_exit(&zp->z_xattr_lock);
665 	zpl_exit(zfsvfs, FTAG);
666 out1:
667 	spl_fstrans_unmark(cookie);
668 	crfree(cr);
669 	ASSERT3S(error, <=, 0);
670 
671 	return (error);
672 }
673 
674 /*
675  * Extended user attributes
676  *
677  * "Extended user attributes may be assigned to files and directories for
678  * storing arbitrary additional information such as the mime type,
679  * character set or encoding of a file.  The access permissions for user
680  * attributes are defined by the file permission bits: read permission
681  * is required to retrieve the attribute value, and writer permission is
682  * required to change it.
683  *
684  * The file permission bits of regular files and directories are
685  * interpreted differently from the file permission bits of special
686  * files and symbolic links.  For regular files and directories the file
687  * permission bits define access to the file's contents, while for
688  * device special files they define access to the device described by
689  * the special file.  The file permissions of symbolic links are not
690  * used in access checks.  These differences would allow users to
691  * consume filesystem resources in a way not controllable by disk quotas
692  * for group or world writable special files and directories.
693  *
694  * For this reason, extended user attributes are allowed only for
695  * regular files and directories, and access to extended user attributes
696  * is restricted to the owner and to users with appropriate capabilities
697  * for directories with the sticky bit set (see the chmod(1) manual page
698  * for an explanation of the sticky bit)." - xattr(7)
699  *
700  * ZFS allows extended user attributes to be disabled administratively
701  * by setting the 'xattr=off' property on the dataset.
702  */
703 static int
704 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
705     const char *name, size_t name_len)
706 {
707 	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
708 }
709 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
710 
711 static int
712 __zpl_xattr_user_get(struct inode *ip, const char *name,
713     void *value, size_t size)
714 {
715 	int error;
716 	/* xattr_resolve_name will do this for us if this is defined */
717 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
718 		return (-EINVAL);
719 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
720 		return (-EOPNOTSUPP);
721 
722 	/*
723 	 * Try to look up the name with the namespace prefix first for
724 	 * compatibility with xattrs from this platform.  If that fails,
725 	 * try again without the namespace prefix for compatibility with
726 	 * other platforms.
727 	 */
728 	char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
729 	error = zpl_xattr_get(ip, xattr_name, value, size);
730 	kmem_strfree(xattr_name);
731 	if (error == -ENODATA)
732 		error = zpl_xattr_get(ip, name, value, size);
733 
734 	return (error);
735 }
736 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
737 
738 static int
739 __zpl_xattr_user_set(zidmap_t *user_ns,
740     struct inode *ip, const char *name,
741     const void *value, size_t size, int flags)
742 {
743 	(void) user_ns;
744 	int error = 0;
745 	/* xattr_resolve_name will do this for us if this is defined */
746 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
747 		return (-EINVAL);
748 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
749 		return (-EOPNOTSUPP);
750 
751 	/*
752 	 * Remove alternate compat version of the xattr so we only set the
753 	 * version specified by the zfs_xattr_compat tunable.
754 	 *
755 	 * The following flags must be handled correctly:
756 	 *
757 	 *   XATTR_CREATE: fail if xattr already exists
758 	 *   XATTR_REPLACE: fail if xattr does not exist
759 	 */
760 	char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
761 	const char *clear_name, *set_name;
762 	if (zfs_xattr_compat) {
763 		clear_name = prefixed_name;
764 		set_name = name;
765 	} else {
766 		clear_name = name;
767 		set_name = prefixed_name;
768 	}
769 	/*
770 	 * Clear the old value with the alternative name format, if it exists.
771 	 */
772 	error = zpl_xattr_set(ip, clear_name, NULL, 0, flags);
773 	/*
774 	 * XATTR_CREATE was specified and we failed to clear the xattr
775 	 * because it already exists.  Stop here.
776 	 */
777 	if (error == -EEXIST)
778 		goto out;
779 	/*
780 	 * If XATTR_REPLACE was specified and we succeeded to clear
781 	 * an xattr, we don't need to replace anything when setting
782 	 * the new value.  If we failed with -ENODATA that's fine,
783 	 * there was nothing to be cleared and we can ignore the error.
784 	 */
785 	if (error == 0)
786 		flags &= ~XATTR_REPLACE;
787 	/*
788 	 * Set the new value with the configured name format.
789 	 */
790 	error = zpl_xattr_set(ip, set_name, value, size, flags);
791 out:
792 	kmem_strfree(prefixed_name);
793 	return (error);
794 }
795 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
796 
797 static xattr_handler_t zpl_xattr_user_handler =
798 {
799 	.prefix	= XATTR_USER_PREFIX,
800 	.list	= zpl_xattr_user_list,
801 	.get	= zpl_xattr_user_get,
802 	.set	= zpl_xattr_user_set,
803 };
804 
805 /*
806  * Trusted extended attributes
807  *
808  * "Trusted extended attributes are visible and accessible only to
809  * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
810  * class are used to implement mechanisms in user space (i.e., outside
811  * the kernel) which keep information in extended attributes to which
812  * ordinary processes should not have access." - xattr(7)
813  */
814 static int
815 __zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
816     const char *name, size_t name_len)
817 {
818 	return (capable(CAP_SYS_ADMIN));
819 }
820 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
821 
822 static int
823 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
824     void *value, size_t size)
825 {
826 	char *xattr_name;
827 	int error;
828 
829 	if (!capable(CAP_SYS_ADMIN))
830 		return (-EACCES);
831 	/* xattr_resolve_name will do this for us if this is defined */
832 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
833 	error = zpl_xattr_get(ip, xattr_name, value, size);
834 	kmem_strfree(xattr_name);
835 
836 	return (error);
837 }
838 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
839 
840 static int
841 __zpl_xattr_trusted_set(zidmap_t *user_ns,
842     struct inode *ip, const char *name,
843     const void *value, size_t size, int flags)
844 {
845 	(void) user_ns;
846 	char *xattr_name;
847 	int error;
848 
849 	if (!capable(CAP_SYS_ADMIN))
850 		return (-EACCES);
851 	/* xattr_resolve_name will do this for us if this is defined */
852 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
853 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
854 	kmem_strfree(xattr_name);
855 
856 	return (error);
857 }
858 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
859 
860 static xattr_handler_t zpl_xattr_trusted_handler = {
861 	.prefix	= XATTR_TRUSTED_PREFIX,
862 	.list	= zpl_xattr_trusted_list,
863 	.get	= zpl_xattr_trusted_get,
864 	.set	= zpl_xattr_trusted_set,
865 };
866 
867 /*
868  * Extended security attributes
869  *
870  * "The security attribute namespace is used by kernel security modules,
871  * such as Security Enhanced Linux, and also to implement file
872  * capabilities (see capabilities(7)).  Read and write access
873  * permissions to security attributes depend on the policy implemented
874  * for each security attribute by the security module.  When no security
875  * module is loaded, all processes have read access to extended security
876  * attributes, and write access is limited to processes that have the
877  * CAP_SYS_ADMIN capability." - xattr(7)
878  */
879 static int
880 __zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
881     const char *name, size_t name_len)
882 {
883 	return (1);
884 }
885 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
886 
887 static int
888 __zpl_xattr_security_get(struct inode *ip, const char *name,
889     void *value, size_t size)
890 {
891 	char *xattr_name;
892 	int error;
893 	/* xattr_resolve_name will do this for us if this is defined */
894 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
895 	error = zpl_xattr_get(ip, xattr_name, value, size);
896 	kmem_strfree(xattr_name);
897 
898 	return (error);
899 }
900 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
901 
902 static int
903 __zpl_xattr_security_set(zidmap_t *user_ns,
904     struct inode *ip, const char *name,
905     const void *value, size_t size, int flags)
906 {
907 	(void) user_ns;
908 	char *xattr_name;
909 	int error;
910 	/* xattr_resolve_name will do this for us if this is defined */
911 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
912 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
913 	kmem_strfree(xattr_name);
914 
915 	return (error);
916 }
917 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
918 
919 static int
920 zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
921     void *fs_info)
922 {
923 	const struct xattr *xattr;
924 	int error = 0;
925 
926 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
927 		error = __zpl_xattr_security_set(NULL, ip,
928 		    xattr->name, xattr->value, xattr->value_len, 0);
929 
930 		if (error < 0)
931 			break;
932 	}
933 
934 	return (error);
935 }
936 
937 int
938 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
939     const struct qstr *qstr)
940 {
941 	return security_inode_init_security(ip, dip, qstr,
942 	    &zpl_xattr_security_init_impl, NULL);
943 }
944 
945 /*
946  * Security xattr namespace handlers.
947  */
948 static xattr_handler_t zpl_xattr_security_handler = {
949 	.prefix	= XATTR_SECURITY_PREFIX,
950 	.list	= zpl_xattr_security_list,
951 	.get	= zpl_xattr_security_get,
952 	.set	= zpl_xattr_security_set,
953 };
954 
955 /*
956  * Extended system attributes
957  *
958  * "Extended system attributes are used by the kernel to store system
959  * objects such as Access Control Lists.  Read and write access permissions
960  * to system attributes depend on the policy implemented for each system
961  * attribute implemented by filesystems in the kernel." - xattr(7)
962  */
963 #ifdef CONFIG_FS_POSIX_ACL
964 static int
965 zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
966 {
967 	char *name, *value = NULL;
968 	int error = 0;
969 	size_t size = 0;
970 
971 	if (S_ISLNK(ip->i_mode))
972 		return (-EOPNOTSUPP);
973 
974 	switch (type) {
975 	case ACL_TYPE_ACCESS:
976 		name = XATTR_NAME_POSIX_ACL_ACCESS;
977 		if (acl) {
978 			umode_t mode = ip->i_mode;
979 			error = posix_acl_equiv_mode(acl, &mode);
980 			if (error < 0) {
981 				return (error);
982 			} else {
983 				/*
984 				 * The mode bits will have been set by
985 				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
986 				 * using the ZFS ACL conversion.  If they
987 				 * differ from the Posix ACL conversion dirty
988 				 * the inode to write the Posix mode bits.
989 				 */
990 				if (ip->i_mode != mode) {
991 					ip->i_mode = ITOZ(ip)->z_mode = mode;
992 					zpl_inode_set_ctime_to_ts(ip,
993 					    current_time(ip));
994 					zfs_mark_inode_dirty(ip);
995 				}
996 
997 				if (error == 0)
998 					acl = NULL;
999 			}
1000 		}
1001 		break;
1002 
1003 	case ACL_TYPE_DEFAULT:
1004 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1005 		if (!S_ISDIR(ip->i_mode))
1006 			return (acl ? -EACCES : 0);
1007 		break;
1008 
1009 	default:
1010 		return (-EINVAL);
1011 	}
1012 
1013 	if (acl) {
1014 		size = posix_acl_xattr_size(acl->a_count);
1015 		value = kmem_alloc(size, KM_SLEEP);
1016 
1017 		error = zpl_acl_to_xattr(acl, value, size);
1018 		if (error < 0) {
1019 			kmem_free(value, size);
1020 			return (error);
1021 		}
1022 	}
1023 
1024 	error = zpl_xattr_set(ip, name, value, size, 0);
1025 	if (value)
1026 		kmem_free(value, size);
1027 
1028 	if (!error) {
1029 		if (acl)
1030 			set_cached_acl(ip, type, acl);
1031 		else
1032 			forget_cached_acl(ip, type);
1033 	}
1034 
1035 	return (error);
1036 }
1037 
1038 int
1039 #ifdef HAVE_SET_ACL_USERNS
1040 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1041     struct posix_acl *acl, int type)
1042 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1043 zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
1044     struct posix_acl *acl, int type)
1045 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
1046 zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
1047     struct posix_acl *acl, int type)
1048 #else
1049 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
1050 #endif /* HAVE_SET_ACL_USERNS */
1051 {
1052 #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
1053 	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
1054 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1055 	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
1056 #else
1057 	return (zpl_set_acl_impl(ip, acl, type));
1058 #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
1059 }
1060 
1061 static struct posix_acl *
1062 zpl_get_acl_impl(struct inode *ip, int type)
1063 {
1064 	struct posix_acl *acl;
1065 	void *value = NULL;
1066 	char *name;
1067 
1068 	switch (type) {
1069 	case ACL_TYPE_ACCESS:
1070 		name = XATTR_NAME_POSIX_ACL_ACCESS;
1071 		break;
1072 	case ACL_TYPE_DEFAULT:
1073 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1074 		break;
1075 	default:
1076 		return (ERR_PTR(-EINVAL));
1077 	}
1078 
1079 	int size = zpl_xattr_get(ip, name, NULL, 0);
1080 	if (size > 0) {
1081 		value = kmem_alloc(size, KM_SLEEP);
1082 		size = zpl_xattr_get(ip, name, value, size);
1083 	}
1084 
1085 	if (size > 0) {
1086 		acl = zpl_acl_from_xattr(value, size);
1087 	} else if (size == -ENODATA || size == -ENOSYS) {
1088 		acl = NULL;
1089 	} else {
1090 		acl = ERR_PTR(-EIO);
1091 	}
1092 
1093 	if (size > 0)
1094 		kmem_free(value, size);
1095 
1096 	return (acl);
1097 }
1098 
1099 #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
1100 struct posix_acl *
1101 zpl_get_acl(struct inode *ip, int type, bool rcu)
1102 {
1103 	if (rcu)
1104 		return (ERR_PTR(-ECHILD));
1105 
1106 	return (zpl_get_acl_impl(ip, type));
1107 }
1108 #elif defined(HAVE_GET_ACL)
1109 struct posix_acl *
1110 zpl_get_acl(struct inode *ip, int type)
1111 {
1112 	return (zpl_get_acl_impl(ip, type));
1113 }
1114 #else
1115 #error "Unsupported iops->get_acl() implementation"
1116 #endif /* HAVE_GET_ACL_RCU */
1117 
1118 int
1119 zpl_init_acl(struct inode *ip, struct inode *dir)
1120 {
1121 	struct posix_acl *acl = NULL;
1122 	int error = 0;
1123 
1124 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1125 		return (0);
1126 
1127 	if (!S_ISLNK(ip->i_mode)) {
1128 		acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);
1129 		if (IS_ERR(acl))
1130 			return (PTR_ERR(acl));
1131 		if (!acl) {
1132 			ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
1133 			zpl_inode_set_ctime_to_ts(ip, current_time(ip));
1134 			zfs_mark_inode_dirty(ip);
1135 			return (0);
1136 		}
1137 	}
1138 
1139 	if (acl) {
1140 		umode_t mode;
1141 
1142 		if (S_ISDIR(ip->i_mode)) {
1143 			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
1144 			if (error)
1145 				goto out;
1146 		}
1147 
1148 		mode = ip->i_mode;
1149 		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1150 		if (error >= 0) {
1151 			ip->i_mode = ITOZ(ip)->z_mode = mode;
1152 			zfs_mark_inode_dirty(ip);
1153 			if (error > 0) {
1154 				error = zpl_set_acl_impl(ip, acl,
1155 				    ACL_TYPE_ACCESS);
1156 			}
1157 		}
1158 	}
1159 out:
1160 	zpl_posix_acl_release(acl);
1161 
1162 	return (error);
1163 }
1164 
1165 int
1166 zpl_chmod_acl(struct inode *ip)
1167 {
1168 	struct posix_acl *acl;
1169 	int error;
1170 
1171 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1172 		return (0);
1173 
1174 	if (S_ISLNK(ip->i_mode))
1175 		return (-EOPNOTSUPP);
1176 
1177 	acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);
1178 	if (IS_ERR(acl) || !acl)
1179 		return (PTR_ERR(acl));
1180 
1181 	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1182 	if (!error)
1183 		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
1184 
1185 	zpl_posix_acl_release(acl);
1186 
1187 	return (error);
1188 }
1189 
1190 static int
1191 __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1192     const char *name, size_t name_len)
1193 {
1194 	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1195 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1196 
1197 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1198 		return (0);
1199 
1200 	if (list && xattr_size <= list_size)
1201 		memcpy(list, xattr_name, xattr_size);
1202 
1203 	return (xattr_size);
1204 }
1205 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1206 
1207 static int
1208 __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1209     const char *name, size_t name_len)
1210 {
1211 	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1212 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1213 
1214 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1215 		return (0);
1216 
1217 	if (list && xattr_size <= list_size)
1218 		memcpy(list, xattr_name, xattr_size);
1219 
1220 	return (xattr_size);
1221 }
1222 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1223 
1224 static int
1225 __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1226     void *buffer, size_t size)
1227 {
1228 	struct posix_acl *acl;
1229 	int type = ACL_TYPE_ACCESS;
1230 	int error;
1231 	/* xattr_resolve_name will do this for us if this is defined */
1232 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1233 		return (-EOPNOTSUPP);
1234 
1235 	acl = zpl_get_acl_impl(ip, type);
1236 	if (IS_ERR(acl))
1237 		return (PTR_ERR(acl));
1238 	if (acl == NULL)
1239 		return (-ENODATA);
1240 
1241 	error = zpl_acl_to_xattr(acl, buffer, size);
1242 	zpl_posix_acl_release(acl);
1243 
1244 	return (error);
1245 }
1246 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1247 
1248 static int
1249 __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1250     void *buffer, size_t size)
1251 {
1252 	struct posix_acl *acl;
1253 	int type = ACL_TYPE_DEFAULT;
1254 	int error;
1255 	/* xattr_resolve_name will do this for us if this is defined */
1256 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1257 		return (-EOPNOTSUPP);
1258 
1259 	acl = zpl_get_acl_impl(ip, type);
1260 	if (IS_ERR(acl))
1261 		return (PTR_ERR(acl));
1262 	if (acl == NULL)
1263 		return (-ENODATA);
1264 
1265 	error = zpl_acl_to_xattr(acl, buffer, size);
1266 	zpl_posix_acl_release(acl);
1267 
1268 	return (error);
1269 }
1270 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1271 
1272 static int
1273 __zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
1274     struct inode *ip, const char *name,
1275     const void *value, size_t size, int flags)
1276 {
1277 	struct posix_acl *acl;
1278 	int type = ACL_TYPE_ACCESS;
1279 	int error = 0;
1280 	/* xattr_resolve_name will do this for us if this is defined */
1281 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1282 		return (-EOPNOTSUPP);
1283 
1284 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1285 	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
1286 		return (-EPERM);
1287 #else
1288 	(void) mnt_ns;
1289 	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1290 		return (-EPERM);
1291 #endif
1292 
1293 	if (value) {
1294 		acl = zpl_acl_from_xattr(value, size);
1295 		if (IS_ERR(acl))
1296 			return (PTR_ERR(acl));
1297 		else if (acl) {
1298 			error = posix_acl_valid(ip->i_sb->s_user_ns, acl);
1299 			if (error) {
1300 				zpl_posix_acl_release(acl);
1301 				return (error);
1302 			}
1303 		}
1304 	} else {
1305 		acl = NULL;
1306 	}
1307 	error = zpl_set_acl_impl(ip, acl, type);
1308 	zpl_posix_acl_release(acl);
1309 
1310 	return (error);
1311 }
1312 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1313 
1314 static int
1315 __zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
1316     struct inode *ip, const char *name,
1317     const void *value, size_t size, int flags)
1318 {
1319 	struct posix_acl *acl;
1320 	int type = ACL_TYPE_DEFAULT;
1321 	int error = 0;
1322 	/* xattr_resolve_name will do this for us if this is defined */
1323 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1324 		return (-EOPNOTSUPP);
1325 
1326 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1327 	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
1328 		return (-EPERM);
1329 #else
1330 	(void) mnt_ns;
1331 	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1332 		return (-EPERM);
1333 #endif
1334 
1335 	if (value) {
1336 		acl = zpl_acl_from_xattr(value, size);
1337 		if (IS_ERR(acl))
1338 			return (PTR_ERR(acl));
1339 		else if (acl) {
1340 			error = posix_acl_valid(ip->i_sb->s_user_ns, acl);
1341 			if (error) {
1342 				zpl_posix_acl_release(acl);
1343 				return (error);
1344 			}
1345 		}
1346 	} else {
1347 		acl = NULL;
1348 	}
1349 
1350 	error = zpl_set_acl_impl(ip, acl, type);
1351 	zpl_posix_acl_release(acl);
1352 
1353 	return (error);
1354 }
1355 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1356 
1357 /*
1358  * ACL access xattr namespace handlers.
1359  *
1360  * Use .name instead of .prefix when available. xattr_resolve_name will match
1361  * whole name and reject anything that has .name only as prefix.
1362  */
1363 static xattr_handler_t zpl_xattr_acl_access_handler = {
1364 	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1365 	.list	= zpl_xattr_acl_list_access,
1366 	.get	= zpl_xattr_acl_get_access,
1367 	.set	= zpl_xattr_acl_set_access,
1368 	.flags	= ACL_TYPE_ACCESS,
1369 };
1370 
1371 /*
1372  * ACL default xattr namespace handlers.
1373  *
1374  * Use .name instead of .prefix. xattr_resolve_name will match whole name and
1375  * reject anything that has .name only as prefix.
1376  */
1377 static xattr_handler_t zpl_xattr_acl_default_handler = {
1378 	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1379 	.list	= zpl_xattr_acl_list_default,
1380 	.get	= zpl_xattr_acl_get_default,
1381 	.set	= zpl_xattr_acl_set_default,
1382 	.flags	= ACL_TYPE_DEFAULT,
1383 };
1384 
1385 #endif /* CONFIG_FS_POSIX_ACL */
1386 
1387 xattr_handler_t *zpl_xattr_handlers[] = {
1388 	&zpl_xattr_security_handler,
1389 	&zpl_xattr_trusted_handler,
1390 	&zpl_xattr_user_handler,
1391 #ifdef CONFIG_FS_POSIX_ACL
1392 	&zpl_xattr_acl_access_handler,
1393 	&zpl_xattr_acl_default_handler,
1394 #endif /* CONFIG_FS_POSIX_ACL */
1395 	NULL
1396 };
1397 
1398 static const struct xattr_handler *
1399 zpl_xattr_handler(const char *name)
1400 {
1401 	if (strncmp(name, XATTR_USER_PREFIX,
1402 	    XATTR_USER_PREFIX_LEN) == 0)
1403 		return (&zpl_xattr_user_handler);
1404 
1405 	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1406 	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1407 		return (&zpl_xattr_trusted_handler);
1408 
1409 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1410 	    XATTR_SECURITY_PREFIX_LEN) == 0)
1411 		return (&zpl_xattr_security_handler);
1412 
1413 #ifdef CONFIG_FS_POSIX_ACL
1414 	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1415 	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1416 		return (&zpl_xattr_acl_access_handler);
1417 
1418 	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1419 	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1420 		return (&zpl_xattr_acl_default_handler);
1421 #endif /* CONFIG_FS_POSIX_ACL */
1422 
1423 	return (NULL);
1424 }
1425 
1426 static enum xattr_permission
1427 zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
1428 {
1429 	const struct xattr_handler *handler;
1430 	struct dentry *d __maybe_unused = xf->dentry;
1431 	enum xattr_permission perm = XAPERM_ALLOW;
1432 
1433 	handler = zpl_xattr_handler(name);
1434 	if (handler == NULL) {
1435 		/* Do not expose FreeBSD system namespace xattrs. */
1436 		if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD, name))
1437 			return (XAPERM_DENY);
1438 		/*
1439 		 * Anything that doesn't match a known namespace gets put in the
1440 		 * user namespace for compatibility with other platforms.
1441 		 */
1442 		perm = XAPERM_COMPAT;
1443 		handler = &zpl_xattr_user_handler;
1444 	}
1445 
1446 	if (handler->list) {
1447 		if (!handler->list(d))
1448 			return (XAPERM_DENY);
1449 	}
1450 
1451 	return (perm);
1452 }
1453 
1454 #ifdef CONFIG_FS_POSIX_ACL
1455 
1456 struct acl_rel_struct {
1457 	struct acl_rel_struct *next;
1458 	struct posix_acl *acl;
1459 	clock_t time;
1460 };
1461 
1462 #define	ACL_REL_GRACE	(60*HZ)
1463 #define	ACL_REL_WINDOW	(1*HZ)
1464 #define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1465 
1466 /*
1467  * Lockless multi-producer single-consumer fifo list.
1468  * Nodes are added to tail and removed from head. Tail pointer is our
1469  * synchronization point. It always points to the next pointer of the last
1470  * node, or head if list is empty.
1471  */
1472 static struct acl_rel_struct *acl_rel_head = NULL;
1473 static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1474 
1475 static void
1476 zpl_posix_acl_free(void *arg)
1477 {
1478 	struct acl_rel_struct *freelist = NULL;
1479 	struct acl_rel_struct *a;
1480 	clock_t new_time;
1481 	boolean_t refire = B_FALSE;
1482 
1483 	ASSERT3P(acl_rel_head, !=, NULL);
1484 	while (acl_rel_head) {
1485 		a = acl_rel_head;
1486 		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1487 			/*
1488 			 * If a is the last node we need to reset tail, but we
1489 			 * need to use cmpxchg to make sure it is still the
1490 			 * last node.
1491 			 */
1492 			if (acl_rel_tail == &a->next) {
1493 				acl_rel_head = NULL;
1494 				if (cmpxchg(&acl_rel_tail, &a->next,
1495 				    &acl_rel_head) == &a->next) {
1496 					ASSERT3P(a->next, ==, NULL);
1497 					a->next = freelist;
1498 					freelist = a;
1499 					break;
1500 				}
1501 			}
1502 			/*
1503 			 * a is not last node, make sure next pointer is set
1504 			 * by the adder and advance the head.
1505 			 */
1506 			while (READ_ONCE(a->next) == NULL)
1507 				cpu_relax();
1508 			acl_rel_head = a->next;
1509 			a->next = freelist;
1510 			freelist = a;
1511 		} else {
1512 			/*
1513 			 * a is still in grace period. We are responsible to
1514 			 * reschedule the free task, since adder will only do
1515 			 * so if list is empty.
1516 			 */
1517 			new_time = a->time + ACL_REL_SCHED;
1518 			refire = B_TRUE;
1519 			break;
1520 		}
1521 	}
1522 
1523 	if (refire)
1524 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1525 		    NULL, TQ_SLEEP, new_time);
1526 
1527 	while (freelist) {
1528 		a = freelist;
1529 		freelist = a->next;
1530 		kfree(a->acl);
1531 		kmem_free(a, sizeof (struct acl_rel_struct));
1532 	}
1533 }
1534 
1535 void
1536 zpl_posix_acl_release_impl(struct posix_acl *acl)
1537 {
1538 	struct acl_rel_struct *a, **prev;
1539 
1540 	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1541 	a->next = NULL;
1542 	a->acl = acl;
1543 	a->time = ddi_get_lbolt();
1544 	/* atomically points tail to us and get the previous tail */
1545 	prev = xchg(&acl_rel_tail, &a->next);
1546 	ASSERT3P(*prev, ==, NULL);
1547 	*prev = a;
1548 	/* if it was empty before, schedule the free task */
1549 	if (prev == &acl_rel_head)
1550 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1551 		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1552 }
1553 #endif
1554 
1555 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
1556 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
1557