xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  *
24  * Extended attributes (xattr) on Solaris are implemented as files
25  * which exist in a hidden xattr directory.  These extended attributes
26  * can be accessed using the attropen() system call which opens
27  * the extended attribute.  It can then be manipulated just like
28  * a standard file descriptor.  This has a couple advantages such
29  * as practically no size limit on the file, and the extended
30  * attributes permissions may differ from those of the parent file.
31  * This interface is really quite clever, but it's also completely
32  * different than what is supported on Linux.  It also comes with a
33  * steep performance penalty when accessing small xattrs because they
34  * are not stored with the parent file.
35  *
36  * Under Linux extended attributes are manipulated by the system
37  * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
38  * extended attributes to be name/value pairs where the name is a
39  * NULL terminated string.  The name must also include one of the
40  * following namespace prefixes:
41  *
42  *   user     - No restrictions and is available to user applications.
43  *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44  *   system   - Used for access control lists (system.nfs4_acl, etc).
45  *   security - Used by SELinux to store a files security context.
46  *
47  * The value under Linux to limited to 65536 bytes of binary data.
48  * In practice, individual xattrs tend to be much smaller than this
49  * and are typically less than 100 bytes.  A good example of this
50  * are the security.selinux xattrs which are less than 100 bytes and
51  * exist for every file when xattr labeling is enabled.
52  *
53  * The Linux xattr implementation has been written to take advantage of
54  * this typical usage.  When the dataset property 'xattr=sa' is set,
55  * then xattrs will be preferentially stored as System Attributes (SA).
56  * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57  * up to 64k of xattrs to be stored in the spill block.  If additional
58  * xattr space is required, which is unlikely under Linux, they will
59  * be stored using the traditional directory approach.
60  *
61  * This optimization results in roughly a 3x performance improvement
62  * when accessing xattrs because it avoids the need to perform a seek
63  * for every xattr value.  When multiple xattrs are stored per-file
64  * the performance improvements are even greater because all of the
65  * xattrs stored in the spill block will be cached.
66  *
67  * However, by default SA based xattrs are disabled in the Linux port
68  * to maximize compatibility with other implementations.  If you do
69  * enable SA based xattrs then they will not be visible on platforms
70  * which do not support this feature.
71  *
72  * NOTE: One additional consequence of the xattr directory implementation
73  * is that when an extended attribute is manipulated an inode is created.
74  * This inode will exist in the Linux inode cache but there will be no
75  * associated entry in the dentry cache which references it.  This is
76  * safe but it may result in some confusion.  Enabling SA based xattrs
77  * largely avoids the issue except in the overflow case.
78  */
79 
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
83 #include <sys/zap.h>
84 #include <sys/vfs.h>
85 #include <sys/zpl.h>
86 #include <linux/vfs_compat.h>
87 
88 enum xattr_permission {
89 	XAPERM_DENY,
90 	XAPERM_ALLOW,
91 	XAPERM_COMPAT,
92 };
93 
94 typedef struct xattr_filldir {
95 	size_t size;
96 	size_t offset;
97 	char *buf;
98 	struct dentry *dentry;
99 } xattr_filldir_t;
100 
101 static enum xattr_permission zpl_xattr_permission(xattr_filldir_t *,
102     const char *, int);
103 
104 static int zfs_xattr_compat = 0;
105 
106 /*
107  * Determine is a given xattr name should be visible and if so copy it
108  * in to the provided buffer (xf->buf).
109  */
110 static int
111 zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
112 {
113 	enum xattr_permission perm;
114 
115 	/* Check permissions using the per-namespace list xattr handler. */
116 	perm = zpl_xattr_permission(xf, name, name_len);
117 	if (perm == XAPERM_DENY)
118 		return (0);
119 
120 	/* Prefix the name with "user." if it does not have a namespace. */
121 	if (perm == XAPERM_COMPAT) {
122 		if (xf->buf) {
123 			if (xf->offset + XATTR_USER_PREFIX_LEN + 1 > xf->size)
124 				return (-ERANGE);
125 
126 			memcpy(xf->buf + xf->offset, XATTR_USER_PREFIX,
127 			    XATTR_USER_PREFIX_LEN);
128 			xf->buf[xf->offset + XATTR_USER_PREFIX_LEN] = '\0';
129 		}
130 
131 		xf->offset += XATTR_USER_PREFIX_LEN;
132 	}
133 
134 	/* When xf->buf is NULL only calculate the required size. */
135 	if (xf->buf) {
136 		if (xf->offset + name_len + 1 > xf->size)
137 			return (-ERANGE);
138 
139 		memcpy(xf->buf + xf->offset, name, name_len);
140 		xf->buf[xf->offset + name_len] = '\0';
141 	}
142 
143 	xf->offset += (name_len + 1);
144 
145 	return (0);
146 }
147 
148 /*
149  * Read as many directory entry names as will fit in to the provided buffer,
150  * or when no buffer is provided calculate the required buffer size.
151  */
152 static int
153 zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
154 {
155 	zap_cursor_t zc;
156 	zap_attribute_t	zap;
157 	int error;
158 
159 	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
160 
161 	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
162 
163 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
164 			error = -ENXIO;
165 			break;
166 		}
167 
168 		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
169 		if (error)
170 			break;
171 
172 		zap_cursor_advance(&zc);
173 	}
174 
175 	zap_cursor_fini(&zc);
176 
177 	if (error == -ENOENT)
178 		error = 0;
179 
180 	return (error);
181 }
182 
183 static ssize_t
184 zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
185 {
186 	struct inode *ip = xf->dentry->d_inode;
187 	struct inode *dxip = NULL;
188 	znode_t *dxzp;
189 	int error;
190 
191 	/* Lookup the xattr directory */
192 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
193 	    cr, NULL, NULL);
194 	if (error) {
195 		if (error == -ENOENT)
196 			error = 0;
197 
198 		return (error);
199 	}
200 
201 	dxip = ZTOI(dxzp);
202 	error = zpl_xattr_readdir(dxip, xf);
203 	iput(dxip);
204 
205 	return (error);
206 }
207 
208 static ssize_t
209 zpl_xattr_list_sa(xattr_filldir_t *xf)
210 {
211 	znode_t *zp = ITOZ(xf->dentry->d_inode);
212 	nvpair_t *nvp = NULL;
213 	int error = 0;
214 
215 	mutex_enter(&zp->z_lock);
216 	if (zp->z_xattr_cached == NULL)
217 		error = -zfs_sa_get_xattr(zp);
218 	mutex_exit(&zp->z_lock);
219 
220 	if (error)
221 		return (error);
222 
223 	ASSERT(zp->z_xattr_cached);
224 
225 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
226 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
227 
228 		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
229 		    strlen(nvpair_name(nvp)));
230 		if (error)
231 			return (error);
232 	}
233 
234 	return (0);
235 }
236 
237 ssize_t
238 zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
239 {
240 	znode_t *zp = ITOZ(dentry->d_inode);
241 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
242 	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
243 	cred_t *cr = CRED();
244 	fstrans_cookie_t cookie;
245 	int error = 0;
246 
247 	crhold(cr);
248 	cookie = spl_fstrans_mark();
249 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
250 		goto out1;
251 	rw_enter(&zp->z_xattr_lock, RW_READER);
252 
253 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
254 		error = zpl_xattr_list_sa(&xf);
255 		if (error)
256 			goto out;
257 	}
258 
259 	error = zpl_xattr_list_dir(&xf, cr);
260 	if (error)
261 		goto out;
262 
263 	error = xf.offset;
264 out:
265 
266 	rw_exit(&zp->z_xattr_lock);
267 	zpl_exit(zfsvfs, FTAG);
268 out1:
269 	spl_fstrans_unmark(cookie);
270 	crfree(cr);
271 
272 	return (error);
273 }
274 
275 static int
276 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
277     size_t size, cred_t *cr)
278 {
279 	fstrans_cookie_t cookie;
280 	struct inode *xip = NULL;
281 	znode_t *dxzp = NULL;
282 	znode_t *xzp = NULL;
283 	int error;
284 
285 	/* Lookup the xattr directory */
286 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, LOOKUP_XATTR,
287 	    cr, NULL, NULL);
288 	if (error)
289 		goto out;
290 
291 	/* Lookup a specific xattr name in the directory */
292 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
293 	if (error)
294 		goto out;
295 
296 	xip = ZTOI(xzp);
297 	if (!size) {
298 		error = i_size_read(xip);
299 		goto out;
300 	}
301 
302 	if (size < i_size_read(xip)) {
303 		error = -ERANGE;
304 		goto out;
305 	}
306 
307 	struct iovec iov;
308 	iov.iov_base = (void *)value;
309 	iov.iov_len = size;
310 
311 	zfs_uio_t uio;
312 	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
313 
314 	cookie = spl_fstrans_mark();
315 	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
316 	spl_fstrans_unmark(cookie);
317 
318 	if (error == 0)
319 		error = size - zfs_uio_resid(&uio);
320 out:
321 	if (xzp)
322 		zrele(xzp);
323 
324 	if (dxzp)
325 		zrele(dxzp);
326 
327 	return (error);
328 }
329 
330 static int
331 zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
332 {
333 	znode_t *zp = ITOZ(ip);
334 	uchar_t *nv_value;
335 	uint_t nv_size;
336 	int error = 0;
337 
338 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
339 
340 	mutex_enter(&zp->z_lock);
341 	if (zp->z_xattr_cached == NULL)
342 		error = -zfs_sa_get_xattr(zp);
343 	mutex_exit(&zp->z_lock);
344 
345 	if (error)
346 		return (error);
347 
348 	ASSERT(zp->z_xattr_cached);
349 	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
350 	    &nv_value, &nv_size);
351 	if (error)
352 		return (error);
353 
354 	if (size == 0 || value == NULL)
355 		return (nv_size);
356 
357 	if (size < nv_size)
358 		return (-ERANGE);
359 
360 	memcpy(value, nv_value, nv_size);
361 
362 	return (nv_size);
363 }
364 
365 static int
366 __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
367     cred_t *cr)
368 {
369 	znode_t *zp = ITOZ(ip);
370 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
371 	int error;
372 
373 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
374 
375 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
376 		error = zpl_xattr_get_sa(ip, name, value, size);
377 		if (error != -ENOENT)
378 			goto out;
379 	}
380 
381 	error = zpl_xattr_get_dir(ip, name, value, size, cr);
382 out:
383 	if (error == -ENOENT)
384 		error = -ENODATA;
385 
386 	return (error);
387 }
388 
389 #define	XATTR_NOENT	0x0
390 #define	XATTR_IN_SA	0x1
391 #define	XATTR_IN_DIR	0x2
392 /* check where the xattr resides */
393 static int
394 __zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
395 {
396 	znode_t *zp = ITOZ(ip);
397 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
398 	int error;
399 
400 	ASSERT(where);
401 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
402 
403 	*where = XATTR_NOENT;
404 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
405 		error = zpl_xattr_get_sa(ip, name, NULL, 0);
406 		if (error >= 0)
407 			*where |= XATTR_IN_SA;
408 		else if (error != -ENOENT)
409 			return (error);
410 	}
411 
412 	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
413 	if (error >= 0)
414 		*where |= XATTR_IN_DIR;
415 	else if (error != -ENOENT)
416 		return (error);
417 
418 	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
419 		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
420 		    " in both SA and dir", ip, name);
421 	if (*where == XATTR_NOENT)
422 		error = -ENODATA;
423 	else
424 		error = 0;
425 	return (error);
426 }
427 
428 static int
429 zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
430 {
431 	znode_t *zp = ITOZ(ip);
432 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
433 	cred_t *cr = CRED();
434 	fstrans_cookie_t cookie;
435 	int error;
436 
437 	crhold(cr);
438 	cookie = spl_fstrans_mark();
439 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
440 		goto out;
441 	rw_enter(&zp->z_xattr_lock, RW_READER);
442 	error = __zpl_xattr_get(ip, name, value, size, cr);
443 	rw_exit(&zp->z_xattr_lock);
444 	zpl_exit(zfsvfs, FTAG);
445 out:
446 	spl_fstrans_unmark(cookie);
447 	crfree(cr);
448 
449 	return (error);
450 }
451 
452 static int
453 zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
454     size_t size, int flags, cred_t *cr)
455 {
456 	znode_t *dxzp = NULL;
457 	znode_t *xzp = NULL;
458 	vattr_t *vap = NULL;
459 	int lookup_flags, error;
460 	const int xattr_mode = S_IFREG | 0644;
461 	loff_t pos = 0;
462 
463 	/*
464 	 * Lookup the xattr directory.  When we're adding an entry pass
465 	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
466 	 * When removing an entry this flag is not passed to avoid
467 	 * unnecessarily creating a new xattr directory.
468 	 */
469 	lookup_flags = LOOKUP_XATTR;
470 	if (value != NULL)
471 		lookup_flags |= CREATE_XATTR_DIR;
472 
473 	error = -zfs_lookup(ITOZ(ip), NULL, &dxzp, lookup_flags,
474 	    cr, NULL, NULL);
475 	if (error)
476 		goto out;
477 
478 	/* Lookup a specific xattr name in the directory */
479 	error = -zfs_lookup(dxzp, (char *)name, &xzp, 0, cr, NULL, NULL);
480 	if (error && (error != -ENOENT))
481 		goto out;
482 
483 	error = 0;
484 
485 	/* Remove a specific name xattr when value is set to NULL. */
486 	if (value == NULL) {
487 		if (xzp)
488 			error = -zfs_remove(dxzp, (char *)name, cr, 0);
489 
490 		goto out;
491 	}
492 
493 	/* Lookup failed create a new xattr. */
494 	if (xzp == NULL) {
495 		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
496 		vap->va_mode = xattr_mode;
497 		vap->va_mask = ATTR_MODE;
498 		vap->va_uid = crgetuid(cr);
499 		vap->va_gid = crgetgid(cr);
500 
501 		error = -zfs_create(dxzp, (char *)name, vap, 0, 0644, &xzp,
502 		    cr, ATTR_NOACLCHECK, NULL, zfs_init_idmap);
503 		if (error)
504 			goto out;
505 	}
506 
507 	ASSERT(xzp != NULL);
508 
509 	error = -zfs_freesp(xzp, 0, 0, xattr_mode, TRUE);
510 	if (error)
511 		goto out;
512 
513 	error = -zfs_write_simple(xzp, value, size, pos, NULL);
514 out:
515 	if (error == 0) {
516 		zpl_inode_set_ctime_to_ts(ip, current_time(ip));
517 		zfs_mark_inode_dirty(ip);
518 	}
519 
520 	if (vap)
521 		kmem_free(vap, sizeof (vattr_t));
522 
523 	if (xzp)
524 		zrele(xzp);
525 
526 	if (dxzp)
527 		zrele(dxzp);
528 
529 	if (error == -ENOENT)
530 		error = -ENODATA;
531 
532 	ASSERT3S(error, <=, 0);
533 
534 	return (error);
535 }
536 
537 static int
538 zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
539     size_t size, int flags, cred_t *cr)
540 {
541 	znode_t *zp = ITOZ(ip);
542 	nvlist_t *nvl;
543 	size_t sa_size;
544 	int error = 0;
545 
546 	mutex_enter(&zp->z_lock);
547 	if (zp->z_xattr_cached == NULL)
548 		error = -zfs_sa_get_xattr(zp);
549 	mutex_exit(&zp->z_lock);
550 
551 	if (error)
552 		return (error);
553 
554 	ASSERT(zp->z_xattr_cached);
555 	nvl = zp->z_xattr_cached;
556 
557 	if (value == NULL) {
558 		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
559 		if (error == -ENOENT)
560 			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
561 	} else {
562 		/* Limited to 32k to keep nvpair memory allocations small */
563 		if (size > DXATTR_MAX_ENTRY_SIZE)
564 			return (-EFBIG);
565 
566 		/* Prevent the DXATTR SA from consuming the entire SA region */
567 		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
568 		if (error)
569 			return (error);
570 
571 		if (sa_size > DXATTR_MAX_SA_SIZE)
572 			return (-EFBIG);
573 
574 		error = -nvlist_add_byte_array(nvl, name,
575 		    (uchar_t *)value, size);
576 	}
577 
578 	/*
579 	 * Update the SA for additions, modifications, and removals. On
580 	 * error drop the inconsistent cached version of the nvlist, it
581 	 * will be reconstructed from the ARC when next accessed.
582 	 */
583 	if (error == 0)
584 		error = -zfs_sa_set_xattr(zp, name, value, size);
585 
586 	if (error) {
587 		nvlist_free(nvl);
588 		zp->z_xattr_cached = NULL;
589 	}
590 
591 	ASSERT3S(error, <=, 0);
592 
593 	return (error);
594 }
595 
596 static int
597 zpl_xattr_set(struct inode *ip, const char *name, const void *value,
598     size_t size, int flags)
599 {
600 	znode_t *zp = ITOZ(ip);
601 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
602 	cred_t *cr = CRED();
603 	fstrans_cookie_t cookie;
604 	int where;
605 	int error;
606 
607 	crhold(cr);
608 	cookie = spl_fstrans_mark();
609 	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
610 		goto out1;
611 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
612 
613 	/*
614 	 * Before setting the xattr check to see if it already exists.
615 	 * This is done to ensure the following optional flags are honored.
616 	 *
617 	 *   XATTR_CREATE: fail if xattr already exists
618 	 *   XATTR_REPLACE: fail if xattr does not exist
619 	 *
620 	 * We also want to know if it resides in sa or dir, so we can make
621 	 * sure we don't end up with duplicate in both places.
622 	 */
623 	error = __zpl_xattr_where(ip, name, &where, cr);
624 	if (error < 0) {
625 		if (error != -ENODATA)
626 			goto out;
627 		if (flags & XATTR_REPLACE)
628 			goto out;
629 
630 		/* The xattr to be removed already doesn't exist */
631 		error = 0;
632 		if (value == NULL)
633 			goto out;
634 	} else {
635 		error = -EEXIST;
636 		if (flags & XATTR_CREATE)
637 			goto out;
638 	}
639 
640 	/* Preferentially store the xattr as a SA for better performance */
641 	if (zfsvfs->z_use_sa && zp->z_is_sa &&
642 	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
643 		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
644 		if (error == 0) {
645 			/*
646 			 * Successfully put into SA, we need to clear the one
647 			 * in dir.
648 			 */
649 			if (where & XATTR_IN_DIR)
650 				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
651 			goto out;
652 		}
653 	}
654 
655 	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
656 	/*
657 	 * Successfully put into dir, we need to clear the one in SA.
658 	 */
659 	if (error == 0 && (where & XATTR_IN_SA))
660 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
661 out:
662 	rw_exit(&zp->z_xattr_lock);
663 	zpl_exit(zfsvfs, FTAG);
664 out1:
665 	spl_fstrans_unmark(cookie);
666 	crfree(cr);
667 	ASSERT3S(error, <=, 0);
668 
669 	return (error);
670 }
671 
672 /*
673  * Extended user attributes
674  *
675  * "Extended user attributes may be assigned to files and directories for
676  * storing arbitrary additional information such as the mime type,
677  * character set or encoding of a file.  The access permissions for user
678  * attributes are defined by the file permission bits: read permission
679  * is required to retrieve the attribute value, and writer permission is
680  * required to change it.
681  *
682  * The file permission bits of regular files and directories are
683  * interpreted differently from the file permission bits of special
684  * files and symbolic links.  For regular files and directories the file
685  * permission bits define access to the file's contents, while for
686  * device special files they define access to the device described by
687  * the special file.  The file permissions of symbolic links are not
688  * used in access checks.  These differences would allow users to
689  * consume filesystem resources in a way not controllable by disk quotas
690  * for group or world writable special files and directories.
691  *
692  * For this reason, extended user attributes are allowed only for
693  * regular files and directories, and access to extended user attributes
694  * is restricted to the owner and to users with appropriate capabilities
695  * for directories with the sticky bit set (see the chmod(1) manual page
696  * for an explanation of the sticky bit)." - xattr(7)
697  *
698  * ZFS allows extended user attributes to be disabled administratively
699  * by setting the 'xattr=off' property on the dataset.
700  */
701 static int
702 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
703     const char *name, size_t name_len)
704 {
705 	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
706 }
707 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
708 
709 static int
710 __zpl_xattr_user_get(struct inode *ip, const char *name,
711     void *value, size_t size)
712 {
713 	int error;
714 	/* xattr_resolve_name will do this for us if this is defined */
715 #ifndef HAVE_XATTR_HANDLER_NAME
716 	if (strcmp(name, "") == 0)
717 		return (-EINVAL);
718 #endif
719 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
720 		return (-EINVAL);
721 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
722 		return (-EOPNOTSUPP);
723 
724 	/*
725 	 * Try to look up the name with the namespace prefix first for
726 	 * compatibility with xattrs from this platform.  If that fails,
727 	 * try again without the namespace prefix for compatibility with
728 	 * other platforms.
729 	 */
730 	char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
731 	error = zpl_xattr_get(ip, xattr_name, value, size);
732 	kmem_strfree(xattr_name);
733 	if (error == -ENODATA)
734 		error = zpl_xattr_get(ip, name, value, size);
735 
736 	return (error);
737 }
738 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
739 
740 static int
741 __zpl_xattr_user_set(zidmap_t *user_ns,
742     struct inode *ip, const char *name,
743     const void *value, size_t size, int flags)
744 {
745 	(void) user_ns;
746 	int error = 0;
747 	/* xattr_resolve_name will do this for us if this is defined */
748 #ifndef HAVE_XATTR_HANDLER_NAME
749 	if (strcmp(name, "") == 0)
750 		return (-EINVAL);
751 #endif
752 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
753 		return (-EINVAL);
754 	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
755 		return (-EOPNOTSUPP);
756 
757 	/*
758 	 * Remove alternate compat version of the xattr so we only set the
759 	 * version specified by the zfs_xattr_compat tunable.
760 	 *
761 	 * The following flags must be handled correctly:
762 	 *
763 	 *   XATTR_CREATE: fail if xattr already exists
764 	 *   XATTR_REPLACE: fail if xattr does not exist
765 	 */
766 	char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
767 	const char *clear_name, *set_name;
768 	if (zfs_xattr_compat) {
769 		clear_name = prefixed_name;
770 		set_name = name;
771 	} else {
772 		clear_name = name;
773 		set_name = prefixed_name;
774 	}
775 	/*
776 	 * Clear the old value with the alternative name format, if it exists.
777 	 */
778 	error = zpl_xattr_set(ip, clear_name, NULL, 0, flags);
779 	/*
780 	 * XATTR_CREATE was specified and we failed to clear the xattr
781 	 * because it already exists.  Stop here.
782 	 */
783 	if (error == -EEXIST)
784 		goto out;
785 	/*
786 	 * If XATTR_REPLACE was specified and we succeeded to clear
787 	 * an xattr, we don't need to replace anything when setting
788 	 * the new value.  If we failed with -ENODATA that's fine,
789 	 * there was nothing to be cleared and we can ignore the error.
790 	 */
791 	if (error == 0)
792 		flags &= ~XATTR_REPLACE;
793 	/*
794 	 * Set the new value with the configured name format.
795 	 */
796 	error = zpl_xattr_set(ip, set_name, value, size, flags);
797 out:
798 	kmem_strfree(prefixed_name);
799 	return (error);
800 }
801 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
802 
803 static xattr_handler_t zpl_xattr_user_handler =
804 {
805 	.prefix	= XATTR_USER_PREFIX,
806 	.list	= zpl_xattr_user_list,
807 	.get	= zpl_xattr_user_get,
808 	.set	= zpl_xattr_user_set,
809 };
810 
811 /*
812  * Trusted extended attributes
813  *
814  * "Trusted extended attributes are visible and accessible only to
815  * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
816  * class are used to implement mechanisms in user space (i.e., outside
817  * the kernel) which keep information in extended attributes to which
818  * ordinary processes should not have access." - xattr(7)
819  */
820 static int
821 __zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
822     const char *name, size_t name_len)
823 {
824 	return (capable(CAP_SYS_ADMIN));
825 }
826 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
827 
828 static int
829 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
830     void *value, size_t size)
831 {
832 	char *xattr_name;
833 	int error;
834 
835 	if (!capable(CAP_SYS_ADMIN))
836 		return (-EACCES);
837 	/* xattr_resolve_name will do this for us if this is defined */
838 #ifndef HAVE_XATTR_HANDLER_NAME
839 	if (strcmp(name, "") == 0)
840 		return (-EINVAL);
841 #endif
842 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
843 	error = zpl_xattr_get(ip, xattr_name, value, size);
844 	kmem_strfree(xattr_name);
845 
846 	return (error);
847 }
848 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
849 
850 static int
851 __zpl_xattr_trusted_set(zidmap_t *user_ns,
852     struct inode *ip, const char *name,
853     const void *value, size_t size, int flags)
854 {
855 	(void) user_ns;
856 	char *xattr_name;
857 	int error;
858 
859 	if (!capable(CAP_SYS_ADMIN))
860 		return (-EACCES);
861 	/* xattr_resolve_name will do this for us if this is defined */
862 #ifndef HAVE_XATTR_HANDLER_NAME
863 	if (strcmp(name, "") == 0)
864 		return (-EINVAL);
865 #endif
866 	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
867 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
868 	kmem_strfree(xattr_name);
869 
870 	return (error);
871 }
872 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
873 
874 static xattr_handler_t zpl_xattr_trusted_handler = {
875 	.prefix	= XATTR_TRUSTED_PREFIX,
876 	.list	= zpl_xattr_trusted_list,
877 	.get	= zpl_xattr_trusted_get,
878 	.set	= zpl_xattr_trusted_set,
879 };
880 
881 /*
882  * Extended security attributes
883  *
884  * "The security attribute namespace is used by kernel security modules,
885  * such as Security Enhanced Linux, and also to implement file
886  * capabilities (see capabilities(7)).  Read and write access
887  * permissions to security attributes depend on the policy implemented
888  * for each security attribute by the security module.  When no security
889  * module is loaded, all processes have read access to extended security
890  * attributes, and write access is limited to processes that have the
891  * CAP_SYS_ADMIN capability." - xattr(7)
892  */
893 static int
894 __zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
895     const char *name, size_t name_len)
896 {
897 	return (1);
898 }
899 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
900 
901 static int
902 __zpl_xattr_security_get(struct inode *ip, const char *name,
903     void *value, size_t size)
904 {
905 	char *xattr_name;
906 	int error;
907 	/* xattr_resolve_name will do this for us if this is defined */
908 #ifndef HAVE_XATTR_HANDLER_NAME
909 	if (strcmp(name, "") == 0)
910 		return (-EINVAL);
911 #endif
912 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
913 	error = zpl_xattr_get(ip, xattr_name, value, size);
914 	kmem_strfree(xattr_name);
915 
916 	return (error);
917 }
918 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
919 
920 static int
921 __zpl_xattr_security_set(zidmap_t *user_ns,
922     struct inode *ip, const char *name,
923     const void *value, size_t size, int flags)
924 {
925 	(void) user_ns;
926 	char *xattr_name;
927 	int error;
928 	/* xattr_resolve_name will do this for us if this is defined */
929 #ifndef HAVE_XATTR_HANDLER_NAME
930 	if (strcmp(name, "") == 0)
931 		return (-EINVAL);
932 #endif
933 	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
934 	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
935 	kmem_strfree(xattr_name);
936 
937 	return (error);
938 }
939 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
940 
941 static int
942 zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
943     void *fs_info)
944 {
945 	const struct xattr *xattr;
946 	int error = 0;
947 
948 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
949 		error = __zpl_xattr_security_set(NULL, ip,
950 		    xattr->name, xattr->value, xattr->value_len, 0);
951 
952 		if (error < 0)
953 			break;
954 	}
955 
956 	return (error);
957 }
958 
959 int
960 zpl_xattr_security_init(struct inode *ip, struct inode *dip,
961     const struct qstr *qstr)
962 {
963 	return security_inode_init_security(ip, dip, qstr,
964 	    &zpl_xattr_security_init_impl, NULL);
965 }
966 
967 /*
968  * Security xattr namespace handlers.
969  */
970 static xattr_handler_t zpl_xattr_security_handler = {
971 	.prefix	= XATTR_SECURITY_PREFIX,
972 	.list	= zpl_xattr_security_list,
973 	.get	= zpl_xattr_security_get,
974 	.set	= zpl_xattr_security_set,
975 };
976 
977 /*
978  * Extended system attributes
979  *
980  * "Extended system attributes are used by the kernel to store system
981  * objects such as Access Control Lists.  Read and write access permissions
982  * to system attributes depend on the policy implemented for each system
983  * attribute implemented by filesystems in the kernel." - xattr(7)
984  */
985 #ifdef CONFIG_FS_POSIX_ACL
986 static int
987 zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
988 {
989 	char *name, *value = NULL;
990 	int error = 0;
991 	size_t size = 0;
992 
993 	if (S_ISLNK(ip->i_mode))
994 		return (-EOPNOTSUPP);
995 
996 	switch (type) {
997 	case ACL_TYPE_ACCESS:
998 		name = XATTR_NAME_POSIX_ACL_ACCESS;
999 		if (acl) {
1000 			umode_t mode = ip->i_mode;
1001 			error = posix_acl_equiv_mode(acl, &mode);
1002 			if (error < 0) {
1003 				return (error);
1004 			} else {
1005 				/*
1006 				 * The mode bits will have been set by
1007 				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
1008 				 * using the ZFS ACL conversion.  If they
1009 				 * differ from the Posix ACL conversion dirty
1010 				 * the inode to write the Posix mode bits.
1011 				 */
1012 				if (ip->i_mode != mode) {
1013 					ip->i_mode = ITOZ(ip)->z_mode = mode;
1014 					zpl_inode_set_ctime_to_ts(ip,
1015 					    current_time(ip));
1016 					zfs_mark_inode_dirty(ip);
1017 				}
1018 
1019 				if (error == 0)
1020 					acl = NULL;
1021 			}
1022 		}
1023 		break;
1024 
1025 	case ACL_TYPE_DEFAULT:
1026 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1027 		if (!S_ISDIR(ip->i_mode))
1028 			return (acl ? -EACCES : 0);
1029 		break;
1030 
1031 	default:
1032 		return (-EINVAL);
1033 	}
1034 
1035 	if (acl) {
1036 		size = posix_acl_xattr_size(acl->a_count);
1037 		value = kmem_alloc(size, KM_SLEEP);
1038 
1039 		error = zpl_acl_to_xattr(acl, value, size);
1040 		if (error < 0) {
1041 			kmem_free(value, size);
1042 			return (error);
1043 		}
1044 	}
1045 
1046 	error = zpl_xattr_set(ip, name, value, size, 0);
1047 	if (value)
1048 		kmem_free(value, size);
1049 
1050 	if (!error) {
1051 		if (acl)
1052 			zpl_set_cached_acl(ip, type, acl);
1053 		else
1054 			zpl_forget_cached_acl(ip, type);
1055 	}
1056 
1057 	return (error);
1058 }
1059 
1060 #ifdef HAVE_SET_ACL
1061 int
1062 #ifdef HAVE_SET_ACL_USERNS
1063 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1064     struct posix_acl *acl, int type)
1065 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1066 zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
1067     struct posix_acl *acl, int type)
1068 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
1069 zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
1070     struct posix_acl *acl, int type)
1071 #else
1072 zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
1073 #endif /* HAVE_SET_ACL_USERNS */
1074 {
1075 #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
1076 	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
1077 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1078 	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
1079 #else
1080 	return (zpl_set_acl_impl(ip, acl, type));
1081 #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
1082 }
1083 #endif /* HAVE_SET_ACL */
1084 
1085 static struct posix_acl *
1086 zpl_get_acl_impl(struct inode *ip, int type)
1087 {
1088 	struct posix_acl *acl;
1089 	void *value = NULL;
1090 	char *name;
1091 
1092 	/*
1093 	 * As of Linux 3.14, the kernel get_acl will check this for us.
1094 	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
1095 	 * as the kernel get_acl will set it to temporary sentinel value.
1096 	 */
1097 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1098 	acl = get_cached_acl(ip, type);
1099 	if (acl != ACL_NOT_CACHED)
1100 		return (acl);
1101 #endif
1102 
1103 	switch (type) {
1104 	case ACL_TYPE_ACCESS:
1105 		name = XATTR_NAME_POSIX_ACL_ACCESS;
1106 		break;
1107 	case ACL_TYPE_DEFAULT:
1108 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
1109 		break;
1110 	default:
1111 		return (ERR_PTR(-EINVAL));
1112 	}
1113 
1114 	int size = zpl_xattr_get(ip, name, NULL, 0);
1115 	if (size > 0) {
1116 		value = kmem_alloc(size, KM_SLEEP);
1117 		size = zpl_xattr_get(ip, name, value, size);
1118 	}
1119 
1120 	if (size > 0) {
1121 		acl = zpl_acl_from_xattr(value, size);
1122 	} else if (size == -ENODATA || size == -ENOSYS) {
1123 		acl = NULL;
1124 	} else {
1125 		acl = ERR_PTR(-EIO);
1126 	}
1127 
1128 	if (size > 0)
1129 		kmem_free(value, size);
1130 
1131 	/* As of Linux 4.7, the kernel get_acl will set this for us */
1132 #ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
1133 	if (!IS_ERR(acl))
1134 		zpl_set_cached_acl(ip, type, acl);
1135 #endif
1136 
1137 	return (acl);
1138 }
1139 
1140 #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
1141 struct posix_acl *
1142 zpl_get_acl(struct inode *ip, int type, bool rcu)
1143 {
1144 	if (rcu)
1145 		return (ERR_PTR(-ECHILD));
1146 
1147 	return (zpl_get_acl_impl(ip, type));
1148 }
1149 #elif defined(HAVE_GET_ACL)
1150 struct posix_acl *
1151 zpl_get_acl(struct inode *ip, int type)
1152 {
1153 	return (zpl_get_acl_impl(ip, type));
1154 }
1155 #else
1156 #error "Unsupported iops->get_acl() implementation"
1157 #endif /* HAVE_GET_ACL_RCU */
1158 
1159 int
1160 zpl_init_acl(struct inode *ip, struct inode *dir)
1161 {
1162 	struct posix_acl *acl = NULL;
1163 	int error = 0;
1164 
1165 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1166 		return (0);
1167 
1168 	if (!S_ISLNK(ip->i_mode)) {
1169 		acl = zpl_get_acl_impl(dir, ACL_TYPE_DEFAULT);
1170 		if (IS_ERR(acl))
1171 			return (PTR_ERR(acl));
1172 		if (!acl) {
1173 			ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
1174 			zpl_inode_set_ctime_to_ts(ip, current_time(ip));
1175 			zfs_mark_inode_dirty(ip);
1176 			return (0);
1177 		}
1178 	}
1179 
1180 	if (acl) {
1181 		umode_t mode;
1182 
1183 		if (S_ISDIR(ip->i_mode)) {
1184 			error = zpl_set_acl_impl(ip, acl, ACL_TYPE_DEFAULT);
1185 			if (error)
1186 				goto out;
1187 		}
1188 
1189 		mode = ip->i_mode;
1190 		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
1191 		if (error >= 0) {
1192 			ip->i_mode = ITOZ(ip)->z_mode = mode;
1193 			zfs_mark_inode_dirty(ip);
1194 			if (error > 0) {
1195 				error = zpl_set_acl_impl(ip, acl,
1196 				    ACL_TYPE_ACCESS);
1197 			}
1198 		}
1199 	}
1200 out:
1201 	zpl_posix_acl_release(acl);
1202 
1203 	return (error);
1204 }
1205 
1206 int
1207 zpl_chmod_acl(struct inode *ip)
1208 {
1209 	struct posix_acl *acl;
1210 	int error;
1211 
1212 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1213 		return (0);
1214 
1215 	if (S_ISLNK(ip->i_mode))
1216 		return (-EOPNOTSUPP);
1217 
1218 	acl = zpl_get_acl_impl(ip, ACL_TYPE_ACCESS);
1219 	if (IS_ERR(acl) || !acl)
1220 		return (PTR_ERR(acl));
1221 
1222 	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
1223 	if (!error)
1224 		error = zpl_set_acl_impl(ip, acl, ACL_TYPE_ACCESS);
1225 
1226 	zpl_posix_acl_release(acl);
1227 
1228 	return (error);
1229 }
1230 
1231 static int
1232 __zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
1233     const char *name, size_t name_len)
1234 {
1235 	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
1236 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
1237 
1238 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1239 		return (0);
1240 
1241 	if (list && xattr_size <= list_size)
1242 		memcpy(list, xattr_name, xattr_size);
1243 
1244 	return (xattr_size);
1245 }
1246 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
1247 
1248 static int
1249 __zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
1250     const char *name, size_t name_len)
1251 {
1252 	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
1253 	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
1254 
1255 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1256 		return (0);
1257 
1258 	if (list && xattr_size <= list_size)
1259 		memcpy(list, xattr_name, xattr_size);
1260 
1261 	return (xattr_size);
1262 }
1263 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
1264 
1265 static int
1266 __zpl_xattr_acl_get_access(struct inode *ip, const char *name,
1267     void *buffer, size_t size)
1268 {
1269 	struct posix_acl *acl;
1270 	int type = ACL_TYPE_ACCESS;
1271 	int error;
1272 	/* xattr_resolve_name will do this for us if this is defined */
1273 #ifndef HAVE_XATTR_HANDLER_NAME
1274 	if (strcmp(name, "") != 0)
1275 		return (-EINVAL);
1276 #endif
1277 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1278 		return (-EOPNOTSUPP);
1279 
1280 	acl = zpl_get_acl_impl(ip, type);
1281 	if (IS_ERR(acl))
1282 		return (PTR_ERR(acl));
1283 	if (acl == NULL)
1284 		return (-ENODATA);
1285 
1286 	error = zpl_acl_to_xattr(acl, buffer, size);
1287 	zpl_posix_acl_release(acl);
1288 
1289 	return (error);
1290 }
1291 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
1292 
1293 static int
1294 __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
1295     void *buffer, size_t size)
1296 {
1297 	struct posix_acl *acl;
1298 	int type = ACL_TYPE_DEFAULT;
1299 	int error;
1300 	/* xattr_resolve_name will do this for us if this is defined */
1301 #ifndef HAVE_XATTR_HANDLER_NAME
1302 	if (strcmp(name, "") != 0)
1303 		return (-EINVAL);
1304 #endif
1305 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1306 		return (-EOPNOTSUPP);
1307 
1308 	acl = zpl_get_acl_impl(ip, type);
1309 	if (IS_ERR(acl))
1310 		return (PTR_ERR(acl));
1311 	if (acl == NULL)
1312 		return (-ENODATA);
1313 
1314 	error = zpl_acl_to_xattr(acl, buffer, size);
1315 	zpl_posix_acl_release(acl);
1316 
1317 	return (error);
1318 }
1319 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
1320 
1321 static int
1322 __zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
1323     struct inode *ip, const char *name,
1324     const void *value, size_t size, int flags)
1325 {
1326 	struct posix_acl *acl;
1327 	int type = ACL_TYPE_ACCESS;
1328 	int error = 0;
1329 	/* xattr_resolve_name will do this for us if this is defined */
1330 #ifndef HAVE_XATTR_HANDLER_NAME
1331 	if (strcmp(name, "") != 0)
1332 		return (-EINVAL);
1333 #endif
1334 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1335 		return (-EOPNOTSUPP);
1336 
1337 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1338 	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
1339 		return (-EPERM);
1340 #else
1341 	(void) mnt_ns;
1342 	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1343 		return (-EPERM);
1344 #endif
1345 
1346 	if (value) {
1347 		acl = zpl_acl_from_xattr(value, size);
1348 		if (IS_ERR(acl))
1349 			return (PTR_ERR(acl));
1350 		else if (acl) {
1351 			error = zpl_posix_acl_valid(ip, acl);
1352 			if (error) {
1353 				zpl_posix_acl_release(acl);
1354 				return (error);
1355 			}
1356 		}
1357 	} else {
1358 		acl = NULL;
1359 	}
1360 	error = zpl_set_acl_impl(ip, acl, type);
1361 	zpl_posix_acl_release(acl);
1362 
1363 	return (error);
1364 }
1365 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
1366 
1367 static int
1368 __zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
1369     struct inode *ip, const char *name,
1370     const void *value, size_t size, int flags)
1371 {
1372 	struct posix_acl *acl;
1373 	int type = ACL_TYPE_DEFAULT;
1374 	int error = 0;
1375 	/* xattr_resolve_name will do this for us if this is defined */
1376 #ifndef HAVE_XATTR_HANDLER_NAME
1377 	if (strcmp(name, "") != 0)
1378 		return (-EINVAL);
1379 #endif
1380 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
1381 		return (-EOPNOTSUPP);
1382 
1383 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1384 	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
1385 		return (-EPERM);
1386 #else
1387 	(void) mnt_ns;
1388 	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
1389 		return (-EPERM);
1390 #endif
1391 
1392 	if (value) {
1393 		acl = zpl_acl_from_xattr(value, size);
1394 		if (IS_ERR(acl))
1395 			return (PTR_ERR(acl));
1396 		else if (acl) {
1397 			error = zpl_posix_acl_valid(ip, acl);
1398 			if (error) {
1399 				zpl_posix_acl_release(acl);
1400 				return (error);
1401 			}
1402 		}
1403 	} else {
1404 		acl = NULL;
1405 	}
1406 
1407 	error = zpl_set_acl_impl(ip, acl, type);
1408 	zpl_posix_acl_release(acl);
1409 
1410 	return (error);
1411 }
1412 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
1413 
1414 /*
1415  * ACL access xattr namespace handlers.
1416  *
1417  * Use .name instead of .prefix when available. xattr_resolve_name will match
1418  * whole name and reject anything that has .name only as prefix.
1419  */
1420 static xattr_handler_t zpl_xattr_acl_access_handler = {
1421 #ifdef HAVE_XATTR_HANDLER_NAME
1422 	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
1423 #else
1424 	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
1425 #endif
1426 	.list	= zpl_xattr_acl_list_access,
1427 	.get	= zpl_xattr_acl_get_access,
1428 	.set	= zpl_xattr_acl_set_access,
1429 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1430     defined(HAVE_XATTR_LIST_DENTRY) || \
1431     defined(HAVE_XATTR_LIST_HANDLER)
1432 	.flags	= ACL_TYPE_ACCESS,
1433 #endif
1434 };
1435 
1436 /*
1437  * ACL default xattr namespace handlers.
1438  *
1439  * Use .name instead of .prefix when available. xattr_resolve_name will match
1440  * whole name and reject anything that has .name only as prefix.
1441  */
1442 static xattr_handler_t zpl_xattr_acl_default_handler = {
1443 #ifdef HAVE_XATTR_HANDLER_NAME
1444 	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
1445 #else
1446 	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
1447 #endif
1448 	.list	= zpl_xattr_acl_list_default,
1449 	.get	= zpl_xattr_acl_get_default,
1450 	.set	= zpl_xattr_acl_set_default,
1451 #if defined(HAVE_XATTR_LIST_SIMPLE) || \
1452     defined(HAVE_XATTR_LIST_DENTRY) || \
1453     defined(HAVE_XATTR_LIST_HANDLER)
1454 	.flags	= ACL_TYPE_DEFAULT,
1455 #endif
1456 };
1457 
1458 #endif /* CONFIG_FS_POSIX_ACL */
1459 
1460 xattr_handler_t *zpl_xattr_handlers[] = {
1461 	&zpl_xattr_security_handler,
1462 	&zpl_xattr_trusted_handler,
1463 	&zpl_xattr_user_handler,
1464 #ifdef CONFIG_FS_POSIX_ACL
1465 	&zpl_xattr_acl_access_handler,
1466 	&zpl_xattr_acl_default_handler,
1467 #endif /* CONFIG_FS_POSIX_ACL */
1468 	NULL
1469 };
1470 
1471 static const struct xattr_handler *
1472 zpl_xattr_handler(const char *name)
1473 {
1474 	if (strncmp(name, XATTR_USER_PREFIX,
1475 	    XATTR_USER_PREFIX_LEN) == 0)
1476 		return (&zpl_xattr_user_handler);
1477 
1478 	if (strncmp(name, XATTR_TRUSTED_PREFIX,
1479 	    XATTR_TRUSTED_PREFIX_LEN) == 0)
1480 		return (&zpl_xattr_trusted_handler);
1481 
1482 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1483 	    XATTR_SECURITY_PREFIX_LEN) == 0)
1484 		return (&zpl_xattr_security_handler);
1485 
1486 #ifdef CONFIG_FS_POSIX_ACL
1487 	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
1488 	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
1489 		return (&zpl_xattr_acl_access_handler);
1490 
1491 	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
1492 	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
1493 		return (&zpl_xattr_acl_default_handler);
1494 #endif /* CONFIG_FS_POSIX_ACL */
1495 
1496 	return (NULL);
1497 }
1498 
1499 static enum xattr_permission
1500 zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
1501 {
1502 	const struct xattr_handler *handler;
1503 	struct dentry *d __maybe_unused = xf->dentry;
1504 	enum xattr_permission perm = XAPERM_ALLOW;
1505 
1506 	handler = zpl_xattr_handler(name);
1507 	if (handler == NULL) {
1508 		/* Do not expose FreeBSD system namespace xattrs. */
1509 		if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD, name))
1510 			return (XAPERM_DENY);
1511 		/*
1512 		 * Anything that doesn't match a known namespace gets put in the
1513 		 * user namespace for compatibility with other platforms.
1514 		 */
1515 		perm = XAPERM_COMPAT;
1516 		handler = &zpl_xattr_user_handler;
1517 	}
1518 
1519 	if (handler->list) {
1520 #if defined(HAVE_XATTR_LIST_SIMPLE)
1521 		if (!handler->list(d))
1522 			return (XAPERM_DENY);
1523 #elif defined(HAVE_XATTR_LIST_DENTRY)
1524 		if (!handler->list(d, NULL, 0, name, name_len, 0))
1525 			return (XAPERM_DENY);
1526 #elif defined(HAVE_XATTR_LIST_HANDLER)
1527 		if (!handler->list(handler, d, NULL, 0, name, name_len))
1528 			return (XAPERM_DENY);
1529 #endif
1530 	}
1531 
1532 	return (perm);
1533 }
1534 
1535 #if defined(CONFIG_FS_POSIX_ACL) && \
1536 	(!defined(HAVE_POSIX_ACL_RELEASE) || \
1537 		defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
1538 struct acl_rel_struct {
1539 	struct acl_rel_struct *next;
1540 	struct posix_acl *acl;
1541 	clock_t time;
1542 };
1543 
1544 #define	ACL_REL_GRACE	(60*HZ)
1545 #define	ACL_REL_WINDOW	(1*HZ)
1546 #define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
1547 
1548 /*
1549  * Lockless multi-producer single-consumer fifo list.
1550  * Nodes are added to tail and removed from head. Tail pointer is our
1551  * synchronization point. It always points to the next pointer of the last
1552  * node, or head if list is empty.
1553  */
1554 static struct acl_rel_struct *acl_rel_head = NULL;
1555 static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
1556 
1557 static void
1558 zpl_posix_acl_free(void *arg)
1559 {
1560 	struct acl_rel_struct *freelist = NULL;
1561 	struct acl_rel_struct *a;
1562 	clock_t new_time;
1563 	boolean_t refire = B_FALSE;
1564 
1565 	ASSERT3P(acl_rel_head, !=, NULL);
1566 	while (acl_rel_head) {
1567 		a = acl_rel_head;
1568 		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
1569 			/*
1570 			 * If a is the last node we need to reset tail, but we
1571 			 * need to use cmpxchg to make sure it is still the
1572 			 * last node.
1573 			 */
1574 			if (acl_rel_tail == &a->next) {
1575 				acl_rel_head = NULL;
1576 				if (cmpxchg(&acl_rel_tail, &a->next,
1577 				    &acl_rel_head) == &a->next) {
1578 					ASSERT3P(a->next, ==, NULL);
1579 					a->next = freelist;
1580 					freelist = a;
1581 					break;
1582 				}
1583 			}
1584 			/*
1585 			 * a is not last node, make sure next pointer is set
1586 			 * by the adder and advance the head.
1587 			 */
1588 			while (READ_ONCE(a->next) == NULL)
1589 				cpu_relax();
1590 			acl_rel_head = a->next;
1591 			a->next = freelist;
1592 			freelist = a;
1593 		} else {
1594 			/*
1595 			 * a is still in grace period. We are responsible to
1596 			 * reschedule the free task, since adder will only do
1597 			 * so if list is empty.
1598 			 */
1599 			new_time = a->time + ACL_REL_SCHED;
1600 			refire = B_TRUE;
1601 			break;
1602 		}
1603 	}
1604 
1605 	if (refire)
1606 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1607 		    NULL, TQ_SLEEP, new_time);
1608 
1609 	while (freelist) {
1610 		a = freelist;
1611 		freelist = a->next;
1612 		kfree(a->acl);
1613 		kmem_free(a, sizeof (struct acl_rel_struct));
1614 	}
1615 }
1616 
1617 void
1618 zpl_posix_acl_release_impl(struct posix_acl *acl)
1619 {
1620 	struct acl_rel_struct *a, **prev;
1621 
1622 	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
1623 	a->next = NULL;
1624 	a->acl = acl;
1625 	a->time = ddi_get_lbolt();
1626 	/* atomically points tail to us and get the previous tail */
1627 	prev = xchg(&acl_rel_tail, &a->next);
1628 	ASSERT3P(*prev, ==, NULL);
1629 	*prev = a;
1630 	/* if it was empty before, schedule the free task */
1631 	if (prev == &acl_rel_head)
1632 		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
1633 		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
1634 }
1635 #endif
1636 
1637 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
1638 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
1639