xref: /linux/fs/xattr.c (revision 9cc8d0ecdd2aad42e377e971e3bb114339df609e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3   File: fs/xattr.c
4 
5   Extended attribute handling.
6 
7   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8   Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/xattr.h>
16 #include <linux/mount.h>
17 #include <linux/namei.h>
18 #include <linux/security.h>
19 #include <linux/syscalls.h>
20 #include <linux/export.h>
21 #include <linux/fsnotify.h>
22 #include <linux/audit.h>
23 #include <linux/vmalloc.h>
24 #include <linux/posix_acl_xattr.h>
25 
26 #include <linux/uaccess.h>
27 
28 #include "internal.h"
29 
30 static const char *
31 strcmp_prefix(const char *a, const char *a_prefix)
32 {
33 	while (*a_prefix && *a == *a_prefix) {
34 		a++;
35 		a_prefix++;
36 	}
37 	return *a_prefix ? NULL : a;
38 }
39 
40 /*
41  * In order to implement different sets of xattr operations for each xattr
42  * prefix, a filesystem should create a null-terminated array of struct
43  * xattr_handler (one for each prefix) and hang a pointer to it off of the
44  * s_xattr field of the superblock.
45  */
46 #define for_each_xattr_handler(handlers, handler)		\
47 	if (handlers)						\
48 		for ((handler) = *(handlers)++;			\
49 			(handler) != NULL;			\
50 			(handler) = *(handlers)++)
51 
52 /*
53  * Find the xattr_handler with the matching prefix.
54  */
55 static const struct xattr_handler *
56 xattr_resolve_name(struct inode *inode, const char **name)
57 {
58 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
59 	const struct xattr_handler *handler;
60 
61 	if (!(inode->i_opflags & IOP_XATTR)) {
62 		if (unlikely(is_bad_inode(inode)))
63 			return ERR_PTR(-EIO);
64 		return ERR_PTR(-EOPNOTSUPP);
65 	}
66 	for_each_xattr_handler(handlers, handler) {
67 		const char *n;
68 
69 		n = strcmp_prefix(*name, xattr_prefix(handler));
70 		if (n) {
71 			if (!handler->prefix ^ !*n) {
72 				if (*n)
73 					continue;
74 				return ERR_PTR(-EINVAL);
75 			}
76 			*name = n;
77 			return handler;
78 		}
79 	}
80 	return ERR_PTR(-EOPNOTSUPP);
81 }
82 
83 /**
84  * may_write_xattr - check whether inode allows writing xattr
85  * @idmap: idmap of the mount the inode was found from
86  * @inode: the inode on which to set an xattr
87  *
88  * Check whether the inode allows writing xattrs. Specifically, we can never
89  * set or remove an extended attribute on a read-only filesystem  or on an
90  * immutable / append-only inode.
91  *
92  * We also need to ensure that the inode has a mapping in the mount to
93  * not risk writing back invalid i_{g,u}id values.
94  *
95  * Return: On success zero is returned. On error a negative errno is returned.
96  */
97 int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
98 {
99 	if (IS_IMMUTABLE(inode))
100 		return -EPERM;
101 	if (IS_APPEND(inode))
102 		return -EPERM;
103 	if (HAS_UNMAPPED_ID(idmap, inode))
104 		return -EPERM;
105 	return 0;
106 }
107 
108 /*
109  * Check permissions for extended attribute access.  This is a bit complicated
110  * because different namespaces have very different rules.
111  */
112 static int
113 xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
114 		 const char *name, int mask)
115 {
116 	if (mask & MAY_WRITE) {
117 		int ret;
118 
119 		ret = may_write_xattr(idmap, inode);
120 		if (ret)
121 			return ret;
122 	}
123 
124 	/*
125 	 * No restriction for security.* and system.* from the VFS.  Decision
126 	 * on these is left to the underlying filesystem / security module.
127 	 */
128 	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
129 	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
130 		return 0;
131 
132 	/*
133 	 * The trusted.* namespace can only be accessed by privileged users.
134 	 */
135 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
136 		if (!capable(CAP_SYS_ADMIN))
137 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
138 		return 0;
139 	}
140 
141 	/*
142 	 * In the user.* namespace, only regular files and directories can have
143 	 * extended attributes. For sticky directories, only the owner and
144 	 * privileged users can write attributes.
145 	 */
146 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
147 		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
148 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
149 		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
150 		    (mask & MAY_WRITE) &&
151 		    !inode_owner_or_capable(idmap, inode))
152 			return -EPERM;
153 	}
154 
155 	return inode_permission(idmap, inode, mask);
156 }
157 
158 /*
159  * Look for any handler that deals with the specified namespace.
160  */
161 int
162 xattr_supports_user_prefix(struct inode *inode)
163 {
164 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
165 	const struct xattr_handler *handler;
166 
167 	if (!(inode->i_opflags & IOP_XATTR)) {
168 		if (unlikely(is_bad_inode(inode)))
169 			return -EIO;
170 		return -EOPNOTSUPP;
171 	}
172 
173 	for_each_xattr_handler(handlers, handler) {
174 		if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
175 			     XATTR_USER_PREFIX_LEN))
176 			return 0;
177 	}
178 
179 	return -EOPNOTSUPP;
180 }
181 EXPORT_SYMBOL(xattr_supports_user_prefix);
182 
183 int
184 __vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
185 	       struct inode *inode, const char *name, const void *value,
186 	       size_t size, int flags)
187 {
188 	const struct xattr_handler *handler;
189 
190 	if (is_posix_acl_xattr(name))
191 		return -EOPNOTSUPP;
192 
193 	handler = xattr_resolve_name(inode, &name);
194 	if (IS_ERR(handler))
195 		return PTR_ERR(handler);
196 	if (!handler->set)
197 		return -EOPNOTSUPP;
198 	if (size == 0)
199 		value = "";  /* empty EA, do not remove */
200 	return handler->set(handler, idmap, dentry, inode, name, value,
201 			    size, flags);
202 }
203 EXPORT_SYMBOL(__vfs_setxattr);
204 
205 /**
206  *  __vfs_setxattr_noperm - perform setxattr operation without performing
207  *  permission checks.
208  *
209  *  @idmap: idmap of the mount the inode was found from
210  *  @dentry: object to perform setxattr on
211  *  @name: xattr name to set
212  *  @value: value to set @name to
213  *  @size: size of @value
214  *  @flags: flags to pass into filesystem operations
215  *
216  *  returns the result of the internal setxattr or setsecurity operations.
217  *
218  *  This function requires the caller to lock the inode's i_mutex before it
219  *  is executed. It also assumes that the caller will make the appropriate
220  *  permission checks.
221  */
222 int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
223 			  struct dentry *dentry, const char *name,
224 			  const void *value, size_t size, int flags)
225 {
226 	struct inode *inode = dentry->d_inode;
227 	int error = -EAGAIN;
228 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
229 				   XATTR_SECURITY_PREFIX_LEN);
230 
231 	if (issec)
232 		inode->i_flags &= ~S_NOSEC;
233 	if (inode->i_opflags & IOP_XATTR) {
234 		error = __vfs_setxattr(idmap, dentry, inode, name, value,
235 				       size, flags);
236 		if (!error) {
237 			fsnotify_xattr(dentry);
238 			security_inode_post_setxattr(dentry, name, value,
239 						     size, flags);
240 		}
241 	} else {
242 		if (unlikely(is_bad_inode(inode)))
243 			return -EIO;
244 	}
245 	if (error == -EAGAIN) {
246 		error = -EOPNOTSUPP;
247 
248 		if (issec) {
249 			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
250 
251 			error = security_inode_setsecurity(inode, suffix, value,
252 							   size, flags);
253 			if (!error)
254 				fsnotify_xattr(dentry);
255 		}
256 	}
257 
258 	return error;
259 }
260 
261 /**
262  * __vfs_setxattr_locked - set an extended attribute while holding the inode
263  * lock
264  *
265  *  @idmap: idmap of the mount of the target inode
266  *  @dentry: object to perform setxattr on
267  *  @name: xattr name to set
268  *  @value: value to set @name to
269  *  @size: size of @value
270  *  @flags: flags to pass into filesystem operations
271  *  @delegated_inode: on return, will contain an inode pointer that
272  *  a delegation was broken on, NULL if none.
273  */
274 int
275 __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
276 		      const char *name, const void *value, size_t size,
277 		      int flags, struct inode **delegated_inode)
278 {
279 	struct inode *inode = dentry->d_inode;
280 	int error;
281 
282 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
283 	if (error)
284 		return error;
285 
286 	error = security_inode_setxattr(idmap, dentry, name, value, size,
287 					flags);
288 	if (error)
289 		goto out;
290 
291 	error = try_break_deleg(inode, delegated_inode);
292 	if (error)
293 		goto out;
294 
295 	error = __vfs_setxattr_noperm(idmap, dentry, name, value,
296 				      size, flags);
297 
298 out:
299 	return error;
300 }
301 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
302 
303 int
304 vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
305 	     const char *name, const void *value, size_t size, int flags)
306 {
307 	struct inode *inode = dentry->d_inode;
308 	struct inode *delegated_inode = NULL;
309 	const void  *orig_value = value;
310 	int error;
311 
312 	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
313 		error = cap_convert_nscap(idmap, dentry, &value, size);
314 		if (error < 0)
315 			return error;
316 		size = error;
317 	}
318 
319 retry_deleg:
320 	inode_lock(inode);
321 	error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
322 				      flags, &delegated_inode);
323 	inode_unlock(inode);
324 
325 	if (delegated_inode) {
326 		error = break_deleg_wait(&delegated_inode);
327 		if (!error)
328 			goto retry_deleg;
329 	}
330 	if (value != orig_value)
331 		kfree(value);
332 
333 	return error;
334 }
335 EXPORT_SYMBOL_GPL(vfs_setxattr);
336 
337 static ssize_t
338 xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
339 		  const char *name, void *value, size_t size)
340 {
341 	void *buffer = NULL;
342 	ssize_t len;
343 
344 	if (!value || !size) {
345 		len = security_inode_getsecurity(idmap, inode, name,
346 						 &buffer, false);
347 		goto out_noalloc;
348 	}
349 
350 	len = security_inode_getsecurity(idmap, inode, name, &buffer,
351 					 true);
352 	if (len < 0)
353 		return len;
354 	if (size < len) {
355 		len = -ERANGE;
356 		goto out;
357 	}
358 	memcpy(value, buffer, len);
359 out:
360 	kfree(buffer);
361 out_noalloc:
362 	return len;
363 }
364 
365 /*
366  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
367  *
368  * Allocate memory, if not already allocated, or re-allocate correct size,
369  * before retrieving the extended attribute.  The xattr value buffer should
370  * always be freed by the caller, even on error.
371  *
372  * Returns the result of alloc, if failed, or the getxattr operation.
373  */
374 int
375 vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
376 		   const char *name, char **xattr_value, size_t xattr_size,
377 		   gfp_t flags)
378 {
379 	const struct xattr_handler *handler;
380 	struct inode *inode = dentry->d_inode;
381 	char *value = *xattr_value;
382 	int error;
383 
384 	error = xattr_permission(idmap, inode, name, MAY_READ);
385 	if (error)
386 		return error;
387 
388 	handler = xattr_resolve_name(inode, &name);
389 	if (IS_ERR(handler))
390 		return PTR_ERR(handler);
391 	if (!handler->get)
392 		return -EOPNOTSUPP;
393 	error = handler->get(handler, dentry, inode, name, NULL, 0);
394 	if (error < 0)
395 		return error;
396 
397 	if (!value || (error > xattr_size)) {
398 		value = krealloc(*xattr_value, error + 1, flags);
399 		if (!value)
400 			return -ENOMEM;
401 		memset(value, 0, error + 1);
402 	}
403 
404 	error = handler->get(handler, dentry, inode, name, value, error);
405 	*xattr_value = value;
406 	return error;
407 }
408 
409 ssize_t
410 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
411 	       void *value, size_t size)
412 {
413 	const struct xattr_handler *handler;
414 
415 	if (is_posix_acl_xattr(name))
416 		return -EOPNOTSUPP;
417 
418 	handler = xattr_resolve_name(inode, &name);
419 	if (IS_ERR(handler))
420 		return PTR_ERR(handler);
421 	if (!handler->get)
422 		return -EOPNOTSUPP;
423 	return handler->get(handler, dentry, inode, name, value, size);
424 }
425 EXPORT_SYMBOL(__vfs_getxattr);
426 
427 ssize_t
428 vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
429 	     const char *name, void *value, size_t size)
430 {
431 	struct inode *inode = dentry->d_inode;
432 	int error;
433 
434 	error = xattr_permission(idmap, inode, name, MAY_READ);
435 	if (error)
436 		return error;
437 
438 	error = security_inode_getxattr(dentry, name);
439 	if (error)
440 		return error;
441 
442 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
443 				XATTR_SECURITY_PREFIX_LEN)) {
444 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
445 		int ret = xattr_getsecurity(idmap, inode, suffix, value,
446 					    size);
447 		/*
448 		 * Only overwrite the return value if a security module
449 		 * is actually active.
450 		 */
451 		if (ret == -EOPNOTSUPP)
452 			goto nolsm;
453 		return ret;
454 	}
455 nolsm:
456 	return __vfs_getxattr(dentry, inode, name, value, size);
457 }
458 EXPORT_SYMBOL_GPL(vfs_getxattr);
459 
460 /**
461  * vfs_listxattr - retrieve \0 separated list of xattr names
462  * @dentry: the dentry from whose inode the xattr names are retrieved
463  * @list: buffer to store xattr names into
464  * @size: size of the buffer
465  *
466  * This function returns the names of all xattrs associated with the
467  * inode of @dentry.
468  *
469  * Note, for legacy reasons the vfs_listxattr() function lists POSIX
470  * ACLs as well. Since POSIX ACLs are decoupled from IOP_XATTR the
471  * vfs_listxattr() function doesn't check for this flag since a
472  * filesystem could implement POSIX ACLs without implementing any other
473  * xattrs.
474  *
475  * However, since all codepaths that remove IOP_XATTR also assign of
476  * inode operations that either don't implement or implement a stub
477  * ->listxattr() operation.
478  *
479  * Return: On success, the size of the buffer that was used. On error a
480  *         negative error code.
481  */
482 ssize_t
483 vfs_listxattr(struct dentry *dentry, char *list, size_t size)
484 {
485 	struct inode *inode = d_inode(dentry);
486 	ssize_t error;
487 
488 	error = security_inode_listxattr(dentry);
489 	if (error)
490 		return error;
491 
492 	if (inode->i_op->listxattr) {
493 		error = inode->i_op->listxattr(dentry, list, size);
494 	} else {
495 		error = security_inode_listsecurity(inode, list, size);
496 		if (size && error > size)
497 			error = -ERANGE;
498 	}
499 	return error;
500 }
501 EXPORT_SYMBOL_GPL(vfs_listxattr);
502 
503 int
504 __vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
505 		  const char *name)
506 {
507 	struct inode *inode = d_inode(dentry);
508 	const struct xattr_handler *handler;
509 
510 	if (is_posix_acl_xattr(name))
511 		return -EOPNOTSUPP;
512 
513 	handler = xattr_resolve_name(inode, &name);
514 	if (IS_ERR(handler))
515 		return PTR_ERR(handler);
516 	if (!handler->set)
517 		return -EOPNOTSUPP;
518 	return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
519 			    XATTR_REPLACE);
520 }
521 EXPORT_SYMBOL(__vfs_removexattr);
522 
523 /**
524  * __vfs_removexattr_locked - set an extended attribute while holding the inode
525  * lock
526  *
527  *  @idmap: idmap of the mount of the target inode
528  *  @dentry: object to perform setxattr on
529  *  @name: name of xattr to remove
530  *  @delegated_inode: on return, will contain an inode pointer that
531  *  a delegation was broken on, NULL if none.
532  */
533 int
534 __vfs_removexattr_locked(struct mnt_idmap *idmap,
535 			 struct dentry *dentry, const char *name,
536 			 struct inode **delegated_inode)
537 {
538 	struct inode *inode = dentry->d_inode;
539 	int error;
540 
541 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
542 	if (error)
543 		return error;
544 
545 	error = security_inode_removexattr(idmap, dentry, name);
546 	if (error)
547 		goto out;
548 
549 	error = try_break_deleg(inode, delegated_inode);
550 	if (error)
551 		goto out;
552 
553 	error = __vfs_removexattr(idmap, dentry, name);
554 	if (error)
555 		return error;
556 
557 	fsnotify_xattr(dentry);
558 	security_inode_post_removexattr(dentry, name);
559 
560 out:
561 	return error;
562 }
563 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
564 
565 int
566 vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
567 		const char *name)
568 {
569 	struct inode *inode = dentry->d_inode;
570 	struct inode *delegated_inode = NULL;
571 	int error;
572 
573 retry_deleg:
574 	inode_lock(inode);
575 	error = __vfs_removexattr_locked(idmap, dentry,
576 					 name, &delegated_inode);
577 	inode_unlock(inode);
578 
579 	if (delegated_inode) {
580 		error = break_deleg_wait(&delegated_inode);
581 		if (!error)
582 			goto retry_deleg;
583 	}
584 
585 	return error;
586 }
587 EXPORT_SYMBOL_GPL(vfs_removexattr);
588 
589 int import_xattr_name(struct xattr_name *kname, const char __user *name)
590 {
591 	int error = strncpy_from_user(kname->name, name,
592 					sizeof(kname->name));
593 	if (error == 0 || error == sizeof(kname->name))
594 		return -ERANGE;
595 	if (error < 0)
596 		return error;
597 	return 0;
598 }
599 
600 /*
601  * Extended attribute SET operations
602  */
603 
604 int setxattr_copy(const char __user *name, struct kernel_xattr_ctx *ctx)
605 {
606 	int error;
607 
608 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
609 		return -EINVAL;
610 
611 	error = import_xattr_name(ctx->kname, name);
612 	if (error)
613 		return error;
614 
615 	if (ctx->size) {
616 		if (ctx->size > XATTR_SIZE_MAX)
617 			return -E2BIG;
618 
619 		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
620 		if (IS_ERR(ctx->kvalue)) {
621 			error = PTR_ERR(ctx->kvalue);
622 			ctx->kvalue = NULL;
623 		}
624 	}
625 
626 	return error;
627 }
628 
629 static int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
630 		struct kernel_xattr_ctx *ctx)
631 {
632 	if (is_posix_acl_xattr(ctx->kname->name))
633 		return do_set_acl(idmap, dentry, ctx->kname->name,
634 				  ctx->kvalue, ctx->size);
635 
636 	return vfs_setxattr(idmap, dentry, ctx->kname->name,
637 			ctx->kvalue, ctx->size, ctx->flags);
638 }
639 
640 int file_setxattr(struct file *f, struct kernel_xattr_ctx *ctx)
641 {
642 	int error = mnt_want_write_file(f);
643 
644 	if (!error) {
645 		audit_file(f);
646 		error = do_setxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
647 		mnt_drop_write_file(f);
648 	}
649 	return error;
650 }
651 
652 /* unconditionally consumes filename */
653 int filename_setxattr(int dfd, struct filename *filename,
654 		      unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
655 {
656 	struct path path;
657 	int error;
658 
659 retry:
660 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
661 	if (error)
662 		goto out;
663 	error = mnt_want_write(path.mnt);
664 	if (!error) {
665 		error = do_setxattr(mnt_idmap(path.mnt), path.dentry, ctx);
666 		mnt_drop_write(path.mnt);
667 	}
668 	path_put(&path);
669 	if (retry_estale(error, lookup_flags)) {
670 		lookup_flags |= LOOKUP_REVAL;
671 		goto retry;
672 	}
673 
674 out:
675 	putname(filename);
676 	return error;
677 }
678 
679 static int path_setxattrat(int dfd, const char __user *pathname,
680 			   unsigned int at_flags, const char __user *name,
681 			   const void __user *value, size_t size, int flags)
682 {
683 	struct xattr_name kname;
684 	struct kernel_xattr_ctx ctx = {
685 		.cvalue	= value,
686 		.kvalue	= NULL,
687 		.size	= size,
688 		.kname	= &kname,
689 		.flags	= flags,
690 	};
691 	struct filename *filename;
692 	unsigned int lookup_flags = 0;
693 	int error;
694 
695 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
696 		return -EINVAL;
697 
698 	if (!(at_flags & AT_SYMLINK_NOFOLLOW))
699 		lookup_flags = LOOKUP_FOLLOW;
700 
701 	error = setxattr_copy(name, &ctx);
702 	if (error)
703 		return error;
704 
705 	filename = getname_maybe_null(pathname, at_flags);
706 	if (!filename) {
707 		CLASS(fd, f)(dfd);
708 		if (fd_empty(f))
709 			error = -EBADF;
710 		else
711 			error = file_setxattr(fd_file(f), &ctx);
712 	} else {
713 		error = filename_setxattr(dfd, filename, lookup_flags, &ctx);
714 	}
715 	kvfree(ctx.kvalue);
716 	return error;
717 }
718 
719 SYSCALL_DEFINE6(setxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
720 		const char __user *, name, const struct xattr_args __user *, uargs,
721 		size_t, usize)
722 {
723 	struct xattr_args args = {};
724 	int error;
725 
726 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
727 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
728 
729 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
730 		return -EINVAL;
731 	if (usize > PAGE_SIZE)
732 		return -E2BIG;
733 
734 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
735 	if (error)
736 		return error;
737 
738 	return path_setxattrat(dfd, pathname, at_flags, name,
739 			       u64_to_user_ptr(args.value), args.size,
740 			       args.flags);
741 }
742 
743 SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
744 		const char __user *, name, const void __user *, value,
745 		size_t, size, int, flags)
746 {
747 	return path_setxattrat(AT_FDCWD, pathname, 0, name, value, size, flags);
748 }
749 
750 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
751 		const char __user *, name, const void __user *, value,
752 		size_t, size, int, flags)
753 {
754 	return path_setxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
755 			       value, size, flags);
756 }
757 
758 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
759 		const void __user *,value, size_t, size, int, flags)
760 {
761 	return path_setxattrat(fd, NULL, AT_EMPTY_PATH, name,
762 			       value, size, flags);
763 }
764 
765 /*
766  * Extended attribute GET operations
767  */
768 static ssize_t
769 do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
770 	struct kernel_xattr_ctx *ctx)
771 {
772 	ssize_t error;
773 	char *kname = ctx->kname->name;
774 	void *kvalue = NULL;
775 
776 	if (ctx->size) {
777 		if (ctx->size > XATTR_SIZE_MAX)
778 			ctx->size = XATTR_SIZE_MAX;
779 		kvalue = kvzalloc(ctx->size, GFP_KERNEL);
780 		if (!kvalue)
781 			return -ENOMEM;
782 	}
783 
784 	if (is_posix_acl_xattr(kname))
785 		error = do_get_acl(idmap, d, kname, kvalue, ctx->size);
786 	else
787 		error = vfs_getxattr(idmap, d, kname, kvalue, ctx->size);
788 	if (error > 0) {
789 		if (ctx->size && copy_to_user(ctx->value, kvalue, error))
790 			error = -EFAULT;
791 	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
792 		/* The file system tried to returned a value bigger
793 		   than XATTR_SIZE_MAX bytes. Not possible. */
794 		error = -E2BIG;
795 	}
796 
797 	kvfree(kvalue);
798 	return error;
799 }
800 
801 ssize_t file_getxattr(struct file *f, struct kernel_xattr_ctx *ctx)
802 {
803 	audit_file(f);
804 	return do_getxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
805 }
806 
807 /* unconditionally consumes filename */
808 ssize_t filename_getxattr(int dfd, struct filename *filename,
809 			  unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
810 {
811 	struct path path;
812 	ssize_t error;
813 retry:
814 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
815 	if (error)
816 		goto out;
817 	error = do_getxattr(mnt_idmap(path.mnt), path.dentry, ctx);
818 	path_put(&path);
819 	if (retry_estale(error, lookup_flags)) {
820 		lookup_flags |= LOOKUP_REVAL;
821 		goto retry;
822 	}
823 out:
824 	putname(filename);
825 	return error;
826 }
827 
828 static ssize_t path_getxattrat(int dfd, const char __user *pathname,
829 			       unsigned int at_flags, const char __user *name,
830 			       void __user *value, size_t size)
831 {
832 	struct xattr_name kname;
833 	struct kernel_xattr_ctx ctx = {
834 		.value    = value,
835 		.size     = size,
836 		.kname    = &kname,
837 		.flags    = 0,
838 	};
839 	struct filename *filename;
840 	ssize_t error;
841 
842 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
843 		return -EINVAL;
844 
845 	error = import_xattr_name(&kname, name);
846 	if (error)
847 		return error;
848 
849 	filename = getname_maybe_null(pathname, at_flags);
850 	if (!filename) {
851 		CLASS(fd, f)(dfd);
852 		if (fd_empty(f))
853 			return -EBADF;
854 		return file_getxattr(fd_file(f), &ctx);
855 	} else {
856 		int lookup_flags = 0;
857 		if (!(at_flags & AT_SYMLINK_NOFOLLOW))
858 			lookup_flags = LOOKUP_FOLLOW;
859 		return filename_getxattr(dfd, filename, lookup_flags, &ctx);
860 	}
861 }
862 
863 SYSCALL_DEFINE6(getxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
864 		const char __user *, name, struct xattr_args __user *, uargs, size_t, usize)
865 {
866 	struct xattr_args args = {};
867 	int error;
868 
869 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
870 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
871 
872 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
873 		return -EINVAL;
874 	if (usize > PAGE_SIZE)
875 		return -E2BIG;
876 
877 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
878 	if (error)
879 		return error;
880 
881 	if (args.flags != 0)
882 		return -EINVAL;
883 
884 	return path_getxattrat(dfd, pathname, at_flags, name,
885 			       u64_to_user_ptr(args.value), args.size);
886 }
887 
888 SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
889 		const char __user *, name, void __user *, value, size_t, size)
890 {
891 	return path_getxattrat(AT_FDCWD, pathname, 0, name, value, size);
892 }
893 
894 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
895 		const char __user *, name, void __user *, value, size_t, size)
896 {
897 	return path_getxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
898 			       value, size);
899 }
900 
901 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
902 		void __user *, value, size_t, size)
903 {
904 	return path_getxattrat(fd, NULL, AT_EMPTY_PATH, name, value, size);
905 }
906 
907 /*
908  * Extended attribute LIST operations
909  */
910 static ssize_t
911 listxattr(struct dentry *d, char __user *list, size_t size)
912 {
913 	ssize_t error;
914 	char *klist = NULL;
915 
916 	if (size) {
917 		if (size > XATTR_LIST_MAX)
918 			size = XATTR_LIST_MAX;
919 		klist = kvmalloc(size, GFP_KERNEL);
920 		if (!klist)
921 			return -ENOMEM;
922 	}
923 
924 	error = vfs_listxattr(d, klist, size);
925 	if (error > 0) {
926 		if (size && copy_to_user(list, klist, error))
927 			error = -EFAULT;
928 	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
929 		/* The file system tried to returned a list bigger
930 		   than XATTR_LIST_MAX bytes. Not possible. */
931 		error = -E2BIG;
932 	}
933 
934 	kvfree(klist);
935 
936 	return error;
937 }
938 
939 static
940 ssize_t file_listxattr(struct file *f, char __user *list, size_t size)
941 {
942 	audit_file(f);
943 	return listxattr(f->f_path.dentry, list, size);
944 }
945 
946 /* unconditionally consumes filename */
947 static
948 ssize_t filename_listxattr(int dfd, struct filename *filename,
949 			   unsigned int lookup_flags,
950 			   char __user *list, size_t size)
951 {
952 	struct path path;
953 	ssize_t error;
954 retry:
955 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
956 	if (error)
957 		goto out;
958 	error = listxattr(path.dentry, list, size);
959 	path_put(&path);
960 	if (retry_estale(error, lookup_flags)) {
961 		lookup_flags |= LOOKUP_REVAL;
962 		goto retry;
963 	}
964 out:
965 	putname(filename);
966 	return error;
967 }
968 
969 static ssize_t path_listxattrat(int dfd, const char __user *pathname,
970 				unsigned int at_flags, char __user *list,
971 				size_t size)
972 {
973 	struct filename *filename;
974 	int lookup_flags;
975 
976 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
977 		return -EINVAL;
978 
979 	filename = getname_maybe_null(pathname, at_flags);
980 	if (!filename) {
981 		CLASS(fd, f)(dfd);
982 		if (fd_empty(f))
983 			return -EBADF;
984 		return file_listxattr(fd_file(f), list, size);
985 	}
986 
987 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
988 	return filename_listxattr(dfd, filename, lookup_flags, list, size);
989 }
990 
991 SYSCALL_DEFINE5(listxattrat, int, dfd, const char __user *, pathname,
992 		unsigned int, at_flags,
993 		char __user *, list, size_t, size)
994 {
995 	return path_listxattrat(dfd, pathname, at_flags, list, size);
996 }
997 
998 SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
999 		size_t, size)
1000 {
1001 	return path_listxattrat(AT_FDCWD, pathname, 0, list, size);
1002 }
1003 
1004 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
1005 		size_t, size)
1006 {
1007 	return path_listxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, list, size);
1008 }
1009 
1010 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
1011 {
1012 	return path_listxattrat(fd, NULL, AT_EMPTY_PATH, list, size);
1013 }
1014 
1015 /*
1016  * Extended attribute REMOVE operations
1017  */
1018 static long
1019 removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
1020 {
1021 	if (is_posix_acl_xattr(name))
1022 		return vfs_remove_acl(idmap, d, name);
1023 	return vfs_removexattr(idmap, d, name);
1024 }
1025 
1026 static int file_removexattr(struct file *f, struct xattr_name *kname)
1027 {
1028 	int error = mnt_want_write_file(f);
1029 
1030 	if (!error) {
1031 		audit_file(f);
1032 		error = removexattr(file_mnt_idmap(f),
1033 				    f->f_path.dentry, kname->name);
1034 		mnt_drop_write_file(f);
1035 	}
1036 	return error;
1037 }
1038 
1039 /* unconditionally consumes filename */
1040 static int filename_removexattr(int dfd, struct filename *filename,
1041 				unsigned int lookup_flags, struct xattr_name *kname)
1042 {
1043 	struct path path;
1044 	int error;
1045 
1046 retry:
1047 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
1048 	if (error)
1049 		goto out;
1050 	error = mnt_want_write(path.mnt);
1051 	if (!error) {
1052 		error = removexattr(mnt_idmap(path.mnt), path.dentry, kname->name);
1053 		mnt_drop_write(path.mnt);
1054 	}
1055 	path_put(&path);
1056 	if (retry_estale(error, lookup_flags)) {
1057 		lookup_flags |= LOOKUP_REVAL;
1058 		goto retry;
1059 	}
1060 out:
1061 	putname(filename);
1062 	return error;
1063 }
1064 
1065 static int path_removexattrat(int dfd, const char __user *pathname,
1066 			      unsigned int at_flags, const char __user *name)
1067 {
1068 	struct xattr_name kname;
1069 	struct filename *filename;
1070 	unsigned int lookup_flags;
1071 	int error;
1072 
1073 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
1074 		return -EINVAL;
1075 
1076 	error = import_xattr_name(&kname, name);
1077 	if (error)
1078 		return error;
1079 
1080 	filename = getname_maybe_null(pathname, at_flags);
1081 	if (!filename) {
1082 		CLASS(fd, f)(dfd);
1083 		if (fd_empty(f))
1084 			return -EBADF;
1085 		return file_removexattr(fd_file(f), &kname);
1086 	}
1087 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
1088 	return filename_removexattr(dfd, filename, lookup_flags, &kname);
1089 }
1090 
1091 SYSCALL_DEFINE4(removexattrat, int, dfd, const char __user *, pathname,
1092 		unsigned int, at_flags, const char __user *, name)
1093 {
1094 	return path_removexattrat(dfd, pathname, at_flags, name);
1095 }
1096 
1097 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
1098 		const char __user *, name)
1099 {
1100 	return path_removexattrat(AT_FDCWD, pathname, 0, name);
1101 }
1102 
1103 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
1104 		const char __user *, name)
1105 {
1106 	return path_removexattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name);
1107 }
1108 
1109 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
1110 {
1111 	return path_removexattrat(fd, NULL, AT_EMPTY_PATH, name);
1112 }
1113 
1114 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
1115 {
1116 	size_t len;
1117 
1118 	len = strlen(name) + 1;
1119 	if (*buffer) {
1120 		if (*remaining_size < len)
1121 			return -ERANGE;
1122 		memcpy(*buffer, name, len);
1123 		*buffer += len;
1124 	}
1125 	*remaining_size -= len;
1126 	return 0;
1127 }
1128 
1129 /**
1130  * generic_listxattr - run through a dentry's xattr list() operations
1131  * @dentry: dentry to list the xattrs
1132  * @buffer: result buffer
1133  * @buffer_size: size of @buffer
1134  *
1135  * Combine the results of the list() operation from every xattr_handler in the
1136  * xattr_handler stack.
1137  *
1138  * Note that this will not include the entries for POSIX ACLs.
1139  */
1140 ssize_t
1141 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
1142 {
1143 	const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr;
1144 	ssize_t remaining_size = buffer_size;
1145 
1146 	for_each_xattr_handler(handlers, handler) {
1147 		int err;
1148 
1149 		if (!handler->name || (handler->list && !handler->list(dentry)))
1150 			continue;
1151 		err = xattr_list_one(&buffer, &remaining_size, handler->name);
1152 		if (err)
1153 			return err;
1154 	}
1155 
1156 	return buffer_size - remaining_size;
1157 }
1158 EXPORT_SYMBOL(generic_listxattr);
1159 
1160 /**
1161  * xattr_full_name  -  Compute full attribute name from suffix
1162  *
1163  * @handler:	handler of the xattr_handler operation
1164  * @name:	name passed to the xattr_handler operation
1165  *
1166  * The get and set xattr handler operations are called with the remainder of
1167  * the attribute name after skipping the handler's prefix: for example, "foo"
1168  * is passed to the get operation of a handler with prefix "user." to get
1169  * attribute "user.foo".  The full name is still "there" in the name though.
1170  *
1171  * Note: the list xattr handler operation when called from the vfs is passed a
1172  * NULL name; some file systems use this operation internally, with varying
1173  * semantics.
1174  */
1175 const char *xattr_full_name(const struct xattr_handler *handler,
1176 			    const char *name)
1177 {
1178 	size_t prefix_len = strlen(xattr_prefix(handler));
1179 
1180 	return name - prefix_len;
1181 }
1182 EXPORT_SYMBOL(xattr_full_name);
1183 
1184 /**
1185  * simple_xattr_space - estimate the memory used by a simple xattr
1186  * @name: the full name of the xattr
1187  * @size: the size of its value
1188  *
1189  * This takes no account of how much larger the two slab objects actually are:
1190  * that would depend on the slab implementation, when what is required is a
1191  * deterministic number, which grows with name length and size and quantity.
1192  *
1193  * Return: The approximate number of bytes of memory used by such an xattr.
1194  */
1195 size_t simple_xattr_space(const char *name, size_t size)
1196 {
1197 	/*
1198 	 * Use "40" instead of sizeof(struct simple_xattr), to return the
1199 	 * same result on 32-bit and 64-bit, and even if simple_xattr grows.
1200 	 */
1201 	return 40 + size + strlen(name);
1202 }
1203 
1204 /**
1205  * simple_xattr_free - free an xattr object
1206  * @xattr: the xattr object
1207  *
1208  * Free the xattr object. Can handle @xattr being NULL.
1209  */
1210 void simple_xattr_free(struct simple_xattr *xattr)
1211 {
1212 	if (xattr)
1213 		kfree(xattr->name);
1214 	kvfree(xattr);
1215 }
1216 
1217 /**
1218  * simple_xattr_alloc - allocate new xattr object
1219  * @value: value of the xattr object
1220  * @size: size of @value
1221  *
1222  * Allocate a new xattr object and initialize respective members. The caller is
1223  * responsible for handling the name of the xattr.
1224  *
1225  * Return: On success a new xattr object is returned. On failure NULL is
1226  * returned.
1227  */
1228 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
1229 {
1230 	struct simple_xattr *new_xattr;
1231 	size_t len;
1232 
1233 	/* wrap around? */
1234 	len = sizeof(*new_xattr) + size;
1235 	if (len < sizeof(*new_xattr))
1236 		return NULL;
1237 
1238 	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
1239 	if (!new_xattr)
1240 		return NULL;
1241 
1242 	new_xattr->size = size;
1243 	memcpy(new_xattr->value, value, size);
1244 	return new_xattr;
1245 }
1246 
1247 /**
1248  * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
1249  * @key: xattr name
1250  * @node: current node
1251  *
1252  * Compare the xattr name with the xattr name attached to @node in the rbtree.
1253  *
1254  * Return: Negative value if continuing left, positive if continuing right, 0
1255  * if the xattr attached to @node matches @key.
1256  */
1257 static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
1258 {
1259 	const char *xattr_name = key;
1260 	const struct simple_xattr *xattr;
1261 
1262 	xattr = rb_entry(node, struct simple_xattr, rb_node);
1263 	return strcmp(xattr->name, xattr_name);
1264 }
1265 
1266 /**
1267  * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
1268  * @new_node: new node
1269  * @node: current node
1270  *
1271  * Compare the xattr attached to @new_node with the xattr attached to @node.
1272  *
1273  * Return: Negative value if continuing left, positive if continuing right, 0
1274  * if the xattr attached to @new_node matches the xattr attached to @node.
1275  */
1276 static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
1277 					const struct rb_node *node)
1278 {
1279 	struct simple_xattr *xattr;
1280 	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
1281 	return rbtree_simple_xattr_cmp(xattr->name, node);
1282 }
1283 
1284 /**
1285  * simple_xattr_get - get an xattr object
1286  * @xattrs: the header of the xattr object
1287  * @name: the name of the xattr to retrieve
1288  * @buffer: the buffer to store the value into
1289  * @size: the size of @buffer
1290  *
1291  * Try to find and retrieve the xattr object associated with @name.
1292  * If @buffer is provided store the value of @xattr in @buffer
1293  * otherwise just return the length. The size of @buffer is limited
1294  * to XATTR_SIZE_MAX which currently is 65536.
1295  *
1296  * Return: On success the length of the xattr value is returned. On error a
1297  * negative error code is returned.
1298  */
1299 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
1300 		     void *buffer, size_t size)
1301 {
1302 	struct simple_xattr *xattr = NULL;
1303 	struct rb_node *rbp;
1304 	int ret = -ENODATA;
1305 
1306 	read_lock(&xattrs->lock);
1307 	rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
1308 	if (rbp) {
1309 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1310 		ret = xattr->size;
1311 		if (buffer) {
1312 			if (size < xattr->size)
1313 				ret = -ERANGE;
1314 			else
1315 				memcpy(buffer, xattr->value, xattr->size);
1316 		}
1317 	}
1318 	read_unlock(&xattrs->lock);
1319 	return ret;
1320 }
1321 
1322 /**
1323  * simple_xattr_set - set an xattr object
1324  * @xattrs: the header of the xattr object
1325  * @name: the name of the xattr to retrieve
1326  * @value: the value to store along the xattr
1327  * @size: the size of @value
1328  * @flags: the flags determining how to set the xattr
1329  *
1330  * Set a new xattr object.
1331  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
1332  * is specified in @flags a matching xattr object for @name must already exist.
1333  * If it does it will be replaced with the new xattr object. If it doesn't we
1334  * fail. If XATTR_CREATE is specified and a matching xattr does already exist
1335  * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
1336  * insert the new xattr replacing any existing one.
1337  *
1338  * If @value is empty and a matching xattr object is found we delete it if
1339  * XATTR_REPLACE is specified in @flags or @flags is zero.
1340  *
1341  * If @value is empty and no matching xattr object for @name is found we do
1342  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
1343  * XATTR_REPLACE we fail as mentioned above.
1344  *
1345  * Return: On success, the removed or replaced xattr is returned, to be freed
1346  * by the caller; or NULL if none. On failure a negative error code is returned.
1347  */
1348 struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
1349 				      const char *name, const void *value,
1350 				      size_t size, int flags)
1351 {
1352 	struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
1353 	struct rb_node *parent = NULL, **rbp;
1354 	int err = 0, ret;
1355 
1356 	/* value == NULL means remove */
1357 	if (value) {
1358 		new_xattr = simple_xattr_alloc(value, size);
1359 		if (!new_xattr)
1360 			return ERR_PTR(-ENOMEM);
1361 
1362 		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
1363 		if (!new_xattr->name) {
1364 			simple_xattr_free(new_xattr);
1365 			return ERR_PTR(-ENOMEM);
1366 		}
1367 	}
1368 
1369 	write_lock(&xattrs->lock);
1370 	rbp = &xattrs->rb_root.rb_node;
1371 	while (*rbp) {
1372 		parent = *rbp;
1373 		ret = rbtree_simple_xattr_cmp(name, *rbp);
1374 		if (ret < 0)
1375 			rbp = &(*rbp)->rb_left;
1376 		else if (ret > 0)
1377 			rbp = &(*rbp)->rb_right;
1378 		else
1379 			old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
1380 		if (old_xattr)
1381 			break;
1382 	}
1383 
1384 	if (old_xattr) {
1385 		/* Fail if XATTR_CREATE is requested and the xattr exists. */
1386 		if (flags & XATTR_CREATE) {
1387 			err = -EEXIST;
1388 			goto out_unlock;
1389 		}
1390 
1391 		if (new_xattr)
1392 			rb_replace_node(&old_xattr->rb_node,
1393 					&new_xattr->rb_node, &xattrs->rb_root);
1394 		else
1395 			rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
1396 	} else {
1397 		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
1398 		if (flags & XATTR_REPLACE) {
1399 			err = -ENODATA;
1400 			goto out_unlock;
1401 		}
1402 
1403 		/*
1404 		 * If XATTR_CREATE or no flags are specified together with a
1405 		 * new value simply insert it.
1406 		 */
1407 		if (new_xattr) {
1408 			rb_link_node(&new_xattr->rb_node, parent, rbp);
1409 			rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
1410 		}
1411 
1412 		/*
1413 		 * If XATTR_CREATE or no flags are specified and neither an
1414 		 * old or new xattr exist then we don't need to do anything.
1415 		 */
1416 	}
1417 
1418 out_unlock:
1419 	write_unlock(&xattrs->lock);
1420 	if (!err)
1421 		return old_xattr;
1422 	simple_xattr_free(new_xattr);
1423 	return ERR_PTR(err);
1424 }
1425 
1426 static bool xattr_is_trusted(const char *name)
1427 {
1428 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1429 }
1430 
1431 /**
1432  * simple_xattr_list - list all xattr objects
1433  * @inode: inode from which to get the xattrs
1434  * @xattrs: the header of the xattr object
1435  * @buffer: the buffer to store all xattrs into
1436  * @size: the size of @buffer
1437  *
1438  * List all xattrs associated with @inode. If @buffer is NULL we returned
1439  * the required size of the buffer. If @buffer is provided we store the
1440  * xattrs value into it provided it is big enough.
1441  *
1442  * Note, the number of xattr names that can be listed with listxattr(2) is
1443  * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
1444  * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
1445  * are found it will return -E2BIG.
1446  *
1447  * Return: On success the required size or the size of the copied xattrs is
1448  * returned. On error a negative error code is returned.
1449  */
1450 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
1451 			  char *buffer, size_t size)
1452 {
1453 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
1454 	struct simple_xattr *xattr;
1455 	struct rb_node *rbp;
1456 	ssize_t remaining_size = size;
1457 	int err = 0;
1458 
1459 	err = posix_acl_listxattr(inode, &buffer, &remaining_size);
1460 	if (err)
1461 		return err;
1462 
1463 	read_lock(&xattrs->lock);
1464 	for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
1465 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1466 
1467 		/* skip "trusted." attributes for unprivileged callers */
1468 		if (!trusted && xattr_is_trusted(xattr->name))
1469 			continue;
1470 
1471 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1472 		if (err)
1473 			break;
1474 	}
1475 	read_unlock(&xattrs->lock);
1476 
1477 	return err ? err : size - remaining_size;
1478 }
1479 
1480 /**
1481  * rbtree_simple_xattr_less - compare two xattr rbtree nodes
1482  * @new_node: new node
1483  * @node: current node
1484  *
1485  * Compare the xattr attached to @new_node with the xattr attached to @node.
1486  * Note that this function technically tolerates duplicate entries.
1487  *
1488  * Return: True if insertion point in the rbtree is found.
1489  */
1490 static bool rbtree_simple_xattr_less(struct rb_node *new_node,
1491 				     const struct rb_node *node)
1492 {
1493 	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
1494 }
1495 
1496 /**
1497  * simple_xattr_add - add xattr objects
1498  * @xattrs: the header of the xattr object
1499  * @new_xattr: the xattr object to add
1500  *
1501  * Add an xattr object to @xattrs. This assumes no replacement or removal
1502  * of matching xattrs is wanted. Should only be called during inode
1503  * initialization when a few distinct initial xattrs are supposed to be set.
1504  */
1505 void simple_xattr_add(struct simple_xattrs *xattrs,
1506 		      struct simple_xattr *new_xattr)
1507 {
1508 	write_lock(&xattrs->lock);
1509 	rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
1510 	write_unlock(&xattrs->lock);
1511 }
1512 
1513 /**
1514  * simple_xattrs_init - initialize new xattr header
1515  * @xattrs: header to initialize
1516  *
1517  * Initialize relevant fields of a an xattr header.
1518  */
1519 void simple_xattrs_init(struct simple_xattrs *xattrs)
1520 {
1521 	xattrs->rb_root = RB_ROOT;
1522 	rwlock_init(&xattrs->lock);
1523 }
1524 
1525 /**
1526  * simple_xattrs_free - free xattrs
1527  * @xattrs: xattr header whose xattrs to destroy
1528  * @freed_space: approximate number of bytes of memory freed from @xattrs
1529  *
1530  * Destroy all xattrs in @xattr. When this is called no one can hold a
1531  * reference to any of the xattrs anymore.
1532  */
1533 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
1534 {
1535 	struct rb_node *rbp;
1536 
1537 	if (freed_space)
1538 		*freed_space = 0;
1539 	rbp = rb_first(&xattrs->rb_root);
1540 	while (rbp) {
1541 		struct simple_xattr *xattr;
1542 		struct rb_node *rbp_next;
1543 
1544 		rbp_next = rb_next(rbp);
1545 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1546 		rb_erase(&xattr->rb_node, &xattrs->rb_root);
1547 		if (freed_space)
1548 			*freed_space += simple_xattr_space(xattr->name,
1549 							   xattr->size);
1550 		simple_xattr_free(xattr);
1551 		rbp = rbp_next;
1552 	}
1553 }
1554