xref: /linux/fs/xattr.c (revision 056a5087d87ead77dedbe9cf5bde53b7cd4b4651)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3   File: fs/xattr.c
4 
5   Extended attribute handling.
6 
7   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8   Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/xattr.h>
16 #include <linux/mount.h>
17 #include <linux/namei.h>
18 #include <linux/security.h>
19 #include <linux/syscalls.h>
20 #include <linux/export.h>
21 #include <linux/fsnotify.h>
22 #include <linux/audit.h>
23 #include <linux/vmalloc.h>
24 #include <linux/posix_acl_xattr.h>
25 #include <linux/rhashtable.h>
26 
27 #include <linux/uaccess.h>
28 
29 #include "internal.h"
30 
31 struct sx_key {
32 	const struct list_head *parent;
33 	const char *name;
34 };
35 
36 static const char *
37 strcmp_prefix(const char *a, const char *a_prefix)
38 {
39 	while (*a_prefix && *a == *a_prefix) {
40 		a++;
41 		a_prefix++;
42 	}
43 	return *a_prefix ? NULL : a;
44 }
45 
46 /*
47  * In order to implement different sets of xattr operations for each xattr
48  * prefix, a filesystem should create a null-terminated array of struct
49  * xattr_handler (one for each prefix) and hang a pointer to it off of the
50  * s_xattr field of the superblock.
51  */
52 #define for_each_xattr_handler(handlers, handler)		\
53 	if (handlers)						\
54 		for ((handler) = *(handlers)++;			\
55 			(handler) != NULL;			\
56 			(handler) = *(handlers)++)
57 
58 /*
59  * Find the xattr_handler with the matching prefix.
60  */
61 static const struct xattr_handler *
62 xattr_resolve_name(struct inode *inode, const char **name)
63 {
64 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
65 	const struct xattr_handler *handler;
66 
67 	if (!(inode->i_opflags & IOP_XATTR)) {
68 		if (unlikely(is_bad_inode(inode)))
69 			return ERR_PTR(-EIO);
70 		return ERR_PTR(-EOPNOTSUPP);
71 	}
72 	for_each_xattr_handler(handlers, handler) {
73 		const char *n;
74 
75 		n = strcmp_prefix(*name, xattr_prefix(handler));
76 		if (n) {
77 			if (!handler->prefix ^ !*n) {
78 				if (*n)
79 					continue;
80 				return ERR_PTR(-EINVAL);
81 			}
82 			*name = n;
83 			return handler;
84 		}
85 	}
86 	return ERR_PTR(-EOPNOTSUPP);
87 }
88 
89 /**
90  * may_write_xattr - check whether inode allows writing xattr
91  * @idmap: idmap of the mount the inode was found from
92  * @inode: the inode on which to set an xattr
93  *
94  * Check whether the inode allows writing xattrs. Specifically, we can never
95  * set or remove an extended attribute on a read-only filesystem  or on an
96  * immutable / append-only inode.
97  *
98  * We also need to ensure that the inode has a mapping in the mount to
99  * not risk writing back invalid i_{g,u}id values.
100  *
101  * Return: On success zero is returned. On error a negative errno is returned.
102  */
103 int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
104 {
105 	if (IS_IMMUTABLE(inode))
106 		return -EPERM;
107 	if (IS_APPEND(inode))
108 		return -EPERM;
109 	if (HAS_UNMAPPED_ID(idmap, inode))
110 		return -EPERM;
111 	return 0;
112 }
113 
114 static inline int xattr_permission_error(int mask)
115 {
116 	if (mask & MAY_WRITE)
117 		return -EPERM;
118 	return -ENODATA;
119 }
120 
121 /*
122  * Check permissions for extended attribute access.  This is a bit complicated
123  * because different namespaces have very different rules.
124  */
125 static int
126 xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
127 		 const char *name, int mask)
128 {
129 	if (mask & MAY_WRITE) {
130 		int ret;
131 
132 		ret = may_write_xattr(idmap, inode);
133 		if (ret)
134 			return ret;
135 	}
136 
137 	/*
138 	 * No restriction for security.* and system.* from the VFS.  Decision
139 	 * on these is left to the underlying filesystem / security module.
140 	 */
141 	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
142 	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
143 		return 0;
144 
145 	/*
146 	 * The trusted.* namespace can only be accessed by privileged users.
147 	 */
148 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
149 		if (!capable(CAP_SYS_ADMIN))
150 			return xattr_permission_error(mask);
151 		return 0;
152 	}
153 
154 	/*
155 	 * In the user.* namespace, only regular files and directories can have
156 	 * extended attributes. For sticky directories, only the owner and
157 	 * privileged users can write attributes.
158 	 */
159 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
160 		switch (inode->i_mode & S_IFMT) {
161 		case S_IFREG:
162 			break;
163 		case S_IFDIR:
164 			if (!(inode->i_mode & S_ISVTX))
165 				break;
166 			if (!(mask & MAY_WRITE))
167 				break;
168 			if (inode_owner_or_capable(idmap, inode))
169 				break;
170 			return -EPERM;
171 		case S_IFSOCK:
172 			break;
173 		default:
174 			return xattr_permission_error(mask);
175 		}
176 	}
177 
178 	return inode_permission(idmap, inode, mask);
179 }
180 
181 /*
182  * Look for any handler that deals with the specified namespace.
183  */
184 int
185 xattr_supports_user_prefix(struct inode *inode)
186 {
187 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
188 	const struct xattr_handler *handler;
189 
190 	if (!(inode->i_opflags & IOP_XATTR)) {
191 		if (unlikely(is_bad_inode(inode)))
192 			return -EIO;
193 		return -EOPNOTSUPP;
194 	}
195 
196 	for_each_xattr_handler(handlers, handler) {
197 		if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
198 			     XATTR_USER_PREFIX_LEN))
199 			return 0;
200 	}
201 
202 	return -EOPNOTSUPP;
203 }
204 EXPORT_SYMBOL(xattr_supports_user_prefix);
205 
206 int
207 __vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
208 	       struct inode *inode, const char *name, const void *value,
209 	       size_t size, int flags)
210 {
211 	const struct xattr_handler *handler;
212 
213 	if (is_posix_acl_xattr(name))
214 		return -EOPNOTSUPP;
215 
216 	handler = xattr_resolve_name(inode, &name);
217 	if (IS_ERR(handler))
218 		return PTR_ERR(handler);
219 	if (!handler->set)
220 		return -EOPNOTSUPP;
221 	if (size == 0)
222 		value = "";  /* empty EA, do not remove */
223 	return handler->set(handler, idmap, dentry, inode, name, value,
224 			    size, flags);
225 }
226 EXPORT_SYMBOL(__vfs_setxattr);
227 
228 /**
229  *  __vfs_setxattr_noperm - perform setxattr operation without performing
230  *  permission checks.
231  *
232  *  @idmap: idmap of the mount the inode was found from
233  *  @dentry: object to perform setxattr on
234  *  @name: xattr name to set
235  *  @value: value to set @name to
236  *  @size: size of @value
237  *  @flags: flags to pass into filesystem operations
238  *
239  *  returns the result of the internal setxattr or setsecurity operations.
240  *
241  *  This function requires the caller to lock the inode's i_rwsem before it
242  *  is executed. It also assumes that the caller will make the appropriate
243  *  permission checks.
244  */
245 int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
246 			  struct dentry *dentry, const char *name,
247 			  const void *value, size_t size, int flags)
248 {
249 	struct inode *inode = dentry->d_inode;
250 	int error = -EAGAIN;
251 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
252 				   XATTR_SECURITY_PREFIX_LEN);
253 
254 	if (issec)
255 		inode->i_flags &= ~S_NOSEC;
256 	if (inode->i_opflags & IOP_XATTR) {
257 		error = __vfs_setxattr(idmap, dentry, inode, name, value,
258 				       size, flags);
259 		if (!error) {
260 			fsnotify_xattr(dentry);
261 			security_inode_post_setxattr(dentry, name, value,
262 						     size, flags);
263 		}
264 	} else {
265 		if (unlikely(is_bad_inode(inode)))
266 			return -EIO;
267 	}
268 	if (error == -EAGAIN) {
269 		error = -EOPNOTSUPP;
270 
271 		if (issec) {
272 			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
273 
274 			error = security_inode_setsecurity(inode, suffix, value,
275 							   size, flags);
276 			if (!error)
277 				fsnotify_xattr(dentry);
278 		}
279 	}
280 
281 	return error;
282 }
283 
284 /**
285  * __vfs_setxattr_locked - set an extended attribute while holding the inode
286  * lock
287  *
288  *  @idmap: idmap of the mount of the target inode
289  *  @dentry: object to perform setxattr on
290  *  @name: xattr name to set
291  *  @value: value to set @name to
292  *  @size: size of @value
293  *  @flags: flags to pass into filesystem operations
294  *  @delegated_inode: on return, will contain an inode pointer that
295  *  a delegation was broken on, NULL if none.
296  */
297 int
298 __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
299 		      const char *name, const void *value, size_t size,
300 		      int flags, struct delegated_inode *delegated_inode)
301 {
302 	struct inode *inode = dentry->d_inode;
303 	int error;
304 
305 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
306 	if (error)
307 		return error;
308 
309 	error = security_inode_setxattr(idmap, dentry, name, value, size,
310 					flags);
311 	if (error)
312 		goto out;
313 
314 	error = try_break_deleg(inode, 0, delegated_inode);
315 	if (error)
316 		goto out;
317 
318 	error = __vfs_setxattr_noperm(idmap, dentry, name, value,
319 				      size, flags);
320 
321 out:
322 	return error;
323 }
324 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
325 
326 int
327 vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
328 	     const char *name, const void *value, size_t size, int flags)
329 {
330 	struct inode *inode = dentry->d_inode;
331 	struct delegated_inode delegated_inode = { };
332 	const void  *orig_value = value;
333 	int error;
334 
335 	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
336 		error = cap_convert_nscap(idmap, dentry, &value, size);
337 		if (error < 0)
338 			return error;
339 		size = error;
340 	}
341 
342 retry_deleg:
343 	inode_lock(inode);
344 	error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
345 				      flags, &delegated_inode);
346 	inode_unlock(inode);
347 
348 	if (is_delegated(&delegated_inode)) {
349 		error = break_deleg_wait(&delegated_inode);
350 		if (!error)
351 			goto retry_deleg;
352 	}
353 	if (value != orig_value)
354 		kfree(value);
355 
356 	return error;
357 }
358 EXPORT_SYMBOL_GPL(vfs_setxattr);
359 
360 static ssize_t
361 xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
362 		  const char *name, void *value, size_t size)
363 {
364 	void *buffer = NULL;
365 	ssize_t len;
366 
367 	if (!value || !size) {
368 		len = security_inode_getsecurity(idmap, inode, name,
369 						 &buffer, false);
370 		goto out_noalloc;
371 	}
372 
373 	len = security_inode_getsecurity(idmap, inode, name, &buffer,
374 					 true);
375 	if (len < 0)
376 		return len;
377 	if (size < len) {
378 		len = -ERANGE;
379 		goto out;
380 	}
381 	memcpy(value, buffer, len);
382 out:
383 	kfree(buffer);
384 out_noalloc:
385 	return len;
386 }
387 
388 /*
389  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
390  *
391  * Allocate memory, if not already allocated, or re-allocate correct size,
392  * before retrieving the extended attribute.  The xattr value buffer should
393  * always be freed by the caller, even on error.
394  *
395  * Returns the result of alloc, if failed, or the getxattr operation.
396  */
397 int
398 vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
399 		   const char *name, char **xattr_value, size_t xattr_size,
400 		   gfp_t flags)
401 {
402 	const struct xattr_handler *handler;
403 	struct inode *inode = dentry->d_inode;
404 	char *value = *xattr_value;
405 	int error;
406 
407 	error = xattr_permission(idmap, inode, name, MAY_READ);
408 	if (error)
409 		return error;
410 
411 	handler = xattr_resolve_name(inode, &name);
412 	if (IS_ERR(handler))
413 		return PTR_ERR(handler);
414 	if (!handler->get)
415 		return -EOPNOTSUPP;
416 	error = handler->get(handler, dentry, inode, name, NULL, 0);
417 	if (error < 0)
418 		return error;
419 
420 	if (!value || (error > xattr_size)) {
421 		value = krealloc(*xattr_value, error + 1, flags);
422 		if (!value)
423 			return -ENOMEM;
424 		memset(value, 0, error + 1);
425 	}
426 
427 	error = handler->get(handler, dentry, inode, name, value, error);
428 	*xattr_value = value;
429 	return error;
430 }
431 
432 ssize_t
433 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
434 	       void *value, size_t size)
435 {
436 	const struct xattr_handler *handler;
437 
438 	if (is_posix_acl_xattr(name))
439 		return -EOPNOTSUPP;
440 
441 	handler = xattr_resolve_name(inode, &name);
442 	if (IS_ERR(handler))
443 		return PTR_ERR(handler);
444 	if (!handler->get)
445 		return -EOPNOTSUPP;
446 	return handler->get(handler, dentry, inode, name, value, size);
447 }
448 EXPORT_SYMBOL(__vfs_getxattr);
449 
450 ssize_t
451 vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
452 	     const char *name, void *value, size_t size)
453 {
454 	struct inode *inode = dentry->d_inode;
455 	int error;
456 
457 	error = xattr_permission(idmap, inode, name, MAY_READ);
458 	if (error)
459 		return error;
460 
461 	error = security_inode_getxattr(dentry, name);
462 	if (error)
463 		return error;
464 
465 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
466 				XATTR_SECURITY_PREFIX_LEN)) {
467 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
468 		int ret = xattr_getsecurity(idmap, inode, suffix, value,
469 					    size);
470 		/*
471 		 * Only overwrite the return value if a security module
472 		 * is actually active.
473 		 */
474 		if (ret == -EOPNOTSUPP)
475 			goto nolsm;
476 		return ret;
477 	}
478 nolsm:
479 	return __vfs_getxattr(dentry, inode, name, value, size);
480 }
481 EXPORT_SYMBOL_GPL(vfs_getxattr);
482 
483 /**
484  * vfs_listxattr - retrieve \0 separated list of xattr names
485  * @dentry: the dentry from whose inode the xattr names are retrieved
486  * @list: buffer to store xattr names into
487  * @size: size of the buffer
488  *
489  * This function returns the names of all xattrs associated with the
490  * inode of @dentry.
491  *
492  * Note, for legacy reasons the vfs_listxattr() function lists POSIX
493  * ACLs as well. Since POSIX ACLs are decoupled from IOP_XATTR the
494  * vfs_listxattr() function doesn't check for this flag since a
495  * filesystem could implement POSIX ACLs without implementing any other
496  * xattrs.
497  *
498  * However, since all codepaths that remove IOP_XATTR also assign of
499  * inode operations that either don't implement or implement a stub
500  * ->listxattr() operation.
501  *
502  * Return: On success, the size of the buffer that was used. On error a
503  *         negative error code.
504  */
505 ssize_t
506 vfs_listxattr(struct dentry *dentry, char *list, size_t size)
507 {
508 	struct inode *inode = d_inode(dentry);
509 	ssize_t error;
510 
511 	error = security_inode_listxattr(dentry);
512 	if (error)
513 		return error;
514 
515 	if (inode->i_op->listxattr) {
516 		error = inode->i_op->listxattr(dentry, list, size);
517 	} else {
518 		error = security_inode_listsecurity(inode, list, size);
519 		if (size && error > size)
520 			error = -ERANGE;
521 	}
522 	return error;
523 }
524 EXPORT_SYMBOL_GPL(vfs_listxattr);
525 
526 int
527 __vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
528 		  const char *name)
529 {
530 	struct inode *inode = d_inode(dentry);
531 	const struct xattr_handler *handler;
532 
533 	if (is_posix_acl_xattr(name))
534 		return -EOPNOTSUPP;
535 
536 	handler = xattr_resolve_name(inode, &name);
537 	if (IS_ERR(handler))
538 		return PTR_ERR(handler);
539 	if (!handler->set)
540 		return -EOPNOTSUPP;
541 	return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
542 			    XATTR_REPLACE);
543 }
544 EXPORT_SYMBOL(__vfs_removexattr);
545 
546 /**
547  * __vfs_removexattr_locked - set an extended attribute while holding the inode
548  * lock
549  *
550  *  @idmap: idmap of the mount of the target inode
551  *  @dentry: object to perform setxattr on
552  *  @name: name of xattr to remove
553  *  @delegated_inode: on return, will contain an inode pointer that
554  *  a delegation was broken on, NULL if none.
555  */
556 int
557 __vfs_removexattr_locked(struct mnt_idmap *idmap,
558 			 struct dentry *dentry, const char *name,
559 			 struct delegated_inode *delegated_inode)
560 {
561 	struct inode *inode = dentry->d_inode;
562 	int error;
563 
564 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
565 	if (error)
566 		return error;
567 
568 	error = security_inode_removexattr(idmap, dentry, name);
569 	if (error)
570 		goto out;
571 
572 	error = try_break_deleg(inode, 0, delegated_inode);
573 	if (error)
574 		goto out;
575 
576 	error = __vfs_removexattr(idmap, dentry, name);
577 	if (error)
578 		return error;
579 
580 	fsnotify_xattr(dentry);
581 	security_inode_post_removexattr(dentry, name);
582 
583 out:
584 	return error;
585 }
586 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
587 
588 int
589 vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
590 		const char *name)
591 {
592 	struct inode *inode = dentry->d_inode;
593 	struct delegated_inode delegated_inode = { };
594 	int error;
595 
596 retry_deleg:
597 	inode_lock(inode);
598 	error = __vfs_removexattr_locked(idmap, dentry,
599 					 name, &delegated_inode);
600 	inode_unlock(inode);
601 
602 	if (is_delegated(&delegated_inode)) {
603 		error = break_deleg_wait(&delegated_inode);
604 		if (!error)
605 			goto retry_deleg;
606 	}
607 
608 	return error;
609 }
610 EXPORT_SYMBOL_GPL(vfs_removexattr);
611 
612 int import_xattr_name(struct xattr_name *kname, const char __user *name)
613 {
614 	int error = strncpy_from_user(kname->name, name,
615 					sizeof(kname->name));
616 	if (error == 0 || error == sizeof(kname->name))
617 		return -ERANGE;
618 	if (error < 0)
619 		return error;
620 	return 0;
621 }
622 
623 /*
624  * Extended attribute SET operations
625  */
626 
627 int setxattr_copy(const char __user *name, struct kernel_xattr_ctx *ctx)
628 {
629 	int error;
630 
631 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
632 		return -EINVAL;
633 
634 	error = import_xattr_name(ctx->kname, name);
635 	if (error)
636 		return error;
637 
638 	if (ctx->size) {
639 		if (ctx->size > XATTR_SIZE_MAX)
640 			return -E2BIG;
641 
642 		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
643 		if (IS_ERR(ctx->kvalue)) {
644 			error = PTR_ERR(ctx->kvalue);
645 			ctx->kvalue = NULL;
646 		}
647 	}
648 
649 	return error;
650 }
651 
652 static int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
653 		struct kernel_xattr_ctx *ctx)
654 {
655 	if (is_posix_acl_xattr(ctx->kname->name))
656 		return do_set_acl(idmap, dentry, ctx->kname->name,
657 				  ctx->kvalue, ctx->size);
658 
659 	return vfs_setxattr(idmap, dentry, ctx->kname->name,
660 			ctx->kvalue, ctx->size, ctx->flags);
661 }
662 
663 int file_setxattr(struct file *f, struct kernel_xattr_ctx *ctx)
664 {
665 	int error = mnt_want_write_file(f);
666 
667 	if (!error) {
668 		audit_file(f);
669 		error = do_setxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
670 		mnt_drop_write_file(f);
671 	}
672 	return error;
673 }
674 
675 int filename_setxattr(int dfd, struct filename *filename,
676 		      unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
677 {
678 	struct path path;
679 	int error;
680 
681 retry:
682 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
683 	if (error)
684 		return error;
685 	error = mnt_want_write(path.mnt);
686 	if (!error) {
687 		error = do_setxattr(mnt_idmap(path.mnt), path.dentry, ctx);
688 		mnt_drop_write(path.mnt);
689 	}
690 	path_put(&path);
691 	if (retry_estale(error, lookup_flags)) {
692 		lookup_flags |= LOOKUP_REVAL;
693 		goto retry;
694 	}
695 	return error;
696 }
697 
698 static int path_setxattrat(int dfd, const char __user *pathname,
699 			   unsigned int at_flags, const char __user *name,
700 			   const void __user *value, size_t size, int flags)
701 {
702 	struct xattr_name kname;
703 	struct kernel_xattr_ctx ctx = {
704 		.cvalue	= value,
705 		.kvalue	= NULL,
706 		.size	= size,
707 		.kname	= &kname,
708 		.flags	= flags,
709 	};
710 	unsigned int lookup_flags = 0;
711 	int error;
712 
713 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
714 		return -EINVAL;
715 
716 	if (!(at_flags & AT_SYMLINK_NOFOLLOW))
717 		lookup_flags = LOOKUP_FOLLOW;
718 
719 	error = setxattr_copy(name, &ctx);
720 	if (error)
721 		return error;
722 
723 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
724 	if (!filename && dfd >= 0) {
725 		CLASS(fd, f)(dfd);
726 		if (fd_empty(f))
727 			error = -EBADF;
728 		else
729 			error = file_setxattr(fd_file(f), &ctx);
730 	} else {
731 		error = filename_setxattr(dfd, filename, lookup_flags, &ctx);
732 	}
733 	kvfree(ctx.kvalue);
734 	return error;
735 }
736 
737 SYSCALL_DEFINE6(setxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
738 		const char __user *, name, const struct xattr_args __user *, uargs,
739 		size_t, usize)
740 {
741 	struct xattr_args args = {};
742 	int error;
743 
744 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
745 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
746 
747 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
748 		return -EINVAL;
749 	if (usize > PAGE_SIZE)
750 		return -E2BIG;
751 
752 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
753 	if (error)
754 		return error;
755 
756 	return path_setxattrat(dfd, pathname, at_flags, name,
757 			       u64_to_user_ptr(args.value), args.size,
758 			       args.flags);
759 }
760 
761 SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
762 		const char __user *, name, const void __user *, value,
763 		size_t, size, int, flags)
764 {
765 	return path_setxattrat(AT_FDCWD, pathname, 0, name, value, size, flags);
766 }
767 
768 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
769 		const char __user *, name, const void __user *, value,
770 		size_t, size, int, flags)
771 {
772 	return path_setxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
773 			       value, size, flags);
774 }
775 
776 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
777 		const void __user *,value, size_t, size, int, flags)
778 {
779 	return path_setxattrat(fd, NULL, AT_EMPTY_PATH, name,
780 			       value, size, flags);
781 }
782 
783 /*
784  * Extended attribute GET operations
785  */
786 static ssize_t
787 do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
788 	struct kernel_xattr_ctx *ctx)
789 {
790 	ssize_t error;
791 	char *kname = ctx->kname->name;
792 	void *kvalue = NULL;
793 
794 	if (ctx->size) {
795 		if (ctx->size > XATTR_SIZE_MAX)
796 			ctx->size = XATTR_SIZE_MAX;
797 		kvalue = kvzalloc(ctx->size, GFP_KERNEL);
798 		if (!kvalue)
799 			return -ENOMEM;
800 	}
801 
802 	if (is_posix_acl_xattr(kname))
803 		error = do_get_acl(idmap, d, kname, kvalue, ctx->size);
804 	else
805 		error = vfs_getxattr(idmap, d, kname, kvalue, ctx->size);
806 	if (error > 0) {
807 		if (ctx->size && copy_to_user(ctx->value, kvalue, error))
808 			error = -EFAULT;
809 	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
810 		/* The file system tried to returned a value bigger
811 		   than XATTR_SIZE_MAX bytes. Not possible. */
812 		error = -E2BIG;
813 	}
814 
815 	kvfree(kvalue);
816 	return error;
817 }
818 
819 ssize_t file_getxattr(struct file *f, struct kernel_xattr_ctx *ctx)
820 {
821 	audit_file(f);
822 	return do_getxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
823 }
824 
825 ssize_t filename_getxattr(int dfd, struct filename *filename,
826 			  unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
827 {
828 	struct path path;
829 	ssize_t error;
830 retry:
831 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
832 	if (error)
833 		return error;
834 	error = do_getxattr(mnt_idmap(path.mnt), path.dentry, ctx);
835 	path_put(&path);
836 	if (retry_estale(error, lookup_flags)) {
837 		lookup_flags |= LOOKUP_REVAL;
838 		goto retry;
839 	}
840 	return error;
841 }
842 
843 static ssize_t path_getxattrat(int dfd, const char __user *pathname,
844 			       unsigned int at_flags, const char __user *name,
845 			       void __user *value, size_t size)
846 {
847 	struct xattr_name kname;
848 	struct kernel_xattr_ctx ctx = {
849 		.value    = value,
850 		.size     = size,
851 		.kname    = &kname,
852 		.flags    = 0,
853 	};
854 	ssize_t error;
855 
856 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
857 		return -EINVAL;
858 
859 	error = import_xattr_name(&kname, name);
860 	if (error)
861 		return error;
862 
863 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
864 	if (!filename && dfd >= 0) {
865 		CLASS(fd, f)(dfd);
866 		if (fd_empty(f))
867 			return -EBADF;
868 		return file_getxattr(fd_file(f), &ctx);
869 	} else {
870 		int lookup_flags = 0;
871 		if (!(at_flags & AT_SYMLINK_NOFOLLOW))
872 			lookup_flags = LOOKUP_FOLLOW;
873 		return filename_getxattr(dfd, filename, lookup_flags, &ctx);
874 	}
875 }
876 
877 SYSCALL_DEFINE6(getxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
878 		const char __user *, name, struct xattr_args __user *, uargs, size_t, usize)
879 {
880 	struct xattr_args args = {};
881 	int error;
882 
883 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
884 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
885 
886 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
887 		return -EINVAL;
888 	if (usize > PAGE_SIZE)
889 		return -E2BIG;
890 
891 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
892 	if (error)
893 		return error;
894 
895 	if (args.flags != 0)
896 		return -EINVAL;
897 
898 	return path_getxattrat(dfd, pathname, at_flags, name,
899 			       u64_to_user_ptr(args.value), args.size);
900 }
901 
902 SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
903 		const char __user *, name, void __user *, value, size_t, size)
904 {
905 	return path_getxattrat(AT_FDCWD, pathname, 0, name, value, size);
906 }
907 
908 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
909 		const char __user *, name, void __user *, value, size_t, size)
910 {
911 	return path_getxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
912 			       value, size);
913 }
914 
915 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
916 		void __user *, value, size_t, size)
917 {
918 	return path_getxattrat(fd, NULL, AT_EMPTY_PATH, name, value, size);
919 }
920 
921 /*
922  * Extended attribute LIST operations
923  */
924 static ssize_t
925 listxattr(struct dentry *d, char __user *list, size_t size)
926 {
927 	ssize_t error;
928 	char *klist = NULL;
929 
930 	if (size) {
931 		if (size > XATTR_LIST_MAX)
932 			size = XATTR_LIST_MAX;
933 		klist = kvmalloc(size, GFP_KERNEL);
934 		if (!klist)
935 			return -ENOMEM;
936 	}
937 
938 	error = vfs_listxattr(d, klist, size);
939 	if (error > 0) {
940 		if (size && copy_to_user(list, klist, error))
941 			error = -EFAULT;
942 	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
943 		/* The file system tried to returned a list bigger
944 		   than XATTR_LIST_MAX bytes. Not possible. */
945 		error = -E2BIG;
946 	}
947 
948 	kvfree(klist);
949 
950 	return error;
951 }
952 
953 static
954 ssize_t file_listxattr(struct file *f, char __user *list, size_t size)
955 {
956 	audit_file(f);
957 	return listxattr(f->f_path.dentry, list, size);
958 }
959 
960 static
961 ssize_t filename_listxattr(int dfd, struct filename *filename,
962 			   unsigned int lookup_flags,
963 			   char __user *list, size_t size)
964 {
965 	struct path path;
966 	ssize_t error;
967 retry:
968 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
969 	if (error)
970 		return error;
971 	error = listxattr(path.dentry, list, size);
972 	path_put(&path);
973 	if (retry_estale(error, lookup_flags)) {
974 		lookup_flags |= LOOKUP_REVAL;
975 		goto retry;
976 	}
977 	return error;
978 }
979 
980 static ssize_t path_listxattrat(int dfd, const char __user *pathname,
981 				unsigned int at_flags, char __user *list,
982 				size_t size)
983 {
984 	int lookup_flags;
985 
986 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
987 		return -EINVAL;
988 
989 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
990 	if (!filename) {
991 		CLASS(fd, f)(dfd);
992 		if (fd_empty(f))
993 			return -EBADF;
994 		return file_listxattr(fd_file(f), list, size);
995 	}
996 
997 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
998 	return filename_listxattr(dfd, filename, lookup_flags, list, size);
999 }
1000 
1001 SYSCALL_DEFINE5(listxattrat, int, dfd, const char __user *, pathname,
1002 		unsigned int, at_flags,
1003 		char __user *, list, size_t, size)
1004 {
1005 	return path_listxattrat(dfd, pathname, at_flags, list, size);
1006 }
1007 
1008 SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
1009 		size_t, size)
1010 {
1011 	return path_listxattrat(AT_FDCWD, pathname, 0, list, size);
1012 }
1013 
1014 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
1015 		size_t, size)
1016 {
1017 	return path_listxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, list, size);
1018 }
1019 
1020 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
1021 {
1022 	return path_listxattrat(fd, NULL, AT_EMPTY_PATH, list, size);
1023 }
1024 
1025 /*
1026  * Extended attribute REMOVE operations
1027  */
1028 static long
1029 removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
1030 {
1031 	if (is_posix_acl_xattr(name))
1032 		return vfs_remove_acl(idmap, d, name);
1033 	return vfs_removexattr(idmap, d, name);
1034 }
1035 
1036 static int file_removexattr(struct file *f, struct xattr_name *kname)
1037 {
1038 	int error = mnt_want_write_file(f);
1039 
1040 	if (!error) {
1041 		audit_file(f);
1042 		error = removexattr(file_mnt_idmap(f),
1043 				    f->f_path.dentry, kname->name);
1044 		mnt_drop_write_file(f);
1045 	}
1046 	return error;
1047 }
1048 
1049 static int filename_removexattr(int dfd, struct filename *filename,
1050 				unsigned int lookup_flags, struct xattr_name *kname)
1051 {
1052 	struct path path;
1053 	int error;
1054 
1055 retry:
1056 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
1057 	if (error)
1058 		return error;
1059 	error = mnt_want_write(path.mnt);
1060 	if (!error) {
1061 		error = removexattr(mnt_idmap(path.mnt), path.dentry, kname->name);
1062 		mnt_drop_write(path.mnt);
1063 	}
1064 	path_put(&path);
1065 	if (retry_estale(error, lookup_flags)) {
1066 		lookup_flags |= LOOKUP_REVAL;
1067 		goto retry;
1068 	}
1069 	return error;
1070 }
1071 
1072 static int path_removexattrat(int dfd, const char __user *pathname,
1073 			      unsigned int at_flags, const char __user *name)
1074 {
1075 	struct xattr_name kname;
1076 	unsigned int lookup_flags;
1077 	int error;
1078 
1079 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
1080 		return -EINVAL;
1081 
1082 	error = import_xattr_name(&kname, name);
1083 	if (error)
1084 		return error;
1085 
1086 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
1087 	if (!filename) {
1088 		CLASS(fd, f)(dfd);
1089 		if (fd_empty(f))
1090 			return -EBADF;
1091 		return file_removexattr(fd_file(f), &kname);
1092 	}
1093 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
1094 	return filename_removexattr(dfd, filename, lookup_flags, &kname);
1095 }
1096 
1097 SYSCALL_DEFINE4(removexattrat, int, dfd, const char __user *, pathname,
1098 		unsigned int, at_flags, const char __user *, name)
1099 {
1100 	return path_removexattrat(dfd, pathname, at_flags, name);
1101 }
1102 
1103 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
1104 		const char __user *, name)
1105 {
1106 	return path_removexattrat(AT_FDCWD, pathname, 0, name);
1107 }
1108 
1109 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
1110 		const char __user *, name)
1111 {
1112 	return path_removexattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name);
1113 }
1114 
1115 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
1116 {
1117 	return path_removexattrat(fd, NULL, AT_EMPTY_PATH, name);
1118 }
1119 
1120 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
1121 {
1122 	size_t len;
1123 
1124 	len = strlen(name) + 1;
1125 	if (*buffer) {
1126 		if (*remaining_size < len)
1127 			return -ERANGE;
1128 		memcpy(*buffer, name, len);
1129 		*buffer += len;
1130 	}
1131 	*remaining_size -= len;
1132 	return 0;
1133 }
1134 
1135 /**
1136  * generic_listxattr - run through a dentry's xattr list() operations
1137  * @dentry: dentry to list the xattrs
1138  * @buffer: result buffer
1139  * @buffer_size: size of @buffer
1140  *
1141  * Combine the results of the list() operation from every xattr_handler in the
1142  * xattr_handler stack.
1143  *
1144  * Note that this will not include the entries for POSIX ACLs.
1145  */
1146 ssize_t
1147 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
1148 {
1149 	const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr;
1150 	ssize_t remaining_size = buffer_size;
1151 
1152 	for_each_xattr_handler(handlers, handler) {
1153 		int err;
1154 
1155 		if (!handler->name || (handler->list && !handler->list(dentry)))
1156 			continue;
1157 		err = xattr_list_one(&buffer, &remaining_size, handler->name);
1158 		if (err)
1159 			return err;
1160 	}
1161 
1162 	return buffer_size - remaining_size;
1163 }
1164 EXPORT_SYMBOL(generic_listxattr);
1165 
1166 /**
1167  * xattr_full_name  -  Compute full attribute name from suffix
1168  *
1169  * @handler:	handler of the xattr_handler operation
1170  * @name:	name passed to the xattr_handler operation
1171  *
1172  * The get and set xattr handler operations are called with the remainder of
1173  * the attribute name after skipping the handler's prefix: for example, "foo"
1174  * is passed to the get operation of a handler with prefix "user." to get
1175  * attribute "user.foo".  The full name is still "there" in the name though.
1176  *
1177  * Note: the list xattr handler operation when called from the vfs is passed a
1178  * NULL name; some file systems use this operation internally, with varying
1179  * semantics.
1180  */
1181 const char *xattr_full_name(const struct xattr_handler *handler,
1182 			    const char *name)
1183 {
1184 	size_t prefix_len = strlen(xattr_prefix(handler));
1185 
1186 	return name - prefix_len;
1187 }
1188 EXPORT_SYMBOL(xattr_full_name);
1189 
1190 /**
1191  * simple_xattr_space - estimate the memory used by a simple xattr
1192  * @name: the full name of the xattr
1193  * @size: the size of its value
1194  *
1195  * This takes no account of how much larger the two slab objects actually are:
1196  * that would depend on the slab implementation, when what is required is a
1197  * deterministic number, which grows with name length and size and quantity.
1198  *
1199  * Return: The approximate number of bytes of memory used by such an xattr.
1200  */
1201 size_t simple_xattr_space(const char *name, size_t size)
1202 {
1203 	/*
1204 	 * Use "40" instead of sizeof(struct simple_xattr), to return the
1205 	 * same result on 32-bit and 64-bit, and even if simple_xattr grows.
1206 	 */
1207 	return 40 + size + strlen(name);
1208 }
1209 
1210 /**
1211  * simple_xattr_free - free an xattr object
1212  * @xattr: the xattr object
1213  *
1214  * Free the xattr object. Can handle @xattr being NULL.
1215  */
1216 void simple_xattr_free(struct simple_xattr *xattr)
1217 {
1218 	if (xattr)
1219 		kfree(xattr->name);
1220 	kvfree(xattr);
1221 }
1222 
1223 static void simple_xattr_rcu_free(struct rcu_head *head)
1224 {
1225 	struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
1226 
1227 	simple_xattr_free(xattr);
1228 }
1229 
1230 /**
1231  * simple_xattr_free_rcu - free an xattr object with RCU delay
1232  * @xattr: the xattr object
1233  *
1234  * Free the xattr object after an RCU grace period. This must be used when
1235  * the xattr was removed from a data structure that concurrent RCU readers
1236  * may still be traversing. Can handle @xattr being NULL.
1237  */
1238 void simple_xattr_free_rcu(struct simple_xattr *xattr)
1239 {
1240 	if (xattr)
1241 		call_rcu(&xattr->rcu, simple_xattr_rcu_free);
1242 }
1243 
1244 /**
1245  * simple_xattr_alloc - allocate new xattr object
1246  * @value: value of the xattr object
1247  * @size: size of @value
1248  *
1249  * Allocate a new xattr object and initialize respective members. The caller is
1250  * responsible for handling the name of the xattr.
1251  *
1252  * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on
1253  * failure.
1254  */
1255 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
1256 {
1257 	struct simple_xattr *new_xattr;
1258 	size_t len;
1259 
1260 	if (!value)
1261 		return NULL;
1262 
1263 	/* wrap around? */
1264 	len = sizeof(*new_xattr) + size;
1265 	if (len < sizeof(*new_xattr))
1266 		return ERR_PTR(-ENOMEM);
1267 
1268 	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
1269 	if (!new_xattr)
1270 		return ERR_PTR(-ENOMEM);
1271 
1272 	new_xattr->size = size;
1273 	memcpy(new_xattr->value, value, size);
1274 	return new_xattr;
1275 }
1276 
1277 static u32 sx_hashfn(const char *name, const struct list_head *parent, u32 seed)
1278 {
1279 	return jhash(name, strlen(name), jhash(&parent, sizeof(parent), seed));
1280 }
1281 
1282 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
1283 {
1284 	const struct sx_key *key = data;
1285 
1286 	return sx_hashfn(key->name, key->parent, seed);
1287 }
1288 
1289 static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
1290 {
1291 	const struct simple_xattr *xattr = obj;
1292 
1293 	return sx_hashfn(xattr->name, xattr->parent, seed);
1294 }
1295 
1296 static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
1297 				   const void *obj)
1298 {
1299 	const struct simple_xattr *xattr = obj;
1300 	const struct sx_key *key = arg->key;
1301 
1302 	return xattr->parent != key->parent || strcmp(xattr->name, key->name);
1303 }
1304 
1305 static const struct rhashtable_params simple_xattr_params = {
1306 	.head_offset    = offsetof(struct simple_xattr, hash_node),
1307 	.hashfn         = simple_xattr_hashfn,
1308 	.obj_hashfn     = simple_xattr_obj_hashfn,
1309 	.obj_cmpfn      = simple_xattr_obj_cmpfn,
1310 	.automatic_shrinking = true,
1311 };
1312 
1313 /**
1314  * simple_xattr_get - get an xattr object
1315  * @cache: anchor for the hash table
1316  * @xattrs: the header of the xattr object
1317  * @name: the name of the xattr to retrieve
1318  * @buffer: the buffer to store the value into
1319  * @size: the size of @buffer
1320  *
1321  * Try to find and retrieve the xattr object associated with @name.
1322  * If @buffer is provided store the value of @xattr in @buffer
1323  * otherwise just return the length. The size of @buffer is limited
1324  * to XATTR_SIZE_MAX which currently is 65536.
1325  *
1326  * Return: On success the length of the xattr value is returned. On error a
1327  * negative error code is returned.
1328  */
1329 int simple_xattr_get(struct simple_xattr_cache *cache, struct list_head *xattrs,
1330 		     const char *name, void *buffer, size_t size)
1331 {
1332 	struct simple_xattr *xattr;
1333 	struct sx_key key = { .parent = xattrs, .name = name };
1334 	struct rhashtable *ht = READ_ONCE(cache->ht);
1335 	int ret = -ENODATA;
1336 
1337 	if (!ht)
1338 		return ret;
1339 
1340 	guard(rcu)();
1341 	xattr = rhashtable_lookup(ht, &key, simple_xattr_params);
1342 	if (xattr) {
1343 		ret = xattr->size;
1344 		if (buffer) {
1345 			if (size < xattr->size)
1346 				ret = -ERANGE;
1347 			else
1348 				memcpy(buffer, xattr->value, xattr->size);
1349 		}
1350 	}
1351 	return ret;
1352 }
1353 
1354 static struct rhashtable *simple_xattrs_lazy_alloc(struct simple_xattr_cache *cache,
1355 						   const void *value, int flags)
1356 {
1357 	struct rhashtable *oldht, *ht = READ_ONCE(cache->ht);
1358 	int err;
1359 
1360 	if (unlikely(!ht)) {
1361 		if (!value)
1362 			return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
1363 
1364 		ht = kzalloc_obj(*ht);
1365 		if (!ht)
1366 			return ERR_PTR(-ENOMEM);
1367 
1368 		err = rhashtable_init(ht, &simple_xattr_params);
1369 		if (err) {
1370 			kfree(ht);
1371 			return ERR_PTR(err);
1372 		}
1373 
1374 		/*
1375 		 * Provides release semantics on success, so that use of a
1376 		 * non-NULL READ_ONCE(cache->ht) will be ordered relative to the
1377 		 * above initialization, due to implicit address dependency.
1378 		 */
1379 		oldht = cmpxchg_release(&cache->ht, NULL, ht);
1380 		if (oldht) {
1381 			/* Race lost */
1382 			rhashtable_destroy(ht);
1383 			kfree(ht);
1384 			ht = oldht;
1385 		}
1386 	}
1387 	return ht;
1388 }
1389 
1390 /**
1391  * simple_xattr_set - set an xattr object
1392  * @cache: anchor for the hash table
1393  * @xattrs: the header of the xattr object
1394  * @name: the name of the xattr to retrieve
1395  * @value: the value to store along the xattr
1396  * @size: the size of @value
1397  * @flags: the flags determining how to set the xattr
1398  *
1399  * Set a new xattr object.
1400  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
1401  * is specified in @flags a matching xattr object for @name must already exist.
1402  * If it does it will be replaced with the new xattr object. If it doesn't we
1403  * fail. If XATTR_CREATE is specified and a matching xattr does already exist
1404  * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
1405  * insert the new xattr replacing any existing one.
1406  *
1407  * If @value is empty and a matching xattr object is found we delete it if
1408  * XATTR_REPLACE is specified in @flags or @flags is zero.
1409  *
1410  * If @value is empty and no matching xattr object for @name is found we do
1411  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
1412  * XATTR_REPLACE we fail as mentioned above.
1413  *
1414  * Note: Callers must externally serialize writes. All current callers hold
1415  * the inode lock for write operations. The lookup->replace/remove sequence
1416  * is not atomic with respect to the rhashtable's per-bucket locking, but
1417  * is safe because writes are serialized by the caller.
1418  *
1419  * Return: On success, the removed or replaced xattr is returned, to be freed
1420  * by the caller; or NULL if none. On failure a negative error code is returned.
1421  */
1422 struct simple_xattr *simple_xattr_set(struct simple_xattr_cache *cache, struct list_head *xattrs,
1423 				      const char *name, const void *value,
1424 				      size_t size, int flags)
1425 {
1426 	struct sx_key key = { .parent = xattrs, .name = name };
1427 	struct simple_xattr *old_xattr = NULL;
1428 	struct rhashtable *ht;
1429 	int err;
1430 
1431 	ht = simple_xattrs_lazy_alloc(cache, value, flags);
1432 	if (IS_ERR_OR_NULL(ht))
1433 		return ERR_CAST(ht);
1434 
1435 	CLASS(simple_xattr, new_xattr)(value, size);
1436 	if (IS_ERR(new_xattr))
1437 		return new_xattr;
1438 
1439 	if (new_xattr) {
1440 		new_xattr->parent = xattrs;
1441 		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
1442 		if (!new_xattr->name)
1443 			return ERR_PTR(-ENOMEM);
1444 	}
1445 
1446 	/*
1447 	 * Hash table lookup/replace/remove will grab RCU read lock themselves.
1448 	 * This makes sure that hash table lookup is safe against concurrent
1449 	 * modification on another inode.
1450 	 */
1451 	old_xattr = rhashtable_lookup_fast(ht, &key, simple_xattr_params);
1452 	if (old_xattr) {
1453 		/* Fail if XATTR_CREATE is requested and the xattr exists. */
1454 		if (flags & XATTR_CREATE)
1455 			return ERR_PTR(-EEXIST);
1456 
1457 		if (new_xattr) {
1458 			err = rhashtable_replace_fast(ht,
1459 						      &old_xattr->hash_node,
1460 						      &new_xattr->hash_node,
1461 						      simple_xattr_params);
1462 			if (err)
1463 				return ERR_PTR(err);
1464 
1465 			list_replace_rcu(&old_xattr->node, &new_xattr->node);
1466 		} else {
1467 			err = rhashtable_remove_fast(ht,
1468 						     &old_xattr->hash_node,
1469 						     simple_xattr_params);
1470 			if (err)
1471 				return ERR_PTR(err);
1472 
1473 			list_del_rcu(&old_xattr->node);
1474 		}
1475 	} else {
1476 		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
1477 		if (flags & XATTR_REPLACE)
1478 			return ERR_PTR(-ENODATA);
1479 
1480 		/*
1481 		 * If XATTR_CREATE or no flags are specified together with a
1482 		 * new value simply insert it.
1483 		 */
1484 		if (new_xattr) {
1485 			err = rhashtable_insert_fast(ht,
1486 						     &new_xattr->hash_node,
1487 						     simple_xattr_params);
1488 			if (err)
1489 				return ERR_PTR(err);
1490 
1491 			list_add_tail_rcu(&new_xattr->node, xattrs);
1492 		}
1493 
1494 		/*
1495 		 * If XATTR_CREATE or no flags are specified and neither an
1496 		 * old or new xattr exist then we don't need to do anything.
1497 		 */
1498 	}
1499 
1500 	retain_and_null_ptr(new_xattr);
1501 	return old_xattr;
1502 }
1503 
1504 static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits,
1505 					   size_t size)
1506 {
1507 	atomic_sub(size, &limits->xattr_size);
1508 	atomic_dec(&limits->nr_xattrs);
1509 }
1510 
1511 static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
1512 					  size_t size)
1513 {
1514 	if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) {
1515 		atomic_dec(&limits->nr_xattrs);
1516 		return -ENOSPC;
1517 	}
1518 
1519 	if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE)
1520 		return 0;
1521 
1522 	simple_xattr_limits_dec(limits, size);
1523 	return -ENOSPC;
1524 }
1525 
1526 /**
1527  * simple_xattr_set_limited - set an xattr with per-inode user.* limits
1528  * @cache: anchor for the hash table
1529  * @xattrs: the header of the xattr object
1530  * @limits: per-inode limit counters for user.* xattrs
1531  * @name: the name of the xattr to set or remove
1532  * @value: the value to store (NULL to remove)
1533  * @size: the size of @value
1534  * @flags: XATTR_CREATE, XATTR_REPLACE, or 0
1535  *
1536  * Like simple_xattr_set(), but enforces per-inode count and total value size
1537  * limits for user.* xattrs. Uses speculative pre-increment of the atomic
1538  * counters to avoid races without requiring external locks.
1539  *
1540  * Return: On success zero is returned. On failure a negative error code is
1541  * returned.
1542  */
1543 int simple_xattr_set_limited(struct simple_xattr_cache *cache, struct list_head *xattrs,
1544 			     struct simple_xattr_limits *limits,
1545 			     const char *name, const void *value,
1546 			     size_t size, int flags)
1547 {
1548 	struct simple_xattr *old_xattr;
1549 	int ret;
1550 
1551 	if (value) {
1552 		ret = simple_xattr_limits_inc(limits, size);
1553 		if (ret)
1554 			return ret;
1555 	}
1556 
1557 	old_xattr = simple_xattr_set(cache, xattrs, name, value, size, flags);
1558 	if (IS_ERR(old_xattr)) {
1559 		if (value)
1560 			simple_xattr_limits_dec(limits, size);
1561 		return PTR_ERR(old_xattr);
1562 	}
1563 	if (old_xattr) {
1564 		simple_xattr_limits_dec(limits, old_xattr->size);
1565 		simple_xattr_free_rcu(old_xattr);
1566 	}
1567 	return 0;
1568 }
1569 
1570 static bool xattr_is_trusted(const char *name)
1571 {
1572 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1573 }
1574 
1575 static bool xattr_is_maclabel(const char *name)
1576 {
1577 	const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
1578 
1579 	return !strncmp(name, XATTR_SECURITY_PREFIX,
1580 			XATTR_SECURITY_PREFIX_LEN) &&
1581 		security_ismaclabel(suffix);
1582 }
1583 
1584 /**
1585  * simple_xattr_list - list all xattr objects
1586  * @inode: inode from which to get the xattrs
1587  * @xattrs: the header of the xattr object
1588  * @buffer: the buffer to store all xattrs into
1589  * @size: the size of @buffer
1590  *
1591  * List all xattrs associated with @inode. If @buffer is NULL we returned
1592  * the required size of the buffer. If @buffer is provided we store the
1593  * xattrs value into it provided it is big enough.
1594  *
1595  * Note, the number of xattr names that can be listed with listxattr(2) is
1596  * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
1597  * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
1598  * are found it will return -E2BIG.
1599  *
1600  * Return: On success the required size or the size of the copied xattrs is
1601  * returned. On error a negative error code is returned.
1602  */
1603 ssize_t simple_xattr_list(struct inode *inode, struct list_head *xattrs,
1604 			  char *buffer, size_t size)
1605 {
1606 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
1607 	struct simple_xattr *xattr;
1608 	ssize_t remaining_size = size;
1609 	int err = 0;
1610 
1611 	err = posix_acl_listxattr(inode, &buffer, &remaining_size);
1612 	if (err)
1613 		return err;
1614 
1615 	err = security_inode_listsecurity(inode, buffer, remaining_size);
1616 	if (err < 0)
1617 		return err;
1618 
1619 	if (buffer) {
1620 		if (remaining_size < err)
1621 			return -ERANGE;
1622 		buffer += err;
1623 	}
1624 	remaining_size -= err;
1625 	err = 0;
1626 
1627 	if (!xattrs)
1628 		return size - remaining_size;
1629 
1630 	rcu_read_lock();
1631 	list_for_each_entry_rcu(xattr, xattrs, node) {
1632 		/* skip "trusted." attributes for unprivileged callers */
1633 		if (!trusted && xattr_is_trusted(xattr->name))
1634 			continue;
1635 
1636 		/* skip MAC labels; these are provided by LSM above */
1637 		if (xattr_is_maclabel(xattr->name))
1638 			continue;
1639 
1640 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1641 		if (err)
1642 			break;
1643 	}
1644 	rcu_read_unlock();
1645 
1646 	return err ? err : size - remaining_size;
1647 }
1648 
1649 /**
1650  * simple_xattr_add - add xattr objects
1651  * @cache: anchor for the hash table
1652  * @xattrs: the header of the xattr object
1653  * @new_xattr: the xattr object to add
1654  *
1655  * Add an xattr object to @xattrs. This assumes no replacement or removal
1656  * of matching xattrs is wanted. Should only be called during inode
1657  * initialization when a few distinct initial xattrs are supposed to be set.
1658  *
1659  * Return: On success zero is returned. On failure a negative error code is
1660  * returned.
1661  */
1662 int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
1663 		     struct simple_xattr *new_xattr)
1664 {
1665 	struct rhashtable *ht;
1666 	int err;
1667 
1668 	ht = simple_xattrs_lazy_alloc(cache, new_xattr->value, 0);
1669 	if (IS_ERR(ht))
1670 		return PTR_ERR(ht);
1671 
1672 	new_xattr->parent = xattrs;
1673 	err = rhashtable_insert_fast(ht, &new_xattr->hash_node, simple_xattr_params);
1674 	if (err)
1675 		return err;
1676 
1677 	list_add_tail_rcu(&new_xattr->node, xattrs);
1678 	return 0;
1679 }
1680 
1681 /**
1682  * simple_xattr_add_limited - add an xattr object, charging per-inode limits
1683  * @cache: anchor for the hash table
1684  * @xattrs: the header of the xattr object
1685  * @limits: per-inode limit counters
1686  * @new_xattr: the xattr object to add
1687  *
1688  * Like simple_xattr_add(), but also accounts @new_xattr against @limits so
1689  * that a later removal or replacement of it through simple_xattr_set_limited()
1690  * decrements counters that were actually incremented, rather than underflowing
1691  * them. Use this instead of simple_xattr_add() when seeding initial xattrs
1692  * that share a namespace with the limited set/remove path.
1693  *
1694  * Return: On success zero is returned. On failure a negative error code is
1695  * returned.
1696  */
1697 int simple_xattr_add_limited(struct simple_xattr_cache *cache,
1698 			     struct list_head *xattrs,
1699 			     struct simple_xattr_limits *limits,
1700 			     struct simple_xattr *new_xattr)
1701 {
1702 	int err;
1703 
1704 	err = simple_xattr_limits_inc(limits, new_xattr->size);
1705 	if (err)
1706 		return err;
1707 
1708 	err = simple_xattr_add(cache, xattrs, new_xattr);
1709 	if (err)
1710 		simple_xattr_limits_dec(limits, new_xattr->size);
1711 	return err;
1712 }
1713 
1714 /**
1715  * simple_xattrs_free - free xattrs
1716  * @cache: anchor for the hash table
1717  * @xattrs: xattr header whose xattrs to destroy
1718  * @freed_space: approximate number of bytes of memory freed from @xattrs
1719  *
1720  * Destroy all xattrs in @xattrs. When this is called no one can hold a
1721  * reference to any of the xattrs anymore.
1722  */
1723 void simple_xattrs_free(struct simple_xattr_cache *cache, struct list_head *xattrs,
1724 			size_t *freed_space)
1725 {
1726 	if (freed_space)
1727 		*freed_space = 0;
1728 
1729 	while (!list_empty(xattrs)) {
1730 		struct simple_xattr *xattr = list_first_entry(xattrs, typeof(*xattr), node);
1731 
1732 		rhashtable_remove_fast(cache->ht, &xattr->hash_node, simple_xattr_params);
1733 		list_del(&xattr->node);
1734 		if (freed_space)
1735 			*freed_space += simple_xattr_space(xattr->name, xattr->size);
1736 		/*
1737 		 * Free with RCU, since the xattr might still get accessed by
1738 		 * the hash compare function
1739 		 */
1740 		simple_xattr_free_rcu(xattr);
1741 	}
1742 }
1743 
1744 /**
1745  * simple_xattr_cache_cleanup - free the cache
1746  * @cache: anchor for the hash table
1747  *
1748  * Destroy the cache table, which was lazily allocated on adding the first xattr.
1749  */
1750 void simple_xattr_cache_cleanup(struct simple_xattr_cache *cache)
1751 {
1752 	if (cache->ht) {
1753 		WARN_ON(atomic_read(&cache->ht->nelems));
1754 		rhashtable_destroy(cache->ht);
1755 		kfree(cache->ht);
1756 		cache->ht = NULL;
1757 	}
1758 }
1759