xref: /linux/fs/xattr.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3   File: fs/xattr.c
4 
5   Extended attribute handling.
6 
7   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8   Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/xattr.h>
16 #include <linux/mount.h>
17 #include <linux/namei.h>
18 #include <linux/security.h>
19 #include <linux/syscalls.h>
20 #include <linux/export.h>
21 #include <linux/fsnotify.h>
22 #include <linux/audit.h>
23 #include <linux/vmalloc.h>
24 #include <linux/posix_acl_xattr.h>
25 #include <linux/rhashtable.h>
26 
27 #include <linux/uaccess.h>
28 
29 #include "internal.h"
30 
31 struct sx_key {
32 	const struct list_head *parent;
33 	const char *name;
34 };
35 
36 static const char *
37 strcmp_prefix(const char *a, const char *a_prefix)
38 {
39 	while (*a_prefix && *a == *a_prefix) {
40 		a++;
41 		a_prefix++;
42 	}
43 	return *a_prefix ? NULL : a;
44 }
45 
46 /*
47  * In order to implement different sets of xattr operations for each xattr
48  * prefix, a filesystem should create a null-terminated array of struct
49  * xattr_handler (one for each prefix) and hang a pointer to it off of the
50  * s_xattr field of the superblock.
51  */
52 #define for_each_xattr_handler(handlers, handler)		\
53 	if (handlers)						\
54 		for ((handler) = *(handlers)++;			\
55 			(handler) != NULL;			\
56 			(handler) = *(handlers)++)
57 
58 /*
59  * Find the xattr_handler with the matching prefix.
60  */
61 static const struct xattr_handler *
62 xattr_resolve_name(struct inode *inode, const char **name)
63 {
64 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
65 	const struct xattr_handler *handler;
66 
67 	if (!(inode->i_opflags & IOP_XATTR)) {
68 		if (unlikely(is_bad_inode(inode)))
69 			return ERR_PTR(-EIO);
70 		return ERR_PTR(-EOPNOTSUPP);
71 	}
72 	for_each_xattr_handler(handlers, handler) {
73 		const char *n;
74 
75 		n = strcmp_prefix(*name, xattr_prefix(handler));
76 		if (n) {
77 			if (!handler->prefix ^ !*n) {
78 				if (*n)
79 					continue;
80 				return ERR_PTR(-EINVAL);
81 			}
82 			*name = n;
83 			return handler;
84 		}
85 	}
86 	return ERR_PTR(-EOPNOTSUPP);
87 }
88 
89 /**
90  * may_write_xattr - check whether inode allows writing xattr
91  * @idmap: idmap of the mount the inode was found from
92  * @inode: the inode on which to set an xattr
93  *
94  * Check whether the inode allows writing xattrs. Specifically, we can never
95  * set or remove an extended attribute on a read-only filesystem  or on an
96  * immutable / append-only inode.
97  *
98  * We also need to ensure that the inode has a mapping in the mount to
99  * not risk writing back invalid i_{g,u}id values.
100  *
101  * Return: On success zero is returned. On error a negative errno is returned.
102  */
103 int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
104 {
105 	if (IS_IMMUTABLE(inode))
106 		return -EPERM;
107 	if (IS_APPEND(inode))
108 		return -EPERM;
109 	if (HAS_UNMAPPED_ID(idmap, inode))
110 		return -EPERM;
111 	return 0;
112 }
113 
114 static inline int xattr_permission_error(int mask)
115 {
116 	if (mask & MAY_WRITE)
117 		return -EPERM;
118 	return -ENODATA;
119 }
120 
121 /*
122  * Check permissions for extended attribute access.  This is a bit complicated
123  * because different namespaces have very different rules.
124  */
125 static int
126 xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
127 		 const char *name, int mask)
128 {
129 	if (mask & MAY_WRITE) {
130 		int ret;
131 
132 		ret = may_write_xattr(idmap, inode);
133 		if (ret)
134 			return ret;
135 	}
136 
137 	/*
138 	 * No restriction for security.* and system.* from the VFS.  Decision
139 	 * on these is left to the underlying filesystem / security module.
140 	 */
141 	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
142 	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
143 		return 0;
144 
145 	/*
146 	 * The trusted.* namespace can only be accessed by privileged users.
147 	 */
148 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
149 		if (!capable(CAP_SYS_ADMIN))
150 			return xattr_permission_error(mask);
151 		return 0;
152 	}
153 
154 	/*
155 	 * In the user.* namespace, only regular files and directories can have
156 	 * extended attributes. For sticky directories, only the owner and
157 	 * privileged users can write attributes.
158 	 */
159 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
160 		switch (inode->i_mode & S_IFMT) {
161 		case S_IFREG:
162 			break;
163 		case S_IFDIR:
164 			if (!(inode->i_mode & S_ISVTX))
165 				break;
166 			if (!(mask & MAY_WRITE))
167 				break;
168 			if (inode_owner_or_capable(idmap, inode))
169 				break;
170 			return -EPERM;
171 		case S_IFSOCK:
172 			break;
173 		default:
174 			return xattr_permission_error(mask);
175 		}
176 	}
177 
178 	return inode_permission(idmap, inode, mask);
179 }
180 
181 /*
182  * Look for any handler that deals with the specified namespace.
183  */
184 int
185 xattr_supports_user_prefix(struct inode *inode)
186 {
187 	const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
188 	const struct xattr_handler *handler;
189 
190 	if (!(inode->i_opflags & IOP_XATTR)) {
191 		if (unlikely(is_bad_inode(inode)))
192 			return -EIO;
193 		return -EOPNOTSUPP;
194 	}
195 
196 	for_each_xattr_handler(handlers, handler) {
197 		if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
198 			     XATTR_USER_PREFIX_LEN))
199 			return 0;
200 	}
201 
202 	return -EOPNOTSUPP;
203 }
204 EXPORT_SYMBOL(xattr_supports_user_prefix);
205 
206 int
207 __vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
208 	       struct inode *inode, const char *name, const void *value,
209 	       size_t size, int flags)
210 {
211 	const struct xattr_handler *handler;
212 
213 	if (is_posix_acl_xattr(name))
214 		return -EOPNOTSUPP;
215 
216 	handler = xattr_resolve_name(inode, &name);
217 	if (IS_ERR(handler))
218 		return PTR_ERR(handler);
219 	if (!handler->set)
220 		return -EOPNOTSUPP;
221 	if (size == 0)
222 		value = "";  /* empty EA, do not remove */
223 	return handler->set(handler, idmap, dentry, inode, name, value,
224 			    size, flags);
225 }
226 EXPORT_SYMBOL(__vfs_setxattr);
227 
228 /**
229  *  __vfs_setxattr_noperm - perform setxattr operation without performing
230  *  permission checks.
231  *
232  *  @idmap: idmap of the mount the inode was found from
233  *  @dentry: object to perform setxattr on
234  *  @name: xattr name to set
235  *  @value: value to set @name to
236  *  @size: size of @value
237  *  @flags: flags to pass into filesystem operations
238  *
239  *  returns the result of the internal setxattr or setsecurity operations.
240  *
241  *  This function requires the caller to lock the inode's i_rwsem before it
242  *  is executed. It also assumes that the caller will make the appropriate
243  *  permission checks.
244  */
245 int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
246 			  struct dentry *dentry, const char *name,
247 			  const void *value, size_t size, int flags)
248 {
249 	struct inode *inode = dentry->d_inode;
250 	int error = -EAGAIN;
251 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
252 				   XATTR_SECURITY_PREFIX_LEN);
253 
254 	if (issec)
255 		inode->i_flags &= ~S_NOSEC;
256 	if (inode->i_opflags & IOP_XATTR) {
257 		error = __vfs_setxattr(idmap, dentry, inode, name, value,
258 				       size, flags);
259 		if (!error) {
260 			fsnotify_xattr(dentry);
261 			security_inode_post_setxattr(dentry, name, value,
262 						     size, flags);
263 		}
264 	} else {
265 		if (unlikely(is_bad_inode(inode)))
266 			return -EIO;
267 	}
268 	if (error == -EAGAIN) {
269 		error = -EOPNOTSUPP;
270 
271 		if (issec) {
272 			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
273 
274 			error = security_inode_setsecurity(inode, suffix, value,
275 							   size, flags);
276 			if (!error)
277 				fsnotify_xattr(dentry);
278 		}
279 	}
280 
281 	return error;
282 }
283 
284 /**
285  * __vfs_setxattr_locked - set an extended attribute while holding the inode
286  * lock
287  *
288  *  @idmap: idmap of the mount of the target inode
289  *  @dentry: object to perform setxattr on
290  *  @name: xattr name to set
291  *  @value: value to set @name to
292  *  @size: size of @value
293  *  @flags: flags to pass into filesystem operations
294  *  @delegated_inode: on return, will contain an inode pointer that
295  *  a delegation was broken on, NULL if none.
296  */
297 int
298 __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
299 		      const char *name, const void *value, size_t size,
300 		      int flags, struct delegated_inode *delegated_inode)
301 {
302 	struct inode *inode = dentry->d_inode;
303 	int error;
304 
305 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
306 	if (error)
307 		return error;
308 
309 	error = security_inode_setxattr(idmap, dentry, name, value, size,
310 					flags);
311 	if (error)
312 		goto out;
313 
314 	error = try_break_deleg(inode, 0, delegated_inode);
315 	if (error)
316 		goto out;
317 
318 	error = __vfs_setxattr_noperm(idmap, dentry, name, value,
319 				      size, flags);
320 
321 out:
322 	return error;
323 }
324 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
325 
326 int
327 vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
328 	     const char *name, const void *value, size_t size, int flags)
329 {
330 	struct inode *inode = dentry->d_inode;
331 	struct delegated_inode delegated_inode = { };
332 	const void  *orig_value = value;
333 	int error;
334 
335 	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
336 		error = cap_convert_nscap(idmap, dentry, &value, size);
337 		if (error < 0)
338 			return error;
339 		size = error;
340 	}
341 
342 retry_deleg:
343 	inode_lock(inode);
344 	error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
345 				      flags, &delegated_inode);
346 	inode_unlock(inode);
347 
348 	if (is_delegated(&delegated_inode)) {
349 		error = break_deleg_wait(&delegated_inode);
350 		if (!error)
351 			goto retry_deleg;
352 	}
353 	if (value != orig_value)
354 		kfree(value);
355 
356 	return error;
357 }
358 EXPORT_SYMBOL_GPL(vfs_setxattr);
359 
360 static ssize_t
361 xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
362 		  const char *name, void *value, size_t size)
363 {
364 	void *buffer = NULL;
365 	ssize_t len;
366 
367 	if (!value || !size) {
368 		len = security_inode_getsecurity(idmap, inode, name,
369 						 &buffer, false);
370 		goto out_noalloc;
371 	}
372 
373 	len = security_inode_getsecurity(idmap, inode, name, &buffer,
374 					 true);
375 	if (len < 0)
376 		return len;
377 	if (size < len) {
378 		len = -ERANGE;
379 		goto out;
380 	}
381 	memcpy(value, buffer, len);
382 out:
383 	kfree(buffer);
384 out_noalloc:
385 	return len;
386 }
387 
388 /*
389  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
390  *
391  * Allocate memory, if not already allocated, or re-allocate correct size,
392  * before retrieving the extended attribute.  The xattr value buffer should
393  * always be freed by the caller, even on error.
394  *
395  * Returns the result of alloc, if failed, or the getxattr operation.
396  */
397 int
398 vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
399 		   const char *name, char **xattr_value, size_t xattr_size,
400 		   gfp_t flags)
401 {
402 	const struct xattr_handler *handler;
403 	struct inode *inode = dentry->d_inode;
404 	char *value = *xattr_value;
405 	int error;
406 
407 	error = xattr_permission(idmap, inode, name, MAY_READ);
408 	if (error)
409 		return error;
410 
411 	handler = xattr_resolve_name(inode, &name);
412 	if (IS_ERR(handler))
413 		return PTR_ERR(handler);
414 	if (!handler->get)
415 		return -EOPNOTSUPP;
416 	error = handler->get(handler, dentry, inode, name, NULL, 0);
417 	if (error < 0)
418 		return error;
419 
420 	if (!value || (error > xattr_size)) {
421 		value = krealloc(*xattr_value, error + 1, flags);
422 		if (!value)
423 			return -ENOMEM;
424 		memset(value, 0, error + 1);
425 	}
426 
427 	error = handler->get(handler, dentry, inode, name, value, error);
428 	*xattr_value = value;
429 	return error;
430 }
431 
432 ssize_t
433 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
434 	       void *value, size_t size)
435 {
436 	const struct xattr_handler *handler;
437 
438 	if (is_posix_acl_xattr(name))
439 		return -EOPNOTSUPP;
440 
441 	handler = xattr_resolve_name(inode, &name);
442 	if (IS_ERR(handler))
443 		return PTR_ERR(handler);
444 	if (!handler->get)
445 		return -EOPNOTSUPP;
446 	return handler->get(handler, dentry, inode, name, value, size);
447 }
448 EXPORT_SYMBOL(__vfs_getxattr);
449 
450 ssize_t
451 vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
452 	     const char *name, void *value, size_t size)
453 {
454 	struct inode *inode = dentry->d_inode;
455 	int error;
456 
457 	error = xattr_permission(idmap, inode, name, MAY_READ);
458 	if (error)
459 		return error;
460 
461 	error = security_inode_getxattr(dentry, name);
462 	if (error)
463 		return error;
464 
465 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
466 				XATTR_SECURITY_PREFIX_LEN)) {
467 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
468 		int ret = xattr_getsecurity(idmap, inode, suffix, value,
469 					    size);
470 		/*
471 		 * Only overwrite the return value if a security module
472 		 * is actually active.
473 		 */
474 		if (ret == -EOPNOTSUPP)
475 			goto nolsm;
476 		return ret;
477 	}
478 nolsm:
479 	return __vfs_getxattr(dentry, inode, name, value, size);
480 }
481 EXPORT_SYMBOL_GPL(vfs_getxattr);
482 
483 /**
484  * vfs_listxattr - retrieve \0 separated list of xattr names
485  * @dentry: the dentry from whose inode the xattr names are retrieved
486  * @list: buffer to store xattr names into
487  * @size: size of the buffer
488  *
489  * This function returns the names of all xattrs associated with the
490  * inode of @dentry.
491  *
492  * Note, for legacy reasons the vfs_listxattr() function lists POSIX
493  * ACLs as well. Since POSIX ACLs are decoupled from IOP_XATTR the
494  * vfs_listxattr() function doesn't check for this flag since a
495  * filesystem could implement POSIX ACLs without implementing any other
496  * xattrs.
497  *
498  * However, since all codepaths that remove IOP_XATTR also assign of
499  * inode operations that either don't implement or implement a stub
500  * ->listxattr() operation.
501  *
502  * Return: On success, the size of the buffer that was used. On error a
503  *         negative error code.
504  */
505 ssize_t
506 vfs_listxattr(struct dentry *dentry, char *list, size_t size)
507 {
508 	struct inode *inode = d_inode(dentry);
509 	ssize_t error;
510 
511 	error = security_inode_listxattr(dentry);
512 	if (error)
513 		return error;
514 
515 	if (inode->i_op->listxattr) {
516 		error = inode->i_op->listxattr(dentry, list, size);
517 	} else {
518 		ssize_t remaining = size;
519 
520 		error = security_inode_listsecurity(inode, &list, &remaining);
521 		if (error)
522 			return error;
523 		error = size - remaining;
524 	}
525 	return error;
526 }
527 EXPORT_SYMBOL_GPL(vfs_listxattr);
528 
529 int
530 __vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
531 		  const char *name)
532 {
533 	struct inode *inode = d_inode(dentry);
534 	const struct xattr_handler *handler;
535 
536 	if (is_posix_acl_xattr(name))
537 		return -EOPNOTSUPP;
538 
539 	handler = xattr_resolve_name(inode, &name);
540 	if (IS_ERR(handler))
541 		return PTR_ERR(handler);
542 	if (!handler->set)
543 		return -EOPNOTSUPP;
544 	return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
545 			    XATTR_REPLACE);
546 }
547 EXPORT_SYMBOL(__vfs_removexattr);
548 
549 /**
550  * __vfs_removexattr_locked - set an extended attribute while holding the inode
551  * lock
552  *
553  *  @idmap: idmap of the mount of the target inode
554  *  @dentry: object to perform setxattr on
555  *  @name: name of xattr to remove
556  *  @delegated_inode: on return, will contain an inode pointer that
557  *  a delegation was broken on, NULL if none.
558  */
559 int
560 __vfs_removexattr_locked(struct mnt_idmap *idmap,
561 			 struct dentry *dentry, const char *name,
562 			 struct delegated_inode *delegated_inode)
563 {
564 	struct inode *inode = dentry->d_inode;
565 	int error;
566 
567 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
568 	if (error)
569 		return error;
570 
571 	error = security_inode_removexattr(idmap, dentry, name);
572 	if (error)
573 		goto out;
574 
575 	error = try_break_deleg(inode, 0, delegated_inode);
576 	if (error)
577 		goto out;
578 
579 	error = __vfs_removexattr(idmap, dentry, name);
580 	if (error)
581 		return error;
582 
583 	fsnotify_xattr(dentry);
584 	security_inode_post_removexattr(dentry, name);
585 
586 out:
587 	return error;
588 }
589 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
590 
591 int
592 vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
593 		const char *name)
594 {
595 	struct inode *inode = dentry->d_inode;
596 	struct delegated_inode delegated_inode = { };
597 	int error;
598 
599 retry_deleg:
600 	inode_lock(inode);
601 	error = __vfs_removexattr_locked(idmap, dentry,
602 					 name, &delegated_inode);
603 	inode_unlock(inode);
604 
605 	if (is_delegated(&delegated_inode)) {
606 		error = break_deleg_wait(&delegated_inode);
607 		if (!error)
608 			goto retry_deleg;
609 	}
610 
611 	return error;
612 }
613 EXPORT_SYMBOL_GPL(vfs_removexattr);
614 
615 int import_xattr_name(struct xattr_name *kname, const char __user *name)
616 {
617 	int error = strncpy_from_user(kname->name, name,
618 					sizeof(kname->name));
619 	if (error == 0 || error == sizeof(kname->name))
620 		return -ERANGE;
621 	if (error < 0)
622 		return error;
623 	return 0;
624 }
625 
626 /*
627  * Extended attribute SET operations
628  */
629 
630 int setxattr_copy(const char __user *name, struct kernel_xattr_ctx *ctx)
631 {
632 	int error;
633 
634 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
635 		return -EINVAL;
636 
637 	error = import_xattr_name(ctx->kname, name);
638 	if (error)
639 		return error;
640 
641 	if (ctx->size) {
642 		if (ctx->size > XATTR_SIZE_MAX)
643 			return -E2BIG;
644 
645 		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
646 		if (IS_ERR(ctx->kvalue)) {
647 			error = PTR_ERR(ctx->kvalue);
648 			ctx->kvalue = NULL;
649 		}
650 	}
651 
652 	return error;
653 }
654 
655 static int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
656 		struct kernel_xattr_ctx *ctx)
657 {
658 	if (is_posix_acl_xattr(ctx->kname->name))
659 		return do_set_acl(idmap, dentry, ctx->kname->name,
660 				  ctx->kvalue, ctx->size);
661 
662 	return vfs_setxattr(idmap, dentry, ctx->kname->name,
663 			ctx->kvalue, ctx->size, ctx->flags);
664 }
665 
666 int file_setxattr(struct file *f, struct kernel_xattr_ctx *ctx)
667 {
668 	int error = mnt_want_write_file(f);
669 
670 	if (!error) {
671 		audit_file(f);
672 		error = do_setxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
673 		mnt_drop_write_file(f);
674 	}
675 	return error;
676 }
677 
678 int filename_setxattr(int dfd, struct filename *filename,
679 		      unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
680 {
681 	struct path path;
682 	int error;
683 
684 retry:
685 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
686 	if (error)
687 		return error;
688 	error = mnt_want_write(path.mnt);
689 	if (!error) {
690 		error = do_setxattr(mnt_idmap(path.mnt), path.dentry, ctx);
691 		mnt_drop_write(path.mnt);
692 	}
693 	path_put(&path);
694 	if (retry_estale(error, lookup_flags)) {
695 		lookup_flags |= LOOKUP_REVAL;
696 		goto retry;
697 	}
698 	return error;
699 }
700 
701 static int path_setxattrat(int dfd, const char __user *pathname,
702 			   unsigned int at_flags, const char __user *name,
703 			   const void __user *value, size_t size, int flags)
704 {
705 	struct xattr_name kname;
706 	struct kernel_xattr_ctx ctx = {
707 		.cvalue	= value,
708 		.kvalue	= NULL,
709 		.size	= size,
710 		.kname	= &kname,
711 		.flags	= flags,
712 	};
713 	unsigned int lookup_flags = 0;
714 	int error;
715 
716 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
717 		return -EINVAL;
718 
719 	if (!(at_flags & AT_SYMLINK_NOFOLLOW))
720 		lookup_flags = LOOKUP_FOLLOW;
721 
722 	error = setxattr_copy(name, &ctx);
723 	if (error)
724 		return error;
725 
726 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
727 	if (!filename && dfd >= 0) {
728 		CLASS(fd, f)(dfd);
729 		if (fd_empty(f))
730 			error = -EBADF;
731 		else
732 			error = file_setxattr(fd_file(f), &ctx);
733 	} else {
734 		error = filename_setxattr(dfd, filename, lookup_flags, &ctx);
735 	}
736 	kvfree(ctx.kvalue);
737 	return error;
738 }
739 
740 SYSCALL_DEFINE6(setxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
741 		const char __user *, name, const struct xattr_args __user *, uargs,
742 		size_t, usize)
743 {
744 	struct xattr_args args = {};
745 	int error;
746 
747 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
748 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
749 
750 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
751 		return -EINVAL;
752 	if (usize > PAGE_SIZE)
753 		return -E2BIG;
754 
755 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
756 	if (error)
757 		return error;
758 
759 	return path_setxattrat(dfd, pathname, at_flags, name,
760 			       u64_to_user_ptr(args.value), args.size,
761 			       args.flags);
762 }
763 
764 SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
765 		const char __user *, name, const void __user *, value,
766 		size_t, size, int, flags)
767 {
768 	return path_setxattrat(AT_FDCWD, pathname, 0, name, value, size, flags);
769 }
770 
771 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
772 		const char __user *, name, const void __user *, value,
773 		size_t, size, int, flags)
774 {
775 	return path_setxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
776 			       value, size, flags);
777 }
778 
779 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
780 		const void __user *,value, size_t, size, int, flags)
781 {
782 	return path_setxattrat(fd, NULL, AT_EMPTY_PATH, name,
783 			       value, size, flags);
784 }
785 
786 /*
787  * Extended attribute GET operations
788  */
789 static ssize_t
790 do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
791 	struct kernel_xattr_ctx *ctx)
792 {
793 	ssize_t error;
794 	char *kname = ctx->kname->name;
795 	void *kvalue = NULL;
796 
797 	if (ctx->size) {
798 		if (ctx->size > XATTR_SIZE_MAX)
799 			ctx->size = XATTR_SIZE_MAX;
800 		kvalue = kvzalloc(ctx->size, GFP_KERNEL);
801 		if (!kvalue)
802 			return -ENOMEM;
803 	}
804 
805 	if (is_posix_acl_xattr(kname))
806 		error = do_get_acl(idmap, d, kname, kvalue, ctx->size);
807 	else
808 		error = vfs_getxattr(idmap, d, kname, kvalue, ctx->size);
809 	if (error > 0) {
810 		if (ctx->size && copy_to_user(ctx->value, kvalue, error))
811 			error = -EFAULT;
812 	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
813 		/* The file system tried to returned a value bigger
814 		   than XATTR_SIZE_MAX bytes. Not possible. */
815 		error = -E2BIG;
816 	}
817 
818 	kvfree(kvalue);
819 	return error;
820 }
821 
822 ssize_t file_getxattr(struct file *f, struct kernel_xattr_ctx *ctx)
823 {
824 	audit_file(f);
825 	return do_getxattr(file_mnt_idmap(f), f->f_path.dentry, ctx);
826 }
827 
828 ssize_t filename_getxattr(int dfd, struct filename *filename,
829 			  unsigned int lookup_flags, struct kernel_xattr_ctx *ctx)
830 {
831 	struct path path;
832 	ssize_t error;
833 retry:
834 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
835 	if (error)
836 		return error;
837 	error = do_getxattr(mnt_idmap(path.mnt), path.dentry, ctx);
838 	path_put(&path);
839 	if (retry_estale(error, lookup_flags)) {
840 		lookup_flags |= LOOKUP_REVAL;
841 		goto retry;
842 	}
843 	return error;
844 }
845 
846 static ssize_t path_getxattrat(int dfd, const char __user *pathname,
847 			       unsigned int at_flags, const char __user *name,
848 			       void __user *value, size_t size)
849 {
850 	struct xattr_name kname;
851 	struct kernel_xattr_ctx ctx = {
852 		.value    = value,
853 		.size     = size,
854 		.kname    = &kname,
855 		.flags    = 0,
856 	};
857 	ssize_t error;
858 
859 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
860 		return -EINVAL;
861 
862 	error = import_xattr_name(&kname, name);
863 	if (error)
864 		return error;
865 
866 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
867 	if (!filename && dfd >= 0) {
868 		CLASS(fd, f)(dfd);
869 		if (fd_empty(f))
870 			return -EBADF;
871 		return file_getxattr(fd_file(f), &ctx);
872 	} else {
873 		int lookup_flags = 0;
874 		if (!(at_flags & AT_SYMLINK_NOFOLLOW))
875 			lookup_flags = LOOKUP_FOLLOW;
876 		return filename_getxattr(dfd, filename, lookup_flags, &ctx);
877 	}
878 }
879 
880 SYSCALL_DEFINE6(getxattrat, int, dfd, const char __user *, pathname, unsigned int, at_flags,
881 		const char __user *, name, struct xattr_args __user *, uargs, size_t, usize)
882 {
883 	struct xattr_args args = {};
884 	int error;
885 
886 	BUILD_BUG_ON(sizeof(struct xattr_args) < XATTR_ARGS_SIZE_VER0);
887 	BUILD_BUG_ON(sizeof(struct xattr_args) != XATTR_ARGS_SIZE_LATEST);
888 
889 	if (unlikely(usize < XATTR_ARGS_SIZE_VER0))
890 		return -EINVAL;
891 	if (usize > PAGE_SIZE)
892 		return -E2BIG;
893 
894 	error = copy_struct_from_user(&args, sizeof(args), uargs, usize);
895 	if (error)
896 		return error;
897 
898 	if (args.flags != 0)
899 		return -EINVAL;
900 
901 	return path_getxattrat(dfd, pathname, at_flags, name,
902 			       u64_to_user_ptr(args.value), args.size);
903 }
904 
905 SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
906 		const char __user *, name, void __user *, value, size_t, size)
907 {
908 	return path_getxattrat(AT_FDCWD, pathname, 0, name, value, size);
909 }
910 
911 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
912 		const char __user *, name, void __user *, value, size_t, size)
913 {
914 	return path_getxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name,
915 			       value, size);
916 }
917 
918 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
919 		void __user *, value, size_t, size)
920 {
921 	return path_getxattrat(fd, NULL, AT_EMPTY_PATH, name, value, size);
922 }
923 
924 /*
925  * Extended attribute LIST operations
926  */
927 static ssize_t
928 listxattr(struct dentry *d, char __user *list, size_t size)
929 {
930 	ssize_t error;
931 	char *klist = NULL;
932 
933 	if (size) {
934 		if (size > XATTR_LIST_MAX)
935 			size = XATTR_LIST_MAX;
936 		klist = kvmalloc(size, GFP_KERNEL);
937 		if (!klist)
938 			return -ENOMEM;
939 	}
940 
941 	error = vfs_listxattr(d, klist, size);
942 	if (error > 0) {
943 		if (size && copy_to_user(list, klist, error))
944 			error = -EFAULT;
945 	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
946 		/* The file system tried to returned a list bigger
947 		   than XATTR_LIST_MAX bytes. Not possible. */
948 		error = -E2BIG;
949 	}
950 
951 	kvfree(klist);
952 
953 	return error;
954 }
955 
956 static
957 ssize_t file_listxattr(struct file *f, char __user *list, size_t size)
958 {
959 	audit_file(f);
960 	return listxattr(f->f_path.dentry, list, size);
961 }
962 
963 static
964 ssize_t filename_listxattr(int dfd, struct filename *filename,
965 			   unsigned int lookup_flags,
966 			   char __user *list, size_t size)
967 {
968 	struct path path;
969 	ssize_t error;
970 retry:
971 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
972 	if (error)
973 		return error;
974 	error = listxattr(path.dentry, list, size);
975 	path_put(&path);
976 	if (retry_estale(error, lookup_flags)) {
977 		lookup_flags |= LOOKUP_REVAL;
978 		goto retry;
979 	}
980 	return error;
981 }
982 
983 static ssize_t path_listxattrat(int dfd, const char __user *pathname,
984 				unsigned int at_flags, char __user *list,
985 				size_t size)
986 {
987 	int lookup_flags;
988 
989 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
990 		return -EINVAL;
991 
992 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
993 	if (!filename) {
994 		CLASS(fd, f)(dfd);
995 		if (fd_empty(f))
996 			return -EBADF;
997 		return file_listxattr(fd_file(f), list, size);
998 	}
999 
1000 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
1001 	return filename_listxattr(dfd, filename, lookup_flags, list, size);
1002 }
1003 
1004 SYSCALL_DEFINE5(listxattrat, int, dfd, const char __user *, pathname,
1005 		unsigned int, at_flags,
1006 		char __user *, list, size_t, size)
1007 {
1008 	return path_listxattrat(dfd, pathname, at_flags, list, size);
1009 }
1010 
1011 SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
1012 		size_t, size)
1013 {
1014 	return path_listxattrat(AT_FDCWD, pathname, 0, list, size);
1015 }
1016 
1017 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
1018 		size_t, size)
1019 {
1020 	return path_listxattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, list, size);
1021 }
1022 
1023 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
1024 {
1025 	return path_listxattrat(fd, NULL, AT_EMPTY_PATH, list, size);
1026 }
1027 
1028 /*
1029  * Extended attribute REMOVE operations
1030  */
1031 static long
1032 removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
1033 {
1034 	if (is_posix_acl_xattr(name))
1035 		return vfs_remove_acl(idmap, d, name);
1036 	return vfs_removexattr(idmap, d, name);
1037 }
1038 
1039 static int file_removexattr(struct file *f, struct xattr_name *kname)
1040 {
1041 	int error = mnt_want_write_file(f);
1042 
1043 	if (!error) {
1044 		audit_file(f);
1045 		error = removexattr(file_mnt_idmap(f),
1046 				    f->f_path.dentry, kname->name);
1047 		mnt_drop_write_file(f);
1048 	}
1049 	return error;
1050 }
1051 
1052 static int filename_removexattr(int dfd, struct filename *filename,
1053 				unsigned int lookup_flags, struct xattr_name *kname)
1054 {
1055 	struct path path;
1056 	int error;
1057 
1058 retry:
1059 	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
1060 	if (error)
1061 		return error;
1062 	error = mnt_want_write(path.mnt);
1063 	if (!error) {
1064 		error = removexattr(mnt_idmap(path.mnt), path.dentry, kname->name);
1065 		mnt_drop_write(path.mnt);
1066 	}
1067 	path_put(&path);
1068 	if (retry_estale(error, lookup_flags)) {
1069 		lookup_flags |= LOOKUP_REVAL;
1070 		goto retry;
1071 	}
1072 	return error;
1073 }
1074 
1075 static int path_removexattrat(int dfd, const char __user *pathname,
1076 			      unsigned int at_flags, const char __user *name)
1077 {
1078 	struct xattr_name kname;
1079 	unsigned int lookup_flags;
1080 	int error;
1081 
1082 	if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
1083 		return -EINVAL;
1084 
1085 	error = import_xattr_name(&kname, name);
1086 	if (error)
1087 		return error;
1088 
1089 	CLASS(filename_maybe_null, filename)(pathname, at_flags);
1090 	if (!filename) {
1091 		CLASS(fd, f)(dfd);
1092 		if (fd_empty(f))
1093 			return -EBADF;
1094 		return file_removexattr(fd_file(f), &kname);
1095 	}
1096 	lookup_flags = (at_flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
1097 	return filename_removexattr(dfd, filename, lookup_flags, &kname);
1098 }
1099 
1100 SYSCALL_DEFINE4(removexattrat, int, dfd, const char __user *, pathname,
1101 		unsigned int, at_flags, const char __user *, name)
1102 {
1103 	return path_removexattrat(dfd, pathname, at_flags, name);
1104 }
1105 
1106 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
1107 		const char __user *, name)
1108 {
1109 	return path_removexattrat(AT_FDCWD, pathname, 0, name);
1110 }
1111 
1112 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
1113 		const char __user *, name)
1114 {
1115 	return path_removexattrat(AT_FDCWD, pathname, AT_SYMLINK_NOFOLLOW, name);
1116 }
1117 
1118 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
1119 {
1120 	return path_removexattrat(fd, NULL, AT_EMPTY_PATH, name);
1121 }
1122 
1123 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
1124 {
1125 	size_t len;
1126 
1127 	len = strlen(name) + 1;
1128 	if (*buffer) {
1129 		if (*remaining_size < len)
1130 			return -ERANGE;
1131 		memcpy(*buffer, name, len);
1132 		*buffer += len;
1133 	}
1134 	*remaining_size -= len;
1135 	return 0;
1136 }
1137 
1138 /**
1139  * generic_listxattr - run through a dentry's xattr list() operations
1140  * @dentry: dentry to list the xattrs
1141  * @buffer: result buffer
1142  * @buffer_size: size of @buffer
1143  *
1144  * Combine the results of the list() operation from every xattr_handler in the
1145  * xattr_handler stack.
1146  *
1147  * Note that this will not include the entries for POSIX ACLs.
1148  */
1149 ssize_t
1150 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
1151 {
1152 	const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr;
1153 	ssize_t remaining_size = buffer_size;
1154 
1155 	for_each_xattr_handler(handlers, handler) {
1156 		int err;
1157 
1158 		if (!handler->name || (handler->list && !handler->list(dentry)))
1159 			continue;
1160 		err = xattr_list_one(&buffer, &remaining_size, handler->name);
1161 		if (err)
1162 			return err;
1163 	}
1164 
1165 	return buffer_size - remaining_size;
1166 }
1167 EXPORT_SYMBOL(generic_listxattr);
1168 
1169 /**
1170  * xattr_full_name  -  Compute full attribute name from suffix
1171  *
1172  * @handler:	handler of the xattr_handler operation
1173  * @name:	name passed to the xattr_handler operation
1174  *
1175  * The get and set xattr handler operations are called with the remainder of
1176  * the attribute name after skipping the handler's prefix: for example, "foo"
1177  * is passed to the get operation of a handler with prefix "user." to get
1178  * attribute "user.foo".  The full name is still "there" in the name though.
1179  *
1180  * Note: the list xattr handler operation when called from the vfs is passed a
1181  * NULL name; some file systems use this operation internally, with varying
1182  * semantics.
1183  */
1184 const char *xattr_full_name(const struct xattr_handler *handler,
1185 			    const char *name)
1186 {
1187 	size_t prefix_len = strlen(xattr_prefix(handler));
1188 
1189 	return name - prefix_len;
1190 }
1191 EXPORT_SYMBOL(xattr_full_name);
1192 
1193 /**
1194  * simple_xattr_space - estimate the memory used by a simple xattr
1195  * @name: the full name of the xattr
1196  * @size: the size of its value
1197  *
1198  * This takes no account of how much larger the two slab objects actually are:
1199  * that would depend on the slab implementation, when what is required is a
1200  * deterministic number, which grows with name length and size and quantity.
1201  *
1202  * Return: The approximate number of bytes of memory used by such an xattr.
1203  */
1204 size_t simple_xattr_space(const char *name, size_t size)
1205 {
1206 	/*
1207 	 * Use "40" instead of sizeof(struct simple_xattr), to return the
1208 	 * same result on 32-bit and 64-bit, and even if simple_xattr grows.
1209 	 */
1210 	return 40 + size + strlen(name);
1211 }
1212 
1213 /**
1214  * simple_xattr_free - free an xattr object
1215  * @xattr: the xattr object
1216  *
1217  * Free the xattr object. Can handle @xattr being NULL.
1218  */
1219 void simple_xattr_free(struct simple_xattr *xattr)
1220 {
1221 	if (xattr)
1222 		kfree(xattr->name);
1223 	kvfree(xattr);
1224 }
1225 
1226 static void simple_xattr_rcu_free(struct rcu_head *head)
1227 {
1228 	struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
1229 
1230 	simple_xattr_free(xattr);
1231 }
1232 
1233 /**
1234  * simple_xattr_free_rcu - free an xattr object with RCU delay
1235  * @xattr: the xattr object
1236  *
1237  * Free the xattr object after an RCU grace period. This must be used when
1238  * the xattr was removed from a data structure that concurrent RCU readers
1239  * may still be traversing. Can handle @xattr being NULL.
1240  */
1241 void simple_xattr_free_rcu(struct simple_xattr *xattr)
1242 {
1243 	if (xattr)
1244 		call_rcu(&xattr->rcu, simple_xattr_rcu_free);
1245 }
1246 
1247 /**
1248  * simple_xattr_alloc - allocate new xattr object
1249  * @value: value of the xattr object
1250  * @size: size of @value
1251  *
1252  * Allocate a new xattr object and initialize respective members. The caller is
1253  * responsible for handling the name of the xattr.
1254  *
1255  * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on
1256  * failure.
1257  */
1258 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
1259 {
1260 	struct simple_xattr *new_xattr;
1261 	size_t len;
1262 
1263 	if (!value)
1264 		return NULL;
1265 
1266 	/* wrap around? */
1267 	len = sizeof(*new_xattr) + size;
1268 	if (len < sizeof(*new_xattr))
1269 		return ERR_PTR(-ENOMEM);
1270 
1271 	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
1272 	if (!new_xattr)
1273 		return ERR_PTR(-ENOMEM);
1274 
1275 	new_xattr->size = size;
1276 	memcpy(new_xattr->value, value, size);
1277 	return new_xattr;
1278 }
1279 
1280 static u32 sx_hashfn(const char *name, const struct list_head *parent, u32 seed)
1281 {
1282 	return jhash(name, strlen(name), jhash(&parent, sizeof(parent), seed));
1283 }
1284 
1285 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
1286 {
1287 	const struct sx_key *key = data;
1288 
1289 	return sx_hashfn(key->name, key->parent, seed);
1290 }
1291 
1292 static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
1293 {
1294 	const struct simple_xattr *xattr = obj;
1295 
1296 	return sx_hashfn(xattr->name, xattr->parent, seed);
1297 }
1298 
1299 static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
1300 				   const void *obj)
1301 {
1302 	const struct simple_xattr *xattr = obj;
1303 	const struct sx_key *key = arg->key;
1304 
1305 	return xattr->parent != key->parent || strcmp(xattr->name, key->name);
1306 }
1307 
1308 static const struct rhashtable_params simple_xattr_params = {
1309 	.head_offset    = offsetof(struct simple_xattr, hash_node),
1310 	.hashfn         = simple_xattr_hashfn,
1311 	.obj_hashfn     = simple_xattr_obj_hashfn,
1312 	.obj_cmpfn      = simple_xattr_obj_cmpfn,
1313 	.automatic_shrinking = true,
1314 };
1315 
1316 /**
1317  * simple_xattr_get - get an xattr object
1318  * @cache: anchor for the hash table
1319  * @xattrs: the header of the xattr object
1320  * @name: the name of the xattr to retrieve
1321  * @buffer: the buffer to store the value into
1322  * @size: the size of @buffer
1323  *
1324  * Try to find and retrieve the xattr object associated with @name.
1325  * If @buffer is provided store the value of @xattr in @buffer
1326  * otherwise just return the length. The size of @buffer is limited
1327  * to XATTR_SIZE_MAX which currently is 65536.
1328  *
1329  * Return: On success the length of the xattr value is returned. On error a
1330  * negative error code is returned.
1331  */
1332 int simple_xattr_get(struct simple_xattr_cache *cache, struct list_head *xattrs,
1333 		     const char *name, void *buffer, size_t size)
1334 {
1335 	struct simple_xattr *xattr;
1336 	struct sx_key key = { .parent = xattrs, .name = name };
1337 	struct rhashtable *ht = READ_ONCE(cache->ht);
1338 	int ret = -ENODATA;
1339 
1340 	if (!ht)
1341 		return ret;
1342 
1343 	guard(rcu)();
1344 	xattr = rhashtable_lookup(ht, &key, simple_xattr_params);
1345 	if (xattr) {
1346 		ret = xattr->size;
1347 		if (buffer) {
1348 			if (size < xattr->size)
1349 				ret = -ERANGE;
1350 			else
1351 				memcpy(buffer, xattr->value, xattr->size);
1352 		}
1353 	}
1354 	return ret;
1355 }
1356 
1357 static struct rhashtable *simple_xattrs_lazy_alloc(struct simple_xattr_cache *cache,
1358 						   const void *value, int flags)
1359 {
1360 	struct rhashtable *oldht, *ht = READ_ONCE(cache->ht);
1361 	int err;
1362 
1363 	if (unlikely(!ht)) {
1364 		if (!value)
1365 			return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
1366 
1367 		ht = kzalloc_obj(*ht);
1368 		if (!ht)
1369 			return ERR_PTR(-ENOMEM);
1370 
1371 		err = rhashtable_init(ht, &simple_xattr_params);
1372 		if (err) {
1373 			kfree(ht);
1374 			return ERR_PTR(err);
1375 		}
1376 
1377 		/*
1378 		 * Provides release semantics on success, so that use of a
1379 		 * non-NULL READ_ONCE(cache->ht) will be ordered relative to the
1380 		 * above initialization, due to implicit address dependency.
1381 		 */
1382 		oldht = cmpxchg_release(&cache->ht, NULL, ht);
1383 		if (oldht) {
1384 			/* Race lost */
1385 			rhashtable_destroy(ht);
1386 			kfree(ht);
1387 			ht = oldht;
1388 		}
1389 	}
1390 	return ht;
1391 }
1392 
1393 /**
1394  * simple_xattr_set - set an xattr object
1395  * @cache: anchor for the hash table
1396  * @xattrs: the header of the xattr object
1397  * @name: the name of the xattr to retrieve
1398  * @value: the value to store along the xattr
1399  * @size: the size of @value
1400  * @flags: the flags determining how to set the xattr
1401  *
1402  * Set a new xattr object.
1403  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
1404  * is specified in @flags a matching xattr object for @name must already exist.
1405  * If it does it will be replaced with the new xattr object. If it doesn't we
1406  * fail. If XATTR_CREATE is specified and a matching xattr does already exist
1407  * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
1408  * insert the new xattr replacing any existing one.
1409  *
1410  * If @value is empty and a matching xattr object is found we delete it if
1411  * XATTR_REPLACE is specified in @flags or @flags is zero.
1412  *
1413  * If @value is empty and no matching xattr object for @name is found we do
1414  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
1415  * XATTR_REPLACE we fail as mentioned above.
1416  *
1417  * Note: Callers must externally serialize writes. All current callers hold
1418  * the inode lock for write operations. The lookup->replace/remove sequence
1419  * is not atomic with respect to the rhashtable's per-bucket locking, but
1420  * is safe because writes are serialized by the caller.
1421  *
1422  * Return: On success, the removed or replaced xattr is returned, to be freed
1423  * by the caller; or NULL if none. On failure a negative error code is returned.
1424  */
1425 struct simple_xattr *simple_xattr_set(struct simple_xattr_cache *cache, struct list_head *xattrs,
1426 				      const char *name, const void *value,
1427 				      size_t size, int flags)
1428 {
1429 	struct sx_key key = { .parent = xattrs, .name = name };
1430 	struct simple_xattr *old_xattr = NULL;
1431 	struct rhashtable *ht;
1432 	int err;
1433 
1434 	ht = simple_xattrs_lazy_alloc(cache, value, flags);
1435 	if (IS_ERR_OR_NULL(ht))
1436 		return ERR_CAST(ht);
1437 
1438 	CLASS(simple_xattr, new_xattr)(value, size);
1439 	if (IS_ERR(new_xattr))
1440 		return new_xattr;
1441 
1442 	if (new_xattr) {
1443 		new_xattr->parent = xattrs;
1444 		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
1445 		if (!new_xattr->name)
1446 			return ERR_PTR(-ENOMEM);
1447 	}
1448 
1449 	/*
1450 	 * Hash table lookup/replace/remove will grab RCU read lock themselves.
1451 	 * This makes sure that hash table lookup is safe against concurrent
1452 	 * modification on another inode.
1453 	 */
1454 	old_xattr = rhashtable_lookup_fast(ht, &key, simple_xattr_params);
1455 	if (old_xattr) {
1456 		/* Fail if XATTR_CREATE is requested and the xattr exists. */
1457 		if (flags & XATTR_CREATE)
1458 			return ERR_PTR(-EEXIST);
1459 
1460 		if (new_xattr) {
1461 			err = rhashtable_replace_fast(ht,
1462 						      &old_xattr->hash_node,
1463 						      &new_xattr->hash_node,
1464 						      simple_xattr_params);
1465 			if (err)
1466 				return ERR_PTR(err);
1467 
1468 			list_replace_rcu(&old_xattr->node, &new_xattr->node);
1469 		} else {
1470 			err = rhashtable_remove_fast(ht,
1471 						     &old_xattr->hash_node,
1472 						     simple_xattr_params);
1473 			if (err)
1474 				return ERR_PTR(err);
1475 
1476 			list_del_rcu(&old_xattr->node);
1477 		}
1478 	} else {
1479 		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
1480 		if (flags & XATTR_REPLACE)
1481 			return ERR_PTR(-ENODATA);
1482 
1483 		/*
1484 		 * If XATTR_CREATE or no flags are specified together with a
1485 		 * new value simply insert it.
1486 		 */
1487 		if (new_xattr) {
1488 			err = rhashtable_insert_fast(ht,
1489 						     &new_xattr->hash_node,
1490 						     simple_xattr_params);
1491 			if (err)
1492 				return ERR_PTR(err);
1493 
1494 			list_add_tail_rcu(&new_xattr->node, xattrs);
1495 		}
1496 
1497 		/*
1498 		 * If XATTR_CREATE or no flags are specified and neither an
1499 		 * old or new xattr exist then we don't need to do anything.
1500 		 */
1501 	}
1502 
1503 	retain_and_null_ptr(new_xattr);
1504 	return old_xattr;
1505 }
1506 
1507 static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits,
1508 					   size_t size)
1509 {
1510 	atomic_sub(size, &limits->xattr_size);
1511 	atomic_dec(&limits->nr_xattrs);
1512 }
1513 
1514 static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
1515 					  size_t size)
1516 {
1517 	if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) {
1518 		atomic_dec(&limits->nr_xattrs);
1519 		return -ENOSPC;
1520 	}
1521 
1522 	if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE)
1523 		return 0;
1524 
1525 	simple_xattr_limits_dec(limits, size);
1526 	return -ENOSPC;
1527 }
1528 
1529 /**
1530  * simple_xattr_set_limited - set an xattr with per-inode user.* limits
1531  * @cache: anchor for the hash table
1532  * @xattrs: the header of the xattr object
1533  * @limits: per-inode limit counters for user.* xattrs
1534  * @name: the name of the xattr to set or remove
1535  * @value: the value to store (NULL to remove)
1536  * @size: the size of @value
1537  * @flags: XATTR_CREATE, XATTR_REPLACE, or 0
1538  *
1539  * Like simple_xattr_set(), but enforces per-inode count and total value size
1540  * limits for user.* xattrs. Uses speculative pre-increment of the atomic
1541  * counters to avoid races without requiring external locks.
1542  *
1543  * Return: On success zero is returned. On failure a negative error code is
1544  * returned.
1545  */
1546 int simple_xattr_set_limited(struct simple_xattr_cache *cache, struct list_head *xattrs,
1547 			     struct simple_xattr_limits *limits,
1548 			     const char *name, const void *value,
1549 			     size_t size, int flags)
1550 {
1551 	struct simple_xattr *old_xattr;
1552 	int ret;
1553 
1554 	if (value) {
1555 		ret = simple_xattr_limits_inc(limits, size);
1556 		if (ret)
1557 			return ret;
1558 	}
1559 
1560 	old_xattr = simple_xattr_set(cache, xattrs, name, value, size, flags);
1561 	if (IS_ERR(old_xattr)) {
1562 		if (value)
1563 			simple_xattr_limits_dec(limits, size);
1564 		return PTR_ERR(old_xattr);
1565 	}
1566 	if (old_xattr) {
1567 		simple_xattr_limits_dec(limits, old_xattr->size);
1568 		simple_xattr_free_rcu(old_xattr);
1569 	}
1570 	return 0;
1571 }
1572 
1573 static bool xattr_is_trusted(const char *name)
1574 {
1575 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1576 }
1577 
1578 static bool xattr_is_maclabel(const char *name)
1579 {
1580 	const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
1581 
1582 	return !strncmp(name, XATTR_SECURITY_PREFIX,
1583 			XATTR_SECURITY_PREFIX_LEN) &&
1584 		security_ismaclabel(suffix);
1585 }
1586 
1587 /**
1588  * simple_xattr_list - list all xattr objects
1589  * @inode: inode from which to get the xattrs
1590  * @xattrs: the header of the xattr object
1591  * @buffer: the buffer to store all xattrs into
1592  * @size: the size of @buffer
1593  *
1594  * List all xattrs associated with @inode. If @buffer is NULL we returned
1595  * the required size of the buffer. If @buffer is provided we store the
1596  * xattrs value into it provided it is big enough.
1597  *
1598  * Note, the number of xattr names that can be listed with listxattr(2) is
1599  * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
1600  * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
1601  * are found it will return -E2BIG.
1602  *
1603  * Return: On success the required size or the size of the copied xattrs is
1604  * returned. On error a negative error code is returned.
1605  */
1606 ssize_t simple_xattr_list(struct inode *inode, struct list_head *xattrs,
1607 			  char *buffer, size_t size)
1608 {
1609 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
1610 	struct simple_xattr *xattr;
1611 	ssize_t remaining_size = size;
1612 	int err = 0;
1613 
1614 	err = posix_acl_listxattr(inode, &buffer, &remaining_size);
1615 	if (err)
1616 		return err;
1617 
1618 	err = security_inode_listsecurity(inode, &buffer, &remaining_size);
1619 	if (err < 0)
1620 		return err;
1621 
1622 	if (buffer) {
1623 		if (remaining_size < err)
1624 			return -ERANGE;
1625 		buffer += err;
1626 	}
1627 	remaining_size -= err;
1628 	err = 0;
1629 
1630 	if (!xattrs)
1631 		return size - remaining_size;
1632 
1633 	rcu_read_lock();
1634 	list_for_each_entry_rcu(xattr, xattrs, node) {
1635 		/* skip "trusted." attributes for unprivileged callers */
1636 		if (!trusted && xattr_is_trusted(xattr->name))
1637 			continue;
1638 
1639 		/* skip MAC labels; these are provided by LSM above */
1640 		if (xattr_is_maclabel(xattr->name))
1641 			continue;
1642 
1643 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1644 		if (err)
1645 			break;
1646 	}
1647 	rcu_read_unlock();
1648 
1649 	return err ? err : size - remaining_size;
1650 }
1651 
1652 /**
1653  * simple_xattr_add - add xattr objects
1654  * @cache: anchor for the hash table
1655  * @xattrs: the header of the xattr object
1656  * @new_xattr: the xattr object to add
1657  *
1658  * Add an xattr object to @xattrs. This assumes no replacement or removal
1659  * of matching xattrs is wanted. Should only be called during inode
1660  * initialization when a few distinct initial xattrs are supposed to be set.
1661  *
1662  * Return: On success zero is returned. On failure a negative error code is
1663  * returned.
1664  */
1665 int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
1666 		     struct simple_xattr *new_xattr)
1667 {
1668 	struct rhashtable *ht;
1669 	int err;
1670 
1671 	ht = simple_xattrs_lazy_alloc(cache, new_xattr->value, 0);
1672 	if (IS_ERR(ht))
1673 		return PTR_ERR(ht);
1674 
1675 	new_xattr->parent = xattrs;
1676 	err = rhashtable_insert_fast(ht, &new_xattr->hash_node, simple_xattr_params);
1677 	if (err)
1678 		return err;
1679 
1680 	list_add_tail_rcu(&new_xattr->node, xattrs);
1681 	return 0;
1682 }
1683 
1684 /**
1685  * simple_xattr_add_limited - add an xattr object, charging per-inode limits
1686  * @cache: anchor for the hash table
1687  * @xattrs: the header of the xattr object
1688  * @limits: per-inode limit counters
1689  * @new_xattr: the xattr object to add
1690  *
1691  * Like simple_xattr_add(), but also accounts @new_xattr against @limits so
1692  * that a later removal or replacement of it through simple_xattr_set_limited()
1693  * decrements counters that were actually incremented, rather than underflowing
1694  * them. Use this instead of simple_xattr_add() when seeding initial xattrs
1695  * that share a namespace with the limited set/remove path.
1696  *
1697  * Return: On success zero is returned. On failure a negative error code is
1698  * returned.
1699  */
1700 int simple_xattr_add_limited(struct simple_xattr_cache *cache,
1701 			     struct list_head *xattrs,
1702 			     struct simple_xattr_limits *limits,
1703 			     struct simple_xattr *new_xattr)
1704 {
1705 	int err;
1706 
1707 	err = simple_xattr_limits_inc(limits, new_xattr->size);
1708 	if (err)
1709 		return err;
1710 
1711 	err = simple_xattr_add(cache, xattrs, new_xattr);
1712 	if (err)
1713 		simple_xattr_limits_dec(limits, new_xattr->size);
1714 	return err;
1715 }
1716 
1717 /**
1718  * simple_xattrs_free - free xattrs
1719  * @cache: anchor for the hash table
1720  * @xattrs: xattr header whose xattrs to destroy
1721  * @freed_space: approximate number of bytes of memory freed from @xattrs
1722  *
1723  * Destroy all xattrs in @xattrs. When this is called no one can hold a
1724  * reference to any of the xattrs anymore.
1725  */
1726 void simple_xattrs_free(struct simple_xattr_cache *cache, struct list_head *xattrs,
1727 			size_t *freed_space)
1728 {
1729 	if (freed_space)
1730 		*freed_space = 0;
1731 
1732 	while (!list_empty(xattrs)) {
1733 		struct simple_xattr *xattr = list_first_entry(xattrs, typeof(*xattr), node);
1734 
1735 		rhashtable_remove_fast(cache->ht, &xattr->hash_node, simple_xattr_params);
1736 		list_del(&xattr->node);
1737 		if (freed_space)
1738 			*freed_space += simple_xattr_space(xattr->name, xattr->size);
1739 		/*
1740 		 * Free with RCU, since the xattr might still get accessed by
1741 		 * the hash compare function
1742 		 */
1743 		simple_xattr_free_rcu(xattr);
1744 	}
1745 }
1746 
1747 /**
1748  * simple_xattr_cache_cleanup - free the cache
1749  * @cache: anchor for the hash table
1750  *
1751  * Destroy the cache table, which was lazily allocated on adding the first xattr.
1752  */
1753 void simple_xattr_cache_cleanup(struct simple_xattr_cache *cache)
1754 {
1755 	if (cache->ht) {
1756 		WARN_ON(atomic_read(&cache->ht->nelems));
1757 		rhashtable_destroy(cache->ht);
1758 		kfree(cache->ht);
1759 		cache->ht = NULL;
1760 	}
1761 }
1762