xref: /linux/fs/xattr.c (revision dec1c62e91ba268ab2a6e339d4d7a59287d5eba1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3   File: fs/xattr.c
4 
5   Extended attribute handling.
6 
7   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8   Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10  */
11 #include <linux/fs.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/mount.h>
16 #include <linux/namei.h>
17 #include <linux/security.h>
18 #include <linux/evm.h>
19 #include <linux/syscalls.h>
20 #include <linux/export.h>
21 #include <linux/fsnotify.h>
22 #include <linux/audit.h>
23 #include <linux/vmalloc.h>
24 #include <linux/posix_acl_xattr.h>
25 
26 #include <linux/uaccess.h>
27 
28 #include "internal.h"
29 
30 static const char *
31 strcmp_prefix(const char *a, const char *a_prefix)
32 {
33 	while (*a_prefix && *a == *a_prefix) {
34 		a++;
35 		a_prefix++;
36 	}
37 	return *a_prefix ? NULL : a;
38 }
39 
40 /*
41  * In order to implement different sets of xattr operations for each xattr
42  * prefix, a filesystem should create a null-terminated array of struct
43  * xattr_handler (one for each prefix) and hang a pointer to it off of the
44  * s_xattr field of the superblock.
45  */
46 #define for_each_xattr_handler(handlers, handler)		\
47 	if (handlers)						\
48 		for ((handler) = *(handlers)++;			\
49 			(handler) != NULL;			\
50 			(handler) = *(handlers)++)
51 
52 /*
53  * Find the xattr_handler with the matching prefix.
54  */
55 static const struct xattr_handler *
56 xattr_resolve_name(struct inode *inode, const char **name)
57 {
58 	const struct xattr_handler **handlers = inode->i_sb->s_xattr;
59 	const struct xattr_handler *handler;
60 
61 	if (!(inode->i_opflags & IOP_XATTR)) {
62 		if (unlikely(is_bad_inode(inode)))
63 			return ERR_PTR(-EIO);
64 		return ERR_PTR(-EOPNOTSUPP);
65 	}
66 	for_each_xattr_handler(handlers, handler) {
67 		const char *n;
68 
69 		n = strcmp_prefix(*name, xattr_prefix(handler));
70 		if (n) {
71 			if (!handler->prefix ^ !*n) {
72 				if (*n)
73 					continue;
74 				return ERR_PTR(-EINVAL);
75 			}
76 			*name = n;
77 			return handler;
78 		}
79 	}
80 	return ERR_PTR(-EOPNOTSUPP);
81 }
82 
83 /*
84  * Check permissions for extended attribute access.  This is a bit complicated
85  * because different namespaces have very different rules.
86  */
87 static int
88 xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
89 		 const char *name, int mask)
90 {
91 	/*
92 	 * We can never set or remove an extended attribute on a read-only
93 	 * filesystem  or on an immutable / append-only inode.
94 	 */
95 	if (mask & MAY_WRITE) {
96 		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
97 			return -EPERM;
98 		/*
99 		 * Updating an xattr will likely cause i_uid and i_gid
100 		 * to be writen back improperly if their true value is
101 		 * unknown to the vfs.
102 		 */
103 		if (HAS_UNMAPPED_ID(mnt_userns, inode))
104 			return -EPERM;
105 	}
106 
107 	/*
108 	 * No restriction for security.* and system.* from the VFS.  Decision
109 	 * on these is left to the underlying filesystem / security module.
110 	 */
111 	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
112 	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
113 		return 0;
114 
115 	/*
116 	 * The trusted.* namespace can only be accessed by privileged users.
117 	 */
118 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
119 		if (!capable(CAP_SYS_ADMIN))
120 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
121 		return 0;
122 	}
123 
124 	/*
125 	 * In the user.* namespace, only regular files and directories can have
126 	 * extended attributes. For sticky directories, only the owner and
127 	 * privileged users can write attributes.
128 	 */
129 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
130 		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
131 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
132 		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
133 		    (mask & MAY_WRITE) &&
134 		    !inode_owner_or_capable(mnt_userns, inode))
135 			return -EPERM;
136 	}
137 
138 	return inode_permission(mnt_userns, inode, mask);
139 }
140 
141 /*
142  * Look for any handler that deals with the specified namespace.
143  */
144 int
145 xattr_supported_namespace(struct inode *inode, const char *prefix)
146 {
147 	const struct xattr_handler **handlers = inode->i_sb->s_xattr;
148 	const struct xattr_handler *handler;
149 	size_t preflen;
150 
151 	if (!(inode->i_opflags & IOP_XATTR)) {
152 		if (unlikely(is_bad_inode(inode)))
153 			return -EIO;
154 		return -EOPNOTSUPP;
155 	}
156 
157 	preflen = strlen(prefix);
158 
159 	for_each_xattr_handler(handlers, handler) {
160 		if (!strncmp(xattr_prefix(handler), prefix, preflen))
161 			return 0;
162 	}
163 
164 	return -EOPNOTSUPP;
165 }
166 EXPORT_SYMBOL(xattr_supported_namespace);
167 
168 int
169 __vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
170 	       struct inode *inode, const char *name, const void *value,
171 	       size_t size, int flags)
172 {
173 	const struct xattr_handler *handler;
174 
175 	handler = xattr_resolve_name(inode, &name);
176 	if (IS_ERR(handler))
177 		return PTR_ERR(handler);
178 	if (!handler->set)
179 		return -EOPNOTSUPP;
180 	if (size == 0)
181 		value = "";  /* empty EA, do not remove */
182 	return handler->set(handler, mnt_userns, dentry, inode, name, value,
183 			    size, flags);
184 }
185 EXPORT_SYMBOL(__vfs_setxattr);
186 
187 /**
188  *  __vfs_setxattr_noperm - perform setxattr operation without performing
189  *  permission checks.
190  *
191  *  @mnt_userns: user namespace of the mount the inode was found from
192  *  @dentry: object to perform setxattr on
193  *  @name: xattr name to set
194  *  @value: value to set @name to
195  *  @size: size of @value
196  *  @flags: flags to pass into filesystem operations
197  *
198  *  returns the result of the internal setxattr or setsecurity operations.
199  *
200  *  This function requires the caller to lock the inode's i_mutex before it
201  *  is executed. It also assumes that the caller will make the appropriate
202  *  permission checks.
203  */
204 int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
205 			  struct dentry *dentry, const char *name,
206 			  const void *value, size_t size, int flags)
207 {
208 	struct inode *inode = dentry->d_inode;
209 	int error = -EAGAIN;
210 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
211 				   XATTR_SECURITY_PREFIX_LEN);
212 
213 	if (issec)
214 		inode->i_flags &= ~S_NOSEC;
215 	if (inode->i_opflags & IOP_XATTR) {
216 		error = __vfs_setxattr(mnt_userns, dentry, inode, name, value,
217 				       size, flags);
218 		if (!error) {
219 			fsnotify_xattr(dentry);
220 			security_inode_post_setxattr(dentry, name, value,
221 						     size, flags);
222 		}
223 	} else {
224 		if (unlikely(is_bad_inode(inode)))
225 			return -EIO;
226 	}
227 	if (error == -EAGAIN) {
228 		error = -EOPNOTSUPP;
229 
230 		if (issec) {
231 			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
232 
233 			error = security_inode_setsecurity(inode, suffix, value,
234 							   size, flags);
235 			if (!error)
236 				fsnotify_xattr(dentry);
237 		}
238 	}
239 
240 	return error;
241 }
242 
243 /**
244  * __vfs_setxattr_locked - set an extended attribute while holding the inode
245  * lock
246  *
247  *  @mnt_userns: user namespace of the mount of the target inode
248  *  @dentry: object to perform setxattr on
249  *  @name: xattr name to set
250  *  @value: value to set @name to
251  *  @size: size of @value
252  *  @flags: flags to pass into filesystem operations
253  *  @delegated_inode: on return, will contain an inode pointer that
254  *  a delegation was broken on, NULL if none.
255  */
256 int
257 __vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry,
258 		      const char *name, const void *value, size_t size,
259 		      int flags, struct inode **delegated_inode)
260 {
261 	struct inode *inode = dentry->d_inode;
262 	int error;
263 
264 	error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
265 	if (error)
266 		return error;
267 
268 	error = security_inode_setxattr(mnt_userns, dentry, name, value, size,
269 					flags);
270 	if (error)
271 		goto out;
272 
273 	error = try_break_deleg(inode, delegated_inode);
274 	if (error)
275 		goto out;
276 
277 	error = __vfs_setxattr_noperm(mnt_userns, dentry, name, value,
278 				      size, flags);
279 
280 out:
281 	return error;
282 }
283 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
284 
285 int
286 vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
287 	     const char *name, const void *value, size_t size, int flags)
288 {
289 	struct inode *inode = dentry->d_inode;
290 	struct inode *delegated_inode = NULL;
291 	const void  *orig_value = value;
292 	int error;
293 
294 	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
295 		error = cap_convert_nscap(mnt_userns, dentry, &value, size);
296 		if (error < 0)
297 			return error;
298 		size = error;
299 	}
300 
301 retry_deleg:
302 	inode_lock(inode);
303 	error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
304 				      flags, &delegated_inode);
305 	inode_unlock(inode);
306 
307 	if (delegated_inode) {
308 		error = break_deleg_wait(&delegated_inode);
309 		if (!error)
310 			goto retry_deleg;
311 	}
312 	if (value != orig_value)
313 		kfree(value);
314 
315 	return error;
316 }
317 EXPORT_SYMBOL_GPL(vfs_setxattr);
318 
319 static ssize_t
320 xattr_getsecurity(struct user_namespace *mnt_userns, struct inode *inode,
321 		  const char *name, void *value, size_t size)
322 {
323 	void *buffer = NULL;
324 	ssize_t len;
325 
326 	if (!value || !size) {
327 		len = security_inode_getsecurity(mnt_userns, inode, name,
328 						 &buffer, false);
329 		goto out_noalloc;
330 	}
331 
332 	len = security_inode_getsecurity(mnt_userns, inode, name, &buffer,
333 					 true);
334 	if (len < 0)
335 		return len;
336 	if (size < len) {
337 		len = -ERANGE;
338 		goto out;
339 	}
340 	memcpy(value, buffer, len);
341 out:
342 	kfree(buffer);
343 out_noalloc:
344 	return len;
345 }
346 
347 /*
348  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
349  *
350  * Allocate memory, if not already allocated, or re-allocate correct size,
351  * before retrieving the extended attribute.
352  *
353  * Returns the result of alloc, if failed, or the getxattr operation.
354  */
355 ssize_t
356 vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
357 		   const char *name, char **xattr_value, size_t xattr_size,
358 		   gfp_t flags)
359 {
360 	const struct xattr_handler *handler;
361 	struct inode *inode = dentry->d_inode;
362 	char *value = *xattr_value;
363 	int error;
364 
365 	error = xattr_permission(mnt_userns, inode, name, MAY_READ);
366 	if (error)
367 		return error;
368 
369 	handler = xattr_resolve_name(inode, &name);
370 	if (IS_ERR(handler))
371 		return PTR_ERR(handler);
372 	if (!handler->get)
373 		return -EOPNOTSUPP;
374 	error = handler->get(handler, dentry, inode, name, NULL, 0);
375 	if (error < 0)
376 		return error;
377 
378 	if (!value || (error > xattr_size)) {
379 		value = krealloc(*xattr_value, error + 1, flags);
380 		if (!value)
381 			return -ENOMEM;
382 		memset(value, 0, error + 1);
383 	}
384 
385 	error = handler->get(handler, dentry, inode, name, value, error);
386 	*xattr_value = value;
387 	return error;
388 }
389 
390 ssize_t
391 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
392 	       void *value, size_t size)
393 {
394 	const struct xattr_handler *handler;
395 
396 	handler = xattr_resolve_name(inode, &name);
397 	if (IS_ERR(handler))
398 		return PTR_ERR(handler);
399 	if (!handler->get)
400 		return -EOPNOTSUPP;
401 	return handler->get(handler, dentry, inode, name, value, size);
402 }
403 EXPORT_SYMBOL(__vfs_getxattr);
404 
405 ssize_t
406 vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
407 	     const char *name, void *value, size_t size)
408 {
409 	struct inode *inode = dentry->d_inode;
410 	int error;
411 
412 	error = xattr_permission(mnt_userns, inode, name, MAY_READ);
413 	if (error)
414 		return error;
415 
416 	error = security_inode_getxattr(dentry, name);
417 	if (error)
418 		return error;
419 
420 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
421 				XATTR_SECURITY_PREFIX_LEN)) {
422 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
423 		int ret = xattr_getsecurity(mnt_userns, inode, suffix, value,
424 					    size);
425 		/*
426 		 * Only overwrite the return value if a security module
427 		 * is actually active.
428 		 */
429 		if (ret == -EOPNOTSUPP)
430 			goto nolsm;
431 		return ret;
432 	}
433 nolsm:
434 	return __vfs_getxattr(dentry, inode, name, value, size);
435 }
436 EXPORT_SYMBOL_GPL(vfs_getxattr);
437 
438 ssize_t
439 vfs_listxattr(struct dentry *dentry, char *list, size_t size)
440 {
441 	struct inode *inode = d_inode(dentry);
442 	ssize_t error;
443 
444 	error = security_inode_listxattr(dentry);
445 	if (error)
446 		return error;
447 	if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) {
448 		error = inode->i_op->listxattr(dentry, list, size);
449 	} else {
450 		error = security_inode_listsecurity(inode, list, size);
451 		if (size && error > size)
452 			error = -ERANGE;
453 	}
454 	return error;
455 }
456 EXPORT_SYMBOL_GPL(vfs_listxattr);
457 
458 int
459 __vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
460 		  const char *name)
461 {
462 	struct inode *inode = d_inode(dentry);
463 	const struct xattr_handler *handler;
464 
465 	handler = xattr_resolve_name(inode, &name);
466 	if (IS_ERR(handler))
467 		return PTR_ERR(handler);
468 	if (!handler->set)
469 		return -EOPNOTSUPP;
470 	return handler->set(handler, mnt_userns, dentry, inode, name, NULL, 0,
471 			    XATTR_REPLACE);
472 }
473 EXPORT_SYMBOL(__vfs_removexattr);
474 
475 /**
476  * __vfs_removexattr_locked - set an extended attribute while holding the inode
477  * lock
478  *
479  *  @mnt_userns: user namespace of the mount of the target inode
480  *  @dentry: object to perform setxattr on
481  *  @name: name of xattr to remove
482  *  @delegated_inode: on return, will contain an inode pointer that
483  *  a delegation was broken on, NULL if none.
484  */
485 int
486 __vfs_removexattr_locked(struct user_namespace *mnt_userns,
487 			 struct dentry *dentry, const char *name,
488 			 struct inode **delegated_inode)
489 {
490 	struct inode *inode = dentry->d_inode;
491 	int error;
492 
493 	error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
494 	if (error)
495 		return error;
496 
497 	error = security_inode_removexattr(mnt_userns, dentry, name);
498 	if (error)
499 		goto out;
500 
501 	error = try_break_deleg(inode, delegated_inode);
502 	if (error)
503 		goto out;
504 
505 	error = __vfs_removexattr(mnt_userns, dentry, name);
506 
507 	if (!error) {
508 		fsnotify_xattr(dentry);
509 		evm_inode_post_removexattr(dentry, name);
510 	}
511 
512 out:
513 	return error;
514 }
515 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
516 
517 int
518 vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
519 		const char *name)
520 {
521 	struct inode *inode = dentry->d_inode;
522 	struct inode *delegated_inode = NULL;
523 	int error;
524 
525 retry_deleg:
526 	inode_lock(inode);
527 	error = __vfs_removexattr_locked(mnt_userns, dentry,
528 					 name, &delegated_inode);
529 	inode_unlock(inode);
530 
531 	if (delegated_inode) {
532 		error = break_deleg_wait(&delegated_inode);
533 		if (!error)
534 			goto retry_deleg;
535 	}
536 
537 	return error;
538 }
539 EXPORT_SYMBOL_GPL(vfs_removexattr);
540 
541 /*
542  * Extended attribute SET operations
543  */
544 
545 int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
546 {
547 	int error;
548 
549 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
550 		return -EINVAL;
551 
552 	error = strncpy_from_user(ctx->kname->name, name,
553 				sizeof(ctx->kname->name));
554 	if (error == 0 || error == sizeof(ctx->kname->name))
555 		return  -ERANGE;
556 	if (error < 0)
557 		return error;
558 
559 	error = 0;
560 	if (ctx->size) {
561 		if (ctx->size > XATTR_SIZE_MAX)
562 			return -E2BIG;
563 
564 		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
565 		if (IS_ERR(ctx->kvalue)) {
566 			error = PTR_ERR(ctx->kvalue);
567 			ctx->kvalue = NULL;
568 		}
569 	}
570 
571 	return error;
572 }
573 
574 static void setxattr_convert(struct user_namespace *mnt_userns,
575 			     struct dentry *d, struct xattr_ctx *ctx)
576 {
577 	if (ctx->size &&
578 		((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
579 		(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
580 		posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
581 						ctx->kvalue, ctx->size);
582 }
583 
584 int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
585 		struct xattr_ctx *ctx)
586 {
587 	setxattr_convert(mnt_userns, dentry, ctx);
588 	return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
589 			ctx->kvalue, ctx->size, ctx->flags);
590 }
591 
592 static long
593 setxattr(struct user_namespace *mnt_userns, struct dentry *d,
594 	const char __user *name, const void __user *value, size_t size,
595 	int flags)
596 {
597 	struct xattr_name kname;
598 	struct xattr_ctx ctx = {
599 		.cvalue   = value,
600 		.kvalue   = NULL,
601 		.size     = size,
602 		.kname    = &kname,
603 		.flags    = flags,
604 	};
605 	int error;
606 
607 	error = setxattr_copy(name, &ctx);
608 	if (error)
609 		return error;
610 
611 	error = do_setxattr(mnt_userns, d, &ctx);
612 
613 	kvfree(ctx.kvalue);
614 	return error;
615 }
616 
617 static int path_setxattr(const char __user *pathname,
618 			 const char __user *name, const void __user *value,
619 			 size_t size, int flags, unsigned int lookup_flags)
620 {
621 	struct path path;
622 	int error;
623 
624 retry:
625 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
626 	if (error)
627 		return error;
628 	error = mnt_want_write(path.mnt);
629 	if (!error) {
630 		error = setxattr(mnt_user_ns(path.mnt), path.dentry, name,
631 				 value, size, flags);
632 		mnt_drop_write(path.mnt);
633 	}
634 	path_put(&path);
635 	if (retry_estale(error, lookup_flags)) {
636 		lookup_flags |= LOOKUP_REVAL;
637 		goto retry;
638 	}
639 	return error;
640 }
641 
642 SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
643 		const char __user *, name, const void __user *, value,
644 		size_t, size, int, flags)
645 {
646 	return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW);
647 }
648 
649 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
650 		const char __user *, name, const void __user *, value,
651 		size_t, size, int, flags)
652 {
653 	return path_setxattr(pathname, name, value, size, flags, 0);
654 }
655 
656 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
657 		const void __user *,value, size_t, size, int, flags)
658 {
659 	struct fd f = fdget(fd);
660 	int error = -EBADF;
661 
662 	if (!f.file)
663 		return error;
664 	audit_file(f.file);
665 	error = mnt_want_write_file(f.file);
666 	if (!error) {
667 		error = setxattr(file_mnt_user_ns(f.file),
668 				 f.file->f_path.dentry, name,
669 				 value, size, flags);
670 		mnt_drop_write_file(f.file);
671 	}
672 	fdput(f);
673 	return error;
674 }
675 
676 /*
677  * Extended attribute GET operations
678  */
679 ssize_t
680 do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
681 	struct xattr_ctx *ctx)
682 {
683 	ssize_t error;
684 	char *kname = ctx->kname->name;
685 
686 	if (ctx->size) {
687 		if (ctx->size > XATTR_SIZE_MAX)
688 			ctx->size = XATTR_SIZE_MAX;
689 		ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
690 		if (!ctx->kvalue)
691 			return -ENOMEM;
692 	}
693 
694 	error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
695 	if (error > 0) {
696 		if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
697 		    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
698 			posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
699 							ctx->kvalue, error);
700 		if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
701 			error = -EFAULT;
702 	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
703 		/* The file system tried to returned a value bigger
704 		   than XATTR_SIZE_MAX bytes. Not possible. */
705 		error = -E2BIG;
706 	}
707 
708 	return error;
709 }
710 
711 static ssize_t
712 getxattr(struct user_namespace *mnt_userns, struct dentry *d,
713 	 const char __user *name, void __user *value, size_t size)
714 {
715 	ssize_t error;
716 	struct xattr_name kname;
717 	struct xattr_ctx ctx = {
718 		.value    = value,
719 		.kvalue   = NULL,
720 		.size     = size,
721 		.kname    = &kname,
722 		.flags    = 0,
723 	};
724 
725 	error = strncpy_from_user(kname.name, name, sizeof(kname.name));
726 	if (error == 0 || error == sizeof(kname.name))
727 		error = -ERANGE;
728 	if (error < 0)
729 		return error;
730 
731 	error =  do_getxattr(mnt_userns, d, &ctx);
732 
733 	kvfree(ctx.kvalue);
734 	return error;
735 }
736 
737 static ssize_t path_getxattr(const char __user *pathname,
738 			     const char __user *name, void __user *value,
739 			     size_t size, unsigned int lookup_flags)
740 {
741 	struct path path;
742 	ssize_t error;
743 retry:
744 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
745 	if (error)
746 		return error;
747 	error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
748 	path_put(&path);
749 	if (retry_estale(error, lookup_flags)) {
750 		lookup_flags |= LOOKUP_REVAL;
751 		goto retry;
752 	}
753 	return error;
754 }
755 
756 SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
757 		const char __user *, name, void __user *, value, size_t, size)
758 {
759 	return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW);
760 }
761 
762 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
763 		const char __user *, name, void __user *, value, size_t, size)
764 {
765 	return path_getxattr(pathname, name, value, size, 0);
766 }
767 
768 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
769 		void __user *, value, size_t, size)
770 {
771 	struct fd f = fdget(fd);
772 	ssize_t error = -EBADF;
773 
774 	if (!f.file)
775 		return error;
776 	audit_file(f.file);
777 	error = getxattr(file_mnt_user_ns(f.file), f.file->f_path.dentry,
778 			 name, value, size);
779 	fdput(f);
780 	return error;
781 }
782 
783 /*
784  * Extended attribute LIST operations
785  */
786 static ssize_t
787 listxattr(struct dentry *d, char __user *list, size_t size)
788 {
789 	ssize_t error;
790 	char *klist = NULL;
791 
792 	if (size) {
793 		if (size > XATTR_LIST_MAX)
794 			size = XATTR_LIST_MAX;
795 		klist = kvmalloc(size, GFP_KERNEL);
796 		if (!klist)
797 			return -ENOMEM;
798 	}
799 
800 	error = vfs_listxattr(d, klist, size);
801 	if (error > 0) {
802 		if (size && copy_to_user(list, klist, error))
803 			error = -EFAULT;
804 	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
805 		/* The file system tried to returned a list bigger
806 		   than XATTR_LIST_MAX bytes. Not possible. */
807 		error = -E2BIG;
808 	}
809 
810 	kvfree(klist);
811 
812 	return error;
813 }
814 
815 static ssize_t path_listxattr(const char __user *pathname, char __user *list,
816 			      size_t size, unsigned int lookup_flags)
817 {
818 	struct path path;
819 	ssize_t error;
820 retry:
821 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
822 	if (error)
823 		return error;
824 	error = listxattr(path.dentry, list, size);
825 	path_put(&path);
826 	if (retry_estale(error, lookup_flags)) {
827 		lookup_flags |= LOOKUP_REVAL;
828 		goto retry;
829 	}
830 	return error;
831 }
832 
833 SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
834 		size_t, size)
835 {
836 	return path_listxattr(pathname, list, size, LOOKUP_FOLLOW);
837 }
838 
839 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
840 		size_t, size)
841 {
842 	return path_listxattr(pathname, list, size, 0);
843 }
844 
845 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
846 {
847 	struct fd f = fdget(fd);
848 	ssize_t error = -EBADF;
849 
850 	if (!f.file)
851 		return error;
852 	audit_file(f.file);
853 	error = listxattr(f.file->f_path.dentry, list, size);
854 	fdput(f);
855 	return error;
856 }
857 
858 /*
859  * Extended attribute REMOVE operations
860  */
861 static long
862 removexattr(struct user_namespace *mnt_userns, struct dentry *d,
863 	    const char __user *name)
864 {
865 	int error;
866 	char kname[XATTR_NAME_MAX + 1];
867 
868 	error = strncpy_from_user(kname, name, sizeof(kname));
869 	if (error == 0 || error == sizeof(kname))
870 		error = -ERANGE;
871 	if (error < 0)
872 		return error;
873 
874 	return vfs_removexattr(mnt_userns, d, kname);
875 }
876 
877 static int path_removexattr(const char __user *pathname,
878 			    const char __user *name, unsigned int lookup_flags)
879 {
880 	struct path path;
881 	int error;
882 retry:
883 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
884 	if (error)
885 		return error;
886 	error = mnt_want_write(path.mnt);
887 	if (!error) {
888 		error = removexattr(mnt_user_ns(path.mnt), path.dentry, name);
889 		mnt_drop_write(path.mnt);
890 	}
891 	path_put(&path);
892 	if (retry_estale(error, lookup_flags)) {
893 		lookup_flags |= LOOKUP_REVAL;
894 		goto retry;
895 	}
896 	return error;
897 }
898 
899 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
900 		const char __user *, name)
901 {
902 	return path_removexattr(pathname, name, LOOKUP_FOLLOW);
903 }
904 
905 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
906 		const char __user *, name)
907 {
908 	return path_removexattr(pathname, name, 0);
909 }
910 
911 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
912 {
913 	struct fd f = fdget(fd);
914 	int error = -EBADF;
915 
916 	if (!f.file)
917 		return error;
918 	audit_file(f.file);
919 	error = mnt_want_write_file(f.file);
920 	if (!error) {
921 		error = removexattr(file_mnt_user_ns(f.file),
922 				    f.file->f_path.dentry, name);
923 		mnt_drop_write_file(f.file);
924 	}
925 	fdput(f);
926 	return error;
927 }
928 
929 /*
930  * Combine the results of the list() operation from every xattr_handler in the
931  * list.
932  */
933 ssize_t
934 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
935 {
936 	const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
937 	unsigned int size = 0;
938 
939 	if (!buffer) {
940 		for_each_xattr_handler(handlers, handler) {
941 			if (!handler->name ||
942 			    (handler->list && !handler->list(dentry)))
943 				continue;
944 			size += strlen(handler->name) + 1;
945 		}
946 	} else {
947 		char *buf = buffer;
948 		size_t len;
949 
950 		for_each_xattr_handler(handlers, handler) {
951 			if (!handler->name ||
952 			    (handler->list && !handler->list(dentry)))
953 				continue;
954 			len = strlen(handler->name);
955 			if (len + 1 > buffer_size)
956 				return -ERANGE;
957 			memcpy(buf, handler->name, len + 1);
958 			buf += len + 1;
959 			buffer_size -= len + 1;
960 		}
961 		size = buf - buffer;
962 	}
963 	return size;
964 }
965 EXPORT_SYMBOL(generic_listxattr);
966 
967 /**
968  * xattr_full_name  -  Compute full attribute name from suffix
969  *
970  * @handler:	handler of the xattr_handler operation
971  * @name:	name passed to the xattr_handler operation
972  *
973  * The get and set xattr handler operations are called with the remainder of
974  * the attribute name after skipping the handler's prefix: for example, "foo"
975  * is passed to the get operation of a handler with prefix "user." to get
976  * attribute "user.foo".  The full name is still "there" in the name though.
977  *
978  * Note: the list xattr handler operation when called from the vfs is passed a
979  * NULL name; some file systems use this operation internally, with varying
980  * semantics.
981  */
982 const char *xattr_full_name(const struct xattr_handler *handler,
983 			    const char *name)
984 {
985 	size_t prefix_len = strlen(xattr_prefix(handler));
986 
987 	return name - prefix_len;
988 }
989 EXPORT_SYMBOL(xattr_full_name);
990 
991 /*
992  * Allocate new xattr and copy in the value; but leave the name to callers.
993  */
994 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
995 {
996 	struct simple_xattr *new_xattr;
997 	size_t len;
998 
999 	/* wrap around? */
1000 	len = sizeof(*new_xattr) + size;
1001 	if (len < sizeof(*new_xattr))
1002 		return NULL;
1003 
1004 	new_xattr = kvmalloc(len, GFP_KERNEL);
1005 	if (!new_xattr)
1006 		return NULL;
1007 
1008 	new_xattr->size = size;
1009 	memcpy(new_xattr->value, value, size);
1010 	return new_xattr;
1011 }
1012 
1013 /*
1014  * xattr GET operation for in-memory/pseudo filesystems
1015  */
1016 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
1017 		     void *buffer, size_t size)
1018 {
1019 	struct simple_xattr *xattr;
1020 	int ret = -ENODATA;
1021 
1022 	spin_lock(&xattrs->lock);
1023 	list_for_each_entry(xattr, &xattrs->head, list) {
1024 		if (strcmp(name, xattr->name))
1025 			continue;
1026 
1027 		ret = xattr->size;
1028 		if (buffer) {
1029 			if (size < xattr->size)
1030 				ret = -ERANGE;
1031 			else
1032 				memcpy(buffer, xattr->value, xattr->size);
1033 		}
1034 		break;
1035 	}
1036 	spin_unlock(&xattrs->lock);
1037 	return ret;
1038 }
1039 
1040 /**
1041  * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
1042  * @xattrs: target simple_xattr list
1043  * @name: name of the extended attribute
1044  * @value: value of the xattr. If %NULL, will remove the attribute.
1045  * @size: size of the new xattr
1046  * @flags: %XATTR_{CREATE|REPLACE}
1047  * @removed_size: returns size of the removed xattr, -1 if none removed
1048  *
1049  * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
1050  * with -EEXIST.  If %XATTR_REPLACE is set, the xattr should exist;
1051  * otherwise, fails with -ENODATA.
1052  *
1053  * Returns 0 on success, -errno on failure.
1054  */
1055 int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
1056 		     const void *value, size_t size, int flags,
1057 		     ssize_t *removed_size)
1058 {
1059 	struct simple_xattr *xattr;
1060 	struct simple_xattr *new_xattr = NULL;
1061 	int err = 0;
1062 
1063 	if (removed_size)
1064 		*removed_size = -1;
1065 
1066 	/* value == NULL means remove */
1067 	if (value) {
1068 		new_xattr = simple_xattr_alloc(value, size);
1069 		if (!new_xattr)
1070 			return -ENOMEM;
1071 
1072 		new_xattr->name = kstrdup(name, GFP_KERNEL);
1073 		if (!new_xattr->name) {
1074 			kvfree(new_xattr);
1075 			return -ENOMEM;
1076 		}
1077 	}
1078 
1079 	spin_lock(&xattrs->lock);
1080 	list_for_each_entry(xattr, &xattrs->head, list) {
1081 		if (!strcmp(name, xattr->name)) {
1082 			if (flags & XATTR_CREATE) {
1083 				xattr = new_xattr;
1084 				err = -EEXIST;
1085 			} else if (new_xattr) {
1086 				list_replace(&xattr->list, &new_xattr->list);
1087 				if (removed_size)
1088 					*removed_size = xattr->size;
1089 			} else {
1090 				list_del(&xattr->list);
1091 				if (removed_size)
1092 					*removed_size = xattr->size;
1093 			}
1094 			goto out;
1095 		}
1096 	}
1097 	if (flags & XATTR_REPLACE) {
1098 		xattr = new_xattr;
1099 		err = -ENODATA;
1100 	} else {
1101 		list_add(&new_xattr->list, &xattrs->head);
1102 		xattr = NULL;
1103 	}
1104 out:
1105 	spin_unlock(&xattrs->lock);
1106 	if (xattr) {
1107 		kfree(xattr->name);
1108 		kvfree(xattr);
1109 	}
1110 	return err;
1111 
1112 }
1113 
1114 static bool xattr_is_trusted(const char *name)
1115 {
1116 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1117 }
1118 
1119 static int xattr_list_one(char **buffer, ssize_t *remaining_size,
1120 			  const char *name)
1121 {
1122 	size_t len = strlen(name) + 1;
1123 	if (*buffer) {
1124 		if (*remaining_size < len)
1125 			return -ERANGE;
1126 		memcpy(*buffer, name, len);
1127 		*buffer += len;
1128 	}
1129 	*remaining_size -= len;
1130 	return 0;
1131 }
1132 
1133 /*
1134  * xattr LIST operation for in-memory/pseudo filesystems
1135  */
1136 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
1137 			  char *buffer, size_t size)
1138 {
1139 	bool trusted = capable(CAP_SYS_ADMIN);
1140 	struct simple_xattr *xattr;
1141 	ssize_t remaining_size = size;
1142 	int err = 0;
1143 
1144 #ifdef CONFIG_FS_POSIX_ACL
1145 	if (IS_POSIXACL(inode)) {
1146 		if (inode->i_acl) {
1147 			err = xattr_list_one(&buffer, &remaining_size,
1148 					     XATTR_NAME_POSIX_ACL_ACCESS);
1149 			if (err)
1150 				return err;
1151 		}
1152 		if (inode->i_default_acl) {
1153 			err = xattr_list_one(&buffer, &remaining_size,
1154 					     XATTR_NAME_POSIX_ACL_DEFAULT);
1155 			if (err)
1156 				return err;
1157 		}
1158 	}
1159 #endif
1160 
1161 	spin_lock(&xattrs->lock);
1162 	list_for_each_entry(xattr, &xattrs->head, list) {
1163 		/* skip "trusted." attributes for unprivileged callers */
1164 		if (!trusted && xattr_is_trusted(xattr->name))
1165 			continue;
1166 
1167 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1168 		if (err)
1169 			break;
1170 	}
1171 	spin_unlock(&xattrs->lock);
1172 
1173 	return err ? err : size - remaining_size;
1174 }
1175 
1176 /*
1177  * Adds an extended attribute to the list
1178  */
1179 void simple_xattr_list_add(struct simple_xattrs *xattrs,
1180 			   struct simple_xattr *new_xattr)
1181 {
1182 	spin_lock(&xattrs->lock);
1183 	list_add(&new_xattr->list, &xattrs->head);
1184 	spin_unlock(&xattrs->lock);
1185 }
1186