xref: /linux/fs/ceph/xattr.c (revision 38fe0e0156c037c060f81fe4e36549fae760322d)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/ceph/ceph_debug.h>
3 #include <linux/ceph/pagelist.h>
4 
5 #include "super.h"
6 #include "mds_client.h"
7 
8 #include <linux/ceph/decode.h>
9 
10 #include <linux/xattr.h>
11 #include <linux/security.h>
12 #include <linux/posix_acl_xattr.h>
13 #include <linux/slab.h>
14 
15 #define XATTR_CEPH_PREFIX "ceph."
16 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
17 
18 static int __remove_xattr(struct ceph_inode_info *ci,
19 			  struct ceph_inode_xattr *xattr);
20 
21 static bool ceph_is_valid_xattr(const char *name)
22 {
23 	return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
24 	       !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
25 	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
26 	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
27 }
28 
29 /*
30  * These define virtual xattrs exposing the recursive directory
31  * statistics and layout metadata.
32  */
33 struct ceph_vxattr {
34 	char *name;
35 	size_t name_size;	/* strlen(name) + 1 (for '\0') */
36 	ssize_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
37 			       size_t size);
38 	bool (*exists_cb)(struct ceph_inode_info *ci);
39 	unsigned int flags;
40 };
41 
42 #define VXATTR_FLAG_READONLY		(1<<0)
43 #define VXATTR_FLAG_HIDDEN		(1<<1)
44 #define VXATTR_FLAG_RSTAT		(1<<2)
45 #define VXATTR_FLAG_DIRSTAT		(1<<3)
46 
47 /* layouts */
48 
49 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
50 {
51 	struct ceph_file_layout *fl = &ci->i_layout;
52 	return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
53 		fl->object_size > 0 || fl->pool_id >= 0 ||
54 		rcu_dereference_raw(fl->pool_ns) != NULL);
55 }
56 
57 static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
58 				    size_t size)
59 {
60 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
61 	struct ceph_osd_client *osdc = &fsc->client->osdc;
62 	struct ceph_string *pool_ns;
63 	s64 pool = ci->i_layout.pool_id;
64 	const char *pool_name;
65 	const char *ns_field = " pool_namespace=";
66 	char buf[128];
67 	size_t len, total_len = 0;
68 	ssize_t ret;
69 
70 	pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
71 
72 	dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
73 	down_read(&osdc->lock);
74 	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
75 	if (pool_name) {
76 		len = snprintf(buf, sizeof(buf),
77 		"stripe_unit=%u stripe_count=%u object_size=%u pool=",
78 		ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
79 	        ci->i_layout.object_size);
80 		total_len = len + strlen(pool_name);
81 	} else {
82 		len = snprintf(buf, sizeof(buf),
83 		"stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
84 		ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
85 		ci->i_layout.object_size, pool);
86 		total_len = len;
87 	}
88 
89 	if (pool_ns)
90 		total_len += strlen(ns_field) + pool_ns->len;
91 
92 	ret = total_len;
93 	if (size >= total_len) {
94 		memcpy(val, buf, len);
95 		ret = len;
96 		if (pool_name) {
97 			len = strlen(pool_name);
98 			memcpy(val + ret, pool_name, len);
99 			ret += len;
100 		}
101 		if (pool_ns) {
102 			len = strlen(ns_field);
103 			memcpy(val + ret, ns_field, len);
104 			ret += len;
105 			memcpy(val + ret, pool_ns->str, pool_ns->len);
106 			ret += pool_ns->len;
107 		}
108 	}
109 	up_read(&osdc->lock);
110 	ceph_put_string(pool_ns);
111 	return ret;
112 }
113 
114 /*
115  * The convention with strings in xattrs is that they should not be NULL
116  * terminated, since we're returning the length with them. snprintf always
117  * NULL terminates however, so call it on a temporary buffer and then memcpy
118  * the result into place.
119  */
120 static __printf(3, 4)
121 int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...)
122 {
123 	int ret;
124 	va_list args;
125 	char buf[96]; /* NB: reevaluate size if new vxattrs are added */
126 
127 	va_start(args, fmt);
128 	ret = vsnprintf(buf, size ? sizeof(buf) : 0, fmt, args);
129 	va_end(args);
130 
131 	/* Sanity check */
132 	if (size && ret + 1 > sizeof(buf)) {
133 		WARN_ONCE(true, "Returned length too big (%d)", ret);
134 		return -E2BIG;
135 	}
136 
137 	if (ret <= size)
138 		memcpy(val, buf, ret);
139 	return ret;
140 }
141 
142 static ssize_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
143 						char *val, size_t size)
144 {
145 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_unit);
146 }
147 
148 static ssize_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
149 						 char *val, size_t size)
150 {
151 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_count);
152 }
153 
154 static ssize_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
155 						char *val, size_t size)
156 {
157 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.object_size);
158 }
159 
160 static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
161 					 char *val, size_t size)
162 {
163 	ssize_t ret;
164 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
165 	struct ceph_osd_client *osdc = &fsc->client->osdc;
166 	s64 pool = ci->i_layout.pool_id;
167 	const char *pool_name;
168 
169 	down_read(&osdc->lock);
170 	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
171 	if (pool_name) {
172 		ret = strlen(pool_name);
173 		if (ret <= size)
174 			memcpy(val, pool_name, ret);
175 	} else {
176 		ret = ceph_fmt_xattr(val, size, "%lld", pool);
177 	}
178 	up_read(&osdc->lock);
179 	return ret;
180 }
181 
182 static ssize_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
183 						   char *val, size_t size)
184 {
185 	ssize_t ret = 0;
186 	struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
187 
188 	if (ns) {
189 		ret = ns->len;
190 		if (ret <= size)
191 			memcpy(val, ns->str, ret);
192 		ceph_put_string(ns);
193 	}
194 	return ret;
195 }
196 
197 /* directories */
198 
199 static ssize_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
200 					 size_t size)
201 {
202 	return ceph_fmt_xattr(val, size, "%lld", ci->i_files + ci->i_subdirs);
203 }
204 
205 static ssize_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
206 				       size_t size)
207 {
208 	return ceph_fmt_xattr(val, size, "%lld", ci->i_files);
209 }
210 
211 static ssize_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
212 					 size_t size)
213 {
214 	return ceph_fmt_xattr(val, size, "%lld", ci->i_subdirs);
215 }
216 
217 static ssize_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
218 					  size_t size)
219 {
220 	return ceph_fmt_xattr(val, size, "%lld",
221 				ci->i_rfiles + ci->i_rsubdirs);
222 }
223 
224 static ssize_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
225 					size_t size)
226 {
227 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rfiles);
228 }
229 
230 static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
231 					  size_t size)
232 {
233 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
234 }
235 
236 static ssize_t ceph_vxattrcb_dir_rsnaps(struct ceph_inode_info *ci, char *val,
237 					  size_t size)
238 {
239 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rsnaps);
240 }
241 
242 static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
243 					size_t size)
244 {
245 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rbytes);
246 }
247 
248 static ssize_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
249 					size_t size)
250 {
251 	return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_rctime.tv_sec,
252 				ci->i_rctime.tv_nsec);
253 }
254 
255 /* dir pin */
256 static bool ceph_vxattrcb_dir_pin_exists(struct ceph_inode_info *ci)
257 {
258 	return ci->i_dir_pin != -ENODATA;
259 }
260 
261 static ssize_t ceph_vxattrcb_dir_pin(struct ceph_inode_info *ci, char *val,
262 				     size_t size)
263 {
264 	return ceph_fmt_xattr(val, size, "%d", (int)ci->i_dir_pin);
265 }
266 
267 /* quotas */
268 static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
269 {
270 	bool ret = false;
271 	spin_lock(&ci->i_ceph_lock);
272 	if ((ci->i_max_files || ci->i_max_bytes) &&
273 	    ci->i_vino.snap == CEPH_NOSNAP &&
274 	    ci->i_snap_realm &&
275 	    ci->i_snap_realm->ino == ci->i_vino.ino)
276 		ret = true;
277 	spin_unlock(&ci->i_ceph_lock);
278 	return ret;
279 }
280 
281 static ssize_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
282 				   size_t size)
283 {
284 	return ceph_fmt_xattr(val, size, "max_bytes=%llu max_files=%llu",
285 				ci->i_max_bytes, ci->i_max_files);
286 }
287 
288 static ssize_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
289 					     char *val, size_t size)
290 {
291 	return ceph_fmt_xattr(val, size, "%llu", ci->i_max_bytes);
292 }
293 
294 static ssize_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
295 					     char *val, size_t size)
296 {
297 	return ceph_fmt_xattr(val, size, "%llu", ci->i_max_files);
298 }
299 
300 /* snapshots */
301 static bool ceph_vxattrcb_snap_btime_exists(struct ceph_inode_info *ci)
302 {
303 	return (ci->i_snap_btime.tv_sec != 0 || ci->i_snap_btime.tv_nsec != 0);
304 }
305 
306 static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
307 					size_t size)
308 {
309 	return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_snap_btime.tv_sec,
310 				ci->i_snap_btime.tv_nsec);
311 }
312 
313 static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
314 					  char *val, size_t size)
315 {
316 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
317 
318 	return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
319 }
320 
321 static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
322 				       char *val, size_t size)
323 {
324 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
325 
326 	return ceph_fmt_xattr(val, size, "client%lld",
327 			      ceph_client_gid(fsc->client));
328 }
329 
330 static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
331 					size_t size)
332 {
333 	int issued;
334 
335 	spin_lock(&ci->i_ceph_lock);
336 	issued = __ceph_caps_issued(ci, NULL);
337 	spin_unlock(&ci->i_ceph_lock);
338 
339 	return ceph_fmt_xattr(val, size, "%s/0x%x",
340 			      ceph_cap_string(issued), issued);
341 }
342 
343 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
344 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
345 	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
346 
347 #define XATTR_NAME_CEPH(_type, _name, _flags)				\
348 	{								\
349 		.name = CEPH_XATTR_NAME(_type, _name),			\
350 		.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
351 		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
352 		.exists_cb = NULL,					\
353 		.flags = (VXATTR_FLAG_READONLY | _flags),		\
354 	}
355 #define XATTR_RSTAT_FIELD(_type, _name)			\
356 	XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
357 #define XATTR_LAYOUT_FIELD(_type, _name, _field)			\
358 	{								\
359 		.name = CEPH_XATTR_NAME2(_type, _name, _field),	\
360 		.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
361 		.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
362 		.exists_cb = ceph_vxattrcb_layout_exists,	\
363 		.flags = VXATTR_FLAG_HIDDEN,			\
364 	}
365 #define XATTR_QUOTA_FIELD(_type, _name)					\
366 	{								\
367 		.name = CEPH_XATTR_NAME(_type, _name),			\
368 		.name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),	\
369 		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,	\
370 		.exists_cb = ceph_vxattrcb_quota_exists,		\
371 		.flags = VXATTR_FLAG_HIDDEN,				\
372 	}
373 
374 static struct ceph_vxattr ceph_dir_vxattrs[] = {
375 	{
376 		.name = "ceph.dir.layout",
377 		.name_size = sizeof("ceph.dir.layout"),
378 		.getxattr_cb = ceph_vxattrcb_layout,
379 		.exists_cb = ceph_vxattrcb_layout_exists,
380 		.flags = VXATTR_FLAG_HIDDEN,
381 	},
382 	XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
383 	XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
384 	XATTR_LAYOUT_FIELD(dir, layout, object_size),
385 	XATTR_LAYOUT_FIELD(dir, layout, pool),
386 	XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
387 	XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
388 	XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
389 	XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
390 	XATTR_RSTAT_FIELD(dir, rentries),
391 	XATTR_RSTAT_FIELD(dir, rfiles),
392 	XATTR_RSTAT_FIELD(dir, rsubdirs),
393 	XATTR_RSTAT_FIELD(dir, rsnaps),
394 	XATTR_RSTAT_FIELD(dir, rbytes),
395 	XATTR_RSTAT_FIELD(dir, rctime),
396 	{
397 		.name = "ceph.dir.pin",
398 		.name_size = sizeof("ceph.dir.pin"),
399 		.getxattr_cb = ceph_vxattrcb_dir_pin,
400 		.exists_cb = ceph_vxattrcb_dir_pin_exists,
401 		.flags = VXATTR_FLAG_HIDDEN,
402 	},
403 	{
404 		.name = "ceph.quota",
405 		.name_size = sizeof("ceph.quota"),
406 		.getxattr_cb = ceph_vxattrcb_quota,
407 		.exists_cb = ceph_vxattrcb_quota_exists,
408 		.flags = VXATTR_FLAG_HIDDEN,
409 	},
410 	XATTR_QUOTA_FIELD(quota, max_bytes),
411 	XATTR_QUOTA_FIELD(quota, max_files),
412 	{
413 		.name = "ceph.snap.btime",
414 		.name_size = sizeof("ceph.snap.btime"),
415 		.getxattr_cb = ceph_vxattrcb_snap_btime,
416 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
417 		.flags = VXATTR_FLAG_READONLY,
418 	},
419 	{
420 		.name = "ceph.caps",
421 		.name_size = sizeof("ceph.caps"),
422 		.getxattr_cb = ceph_vxattrcb_caps,
423 		.exists_cb = NULL,
424 		.flags = VXATTR_FLAG_HIDDEN,
425 	},
426 	{ .name = NULL, 0 }	/* Required table terminator */
427 };
428 
429 /* files */
430 
431 static struct ceph_vxattr ceph_file_vxattrs[] = {
432 	{
433 		.name = "ceph.file.layout",
434 		.name_size = sizeof("ceph.file.layout"),
435 		.getxattr_cb = ceph_vxattrcb_layout,
436 		.exists_cb = ceph_vxattrcb_layout_exists,
437 		.flags = VXATTR_FLAG_HIDDEN,
438 	},
439 	XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
440 	XATTR_LAYOUT_FIELD(file, layout, stripe_count),
441 	XATTR_LAYOUT_FIELD(file, layout, object_size),
442 	XATTR_LAYOUT_FIELD(file, layout, pool),
443 	XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
444 	{
445 		.name = "ceph.snap.btime",
446 		.name_size = sizeof("ceph.snap.btime"),
447 		.getxattr_cb = ceph_vxattrcb_snap_btime,
448 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
449 		.flags = VXATTR_FLAG_READONLY,
450 	},
451 	{
452 		.name = "ceph.caps",
453 		.name_size = sizeof("ceph.caps"),
454 		.getxattr_cb = ceph_vxattrcb_caps,
455 		.exists_cb = NULL,
456 		.flags = VXATTR_FLAG_HIDDEN,
457 	},
458 	{ .name = NULL, 0 }	/* Required table terminator */
459 };
460 
461 static struct ceph_vxattr ceph_common_vxattrs[] = {
462 	{
463 		.name = "ceph.cluster_fsid",
464 		.name_size = sizeof("ceph.cluster_fsid"),
465 		.getxattr_cb = ceph_vxattrcb_cluster_fsid,
466 		.exists_cb = NULL,
467 		.flags = VXATTR_FLAG_READONLY,
468 	},
469 	{
470 		.name = "ceph.client_id",
471 		.name_size = sizeof("ceph.client_id"),
472 		.getxattr_cb = ceph_vxattrcb_client_id,
473 		.exists_cb = NULL,
474 		.flags = VXATTR_FLAG_READONLY,
475 	},
476 	{ .name = NULL, 0 }	/* Required table terminator */
477 };
478 
479 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
480 {
481 	if (S_ISDIR(inode->i_mode))
482 		return ceph_dir_vxattrs;
483 	else if (S_ISREG(inode->i_mode))
484 		return ceph_file_vxattrs;
485 	return NULL;
486 }
487 
488 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
489 						const char *name)
490 {
491 	struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
492 
493 	if (vxattr) {
494 		while (vxattr->name) {
495 			if (!strcmp(vxattr->name, name))
496 				return vxattr;
497 			vxattr++;
498 		}
499 	}
500 
501 	vxattr = ceph_common_vxattrs;
502 	while (vxattr->name) {
503 		if (!strcmp(vxattr->name, name))
504 			return vxattr;
505 		vxattr++;
506 	}
507 
508 	return NULL;
509 }
510 
511 static int __set_xattr(struct ceph_inode_info *ci,
512 			   const char *name, int name_len,
513 			   const char *val, int val_len,
514 			   int flags, int update_xattr,
515 			   struct ceph_inode_xattr **newxattr)
516 {
517 	struct rb_node **p;
518 	struct rb_node *parent = NULL;
519 	struct ceph_inode_xattr *xattr = NULL;
520 	int c;
521 	int new = 0;
522 
523 	p = &ci->i_xattrs.index.rb_node;
524 	while (*p) {
525 		parent = *p;
526 		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
527 		c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
528 		if (c < 0)
529 			p = &(*p)->rb_left;
530 		else if (c > 0)
531 			p = &(*p)->rb_right;
532 		else {
533 			if (name_len == xattr->name_len)
534 				break;
535 			else if (name_len < xattr->name_len)
536 				p = &(*p)->rb_left;
537 			else
538 				p = &(*p)->rb_right;
539 		}
540 		xattr = NULL;
541 	}
542 
543 	if (update_xattr) {
544 		int err = 0;
545 
546 		if (xattr && (flags & XATTR_CREATE))
547 			err = -EEXIST;
548 		else if (!xattr && (flags & XATTR_REPLACE))
549 			err = -ENODATA;
550 		if (err) {
551 			kfree(name);
552 			kfree(val);
553 			kfree(*newxattr);
554 			return err;
555 		}
556 		if (update_xattr < 0) {
557 			if (xattr)
558 				__remove_xattr(ci, xattr);
559 			kfree(name);
560 			kfree(*newxattr);
561 			return 0;
562 		}
563 	}
564 
565 	if (!xattr) {
566 		new = 1;
567 		xattr = *newxattr;
568 		xattr->name = name;
569 		xattr->name_len = name_len;
570 		xattr->should_free_name = update_xattr;
571 
572 		ci->i_xattrs.count++;
573 		dout("__set_xattr count=%d\n", ci->i_xattrs.count);
574 	} else {
575 		kfree(*newxattr);
576 		*newxattr = NULL;
577 		if (xattr->should_free_val)
578 			kfree(xattr->val);
579 
580 		if (update_xattr) {
581 			kfree(name);
582 			name = xattr->name;
583 		}
584 		ci->i_xattrs.names_size -= xattr->name_len;
585 		ci->i_xattrs.vals_size -= xattr->val_len;
586 	}
587 	ci->i_xattrs.names_size += name_len;
588 	ci->i_xattrs.vals_size += val_len;
589 	if (val)
590 		xattr->val = val;
591 	else
592 		xattr->val = "";
593 
594 	xattr->val_len = val_len;
595 	xattr->dirty = update_xattr;
596 	xattr->should_free_val = (val && update_xattr);
597 
598 	if (new) {
599 		rb_link_node(&xattr->node, parent, p);
600 		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
601 		dout("__set_xattr_val p=%p\n", p);
602 	}
603 
604 	dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
605 	     ceph_vinop(&ci->vfs_inode), xattr, name_len, name, val_len, val);
606 
607 	return 0;
608 }
609 
610 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
611 			   const char *name)
612 {
613 	struct rb_node **p;
614 	struct rb_node *parent = NULL;
615 	struct ceph_inode_xattr *xattr = NULL;
616 	int name_len = strlen(name);
617 	int c;
618 
619 	p = &ci->i_xattrs.index.rb_node;
620 	while (*p) {
621 		parent = *p;
622 		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
623 		c = strncmp(name, xattr->name, xattr->name_len);
624 		if (c == 0 && name_len > xattr->name_len)
625 			c = 1;
626 		if (c < 0)
627 			p = &(*p)->rb_left;
628 		else if (c > 0)
629 			p = &(*p)->rb_right;
630 		else {
631 			dout("__get_xattr %s: found %.*s\n", name,
632 			     xattr->val_len, xattr->val);
633 			return xattr;
634 		}
635 	}
636 
637 	dout("__get_xattr %s: not found\n", name);
638 
639 	return NULL;
640 }
641 
642 static void __free_xattr(struct ceph_inode_xattr *xattr)
643 {
644 	BUG_ON(!xattr);
645 
646 	if (xattr->should_free_name)
647 		kfree(xattr->name);
648 	if (xattr->should_free_val)
649 		kfree(xattr->val);
650 
651 	kfree(xattr);
652 }
653 
654 static int __remove_xattr(struct ceph_inode_info *ci,
655 			  struct ceph_inode_xattr *xattr)
656 {
657 	if (!xattr)
658 		return -ENODATA;
659 
660 	rb_erase(&xattr->node, &ci->i_xattrs.index);
661 
662 	if (xattr->should_free_name)
663 		kfree(xattr->name);
664 	if (xattr->should_free_val)
665 		kfree(xattr->val);
666 
667 	ci->i_xattrs.names_size -= xattr->name_len;
668 	ci->i_xattrs.vals_size -= xattr->val_len;
669 	ci->i_xattrs.count--;
670 	kfree(xattr);
671 
672 	return 0;
673 }
674 
675 static char *__copy_xattr_names(struct ceph_inode_info *ci,
676 				char *dest)
677 {
678 	struct rb_node *p;
679 	struct ceph_inode_xattr *xattr = NULL;
680 
681 	p = rb_first(&ci->i_xattrs.index);
682 	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
683 
684 	while (p) {
685 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
686 		memcpy(dest, xattr->name, xattr->name_len);
687 		dest[xattr->name_len] = '\0';
688 
689 		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
690 		     xattr->name_len, ci->i_xattrs.names_size);
691 
692 		dest += xattr->name_len + 1;
693 		p = rb_next(p);
694 	}
695 
696 	return dest;
697 }
698 
699 void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
700 {
701 	struct rb_node *p, *tmp;
702 	struct ceph_inode_xattr *xattr = NULL;
703 
704 	p = rb_first(&ci->i_xattrs.index);
705 
706 	dout("__ceph_destroy_xattrs p=%p\n", p);
707 
708 	while (p) {
709 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
710 		tmp = p;
711 		p = rb_next(tmp);
712 		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
713 		     xattr->name_len, xattr->name);
714 		rb_erase(tmp, &ci->i_xattrs.index);
715 
716 		__free_xattr(xattr);
717 	}
718 
719 	ci->i_xattrs.names_size = 0;
720 	ci->i_xattrs.vals_size = 0;
721 	ci->i_xattrs.index_version = 0;
722 	ci->i_xattrs.count = 0;
723 	ci->i_xattrs.index = RB_ROOT;
724 }
725 
726 static int __build_xattrs(struct inode *inode)
727 	__releases(ci->i_ceph_lock)
728 	__acquires(ci->i_ceph_lock)
729 {
730 	u32 namelen;
731 	u32 numattr = 0;
732 	void *p, *end;
733 	u32 len;
734 	const char *name, *val;
735 	struct ceph_inode_info *ci = ceph_inode(inode);
736 	u64 xattr_version;
737 	struct ceph_inode_xattr **xattrs = NULL;
738 	int err = 0;
739 	int i;
740 
741 	dout("__build_xattrs() len=%d\n",
742 	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
743 
744 	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
745 		return 0; /* already built */
746 
747 	__ceph_destroy_xattrs(ci);
748 
749 start:
750 	/* updated internal xattr rb tree */
751 	if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
752 		p = ci->i_xattrs.blob->vec.iov_base;
753 		end = p + ci->i_xattrs.blob->vec.iov_len;
754 		ceph_decode_32_safe(&p, end, numattr, bad);
755 		xattr_version = ci->i_xattrs.version;
756 		spin_unlock(&ci->i_ceph_lock);
757 
758 		xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
759 				 GFP_NOFS);
760 		err = -ENOMEM;
761 		if (!xattrs)
762 			goto bad_lock;
763 
764 		for (i = 0; i < numattr; i++) {
765 			xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
766 					    GFP_NOFS);
767 			if (!xattrs[i])
768 				goto bad_lock;
769 		}
770 
771 		spin_lock(&ci->i_ceph_lock);
772 		if (ci->i_xattrs.version != xattr_version) {
773 			/* lost a race, retry */
774 			for (i = 0; i < numattr; i++)
775 				kfree(xattrs[i]);
776 			kfree(xattrs);
777 			xattrs = NULL;
778 			goto start;
779 		}
780 		err = -EIO;
781 		while (numattr--) {
782 			ceph_decode_32_safe(&p, end, len, bad);
783 			namelen = len;
784 			name = p;
785 			p += len;
786 			ceph_decode_32_safe(&p, end, len, bad);
787 			val = p;
788 			p += len;
789 
790 			err = __set_xattr(ci, name, namelen, val, len,
791 					  0, 0, &xattrs[numattr]);
792 
793 			if (err < 0)
794 				goto bad;
795 		}
796 		kfree(xattrs);
797 	}
798 	ci->i_xattrs.index_version = ci->i_xattrs.version;
799 	ci->i_xattrs.dirty = false;
800 
801 	return err;
802 bad_lock:
803 	spin_lock(&ci->i_ceph_lock);
804 bad:
805 	if (xattrs) {
806 		for (i = 0; i < numattr; i++)
807 			kfree(xattrs[i]);
808 		kfree(xattrs);
809 	}
810 	ci->i_xattrs.names_size = 0;
811 	return err;
812 }
813 
814 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
815 				    int val_size)
816 {
817 	/*
818 	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
819 	 * 4 bytes per each value
820 	 */
821 	int size = 4 + ci->i_xattrs.count*(4 + 4) +
822 			     ci->i_xattrs.names_size +
823 			     ci->i_xattrs.vals_size;
824 	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
825 	     ci->i_xattrs.count, ci->i_xattrs.names_size,
826 	     ci->i_xattrs.vals_size);
827 
828 	if (name_size)
829 		size += 4 + 4 + name_size + val_size;
830 
831 	return size;
832 }
833 
834 /*
835  * If there are dirty xattrs, reencode xattrs into the prealloc_blob
836  * and swap into place.  It returns the old i_xattrs.blob (or NULL) so
837  * that it can be freed by the caller as the i_ceph_lock is likely to be
838  * held.
839  */
840 struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci)
841 {
842 	struct rb_node *p;
843 	struct ceph_inode_xattr *xattr = NULL;
844 	struct ceph_buffer *old_blob = NULL;
845 	void *dest;
846 
847 	dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
848 	if (ci->i_xattrs.dirty) {
849 		int need = __get_required_blob_size(ci, 0, 0);
850 
851 		BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
852 
853 		p = rb_first(&ci->i_xattrs.index);
854 		dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
855 
856 		ceph_encode_32(&dest, ci->i_xattrs.count);
857 		while (p) {
858 			xattr = rb_entry(p, struct ceph_inode_xattr, node);
859 
860 			ceph_encode_32(&dest, xattr->name_len);
861 			memcpy(dest, xattr->name, xattr->name_len);
862 			dest += xattr->name_len;
863 			ceph_encode_32(&dest, xattr->val_len);
864 			memcpy(dest, xattr->val, xattr->val_len);
865 			dest += xattr->val_len;
866 
867 			p = rb_next(p);
868 		}
869 
870 		/* adjust buffer len; it may be larger than we need */
871 		ci->i_xattrs.prealloc_blob->vec.iov_len =
872 			dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
873 
874 		if (ci->i_xattrs.blob)
875 			old_blob = ci->i_xattrs.blob;
876 		ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
877 		ci->i_xattrs.prealloc_blob = NULL;
878 		ci->i_xattrs.dirty = false;
879 		ci->i_xattrs.version++;
880 	}
881 
882 	return old_blob;
883 }
884 
885 static inline int __get_request_mask(struct inode *in) {
886 	struct ceph_mds_request *req = current->journal_info;
887 	int mask = 0;
888 	if (req && req->r_target_inode == in) {
889 		if (req->r_op == CEPH_MDS_OP_LOOKUP ||
890 		    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
891 		    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
892 		    req->r_op == CEPH_MDS_OP_GETATTR) {
893 			mask = le32_to_cpu(req->r_args.getattr.mask);
894 		} else if (req->r_op == CEPH_MDS_OP_OPEN ||
895 			   req->r_op == CEPH_MDS_OP_CREATE) {
896 			mask = le32_to_cpu(req->r_args.open.mask);
897 		}
898 	}
899 	return mask;
900 }
901 
902 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
903 		      size_t size)
904 {
905 	struct ceph_inode_info *ci = ceph_inode(inode);
906 	struct ceph_inode_xattr *xattr;
907 	struct ceph_vxattr *vxattr = NULL;
908 	int req_mask;
909 	ssize_t err;
910 
911 	/* let's see if a virtual xattr was requested */
912 	vxattr = ceph_match_vxattr(inode, name);
913 	if (vxattr) {
914 		int mask = 0;
915 		if (vxattr->flags & VXATTR_FLAG_RSTAT)
916 			mask |= CEPH_STAT_RSTAT;
917 		if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
918 			mask |= CEPH_CAP_FILE_SHARED;
919 		err = ceph_do_getattr(inode, mask, true);
920 		if (err)
921 			return err;
922 		err = -ENODATA;
923 		if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
924 			err = vxattr->getxattr_cb(ci, value, size);
925 			if (size && size < err)
926 				err = -ERANGE;
927 		}
928 		return err;
929 	}
930 
931 	req_mask = __get_request_mask(inode);
932 
933 	spin_lock(&ci->i_ceph_lock);
934 	dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name,
935 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
936 
937 	if (ci->i_xattrs.version == 0 ||
938 	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
939 	      __ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1))) {
940 		spin_unlock(&ci->i_ceph_lock);
941 
942 		/* security module gets xattr while filling trace */
943 		if (current->journal_info) {
944 			pr_warn_ratelimited("sync getxattr %p "
945 					    "during filling trace\n", inode);
946 			return -EBUSY;
947 		}
948 
949 		/* get xattrs from mds (if we don't already have them) */
950 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
951 		if (err)
952 			return err;
953 		spin_lock(&ci->i_ceph_lock);
954 	}
955 
956 	err = __build_xattrs(inode);
957 	if (err < 0)
958 		goto out;
959 
960 	err = -ENODATA;  /* == ENOATTR */
961 	xattr = __get_xattr(ci, name);
962 	if (!xattr)
963 		goto out;
964 
965 	err = -ERANGE;
966 	if (size && size < xattr->val_len)
967 		goto out;
968 
969 	err = xattr->val_len;
970 	if (size == 0)
971 		goto out;
972 
973 	memcpy(value, xattr->val, xattr->val_len);
974 
975 	if (current->journal_info &&
976 	    !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
977 	    security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
978 		ci->i_ceph_flags |= CEPH_I_SEC_INITED;
979 out:
980 	spin_unlock(&ci->i_ceph_lock);
981 	return err;
982 }
983 
984 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
985 {
986 	struct inode *inode = d_inode(dentry);
987 	struct ceph_inode_info *ci = ceph_inode(inode);
988 	bool len_only = (size == 0);
989 	u32 namelen;
990 	int err;
991 
992 	spin_lock(&ci->i_ceph_lock);
993 	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
994 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
995 
996 	if (ci->i_xattrs.version == 0 ||
997 	    !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) {
998 		spin_unlock(&ci->i_ceph_lock);
999 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
1000 		if (err)
1001 			return err;
1002 		spin_lock(&ci->i_ceph_lock);
1003 	}
1004 
1005 	err = __build_xattrs(inode);
1006 	if (err < 0)
1007 		goto out;
1008 
1009 	/* add 1 byte for each xattr due to the null termination */
1010 	namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
1011 	if (!len_only) {
1012 		if (namelen > size) {
1013 			err = -ERANGE;
1014 			goto out;
1015 		}
1016 		names = __copy_xattr_names(ci, names);
1017 		size -= namelen;
1018 	}
1019 	err = namelen;
1020 out:
1021 	spin_unlock(&ci->i_ceph_lock);
1022 	return err;
1023 }
1024 
1025 static int ceph_sync_setxattr(struct inode *inode, const char *name,
1026 			      const char *value, size_t size, int flags)
1027 {
1028 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
1029 	struct ceph_inode_info *ci = ceph_inode(inode);
1030 	struct ceph_mds_request *req;
1031 	struct ceph_mds_client *mdsc = fsc->mdsc;
1032 	struct ceph_osd_client *osdc = &fsc->client->osdc;
1033 	struct ceph_pagelist *pagelist = NULL;
1034 	int op = CEPH_MDS_OP_SETXATTR;
1035 	int err;
1036 
1037 	if (size > 0) {
1038 		/* copy value into pagelist */
1039 		pagelist = ceph_pagelist_alloc(GFP_NOFS);
1040 		if (!pagelist)
1041 			return -ENOMEM;
1042 
1043 		err = ceph_pagelist_append(pagelist, value, size);
1044 		if (err)
1045 			goto out;
1046 	} else if (!value) {
1047 		if (flags & CEPH_XATTR_REPLACE)
1048 			op = CEPH_MDS_OP_RMXATTR;
1049 		else
1050 			flags |= CEPH_XATTR_REMOVE;
1051 	}
1052 
1053 	dout("setxattr value=%.*s\n", (int)size, value);
1054 
1055 	/* do request */
1056 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
1057 	if (IS_ERR(req)) {
1058 		err = PTR_ERR(req);
1059 		goto out;
1060 	}
1061 
1062 	req->r_path2 = kstrdup(name, GFP_NOFS);
1063 	if (!req->r_path2) {
1064 		ceph_mdsc_put_request(req);
1065 		err = -ENOMEM;
1066 		goto out;
1067 	}
1068 
1069 	if (op == CEPH_MDS_OP_SETXATTR) {
1070 		req->r_args.setxattr.flags = cpu_to_le32(flags);
1071 		req->r_args.setxattr.osdmap_epoch =
1072 			cpu_to_le32(osdc->osdmap->epoch);
1073 		req->r_pagelist = pagelist;
1074 		pagelist = NULL;
1075 	}
1076 
1077 	req->r_inode = inode;
1078 	ihold(inode);
1079 	req->r_num_caps = 1;
1080 	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1081 
1082 	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
1083 	err = ceph_mdsc_do_request(mdsc, NULL, req);
1084 	ceph_mdsc_put_request(req);
1085 	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
1086 
1087 out:
1088 	if (pagelist)
1089 		ceph_pagelist_release(pagelist);
1090 	return err;
1091 }
1092 
1093 int __ceph_setxattr(struct inode *inode, const char *name,
1094 			const void *value, size_t size, int flags)
1095 {
1096 	struct ceph_vxattr *vxattr;
1097 	struct ceph_inode_info *ci = ceph_inode(inode);
1098 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1099 	struct ceph_cap_flush *prealloc_cf = NULL;
1100 	struct ceph_buffer *old_blob = NULL;
1101 	int issued;
1102 	int err;
1103 	int dirty = 0;
1104 	int name_len = strlen(name);
1105 	int val_len = size;
1106 	char *newname = NULL;
1107 	char *newval = NULL;
1108 	struct ceph_inode_xattr *xattr = NULL;
1109 	int required_blob_size;
1110 	bool check_realm = false;
1111 	bool lock_snap_rwsem = false;
1112 
1113 	if (ceph_snap(inode) != CEPH_NOSNAP)
1114 		return -EROFS;
1115 
1116 	vxattr = ceph_match_vxattr(inode, name);
1117 	if (vxattr) {
1118 		if (vxattr->flags & VXATTR_FLAG_READONLY)
1119 			return -EOPNOTSUPP;
1120 		if (value && !strncmp(vxattr->name, "ceph.quota", 10))
1121 			check_realm = true;
1122 	}
1123 
1124 	/* pass any unhandled ceph.* xattrs through to the MDS */
1125 	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1126 		goto do_sync_unlocked;
1127 
1128 	/* preallocate memory for xattr name, value, index node */
1129 	err = -ENOMEM;
1130 	newname = kmemdup(name, name_len + 1, GFP_NOFS);
1131 	if (!newname)
1132 		goto out;
1133 
1134 	if (val_len) {
1135 		newval = kmemdup(value, val_len, GFP_NOFS);
1136 		if (!newval)
1137 			goto out;
1138 	}
1139 
1140 	xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
1141 	if (!xattr)
1142 		goto out;
1143 
1144 	prealloc_cf = ceph_alloc_cap_flush();
1145 	if (!prealloc_cf)
1146 		goto out;
1147 
1148 	spin_lock(&ci->i_ceph_lock);
1149 retry:
1150 	issued = __ceph_caps_issued(ci, NULL);
1151 	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1152 		goto do_sync;
1153 
1154 	if (!lock_snap_rwsem && !ci->i_head_snapc) {
1155 		lock_snap_rwsem = true;
1156 		if (!down_read_trylock(&mdsc->snap_rwsem)) {
1157 			spin_unlock(&ci->i_ceph_lock);
1158 			down_read(&mdsc->snap_rwsem);
1159 			spin_lock(&ci->i_ceph_lock);
1160 			goto retry;
1161 		}
1162 	}
1163 
1164 	dout("setxattr %p name '%s' issued %s\n", inode, name,
1165 	     ceph_cap_string(issued));
1166 	__build_xattrs(inode);
1167 
1168 	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1169 
1170 	if (!ci->i_xattrs.prealloc_blob ||
1171 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1172 		struct ceph_buffer *blob;
1173 
1174 		spin_unlock(&ci->i_ceph_lock);
1175 		ceph_buffer_put(old_blob); /* Shouldn't be required */
1176 		dout(" pre-allocating new blob size=%d\n", required_blob_size);
1177 		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1178 		if (!blob)
1179 			goto do_sync_unlocked;
1180 		spin_lock(&ci->i_ceph_lock);
1181 		/* prealloc_blob can't be released while holding i_ceph_lock */
1182 		if (ci->i_xattrs.prealloc_blob)
1183 			old_blob = ci->i_xattrs.prealloc_blob;
1184 		ci->i_xattrs.prealloc_blob = blob;
1185 		goto retry;
1186 	}
1187 
1188 	err = __set_xattr(ci, newname, name_len, newval, val_len,
1189 			  flags, value ? 1 : -1, &xattr);
1190 
1191 	if (!err) {
1192 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1193 					       &prealloc_cf);
1194 		ci->i_xattrs.dirty = true;
1195 		inode->i_ctime = current_time(inode);
1196 	}
1197 
1198 	spin_unlock(&ci->i_ceph_lock);
1199 	ceph_buffer_put(old_blob);
1200 	if (lock_snap_rwsem)
1201 		up_read(&mdsc->snap_rwsem);
1202 	if (dirty)
1203 		__mark_inode_dirty(inode, dirty);
1204 	ceph_free_cap_flush(prealloc_cf);
1205 	return err;
1206 
1207 do_sync:
1208 	spin_unlock(&ci->i_ceph_lock);
1209 do_sync_unlocked:
1210 	if (lock_snap_rwsem)
1211 		up_read(&mdsc->snap_rwsem);
1212 
1213 	/* security module set xattr while filling trace */
1214 	if (current->journal_info) {
1215 		pr_warn_ratelimited("sync setxattr %p "
1216 				    "during filling trace\n", inode);
1217 		err = -EBUSY;
1218 	} else {
1219 		err = ceph_sync_setxattr(inode, name, value, size, flags);
1220 		if (err >= 0 && check_realm) {
1221 			/* check if snaprealm was created for quota inode */
1222 			spin_lock(&ci->i_ceph_lock);
1223 			if ((ci->i_max_files || ci->i_max_bytes) &&
1224 			    !(ci->i_snap_realm &&
1225 			      ci->i_snap_realm->ino == ci->i_vino.ino))
1226 				err = -EOPNOTSUPP;
1227 			spin_unlock(&ci->i_ceph_lock);
1228 		}
1229 	}
1230 out:
1231 	ceph_free_cap_flush(prealloc_cf);
1232 	kfree(newname);
1233 	kfree(newval);
1234 	kfree(xattr);
1235 	return err;
1236 }
1237 
1238 static int ceph_get_xattr_handler(const struct xattr_handler *handler,
1239 				  struct dentry *dentry, struct inode *inode,
1240 				  const char *name, void *value, size_t size)
1241 {
1242 	if (!ceph_is_valid_xattr(name))
1243 		return -EOPNOTSUPP;
1244 	return __ceph_getxattr(inode, name, value, size);
1245 }
1246 
1247 static int ceph_set_xattr_handler(const struct xattr_handler *handler,
1248 				  struct user_namespace *mnt_userns,
1249 				  struct dentry *unused, struct inode *inode,
1250 				  const char *name, const void *value,
1251 				  size_t size, int flags)
1252 {
1253 	if (!ceph_is_valid_xattr(name))
1254 		return -EOPNOTSUPP;
1255 	return __ceph_setxattr(inode, name, value, size, flags);
1256 }
1257 
1258 static const struct xattr_handler ceph_other_xattr_handler = {
1259 	.prefix = "",  /* match any name => handlers called with full name */
1260 	.get = ceph_get_xattr_handler,
1261 	.set = ceph_set_xattr_handler,
1262 };
1263 
1264 #ifdef CONFIG_SECURITY
1265 bool ceph_security_xattr_wanted(struct inode *in)
1266 {
1267 	return in->i_security != NULL;
1268 }
1269 
1270 bool ceph_security_xattr_deadlock(struct inode *in)
1271 {
1272 	struct ceph_inode_info *ci;
1273 	bool ret;
1274 	if (!in->i_security)
1275 		return false;
1276 	ci = ceph_inode(in);
1277 	spin_lock(&ci->i_ceph_lock);
1278 	ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1279 	      !(ci->i_xattrs.version > 0 &&
1280 		__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1281 	spin_unlock(&ci->i_ceph_lock);
1282 	return ret;
1283 }
1284 
1285 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1286 int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
1287 			   struct ceph_acl_sec_ctx *as_ctx)
1288 {
1289 	struct ceph_pagelist *pagelist = as_ctx->pagelist;
1290 	const char *name;
1291 	size_t name_len;
1292 	int err;
1293 
1294 	err = security_dentry_init_security(dentry, mode, &dentry->d_name,
1295 					    &as_ctx->sec_ctx,
1296 					    &as_ctx->sec_ctxlen);
1297 	if (err < 0) {
1298 		WARN_ON_ONCE(err != -EOPNOTSUPP);
1299 		err = 0; /* do nothing */
1300 		goto out;
1301 	}
1302 
1303 	err = -ENOMEM;
1304 	if (!pagelist) {
1305 		pagelist = ceph_pagelist_alloc(GFP_KERNEL);
1306 		if (!pagelist)
1307 			goto out;
1308 		err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
1309 		if (err)
1310 			goto out;
1311 		ceph_pagelist_encode_32(pagelist, 1);
1312 	}
1313 
1314 	/*
1315 	 * FIXME: Make security_dentry_init_security() generic. Currently
1316 	 * It only supports single security module and only selinux has
1317 	 * dentry_init_security hook.
1318 	 */
1319 	name = XATTR_NAME_SELINUX;
1320 	name_len = strlen(name);
1321 	err = ceph_pagelist_reserve(pagelist,
1322 				    4 * 2 + name_len + as_ctx->sec_ctxlen);
1323 	if (err)
1324 		goto out;
1325 
1326 	if (as_ctx->pagelist) {
1327 		/* update count of KV pairs */
1328 		BUG_ON(pagelist->length <= sizeof(__le32));
1329 		if (list_is_singular(&pagelist->head)) {
1330 			le32_add_cpu((__le32*)pagelist->mapped_tail, 1);
1331 		} else {
1332 			struct page *page = list_first_entry(&pagelist->head,
1333 							     struct page, lru);
1334 			void *addr = kmap_atomic(page);
1335 			le32_add_cpu((__le32*)addr, 1);
1336 			kunmap_atomic(addr);
1337 		}
1338 	} else {
1339 		as_ctx->pagelist = pagelist;
1340 	}
1341 
1342 	ceph_pagelist_encode_32(pagelist, name_len);
1343 	ceph_pagelist_append(pagelist, name, name_len);
1344 
1345 	ceph_pagelist_encode_32(pagelist, as_ctx->sec_ctxlen);
1346 	ceph_pagelist_append(pagelist, as_ctx->sec_ctx, as_ctx->sec_ctxlen);
1347 
1348 	err = 0;
1349 out:
1350 	if (pagelist && !as_ctx->pagelist)
1351 		ceph_pagelist_release(pagelist);
1352 	return err;
1353 }
1354 #endif /* CONFIG_CEPH_FS_SECURITY_LABEL */
1355 #endif /* CONFIG_SECURITY */
1356 
1357 void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
1358 {
1359 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1360 	posix_acl_release(as_ctx->acl);
1361 	posix_acl_release(as_ctx->default_acl);
1362 #endif
1363 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1364 	security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
1365 #endif
1366 	if (as_ctx->pagelist)
1367 		ceph_pagelist_release(as_ctx->pagelist);
1368 }
1369 
1370 /*
1371  * List of handlers for synthetic system.* attributes. Other
1372  * attributes are handled directly.
1373  */
1374 const struct xattr_handler *ceph_xattr_handlers[] = {
1375 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1376 	&posix_acl_access_xattr_handler,
1377 	&posix_acl_default_xattr_handler,
1378 #endif
1379 	&ceph_other_xattr_handler,
1380 	NULL,
1381 };
1382