1 #include <linux/ceph/ceph_debug.h> 2 #include <linux/ceph/pagelist.h> 3 4 #include "super.h" 5 #include "mds_client.h" 6 7 #include <linux/ceph/decode.h> 8 9 #include <linux/xattr.h> 10 #include <linux/posix_acl_xattr.h> 11 #include <linux/slab.h> 12 13 #define XATTR_CEPH_PREFIX "ceph." 14 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) 15 16 static int __remove_xattr(struct ceph_inode_info *ci, 17 struct ceph_inode_xattr *xattr); 18 19 const struct xattr_handler ceph_other_xattr_handler; 20 21 /* 22 * List of handlers for synthetic system.* attributes. Other 23 * attributes are handled directly. 24 */ 25 const struct xattr_handler *ceph_xattr_handlers[] = { 26 #ifdef CONFIG_CEPH_FS_POSIX_ACL 27 &posix_acl_access_xattr_handler, 28 &posix_acl_default_xattr_handler, 29 #endif 30 &ceph_other_xattr_handler, 31 NULL, 32 }; 33 34 static bool ceph_is_valid_xattr(const char *name) 35 { 36 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || 37 !strncmp(name, XATTR_SECURITY_PREFIX, 38 XATTR_SECURITY_PREFIX_LEN) || 39 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 40 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 41 } 42 43 /* 44 * These define virtual xattrs exposing the recursive directory 45 * statistics and layout metadata. 46 */ 47 struct ceph_vxattr { 48 char *name; 49 size_t name_size; /* strlen(name) + 1 (for '\0') */ 50 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 51 size_t size); 52 bool readonly, hidden; 53 bool (*exists_cb)(struct ceph_inode_info *ci); 54 }; 55 56 /* layouts */ 57 58 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) 59 { 60 size_t s; 61 char *p = (char *)&ci->i_layout; 62 63 for (s = 0; s < sizeof(ci->i_layout); s++, p++) 64 if (*p) 65 return true; 66 return false; 67 } 68 69 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 70 size_t size) 71 { 72 int ret; 73 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 74 struct ceph_osd_client *osdc = &fsc->client->osdc; 75 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 76 const char *pool_name; 77 char buf[128]; 78 79 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 80 down_read(&osdc->lock); 81 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 82 if (pool_name) { 83 size_t len = strlen(pool_name); 84 ret = snprintf(buf, sizeof(buf), 85 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", 86 (unsigned long long)ceph_file_layout_su(ci->i_layout), 87 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 88 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 89 if (!size) { 90 ret += len; 91 } else if (ret + len > size) { 92 ret = -ERANGE; 93 } else { 94 memcpy(val, buf, ret); 95 memcpy(val + ret, pool_name, len); 96 ret += len; 97 } 98 } else { 99 ret = snprintf(buf, sizeof(buf), 100 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", 101 (unsigned long long)ceph_file_layout_su(ci->i_layout), 102 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 103 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 104 (unsigned long long)pool); 105 if (size) { 106 if (ret <= size) 107 memcpy(val, buf, ret); 108 else 109 ret = -ERANGE; 110 } 111 } 112 up_read(&osdc->lock); 113 return ret; 114 } 115 116 static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, 117 char *val, size_t size) 118 { 119 return snprintf(val, size, "%lld", 120 (unsigned long long)ceph_file_layout_su(ci->i_layout)); 121 } 122 123 static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, 124 char *val, size_t size) 125 { 126 return snprintf(val, size, "%lld", 127 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); 128 } 129 130 static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, 131 char *val, size_t size) 132 { 133 return snprintf(val, size, "%lld", 134 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 135 } 136 137 static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, 138 char *val, size_t size) 139 { 140 int ret; 141 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 142 struct ceph_osd_client *osdc = &fsc->client->osdc; 143 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 144 const char *pool_name; 145 146 down_read(&osdc->lock); 147 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 148 if (pool_name) 149 ret = snprintf(val, size, "%s", pool_name); 150 else 151 ret = snprintf(val, size, "%lld", (unsigned long long)pool); 152 up_read(&osdc->lock); 153 return ret; 154 } 155 156 /* directories */ 157 158 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, 159 size_t size) 160 { 161 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); 162 } 163 164 static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, 165 size_t size) 166 { 167 return snprintf(val, size, "%lld", ci->i_files); 168 } 169 170 static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, 171 size_t size) 172 { 173 return snprintf(val, size, "%lld", ci->i_subdirs); 174 } 175 176 static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, 177 size_t size) 178 { 179 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); 180 } 181 182 static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, 183 size_t size) 184 { 185 return snprintf(val, size, "%lld", ci->i_rfiles); 186 } 187 188 static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, 189 size_t size) 190 { 191 return snprintf(val, size, "%lld", ci->i_rsubdirs); 192 } 193 194 static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, 195 size_t size) 196 { 197 return snprintf(val, size, "%lld", ci->i_rbytes); 198 } 199 200 static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, 201 size_t size) 202 { 203 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, 204 (long)ci->i_rctime.tv_nsec); 205 } 206 207 208 #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name 209 #define CEPH_XATTR_NAME2(_type, _name, _name2) \ 210 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 211 212 #define XATTR_NAME_CEPH(_type, _name) \ 213 { \ 214 .name = CEPH_XATTR_NAME(_type, _name), \ 215 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ 216 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ 217 .readonly = true, \ 218 .hidden = false, \ 219 .exists_cb = NULL, \ 220 } 221 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ 222 { \ 223 .name = CEPH_XATTR_NAME2(_type, _name, _field), \ 224 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ 225 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ 226 .readonly = false, \ 227 .hidden = true, \ 228 .exists_cb = ceph_vxattrcb_layout_exists, \ 229 } 230 231 static struct ceph_vxattr ceph_dir_vxattrs[] = { 232 { 233 .name = "ceph.dir.layout", 234 .name_size = sizeof("ceph.dir.layout"), 235 .getxattr_cb = ceph_vxattrcb_layout, 236 .readonly = false, 237 .hidden = true, 238 .exists_cb = ceph_vxattrcb_layout_exists, 239 }, 240 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 241 XATTR_LAYOUT_FIELD(dir, layout, stripe_count), 242 XATTR_LAYOUT_FIELD(dir, layout, object_size), 243 XATTR_LAYOUT_FIELD(dir, layout, pool), 244 XATTR_NAME_CEPH(dir, entries), 245 XATTR_NAME_CEPH(dir, files), 246 XATTR_NAME_CEPH(dir, subdirs), 247 XATTR_NAME_CEPH(dir, rentries), 248 XATTR_NAME_CEPH(dir, rfiles), 249 XATTR_NAME_CEPH(dir, rsubdirs), 250 XATTR_NAME_CEPH(dir, rbytes), 251 XATTR_NAME_CEPH(dir, rctime), 252 { .name = NULL, 0 } /* Required table terminator */ 253 }; 254 static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 255 256 /* files */ 257 258 static struct ceph_vxattr ceph_file_vxattrs[] = { 259 { 260 .name = "ceph.file.layout", 261 .name_size = sizeof("ceph.file.layout"), 262 .getxattr_cb = ceph_vxattrcb_layout, 263 .readonly = false, 264 .hidden = true, 265 .exists_cb = ceph_vxattrcb_layout_exists, 266 }, 267 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 268 XATTR_LAYOUT_FIELD(file, layout, stripe_count), 269 XATTR_LAYOUT_FIELD(file, layout, object_size), 270 XATTR_LAYOUT_FIELD(file, layout, pool), 271 { .name = NULL, 0 } /* Required table terminator */ 272 }; 273 static size_t ceph_file_vxattrs_name_size; /* total size of all names */ 274 275 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) 276 { 277 if (S_ISDIR(inode->i_mode)) 278 return ceph_dir_vxattrs; 279 else if (S_ISREG(inode->i_mode)) 280 return ceph_file_vxattrs; 281 return NULL; 282 } 283 284 static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) 285 { 286 if (vxattrs == ceph_dir_vxattrs) 287 return ceph_dir_vxattrs_name_size; 288 if (vxattrs == ceph_file_vxattrs) 289 return ceph_file_vxattrs_name_size; 290 BUG_ON(vxattrs); 291 return 0; 292 } 293 294 /* 295 * Compute the aggregate size (including terminating '\0') of all 296 * virtual extended attribute names in the given vxattr table. 297 */ 298 static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) 299 { 300 struct ceph_vxattr *vxattr; 301 size_t size = 0; 302 303 for (vxattr = vxattrs; vxattr->name; vxattr++) 304 if (!vxattr->hidden) 305 size += vxattr->name_size; 306 307 return size; 308 } 309 310 /* Routines called at initialization and exit time */ 311 312 void __init ceph_xattr_init(void) 313 { 314 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); 315 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); 316 } 317 318 void ceph_xattr_exit(void) 319 { 320 ceph_dir_vxattrs_name_size = 0; 321 ceph_file_vxattrs_name_size = 0; 322 } 323 324 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, 325 const char *name) 326 { 327 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode); 328 329 if (vxattr) { 330 while (vxattr->name) { 331 if (!strcmp(vxattr->name, name)) 332 return vxattr; 333 vxattr++; 334 } 335 } 336 337 return NULL; 338 } 339 340 static int __set_xattr(struct ceph_inode_info *ci, 341 const char *name, int name_len, 342 const char *val, int val_len, 343 int flags, int update_xattr, 344 struct ceph_inode_xattr **newxattr) 345 { 346 struct rb_node **p; 347 struct rb_node *parent = NULL; 348 struct ceph_inode_xattr *xattr = NULL; 349 int c; 350 int new = 0; 351 352 p = &ci->i_xattrs.index.rb_node; 353 while (*p) { 354 parent = *p; 355 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 356 c = strncmp(name, xattr->name, min(name_len, xattr->name_len)); 357 if (c < 0) 358 p = &(*p)->rb_left; 359 else if (c > 0) 360 p = &(*p)->rb_right; 361 else { 362 if (name_len == xattr->name_len) 363 break; 364 else if (name_len < xattr->name_len) 365 p = &(*p)->rb_left; 366 else 367 p = &(*p)->rb_right; 368 } 369 xattr = NULL; 370 } 371 372 if (update_xattr) { 373 int err = 0; 374 if (xattr && (flags & XATTR_CREATE)) 375 err = -EEXIST; 376 else if (!xattr && (flags & XATTR_REPLACE)) 377 err = -ENODATA; 378 if (err) { 379 kfree(name); 380 kfree(val); 381 return err; 382 } 383 if (update_xattr < 0) { 384 if (xattr) 385 __remove_xattr(ci, xattr); 386 kfree(name); 387 return 0; 388 } 389 } 390 391 if (!xattr) { 392 new = 1; 393 xattr = *newxattr; 394 xattr->name = name; 395 xattr->name_len = name_len; 396 xattr->should_free_name = update_xattr; 397 398 ci->i_xattrs.count++; 399 dout("__set_xattr count=%d\n", ci->i_xattrs.count); 400 } else { 401 kfree(*newxattr); 402 *newxattr = NULL; 403 if (xattr->should_free_val) 404 kfree((void *)xattr->val); 405 406 if (update_xattr) { 407 kfree((void *)name); 408 name = xattr->name; 409 } 410 ci->i_xattrs.names_size -= xattr->name_len; 411 ci->i_xattrs.vals_size -= xattr->val_len; 412 } 413 ci->i_xattrs.names_size += name_len; 414 ci->i_xattrs.vals_size += val_len; 415 if (val) 416 xattr->val = val; 417 else 418 xattr->val = ""; 419 420 xattr->val_len = val_len; 421 xattr->dirty = update_xattr; 422 xattr->should_free_val = (val && update_xattr); 423 424 if (new) { 425 rb_link_node(&xattr->node, parent, p); 426 rb_insert_color(&xattr->node, &ci->i_xattrs.index); 427 dout("__set_xattr_val p=%p\n", p); 428 } 429 430 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n", 431 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val); 432 433 return 0; 434 } 435 436 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, 437 const char *name) 438 { 439 struct rb_node **p; 440 struct rb_node *parent = NULL; 441 struct ceph_inode_xattr *xattr = NULL; 442 int name_len = strlen(name); 443 int c; 444 445 p = &ci->i_xattrs.index.rb_node; 446 while (*p) { 447 parent = *p; 448 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 449 c = strncmp(name, xattr->name, xattr->name_len); 450 if (c == 0 && name_len > xattr->name_len) 451 c = 1; 452 if (c < 0) 453 p = &(*p)->rb_left; 454 else if (c > 0) 455 p = &(*p)->rb_right; 456 else { 457 dout("__get_xattr %s: found %.*s\n", name, 458 xattr->val_len, xattr->val); 459 return xattr; 460 } 461 } 462 463 dout("__get_xattr %s: not found\n", name); 464 465 return NULL; 466 } 467 468 static void __free_xattr(struct ceph_inode_xattr *xattr) 469 { 470 BUG_ON(!xattr); 471 472 if (xattr->should_free_name) 473 kfree((void *)xattr->name); 474 if (xattr->should_free_val) 475 kfree((void *)xattr->val); 476 477 kfree(xattr); 478 } 479 480 static int __remove_xattr(struct ceph_inode_info *ci, 481 struct ceph_inode_xattr *xattr) 482 { 483 if (!xattr) 484 return -ENODATA; 485 486 rb_erase(&xattr->node, &ci->i_xattrs.index); 487 488 if (xattr->should_free_name) 489 kfree((void *)xattr->name); 490 if (xattr->should_free_val) 491 kfree((void *)xattr->val); 492 493 ci->i_xattrs.names_size -= xattr->name_len; 494 ci->i_xattrs.vals_size -= xattr->val_len; 495 ci->i_xattrs.count--; 496 kfree(xattr); 497 498 return 0; 499 } 500 501 static char *__copy_xattr_names(struct ceph_inode_info *ci, 502 char *dest) 503 { 504 struct rb_node *p; 505 struct ceph_inode_xattr *xattr = NULL; 506 507 p = rb_first(&ci->i_xattrs.index); 508 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); 509 510 while (p) { 511 xattr = rb_entry(p, struct ceph_inode_xattr, node); 512 memcpy(dest, xattr->name, xattr->name_len); 513 dest[xattr->name_len] = '\0'; 514 515 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, 516 xattr->name_len, ci->i_xattrs.names_size); 517 518 dest += xattr->name_len + 1; 519 p = rb_next(p); 520 } 521 522 return dest; 523 } 524 525 void __ceph_destroy_xattrs(struct ceph_inode_info *ci) 526 { 527 struct rb_node *p, *tmp; 528 struct ceph_inode_xattr *xattr = NULL; 529 530 p = rb_first(&ci->i_xattrs.index); 531 532 dout("__ceph_destroy_xattrs p=%p\n", p); 533 534 while (p) { 535 xattr = rb_entry(p, struct ceph_inode_xattr, node); 536 tmp = p; 537 p = rb_next(tmp); 538 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, 539 xattr->name_len, xattr->name); 540 rb_erase(tmp, &ci->i_xattrs.index); 541 542 __free_xattr(xattr); 543 } 544 545 ci->i_xattrs.names_size = 0; 546 ci->i_xattrs.vals_size = 0; 547 ci->i_xattrs.index_version = 0; 548 ci->i_xattrs.count = 0; 549 ci->i_xattrs.index = RB_ROOT; 550 } 551 552 static int __build_xattrs(struct inode *inode) 553 __releases(ci->i_ceph_lock) 554 __acquires(ci->i_ceph_lock) 555 { 556 u32 namelen; 557 u32 numattr = 0; 558 void *p, *end; 559 u32 len; 560 const char *name, *val; 561 struct ceph_inode_info *ci = ceph_inode(inode); 562 int xattr_version; 563 struct ceph_inode_xattr **xattrs = NULL; 564 int err = 0; 565 int i; 566 567 dout("__build_xattrs() len=%d\n", 568 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); 569 570 if (ci->i_xattrs.index_version >= ci->i_xattrs.version) 571 return 0; /* already built */ 572 573 __ceph_destroy_xattrs(ci); 574 575 start: 576 /* updated internal xattr rb tree */ 577 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) { 578 p = ci->i_xattrs.blob->vec.iov_base; 579 end = p + ci->i_xattrs.blob->vec.iov_len; 580 ceph_decode_32_safe(&p, end, numattr, bad); 581 xattr_version = ci->i_xattrs.version; 582 spin_unlock(&ci->i_ceph_lock); 583 584 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), 585 GFP_NOFS); 586 err = -ENOMEM; 587 if (!xattrs) 588 goto bad_lock; 589 590 for (i = 0; i < numattr; i++) { 591 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), 592 GFP_NOFS); 593 if (!xattrs[i]) 594 goto bad_lock; 595 } 596 597 spin_lock(&ci->i_ceph_lock); 598 if (ci->i_xattrs.version != xattr_version) { 599 /* lost a race, retry */ 600 for (i = 0; i < numattr; i++) 601 kfree(xattrs[i]); 602 kfree(xattrs); 603 xattrs = NULL; 604 goto start; 605 } 606 err = -EIO; 607 while (numattr--) { 608 ceph_decode_32_safe(&p, end, len, bad); 609 namelen = len; 610 name = p; 611 p += len; 612 ceph_decode_32_safe(&p, end, len, bad); 613 val = p; 614 p += len; 615 616 err = __set_xattr(ci, name, namelen, val, len, 617 0, 0, &xattrs[numattr]); 618 619 if (err < 0) 620 goto bad; 621 } 622 kfree(xattrs); 623 } 624 ci->i_xattrs.index_version = ci->i_xattrs.version; 625 ci->i_xattrs.dirty = false; 626 627 return err; 628 bad_lock: 629 spin_lock(&ci->i_ceph_lock); 630 bad: 631 if (xattrs) { 632 for (i = 0; i < numattr; i++) 633 kfree(xattrs[i]); 634 kfree(xattrs); 635 } 636 ci->i_xattrs.names_size = 0; 637 return err; 638 } 639 640 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, 641 int val_size) 642 { 643 /* 644 * 4 bytes for the length, and additional 4 bytes per each xattr name, 645 * 4 bytes per each value 646 */ 647 int size = 4 + ci->i_xattrs.count*(4 + 4) + 648 ci->i_xattrs.names_size + 649 ci->i_xattrs.vals_size; 650 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", 651 ci->i_xattrs.count, ci->i_xattrs.names_size, 652 ci->i_xattrs.vals_size); 653 654 if (name_size) 655 size += 4 + 4 + name_size + val_size; 656 657 return size; 658 } 659 660 /* 661 * If there are dirty xattrs, reencode xattrs into the prealloc_blob 662 * and swap into place. 663 */ 664 void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) 665 { 666 struct rb_node *p; 667 struct ceph_inode_xattr *xattr = NULL; 668 void *dest; 669 670 dout("__build_xattrs_blob %p\n", &ci->vfs_inode); 671 if (ci->i_xattrs.dirty) { 672 int need = __get_required_blob_size(ci, 0, 0); 673 674 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len); 675 676 p = rb_first(&ci->i_xattrs.index); 677 dest = ci->i_xattrs.prealloc_blob->vec.iov_base; 678 679 ceph_encode_32(&dest, ci->i_xattrs.count); 680 while (p) { 681 xattr = rb_entry(p, struct ceph_inode_xattr, node); 682 683 ceph_encode_32(&dest, xattr->name_len); 684 memcpy(dest, xattr->name, xattr->name_len); 685 dest += xattr->name_len; 686 ceph_encode_32(&dest, xattr->val_len); 687 memcpy(dest, xattr->val, xattr->val_len); 688 dest += xattr->val_len; 689 690 p = rb_next(p); 691 } 692 693 /* adjust buffer len; it may be larger than we need */ 694 ci->i_xattrs.prealloc_blob->vec.iov_len = 695 dest - ci->i_xattrs.prealloc_blob->vec.iov_base; 696 697 if (ci->i_xattrs.blob) 698 ceph_buffer_put(ci->i_xattrs.blob); 699 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 700 ci->i_xattrs.prealloc_blob = NULL; 701 ci->i_xattrs.dirty = false; 702 ci->i_xattrs.version++; 703 } 704 } 705 706 static inline int __get_request_mask(struct inode *in) { 707 struct ceph_mds_request *req = current->journal_info; 708 int mask = 0; 709 if (req && req->r_target_inode == in) { 710 if (req->r_op == CEPH_MDS_OP_LOOKUP || 711 req->r_op == CEPH_MDS_OP_LOOKUPINO || 712 req->r_op == CEPH_MDS_OP_LOOKUPPARENT || 713 req->r_op == CEPH_MDS_OP_GETATTR) { 714 mask = le32_to_cpu(req->r_args.getattr.mask); 715 } else if (req->r_op == CEPH_MDS_OP_OPEN || 716 req->r_op == CEPH_MDS_OP_CREATE) { 717 mask = le32_to_cpu(req->r_args.open.mask); 718 } 719 } 720 return mask; 721 } 722 723 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, 724 size_t size) 725 { 726 struct ceph_inode_info *ci = ceph_inode(inode); 727 struct ceph_inode_xattr *xattr; 728 struct ceph_vxattr *vxattr = NULL; 729 int req_mask; 730 int err; 731 732 /* let's see if a virtual xattr was requested */ 733 vxattr = ceph_match_vxattr(inode, name); 734 if (vxattr) { 735 err = -ENODATA; 736 if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) 737 err = vxattr->getxattr_cb(ci, value, size); 738 return err; 739 } 740 741 req_mask = __get_request_mask(inode); 742 743 spin_lock(&ci->i_ceph_lock); 744 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 745 ci->i_xattrs.version, ci->i_xattrs.index_version); 746 747 if (ci->i_xattrs.version == 0 || 748 !((req_mask & CEPH_CAP_XATTR_SHARED) || 749 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) { 750 spin_unlock(&ci->i_ceph_lock); 751 752 /* security module gets xattr while filling trace */ 753 if (current->journal_info != NULL) { 754 pr_warn_ratelimited("sync getxattr %p " 755 "during filling trace\n", inode); 756 return -EBUSY; 757 } 758 759 /* get xattrs from mds (if we don't already have them) */ 760 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 761 if (err) 762 return err; 763 spin_lock(&ci->i_ceph_lock); 764 } 765 766 err = __build_xattrs(inode); 767 if (err < 0) 768 goto out; 769 770 err = -ENODATA; /* == ENOATTR */ 771 xattr = __get_xattr(ci, name); 772 if (!xattr) 773 goto out; 774 775 err = -ERANGE; 776 if (size && size < xattr->val_len) 777 goto out; 778 779 err = xattr->val_len; 780 if (size == 0) 781 goto out; 782 783 memcpy(value, xattr->val, xattr->val_len); 784 785 if (current->journal_info != NULL && 786 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) 787 ci->i_ceph_flags |= CEPH_I_SEC_INITED; 788 out: 789 spin_unlock(&ci->i_ceph_lock); 790 return err; 791 } 792 793 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) 794 { 795 struct inode *inode = d_inode(dentry); 796 struct ceph_inode_info *ci = ceph_inode(inode); 797 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); 798 u32 vir_namelen = 0; 799 u32 namelen; 800 int err; 801 u32 len; 802 int i; 803 804 spin_lock(&ci->i_ceph_lock); 805 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 806 ci->i_xattrs.version, ci->i_xattrs.index_version); 807 808 if (ci->i_xattrs.version == 0 || 809 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 810 spin_unlock(&ci->i_ceph_lock); 811 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 812 if (err) 813 return err; 814 spin_lock(&ci->i_ceph_lock); 815 } 816 817 err = __build_xattrs(inode); 818 if (err < 0) 819 goto out; 820 /* 821 * Start with virtual dir xattr names (if any) (including 822 * terminating '\0' characters for each). 823 */ 824 vir_namelen = ceph_vxattrs_name_size(vxattrs); 825 826 /* adding 1 byte per each variable due to the null termination */ 827 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; 828 err = -ERANGE; 829 if (size && vir_namelen + namelen > size) 830 goto out; 831 832 err = namelen + vir_namelen; 833 if (size == 0) 834 goto out; 835 836 names = __copy_xattr_names(ci, names); 837 838 /* virtual xattr names, too */ 839 err = namelen; 840 if (vxattrs) { 841 for (i = 0; vxattrs[i].name; i++) { 842 if (!vxattrs[i].hidden && 843 !(vxattrs[i].exists_cb && 844 !vxattrs[i].exists_cb(ci))) { 845 len = sprintf(names, "%s", vxattrs[i].name); 846 names += len + 1; 847 err += len + 1; 848 } 849 } 850 } 851 852 out: 853 spin_unlock(&ci->i_ceph_lock); 854 return err; 855 } 856 857 static int ceph_sync_setxattr(struct inode *inode, const char *name, 858 const char *value, size_t size, int flags) 859 { 860 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 861 struct ceph_inode_info *ci = ceph_inode(inode); 862 struct ceph_mds_request *req; 863 struct ceph_mds_client *mdsc = fsc->mdsc; 864 struct ceph_pagelist *pagelist = NULL; 865 int op = CEPH_MDS_OP_SETXATTR; 866 int err; 867 868 if (size > 0) { 869 /* copy value into pagelist */ 870 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 871 if (!pagelist) 872 return -ENOMEM; 873 874 ceph_pagelist_init(pagelist); 875 err = ceph_pagelist_append(pagelist, value, size); 876 if (err) 877 goto out; 878 } else if (!value) { 879 if (flags & CEPH_XATTR_REPLACE) 880 op = CEPH_MDS_OP_RMXATTR; 881 else 882 flags |= CEPH_XATTR_REMOVE; 883 } 884 885 dout("setxattr value=%.*s\n", (int)size, value); 886 887 /* do request */ 888 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 889 if (IS_ERR(req)) { 890 err = PTR_ERR(req); 891 goto out; 892 } 893 894 req->r_path2 = kstrdup(name, GFP_NOFS); 895 if (!req->r_path2) { 896 ceph_mdsc_put_request(req); 897 err = -ENOMEM; 898 goto out; 899 } 900 901 if (op == CEPH_MDS_OP_SETXATTR) { 902 req->r_args.setxattr.flags = cpu_to_le32(flags); 903 req->r_pagelist = pagelist; 904 pagelist = NULL; 905 } 906 907 req->r_inode = inode; 908 ihold(inode); 909 req->r_num_caps = 1; 910 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 911 912 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 913 err = ceph_mdsc_do_request(mdsc, NULL, req); 914 ceph_mdsc_put_request(req); 915 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 916 917 out: 918 if (pagelist) 919 ceph_pagelist_release(pagelist); 920 return err; 921 } 922 923 int __ceph_setxattr(struct inode *inode, const char *name, 924 const void *value, size_t size, int flags) 925 { 926 struct ceph_vxattr *vxattr; 927 struct ceph_inode_info *ci = ceph_inode(inode); 928 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 929 struct ceph_cap_flush *prealloc_cf = NULL; 930 int issued; 931 int err; 932 int dirty = 0; 933 int name_len = strlen(name); 934 int val_len = size; 935 char *newname = NULL; 936 char *newval = NULL; 937 struct ceph_inode_xattr *xattr = NULL; 938 int required_blob_size; 939 bool lock_snap_rwsem = false; 940 941 if (ceph_snap(inode) != CEPH_NOSNAP) 942 return -EROFS; 943 944 vxattr = ceph_match_vxattr(inode, name); 945 if (vxattr && vxattr->readonly) 946 return -EOPNOTSUPP; 947 948 /* pass any unhandled ceph.* xattrs through to the MDS */ 949 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 950 goto do_sync_unlocked; 951 952 /* preallocate memory for xattr name, value, index node */ 953 err = -ENOMEM; 954 newname = kmemdup(name, name_len + 1, GFP_NOFS); 955 if (!newname) 956 goto out; 957 958 if (val_len) { 959 newval = kmemdup(value, val_len, GFP_NOFS); 960 if (!newval) 961 goto out; 962 } 963 964 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); 965 if (!xattr) 966 goto out; 967 968 prealloc_cf = ceph_alloc_cap_flush(); 969 if (!prealloc_cf) 970 goto out; 971 972 spin_lock(&ci->i_ceph_lock); 973 retry: 974 issued = __ceph_caps_issued(ci, NULL); 975 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 976 goto do_sync; 977 978 if (!lock_snap_rwsem && !ci->i_head_snapc) { 979 lock_snap_rwsem = true; 980 if (!down_read_trylock(&mdsc->snap_rwsem)) { 981 spin_unlock(&ci->i_ceph_lock); 982 down_read(&mdsc->snap_rwsem); 983 spin_lock(&ci->i_ceph_lock); 984 goto retry; 985 } 986 } 987 988 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 989 __build_xattrs(inode); 990 991 required_blob_size = __get_required_blob_size(ci, name_len, val_len); 992 993 if (!ci->i_xattrs.prealloc_blob || 994 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 995 struct ceph_buffer *blob; 996 997 spin_unlock(&ci->i_ceph_lock); 998 dout(" preaallocating new blob size=%d\n", required_blob_size); 999 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 1000 if (!blob) 1001 goto do_sync_unlocked; 1002 spin_lock(&ci->i_ceph_lock); 1003 if (ci->i_xattrs.prealloc_blob) 1004 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 1005 ci->i_xattrs.prealloc_blob = blob; 1006 goto retry; 1007 } 1008 1009 err = __set_xattr(ci, newname, name_len, newval, val_len, 1010 flags, value ? 1 : -1, &xattr); 1011 1012 if (!err) { 1013 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1014 &prealloc_cf); 1015 ci->i_xattrs.dirty = true; 1016 inode->i_ctime = current_fs_time(inode->i_sb); 1017 } 1018 1019 spin_unlock(&ci->i_ceph_lock); 1020 if (lock_snap_rwsem) 1021 up_read(&mdsc->snap_rwsem); 1022 if (dirty) 1023 __mark_inode_dirty(inode, dirty); 1024 ceph_free_cap_flush(prealloc_cf); 1025 return err; 1026 1027 do_sync: 1028 spin_unlock(&ci->i_ceph_lock); 1029 do_sync_unlocked: 1030 if (lock_snap_rwsem) 1031 up_read(&mdsc->snap_rwsem); 1032 1033 /* security module set xattr while filling trace */ 1034 if (current->journal_info != NULL) { 1035 pr_warn_ratelimited("sync setxattr %p " 1036 "during filling trace\n", inode); 1037 err = -EBUSY; 1038 } else { 1039 err = ceph_sync_setxattr(inode, name, value, size, flags); 1040 } 1041 out: 1042 ceph_free_cap_flush(prealloc_cf); 1043 kfree(newname); 1044 kfree(newval); 1045 kfree(xattr); 1046 return err; 1047 } 1048 1049 static int ceph_get_xattr_handler(const struct xattr_handler *handler, 1050 struct dentry *dentry, struct inode *inode, 1051 const char *name, void *value, size_t size) 1052 { 1053 if (!ceph_is_valid_xattr(name)) 1054 return -EOPNOTSUPP; 1055 return __ceph_getxattr(inode, name, value, size); 1056 } 1057 1058 static int ceph_set_xattr_handler(const struct xattr_handler *handler, 1059 struct dentry *unused, struct inode *inode, 1060 const char *name, const void *value, 1061 size_t size, int flags) 1062 { 1063 if (!ceph_is_valid_xattr(name)) 1064 return -EOPNOTSUPP; 1065 return __ceph_setxattr(inode, name, value, size, flags); 1066 } 1067 1068 const struct xattr_handler ceph_other_xattr_handler = { 1069 .prefix = "", /* match any name => handlers called with full name */ 1070 .get = ceph_get_xattr_handler, 1071 .set = ceph_set_xattr_handler, 1072 }; 1073 1074 #ifdef CONFIG_SECURITY 1075 bool ceph_security_xattr_wanted(struct inode *in) 1076 { 1077 return in->i_security != NULL; 1078 } 1079 1080 bool ceph_security_xattr_deadlock(struct inode *in) 1081 { 1082 struct ceph_inode_info *ci; 1083 bool ret; 1084 if (in->i_security == NULL) 1085 return false; 1086 ci = ceph_inode(in); 1087 spin_lock(&ci->i_ceph_lock); 1088 ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) && 1089 !(ci->i_xattrs.version > 0 && 1090 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)); 1091 spin_unlock(&ci->i_ceph_lock); 1092 return ret; 1093 } 1094 #endif 1095