1 #include <linux/ceph/ceph_debug.h> 2 #include <linux/ceph/pagelist.h> 3 4 #include "super.h" 5 #include "mds_client.h" 6 7 #include <linux/ceph/decode.h> 8 9 #include <linux/xattr.h> 10 #include <linux/posix_acl_xattr.h> 11 #include <linux/slab.h> 12 13 #define XATTR_CEPH_PREFIX "ceph." 14 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) 15 16 static int __remove_xattr(struct ceph_inode_info *ci, 17 struct ceph_inode_xattr *xattr); 18 19 /* 20 * List of handlers for synthetic system.* attributes. Other 21 * attributes are handled directly. 22 */ 23 const struct xattr_handler *ceph_xattr_handlers[] = { 24 #ifdef CONFIG_CEPH_FS_POSIX_ACL 25 &posix_acl_access_xattr_handler, 26 &posix_acl_default_xattr_handler, 27 #endif 28 NULL, 29 }; 30 31 static bool ceph_is_valid_xattr(const char *name) 32 { 33 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || 34 !strncmp(name, XATTR_SECURITY_PREFIX, 35 XATTR_SECURITY_PREFIX_LEN) || 36 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 37 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 38 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 39 } 40 41 /* 42 * These define virtual xattrs exposing the recursive directory 43 * statistics and layout metadata. 44 */ 45 struct ceph_vxattr { 46 char *name; 47 size_t name_size; /* strlen(name) + 1 (for '\0') */ 48 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 49 size_t size); 50 bool readonly, hidden; 51 bool (*exists_cb)(struct ceph_inode_info *ci); 52 }; 53 54 /* layouts */ 55 56 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) 57 { 58 size_t s; 59 char *p = (char *)&ci->i_layout; 60 61 for (s = 0; s < sizeof(ci->i_layout); s++, p++) 62 if (*p) 63 return true; 64 return false; 65 } 66 67 static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 68 size_t size) 69 { 70 int ret; 71 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 72 struct ceph_osd_client *osdc = &fsc->client->osdc; 73 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 74 const char *pool_name; 75 char buf[128]; 76 77 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 78 down_read(&osdc->map_sem); 79 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 80 if (pool_name) { 81 size_t len = strlen(pool_name); 82 ret = snprintf(buf, sizeof(buf), 83 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", 84 (unsigned long long)ceph_file_layout_su(ci->i_layout), 85 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 86 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 87 if (!size) { 88 ret += len; 89 } else if (ret + len > size) { 90 ret = -ERANGE; 91 } else { 92 memcpy(val, buf, ret); 93 memcpy(val + ret, pool_name, len); 94 ret += len; 95 } 96 } else { 97 ret = snprintf(buf, sizeof(buf), 98 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", 99 (unsigned long long)ceph_file_layout_su(ci->i_layout), 100 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 101 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 102 (unsigned long long)pool); 103 if (size) { 104 if (ret <= size) 105 memcpy(val, buf, ret); 106 else 107 ret = -ERANGE; 108 } 109 } 110 up_read(&osdc->map_sem); 111 return ret; 112 } 113 114 static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, 115 char *val, size_t size) 116 { 117 return snprintf(val, size, "%lld", 118 (unsigned long long)ceph_file_layout_su(ci->i_layout)); 119 } 120 121 static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, 122 char *val, size_t size) 123 { 124 return snprintf(val, size, "%lld", 125 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); 126 } 127 128 static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, 129 char *val, size_t size) 130 { 131 return snprintf(val, size, "%lld", 132 (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); 133 } 134 135 static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, 136 char *val, size_t size) 137 { 138 int ret; 139 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 140 struct ceph_osd_client *osdc = &fsc->client->osdc; 141 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 142 const char *pool_name; 143 144 down_read(&osdc->map_sem); 145 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 146 if (pool_name) 147 ret = snprintf(val, size, "%s", pool_name); 148 else 149 ret = snprintf(val, size, "%lld", (unsigned long long)pool); 150 up_read(&osdc->map_sem); 151 return ret; 152 } 153 154 /* directories */ 155 156 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, 157 size_t size) 158 { 159 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); 160 } 161 162 static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, 163 size_t size) 164 { 165 return snprintf(val, size, "%lld", ci->i_files); 166 } 167 168 static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, 169 size_t size) 170 { 171 return snprintf(val, size, "%lld", ci->i_subdirs); 172 } 173 174 static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, 175 size_t size) 176 { 177 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); 178 } 179 180 static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, 181 size_t size) 182 { 183 return snprintf(val, size, "%lld", ci->i_rfiles); 184 } 185 186 static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, 187 size_t size) 188 { 189 return snprintf(val, size, "%lld", ci->i_rsubdirs); 190 } 191 192 static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, 193 size_t size) 194 { 195 return snprintf(val, size, "%lld", ci->i_rbytes); 196 } 197 198 static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, 199 size_t size) 200 { 201 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, 202 (long)ci->i_rctime.tv_nsec); 203 } 204 205 206 #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name 207 #define CEPH_XATTR_NAME2(_type, _name, _name2) \ 208 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 209 210 #define XATTR_NAME_CEPH(_type, _name) \ 211 { \ 212 .name = CEPH_XATTR_NAME(_type, _name), \ 213 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ 214 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ 215 .readonly = true, \ 216 .hidden = false, \ 217 .exists_cb = NULL, \ 218 } 219 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ 220 { \ 221 .name = CEPH_XATTR_NAME2(_type, _name, _field), \ 222 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ 223 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ 224 .readonly = false, \ 225 .hidden = true, \ 226 .exists_cb = ceph_vxattrcb_layout_exists, \ 227 } 228 229 static struct ceph_vxattr ceph_dir_vxattrs[] = { 230 { 231 .name = "ceph.dir.layout", 232 .name_size = sizeof("ceph.dir.layout"), 233 .getxattr_cb = ceph_vxattrcb_layout, 234 .readonly = false, 235 .hidden = true, 236 .exists_cb = ceph_vxattrcb_layout_exists, 237 }, 238 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 239 XATTR_LAYOUT_FIELD(dir, layout, stripe_count), 240 XATTR_LAYOUT_FIELD(dir, layout, object_size), 241 XATTR_LAYOUT_FIELD(dir, layout, pool), 242 XATTR_NAME_CEPH(dir, entries), 243 XATTR_NAME_CEPH(dir, files), 244 XATTR_NAME_CEPH(dir, subdirs), 245 XATTR_NAME_CEPH(dir, rentries), 246 XATTR_NAME_CEPH(dir, rfiles), 247 XATTR_NAME_CEPH(dir, rsubdirs), 248 XATTR_NAME_CEPH(dir, rbytes), 249 XATTR_NAME_CEPH(dir, rctime), 250 { .name = NULL, 0 } /* Required table terminator */ 251 }; 252 static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 253 254 /* files */ 255 256 static struct ceph_vxattr ceph_file_vxattrs[] = { 257 { 258 .name = "ceph.file.layout", 259 .name_size = sizeof("ceph.file.layout"), 260 .getxattr_cb = ceph_vxattrcb_layout, 261 .readonly = false, 262 .hidden = true, 263 .exists_cb = ceph_vxattrcb_layout_exists, 264 }, 265 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 266 XATTR_LAYOUT_FIELD(file, layout, stripe_count), 267 XATTR_LAYOUT_FIELD(file, layout, object_size), 268 XATTR_LAYOUT_FIELD(file, layout, pool), 269 { .name = NULL, 0 } /* Required table terminator */ 270 }; 271 static size_t ceph_file_vxattrs_name_size; /* total size of all names */ 272 273 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) 274 { 275 if (S_ISDIR(inode->i_mode)) 276 return ceph_dir_vxattrs; 277 else if (S_ISREG(inode->i_mode)) 278 return ceph_file_vxattrs; 279 return NULL; 280 } 281 282 static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) 283 { 284 if (vxattrs == ceph_dir_vxattrs) 285 return ceph_dir_vxattrs_name_size; 286 if (vxattrs == ceph_file_vxattrs) 287 return ceph_file_vxattrs_name_size; 288 BUG_ON(vxattrs); 289 return 0; 290 } 291 292 /* 293 * Compute the aggregate size (including terminating '\0') of all 294 * virtual extended attribute names in the given vxattr table. 295 */ 296 static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) 297 { 298 struct ceph_vxattr *vxattr; 299 size_t size = 0; 300 301 for (vxattr = vxattrs; vxattr->name; vxattr++) 302 if (!vxattr->hidden) 303 size += vxattr->name_size; 304 305 return size; 306 } 307 308 /* Routines called at initialization and exit time */ 309 310 void __init ceph_xattr_init(void) 311 { 312 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); 313 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); 314 } 315 316 void ceph_xattr_exit(void) 317 { 318 ceph_dir_vxattrs_name_size = 0; 319 ceph_file_vxattrs_name_size = 0; 320 } 321 322 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, 323 const char *name) 324 { 325 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode); 326 327 if (vxattr) { 328 while (vxattr->name) { 329 if (!strcmp(vxattr->name, name)) 330 return vxattr; 331 vxattr++; 332 } 333 } 334 335 return NULL; 336 } 337 338 static int __set_xattr(struct ceph_inode_info *ci, 339 const char *name, int name_len, 340 const char *val, int val_len, 341 int flags, int update_xattr, 342 struct ceph_inode_xattr **newxattr) 343 { 344 struct rb_node **p; 345 struct rb_node *parent = NULL; 346 struct ceph_inode_xattr *xattr = NULL; 347 int c; 348 int new = 0; 349 350 p = &ci->i_xattrs.index.rb_node; 351 while (*p) { 352 parent = *p; 353 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 354 c = strncmp(name, xattr->name, min(name_len, xattr->name_len)); 355 if (c < 0) 356 p = &(*p)->rb_left; 357 else if (c > 0) 358 p = &(*p)->rb_right; 359 else { 360 if (name_len == xattr->name_len) 361 break; 362 else if (name_len < xattr->name_len) 363 p = &(*p)->rb_left; 364 else 365 p = &(*p)->rb_right; 366 } 367 xattr = NULL; 368 } 369 370 if (update_xattr) { 371 int err = 0; 372 if (xattr && (flags & XATTR_CREATE)) 373 err = -EEXIST; 374 else if (!xattr && (flags & XATTR_REPLACE)) 375 err = -ENODATA; 376 if (err) { 377 kfree(name); 378 kfree(val); 379 return err; 380 } 381 if (update_xattr < 0) { 382 if (xattr) 383 __remove_xattr(ci, xattr); 384 kfree(name); 385 return 0; 386 } 387 } 388 389 if (!xattr) { 390 new = 1; 391 xattr = *newxattr; 392 xattr->name = name; 393 xattr->name_len = name_len; 394 xattr->should_free_name = update_xattr; 395 396 ci->i_xattrs.count++; 397 dout("__set_xattr count=%d\n", ci->i_xattrs.count); 398 } else { 399 kfree(*newxattr); 400 *newxattr = NULL; 401 if (xattr->should_free_val) 402 kfree((void *)xattr->val); 403 404 if (update_xattr) { 405 kfree((void *)name); 406 name = xattr->name; 407 } 408 ci->i_xattrs.names_size -= xattr->name_len; 409 ci->i_xattrs.vals_size -= xattr->val_len; 410 } 411 ci->i_xattrs.names_size += name_len; 412 ci->i_xattrs.vals_size += val_len; 413 if (val) 414 xattr->val = val; 415 else 416 xattr->val = ""; 417 418 xattr->val_len = val_len; 419 xattr->dirty = update_xattr; 420 xattr->should_free_val = (val && update_xattr); 421 422 if (new) { 423 rb_link_node(&xattr->node, parent, p); 424 rb_insert_color(&xattr->node, &ci->i_xattrs.index); 425 dout("__set_xattr_val p=%p\n", p); 426 } 427 428 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n", 429 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val); 430 431 return 0; 432 } 433 434 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, 435 const char *name) 436 { 437 struct rb_node **p; 438 struct rb_node *parent = NULL; 439 struct ceph_inode_xattr *xattr = NULL; 440 int name_len = strlen(name); 441 int c; 442 443 p = &ci->i_xattrs.index.rb_node; 444 while (*p) { 445 parent = *p; 446 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 447 c = strncmp(name, xattr->name, xattr->name_len); 448 if (c == 0 && name_len > xattr->name_len) 449 c = 1; 450 if (c < 0) 451 p = &(*p)->rb_left; 452 else if (c > 0) 453 p = &(*p)->rb_right; 454 else { 455 dout("__get_xattr %s: found %.*s\n", name, 456 xattr->val_len, xattr->val); 457 return xattr; 458 } 459 } 460 461 dout("__get_xattr %s: not found\n", name); 462 463 return NULL; 464 } 465 466 static void __free_xattr(struct ceph_inode_xattr *xattr) 467 { 468 BUG_ON(!xattr); 469 470 if (xattr->should_free_name) 471 kfree((void *)xattr->name); 472 if (xattr->should_free_val) 473 kfree((void *)xattr->val); 474 475 kfree(xattr); 476 } 477 478 static int __remove_xattr(struct ceph_inode_info *ci, 479 struct ceph_inode_xattr *xattr) 480 { 481 if (!xattr) 482 return -ENODATA; 483 484 rb_erase(&xattr->node, &ci->i_xattrs.index); 485 486 if (xattr->should_free_name) 487 kfree((void *)xattr->name); 488 if (xattr->should_free_val) 489 kfree((void *)xattr->val); 490 491 ci->i_xattrs.names_size -= xattr->name_len; 492 ci->i_xattrs.vals_size -= xattr->val_len; 493 ci->i_xattrs.count--; 494 kfree(xattr); 495 496 return 0; 497 } 498 499 static int __remove_xattr_by_name(struct ceph_inode_info *ci, 500 const char *name) 501 { 502 struct rb_node **p; 503 struct ceph_inode_xattr *xattr; 504 int err; 505 506 p = &ci->i_xattrs.index.rb_node; 507 xattr = __get_xattr(ci, name); 508 err = __remove_xattr(ci, xattr); 509 return err; 510 } 511 512 static char *__copy_xattr_names(struct ceph_inode_info *ci, 513 char *dest) 514 { 515 struct rb_node *p; 516 struct ceph_inode_xattr *xattr = NULL; 517 518 p = rb_first(&ci->i_xattrs.index); 519 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); 520 521 while (p) { 522 xattr = rb_entry(p, struct ceph_inode_xattr, node); 523 memcpy(dest, xattr->name, xattr->name_len); 524 dest[xattr->name_len] = '\0'; 525 526 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, 527 xattr->name_len, ci->i_xattrs.names_size); 528 529 dest += xattr->name_len + 1; 530 p = rb_next(p); 531 } 532 533 return dest; 534 } 535 536 void __ceph_destroy_xattrs(struct ceph_inode_info *ci) 537 { 538 struct rb_node *p, *tmp; 539 struct ceph_inode_xattr *xattr = NULL; 540 541 p = rb_first(&ci->i_xattrs.index); 542 543 dout("__ceph_destroy_xattrs p=%p\n", p); 544 545 while (p) { 546 xattr = rb_entry(p, struct ceph_inode_xattr, node); 547 tmp = p; 548 p = rb_next(tmp); 549 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, 550 xattr->name_len, xattr->name); 551 rb_erase(tmp, &ci->i_xattrs.index); 552 553 __free_xattr(xattr); 554 } 555 556 ci->i_xattrs.names_size = 0; 557 ci->i_xattrs.vals_size = 0; 558 ci->i_xattrs.index_version = 0; 559 ci->i_xattrs.count = 0; 560 ci->i_xattrs.index = RB_ROOT; 561 } 562 563 static int __build_xattrs(struct inode *inode) 564 __releases(ci->i_ceph_lock) 565 __acquires(ci->i_ceph_lock) 566 { 567 u32 namelen; 568 u32 numattr = 0; 569 void *p, *end; 570 u32 len; 571 const char *name, *val; 572 struct ceph_inode_info *ci = ceph_inode(inode); 573 int xattr_version; 574 struct ceph_inode_xattr **xattrs = NULL; 575 int err = 0; 576 int i; 577 578 dout("__build_xattrs() len=%d\n", 579 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); 580 581 if (ci->i_xattrs.index_version >= ci->i_xattrs.version) 582 return 0; /* already built */ 583 584 __ceph_destroy_xattrs(ci); 585 586 start: 587 /* updated internal xattr rb tree */ 588 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) { 589 p = ci->i_xattrs.blob->vec.iov_base; 590 end = p + ci->i_xattrs.blob->vec.iov_len; 591 ceph_decode_32_safe(&p, end, numattr, bad); 592 xattr_version = ci->i_xattrs.version; 593 spin_unlock(&ci->i_ceph_lock); 594 595 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), 596 GFP_NOFS); 597 err = -ENOMEM; 598 if (!xattrs) 599 goto bad_lock; 600 601 for (i = 0; i < numattr; i++) { 602 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), 603 GFP_NOFS); 604 if (!xattrs[i]) 605 goto bad_lock; 606 } 607 608 spin_lock(&ci->i_ceph_lock); 609 if (ci->i_xattrs.version != xattr_version) { 610 /* lost a race, retry */ 611 for (i = 0; i < numattr; i++) 612 kfree(xattrs[i]); 613 kfree(xattrs); 614 xattrs = NULL; 615 goto start; 616 } 617 err = -EIO; 618 while (numattr--) { 619 ceph_decode_32_safe(&p, end, len, bad); 620 namelen = len; 621 name = p; 622 p += len; 623 ceph_decode_32_safe(&p, end, len, bad); 624 val = p; 625 p += len; 626 627 err = __set_xattr(ci, name, namelen, val, len, 628 0, 0, &xattrs[numattr]); 629 630 if (err < 0) 631 goto bad; 632 } 633 kfree(xattrs); 634 } 635 ci->i_xattrs.index_version = ci->i_xattrs.version; 636 ci->i_xattrs.dirty = false; 637 638 return err; 639 bad_lock: 640 spin_lock(&ci->i_ceph_lock); 641 bad: 642 if (xattrs) { 643 for (i = 0; i < numattr; i++) 644 kfree(xattrs[i]); 645 kfree(xattrs); 646 } 647 ci->i_xattrs.names_size = 0; 648 return err; 649 } 650 651 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, 652 int val_size) 653 { 654 /* 655 * 4 bytes for the length, and additional 4 bytes per each xattr name, 656 * 4 bytes per each value 657 */ 658 int size = 4 + ci->i_xattrs.count*(4 + 4) + 659 ci->i_xattrs.names_size + 660 ci->i_xattrs.vals_size; 661 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", 662 ci->i_xattrs.count, ci->i_xattrs.names_size, 663 ci->i_xattrs.vals_size); 664 665 if (name_size) 666 size += 4 + 4 + name_size + val_size; 667 668 return size; 669 } 670 671 /* 672 * If there are dirty xattrs, reencode xattrs into the prealloc_blob 673 * and swap into place. 674 */ 675 void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) 676 { 677 struct rb_node *p; 678 struct ceph_inode_xattr *xattr = NULL; 679 void *dest; 680 681 dout("__build_xattrs_blob %p\n", &ci->vfs_inode); 682 if (ci->i_xattrs.dirty) { 683 int need = __get_required_blob_size(ci, 0, 0); 684 685 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len); 686 687 p = rb_first(&ci->i_xattrs.index); 688 dest = ci->i_xattrs.prealloc_blob->vec.iov_base; 689 690 ceph_encode_32(&dest, ci->i_xattrs.count); 691 while (p) { 692 xattr = rb_entry(p, struct ceph_inode_xattr, node); 693 694 ceph_encode_32(&dest, xattr->name_len); 695 memcpy(dest, xattr->name, xattr->name_len); 696 dest += xattr->name_len; 697 ceph_encode_32(&dest, xattr->val_len); 698 memcpy(dest, xattr->val, xattr->val_len); 699 dest += xattr->val_len; 700 701 p = rb_next(p); 702 } 703 704 /* adjust buffer len; it may be larger than we need */ 705 ci->i_xattrs.prealloc_blob->vec.iov_len = 706 dest - ci->i_xattrs.prealloc_blob->vec.iov_base; 707 708 if (ci->i_xattrs.blob) 709 ceph_buffer_put(ci->i_xattrs.blob); 710 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 711 ci->i_xattrs.prealloc_blob = NULL; 712 ci->i_xattrs.dirty = false; 713 ci->i_xattrs.version++; 714 } 715 } 716 717 static inline int __get_request_mask(struct inode *in) { 718 struct ceph_mds_request *req = current->journal_info; 719 int mask = 0; 720 if (req && req->r_target_inode == in) { 721 if (req->r_op == CEPH_MDS_OP_LOOKUP || 722 req->r_op == CEPH_MDS_OP_LOOKUPINO || 723 req->r_op == CEPH_MDS_OP_LOOKUPPARENT || 724 req->r_op == CEPH_MDS_OP_GETATTR) { 725 mask = le32_to_cpu(req->r_args.getattr.mask); 726 } else if (req->r_op == CEPH_MDS_OP_OPEN || 727 req->r_op == CEPH_MDS_OP_CREATE) { 728 mask = le32_to_cpu(req->r_args.open.mask); 729 } 730 } 731 return mask; 732 } 733 734 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, 735 size_t size) 736 { 737 struct ceph_inode_info *ci = ceph_inode(inode); 738 struct ceph_inode_xattr *xattr; 739 struct ceph_vxattr *vxattr = NULL; 740 int req_mask; 741 int err; 742 743 if (!ceph_is_valid_xattr(name)) 744 return -ENODATA; 745 746 /* let's see if a virtual xattr was requested */ 747 vxattr = ceph_match_vxattr(inode, name); 748 if (vxattr) { 749 err = -ENODATA; 750 if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) 751 err = vxattr->getxattr_cb(ci, value, size); 752 return err; 753 } 754 755 req_mask = __get_request_mask(inode); 756 757 spin_lock(&ci->i_ceph_lock); 758 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 759 ci->i_xattrs.version, ci->i_xattrs.index_version); 760 761 if (ci->i_xattrs.version == 0 || 762 !((req_mask & CEPH_CAP_XATTR_SHARED) || 763 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) { 764 spin_unlock(&ci->i_ceph_lock); 765 766 /* security module gets xattr while filling trace */ 767 if (current->journal_info != NULL) { 768 pr_warn_ratelimited("sync getxattr %p " 769 "during filling trace\n", inode); 770 return -EBUSY; 771 } 772 773 /* get xattrs from mds (if we don't already have them) */ 774 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 775 if (err) 776 return err; 777 spin_lock(&ci->i_ceph_lock); 778 } 779 780 err = __build_xattrs(inode); 781 if (err < 0) 782 goto out; 783 784 err = -ENODATA; /* == ENOATTR */ 785 xattr = __get_xattr(ci, name); 786 if (!xattr) 787 goto out; 788 789 err = -ERANGE; 790 if (size && size < xattr->val_len) 791 goto out; 792 793 err = xattr->val_len; 794 if (size == 0) 795 goto out; 796 797 memcpy(value, xattr->val, xattr->val_len); 798 799 if (current->journal_info != NULL && 800 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) 801 ci->i_ceph_flags |= CEPH_I_SEC_INITED; 802 out: 803 spin_unlock(&ci->i_ceph_lock); 804 return err; 805 } 806 807 ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, 808 size_t size) 809 { 810 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 811 return generic_getxattr(dentry, name, value, size); 812 813 return __ceph_getxattr(d_inode(dentry), name, value, size); 814 } 815 816 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) 817 { 818 struct inode *inode = d_inode(dentry); 819 struct ceph_inode_info *ci = ceph_inode(inode); 820 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); 821 u32 vir_namelen = 0; 822 u32 namelen; 823 int err; 824 u32 len; 825 int i; 826 827 spin_lock(&ci->i_ceph_lock); 828 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 829 ci->i_xattrs.version, ci->i_xattrs.index_version); 830 831 if (ci->i_xattrs.version == 0 || 832 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 833 spin_unlock(&ci->i_ceph_lock); 834 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 835 if (err) 836 return err; 837 spin_lock(&ci->i_ceph_lock); 838 } 839 840 err = __build_xattrs(inode); 841 if (err < 0) 842 goto out; 843 /* 844 * Start with virtual dir xattr names (if any) (including 845 * terminating '\0' characters for each). 846 */ 847 vir_namelen = ceph_vxattrs_name_size(vxattrs); 848 849 /* adding 1 byte per each variable due to the null termination */ 850 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; 851 err = -ERANGE; 852 if (size && vir_namelen + namelen > size) 853 goto out; 854 855 err = namelen + vir_namelen; 856 if (size == 0) 857 goto out; 858 859 names = __copy_xattr_names(ci, names); 860 861 /* virtual xattr names, too */ 862 err = namelen; 863 if (vxattrs) { 864 for (i = 0; vxattrs[i].name; i++) { 865 if (!vxattrs[i].hidden && 866 !(vxattrs[i].exists_cb && 867 !vxattrs[i].exists_cb(ci))) { 868 len = sprintf(names, "%s", vxattrs[i].name); 869 names += len + 1; 870 err += len + 1; 871 } 872 } 873 } 874 875 out: 876 spin_unlock(&ci->i_ceph_lock); 877 return err; 878 } 879 880 static int ceph_sync_setxattr(struct dentry *dentry, const char *name, 881 const char *value, size_t size, int flags) 882 { 883 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 884 struct inode *inode = d_inode(dentry); 885 struct ceph_inode_info *ci = ceph_inode(inode); 886 struct ceph_mds_request *req; 887 struct ceph_mds_client *mdsc = fsc->mdsc; 888 struct ceph_pagelist *pagelist = NULL; 889 int err; 890 891 if (size > 0) { 892 /* copy value into pagelist */ 893 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); 894 if (!pagelist) 895 return -ENOMEM; 896 897 ceph_pagelist_init(pagelist); 898 err = ceph_pagelist_append(pagelist, value, size); 899 if (err) 900 goto out; 901 } else if (!value) { 902 flags |= CEPH_XATTR_REMOVE; 903 } 904 905 dout("setxattr value=%.*s\n", (int)size, value); 906 907 /* do request */ 908 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR, 909 USE_AUTH_MDS); 910 if (IS_ERR(req)) { 911 err = PTR_ERR(req); 912 goto out; 913 } 914 915 req->r_args.setxattr.flags = cpu_to_le32(flags); 916 req->r_path2 = kstrdup(name, GFP_NOFS); 917 if (!req->r_path2) { 918 ceph_mdsc_put_request(req); 919 err = -ENOMEM; 920 goto out; 921 } 922 923 req->r_pagelist = pagelist; 924 pagelist = NULL; 925 926 req->r_inode = inode; 927 ihold(inode); 928 req->r_num_caps = 1; 929 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 930 931 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 932 err = ceph_mdsc_do_request(mdsc, NULL, req); 933 ceph_mdsc_put_request(req); 934 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 935 936 out: 937 if (pagelist) 938 ceph_pagelist_release(pagelist); 939 return err; 940 } 941 942 int __ceph_setxattr(struct dentry *dentry, const char *name, 943 const void *value, size_t size, int flags) 944 { 945 struct inode *inode = d_inode(dentry); 946 struct ceph_vxattr *vxattr; 947 struct ceph_inode_info *ci = ceph_inode(inode); 948 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 949 struct ceph_cap_flush *prealloc_cf = NULL; 950 int issued; 951 int err; 952 int dirty = 0; 953 int name_len = strlen(name); 954 int val_len = size; 955 char *newname = NULL; 956 char *newval = NULL; 957 struct ceph_inode_xattr *xattr = NULL; 958 int required_blob_size; 959 bool lock_snap_rwsem = false; 960 961 if (!ceph_is_valid_xattr(name)) 962 return -EOPNOTSUPP; 963 964 vxattr = ceph_match_vxattr(inode, name); 965 if (vxattr && vxattr->readonly) 966 return -EOPNOTSUPP; 967 968 /* pass any unhandled ceph.* xattrs through to the MDS */ 969 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 970 goto do_sync_unlocked; 971 972 /* preallocate memory for xattr name, value, index node */ 973 err = -ENOMEM; 974 newname = kmemdup(name, name_len + 1, GFP_NOFS); 975 if (!newname) 976 goto out; 977 978 if (val_len) { 979 newval = kmemdup(value, val_len, GFP_NOFS); 980 if (!newval) 981 goto out; 982 } 983 984 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); 985 if (!xattr) 986 goto out; 987 988 prealloc_cf = ceph_alloc_cap_flush(); 989 if (!prealloc_cf) 990 goto out; 991 992 spin_lock(&ci->i_ceph_lock); 993 retry: 994 issued = __ceph_caps_issued(ci, NULL); 995 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 996 goto do_sync; 997 998 if (!lock_snap_rwsem && !ci->i_head_snapc) { 999 lock_snap_rwsem = true; 1000 if (!down_read_trylock(&mdsc->snap_rwsem)) { 1001 spin_unlock(&ci->i_ceph_lock); 1002 down_read(&mdsc->snap_rwsem); 1003 spin_lock(&ci->i_ceph_lock); 1004 goto retry; 1005 } 1006 } 1007 1008 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 1009 __build_xattrs(inode); 1010 1011 required_blob_size = __get_required_blob_size(ci, name_len, val_len); 1012 1013 if (!ci->i_xattrs.prealloc_blob || 1014 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 1015 struct ceph_buffer *blob; 1016 1017 spin_unlock(&ci->i_ceph_lock); 1018 dout(" preaallocating new blob size=%d\n", required_blob_size); 1019 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 1020 if (!blob) 1021 goto do_sync_unlocked; 1022 spin_lock(&ci->i_ceph_lock); 1023 if (ci->i_xattrs.prealloc_blob) 1024 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 1025 ci->i_xattrs.prealloc_blob = blob; 1026 goto retry; 1027 } 1028 1029 err = __set_xattr(ci, newname, name_len, newval, val_len, 1030 flags, value ? 1 : -1, &xattr); 1031 1032 if (!err) { 1033 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1034 &prealloc_cf); 1035 ci->i_xattrs.dirty = true; 1036 inode->i_ctime = current_fs_time(inode->i_sb); 1037 } 1038 1039 spin_unlock(&ci->i_ceph_lock); 1040 if (lock_snap_rwsem) 1041 up_read(&mdsc->snap_rwsem); 1042 if (dirty) 1043 __mark_inode_dirty(inode, dirty); 1044 ceph_free_cap_flush(prealloc_cf); 1045 return err; 1046 1047 do_sync: 1048 spin_unlock(&ci->i_ceph_lock); 1049 do_sync_unlocked: 1050 if (lock_snap_rwsem) 1051 up_read(&mdsc->snap_rwsem); 1052 1053 /* security module set xattr while filling trace */ 1054 if (current->journal_info != NULL) { 1055 pr_warn_ratelimited("sync setxattr %p " 1056 "during filling trace\n", inode); 1057 err = -EBUSY; 1058 } else { 1059 err = ceph_sync_setxattr(dentry, name, value, size, flags); 1060 } 1061 out: 1062 ceph_free_cap_flush(prealloc_cf); 1063 kfree(newname); 1064 kfree(newval); 1065 kfree(xattr); 1066 return err; 1067 } 1068 1069 int ceph_setxattr(struct dentry *dentry, const char *name, 1070 const void *value, size_t size, int flags) 1071 { 1072 if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) 1073 return -EROFS; 1074 1075 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 1076 return generic_setxattr(dentry, name, value, size, flags); 1077 1078 if (size == 0) 1079 value = ""; /* empty EA, do not remove */ 1080 1081 return __ceph_setxattr(dentry, name, value, size, flags); 1082 } 1083 1084 static int ceph_send_removexattr(struct dentry *dentry, const char *name) 1085 { 1086 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 1087 struct ceph_mds_client *mdsc = fsc->mdsc; 1088 struct inode *inode = d_inode(dentry); 1089 struct ceph_mds_request *req; 1090 int err; 1091 1092 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR, 1093 USE_AUTH_MDS); 1094 if (IS_ERR(req)) 1095 return PTR_ERR(req); 1096 req->r_path2 = kstrdup(name, GFP_NOFS); 1097 if (!req->r_path2) 1098 return -ENOMEM; 1099 1100 req->r_inode = inode; 1101 ihold(inode); 1102 req->r_num_caps = 1; 1103 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 1104 err = ceph_mdsc_do_request(mdsc, NULL, req); 1105 ceph_mdsc_put_request(req); 1106 return err; 1107 } 1108 1109 int __ceph_removexattr(struct dentry *dentry, const char *name) 1110 { 1111 struct inode *inode = d_inode(dentry); 1112 struct ceph_vxattr *vxattr; 1113 struct ceph_inode_info *ci = ceph_inode(inode); 1114 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1115 struct ceph_cap_flush *prealloc_cf = NULL; 1116 int issued; 1117 int err; 1118 int required_blob_size; 1119 int dirty; 1120 bool lock_snap_rwsem = false; 1121 1122 if (!ceph_is_valid_xattr(name)) 1123 return -EOPNOTSUPP; 1124 1125 vxattr = ceph_match_vxattr(inode, name); 1126 if (vxattr && vxattr->readonly) 1127 return -EOPNOTSUPP; 1128 1129 /* pass any unhandled ceph.* xattrs through to the MDS */ 1130 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) 1131 goto do_sync_unlocked; 1132 1133 prealloc_cf = ceph_alloc_cap_flush(); 1134 if (!prealloc_cf) 1135 return -ENOMEM; 1136 1137 err = -ENOMEM; 1138 spin_lock(&ci->i_ceph_lock); 1139 retry: 1140 issued = __ceph_caps_issued(ci, NULL); 1141 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) 1142 goto do_sync; 1143 1144 if (!lock_snap_rwsem && !ci->i_head_snapc) { 1145 lock_snap_rwsem = true; 1146 if (!down_read_trylock(&mdsc->snap_rwsem)) { 1147 spin_unlock(&ci->i_ceph_lock); 1148 down_read(&mdsc->snap_rwsem); 1149 spin_lock(&ci->i_ceph_lock); 1150 goto retry; 1151 } 1152 } 1153 1154 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 1155 1156 __build_xattrs(inode); 1157 1158 required_blob_size = __get_required_blob_size(ci, 0, 0); 1159 1160 if (!ci->i_xattrs.prealloc_blob || 1161 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 1162 struct ceph_buffer *blob; 1163 1164 spin_unlock(&ci->i_ceph_lock); 1165 dout(" preaallocating new blob size=%d\n", required_blob_size); 1166 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 1167 if (!blob) 1168 goto do_sync_unlocked; 1169 spin_lock(&ci->i_ceph_lock); 1170 if (ci->i_xattrs.prealloc_blob) 1171 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 1172 ci->i_xattrs.prealloc_blob = blob; 1173 goto retry; 1174 } 1175 1176 err = __remove_xattr_by_name(ceph_inode(inode), name); 1177 1178 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, 1179 &prealloc_cf); 1180 ci->i_xattrs.dirty = true; 1181 inode->i_ctime = current_fs_time(inode->i_sb); 1182 spin_unlock(&ci->i_ceph_lock); 1183 if (lock_snap_rwsem) 1184 up_read(&mdsc->snap_rwsem); 1185 if (dirty) 1186 __mark_inode_dirty(inode, dirty); 1187 ceph_free_cap_flush(prealloc_cf); 1188 return err; 1189 do_sync: 1190 spin_unlock(&ci->i_ceph_lock); 1191 do_sync_unlocked: 1192 if (lock_snap_rwsem) 1193 up_read(&mdsc->snap_rwsem); 1194 ceph_free_cap_flush(prealloc_cf); 1195 err = ceph_send_removexattr(dentry, name); 1196 return err; 1197 } 1198 1199 int ceph_removexattr(struct dentry *dentry, const char *name) 1200 { 1201 if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) 1202 return -EROFS; 1203 1204 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 1205 return generic_removexattr(dentry, name); 1206 1207 return __ceph_removexattr(dentry, name); 1208 } 1209 1210 #ifdef CONFIG_SECURITY 1211 bool ceph_security_xattr_wanted(struct inode *in) 1212 { 1213 return in->i_security != NULL; 1214 } 1215 1216 bool ceph_security_xattr_deadlock(struct inode *in) 1217 { 1218 struct ceph_inode_info *ci; 1219 bool ret; 1220 if (in->i_security == NULL) 1221 return false; 1222 ci = ceph_inode(in); 1223 spin_lock(&ci->i_ceph_lock); 1224 ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) && 1225 !(ci->i_xattrs.version > 0 && 1226 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)); 1227 spin_unlock(&ci->i_ceph_lock); 1228 return ret; 1229 } 1230 #endif 1231