1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/spinlock.h> 4 #include <linux/fs_struct.h> 5 #include <linux/namei.h> 6 #include <linux/slab.h> 7 #include <linux/sched.h> 8 9 #include "super.h" 10 #include "mds_client.h" 11 12 /* 13 * Directory operations: readdir, lookup, create, link, unlink, 14 * rename, etc. 15 */ 16 17 /* 18 * Ceph MDS operations are specified in terms of a base ino and 19 * relative path. Thus, the client can specify an operation on a 20 * specific inode (e.g., a getattr due to fstat(2)), or as a path 21 * relative to, say, the root directory. 22 * 23 * Normally, we limit ourselves to strict inode ops (no path component) 24 * or dentry operations (a single path component relative to an ino). The 25 * exception to this is open_root_dentry(), which will open the mount 26 * point by name. 27 */ 28 29 const struct dentry_operations ceph_dentry_ops; 30 31 /* 32 * Initialize ceph dentry state. 33 */ 34 int ceph_init_dentry(struct dentry *dentry) 35 { 36 struct ceph_dentry_info *di; 37 38 if (dentry->d_fsdata) 39 return 0; 40 41 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 42 if (!di) 43 return -ENOMEM; /* oh well */ 44 45 spin_lock(&dentry->d_lock); 46 if (dentry->d_fsdata) { 47 /* lost a race */ 48 kmem_cache_free(ceph_dentry_cachep, di); 49 goto out_unlock; 50 } 51 52 if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) 53 d_set_d_op(dentry, &ceph_dentry_ops); 54 else if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR) 55 d_set_d_op(dentry, &ceph_snapdir_dentry_ops); 56 else 57 d_set_d_op(dentry, &ceph_snap_dentry_ops); 58 59 di->dentry = dentry; 60 di->lease_session = NULL; 61 dentry->d_time = jiffies; 62 /* avoid reordering d_fsdata setup so that the check above is safe */ 63 smp_mb(); 64 dentry->d_fsdata = di; 65 ceph_dentry_lru_add(dentry); 66 out_unlock: 67 spin_unlock(&dentry->d_lock); 68 return 0; 69 } 70 71 struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) 72 { 73 struct inode *inode = NULL; 74 75 if (!dentry) 76 return NULL; 77 78 spin_lock(&dentry->d_lock); 79 if (!IS_ROOT(dentry)) { 80 inode = d_inode(dentry->d_parent); 81 ihold(inode); 82 } 83 spin_unlock(&dentry->d_lock); 84 return inode; 85 } 86 87 88 /* 89 * for readdir, we encode the directory frag and offset within that 90 * frag into f_pos. 91 */ 92 static unsigned fpos_frag(loff_t p) 93 { 94 return p >> 32; 95 } 96 static unsigned fpos_off(loff_t p) 97 { 98 return p & 0xffffffff; 99 } 100 101 static int fpos_cmp(loff_t l, loff_t r) 102 { 103 int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r)); 104 if (v) 105 return v; 106 return (int)(fpos_off(l) - fpos_off(r)); 107 } 108 109 /* 110 * When possible, we try to satisfy a readdir by peeking at the 111 * dcache. We make this work by carefully ordering dentries on 112 * d_child when we initially get results back from the MDS, and 113 * falling back to a "normal" sync readdir if any dentries in the dir 114 * are dropped. 115 * 116 * Complete dir indicates that we have all dentries in the dir. It is 117 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 118 * the MDS if/when the directory is modified). 119 */ 120 static int __dcache_readdir(struct file *file, struct dir_context *ctx, 121 u32 shared_gen) 122 { 123 struct ceph_file_info *fi = file->private_data; 124 struct dentry *parent = file->f_path.dentry; 125 struct inode *dir = d_inode(parent); 126 struct list_head *p; 127 struct dentry *dentry, *last; 128 struct ceph_dentry_info *di; 129 int err = 0; 130 131 /* claim ref on last dentry we returned */ 132 last = fi->dentry; 133 fi->dentry = NULL; 134 135 dout("__dcache_readdir %p v%u at %llu (last %p)\n", 136 dir, shared_gen, ctx->pos, last); 137 138 spin_lock(&parent->d_lock); 139 140 /* start at beginning? */ 141 if (ctx->pos == 2 || last == NULL || 142 fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { 143 if (list_empty(&parent->d_subdirs)) 144 goto out_unlock; 145 p = parent->d_subdirs.prev; 146 dout(" initial p %p/%p\n", p->prev, p->next); 147 } else { 148 p = last->d_child.prev; 149 } 150 151 more: 152 dentry = list_entry(p, struct dentry, d_child); 153 di = ceph_dentry(dentry); 154 while (1) { 155 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, 156 d_unhashed(dentry) ? "!hashed" : "hashed", 157 parent->d_subdirs.prev, parent->d_subdirs.next); 158 if (p == &parent->d_subdirs) { 159 fi->flags |= CEPH_F_ATEND; 160 goto out_unlock; 161 } 162 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 163 if (di->lease_shared_gen == shared_gen && 164 !d_unhashed(dentry) && d_really_is_positive(dentry) && 165 ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && 166 ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && 167 fpos_cmp(ctx->pos, di->offset) <= 0) 168 break; 169 dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry, 170 dentry, di->offset, 171 ctx->pos, d_unhashed(dentry) ? " unhashed" : "", 172 !d_inode(dentry) ? " null" : ""); 173 spin_unlock(&dentry->d_lock); 174 p = p->prev; 175 dentry = list_entry(p, struct dentry, d_child); 176 di = ceph_dentry(dentry); 177 } 178 179 dget_dlock(dentry); 180 spin_unlock(&dentry->d_lock); 181 spin_unlock(&parent->d_lock); 182 183 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 184 if (!ceph_dir_is_complete_ordered(dir)) { 185 dout(" lost dir complete on %p; falling back to mds\n", dir); 186 dput(dentry); 187 err = -EAGAIN; 188 goto out; 189 } 190 191 dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, 192 dentry, dentry, d_inode(dentry)); 193 if (!dir_emit(ctx, dentry->d_name.name, 194 dentry->d_name.len, 195 ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino), 196 d_inode(dentry)->i_mode >> 12)) { 197 if (last) { 198 /* remember our position */ 199 fi->dentry = last; 200 fi->next_offset = fpos_off(di->offset); 201 } 202 dput(dentry); 203 return 0; 204 } 205 206 ctx->pos = di->offset + 1; 207 208 if (last) 209 dput(last); 210 last = dentry; 211 212 spin_lock(&parent->d_lock); 213 p = p->prev; /* advance to next dentry */ 214 goto more; 215 216 out_unlock: 217 spin_unlock(&parent->d_lock); 218 out: 219 if (last) 220 dput(last); 221 return err; 222 } 223 224 /* 225 * make note of the last dentry we read, so we can 226 * continue at the same lexicographical point, 227 * regardless of what dir changes take place on the 228 * server. 229 */ 230 static int note_last_dentry(struct ceph_file_info *fi, const char *name, 231 int len) 232 { 233 kfree(fi->last_name); 234 fi->last_name = kmalloc(len+1, GFP_NOFS); 235 if (!fi->last_name) 236 return -ENOMEM; 237 memcpy(fi->last_name, name, len); 238 fi->last_name[len] = 0; 239 dout("note_last_dentry '%s'\n", fi->last_name); 240 return 0; 241 } 242 243 static int ceph_readdir(struct file *file, struct dir_context *ctx) 244 { 245 struct ceph_file_info *fi = file->private_data; 246 struct inode *inode = file_inode(file); 247 struct ceph_inode_info *ci = ceph_inode(inode); 248 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 249 struct ceph_mds_client *mdsc = fsc->mdsc; 250 unsigned frag = fpos_frag(ctx->pos); 251 int off = fpos_off(ctx->pos); 252 int err; 253 u32 ftype; 254 struct ceph_mds_reply_info_parsed *rinfo; 255 256 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); 257 if (fi->flags & CEPH_F_ATEND) 258 return 0; 259 260 /* always start with . and .. */ 261 if (ctx->pos == 0) { 262 dout("readdir off 0 -> '.'\n"); 263 if (!dir_emit(ctx, ".", 1, 264 ceph_translate_ino(inode->i_sb, inode->i_ino), 265 inode->i_mode >> 12)) 266 return 0; 267 ctx->pos = 1; 268 off = 1; 269 } 270 if (ctx->pos == 1) { 271 ino_t ino = parent_ino(file->f_path.dentry); 272 dout("readdir off 1 -> '..'\n"); 273 if (!dir_emit(ctx, "..", 2, 274 ceph_translate_ino(inode->i_sb, ino), 275 inode->i_mode >> 12)) 276 return 0; 277 ctx->pos = 2; 278 off = 2; 279 } 280 281 /* can we use the dcache? */ 282 spin_lock(&ci->i_ceph_lock); 283 if ((ctx->pos == 2 || fi->dentry) && 284 ceph_test_mount_opt(fsc, DCACHE) && 285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 ceph_snap(inode) != CEPH_SNAPDIR && 287 __ceph_dir_is_complete_ordered(ci) && 288 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 u32 shared_gen = ci->i_shared_gen; 290 spin_unlock(&ci->i_ceph_lock); 291 err = __dcache_readdir(file, ctx, shared_gen); 292 if (err != -EAGAIN) 293 return err; 294 frag = fpos_frag(ctx->pos); 295 off = fpos_off(ctx->pos); 296 } else { 297 spin_unlock(&ci->i_ceph_lock); 298 } 299 if (fi->dentry) { 300 err = note_last_dentry(fi, fi->dentry->d_name.name, 301 fi->dentry->d_name.len); 302 if (err) 303 return err; 304 dput(fi->dentry); 305 fi->dentry = NULL; 306 } 307 308 /* proceed with a normal readdir */ 309 310 if (ctx->pos == 2) { 311 /* note dir version at start of readdir so we can tell 312 * if any dentries get dropped */ 313 fi->dir_release_count = atomic_read(&ci->i_release_count); 314 fi->dir_ordered_count = ci->i_ordered_count; 315 } 316 317 more: 318 /* do we have the correct frag content buffered? */ 319 if (fi->frag != frag || fi->last_readdir == NULL) { 320 struct ceph_mds_request *req; 321 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 322 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 323 324 /* discard old result, if any */ 325 if (fi->last_readdir) { 326 ceph_mdsc_put_request(fi->last_readdir); 327 fi->last_readdir = NULL; 328 } 329 330 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 331 ceph_vinop(inode), frag, fi->last_name); 332 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 333 if (IS_ERR(req)) 334 return PTR_ERR(req); 335 err = ceph_alloc_readdir_reply_buffer(req, inode); 336 if (err) { 337 ceph_mdsc_put_request(req); 338 return err; 339 } 340 /* hints to request -> mds selection code */ 341 req->r_direct_mode = USE_AUTH_MDS; 342 req->r_direct_hash = ceph_frag_value(frag); 343 req->r_direct_is_hash = true; 344 if (fi->last_name) { 345 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 346 if (!req->r_path2) { 347 ceph_mdsc_put_request(req); 348 return -ENOMEM; 349 } 350 } 351 req->r_readdir_offset = fi->next_offset; 352 req->r_args.readdir.frag = cpu_to_le32(frag); 353 354 req->r_inode = inode; 355 ihold(inode); 356 req->r_dentry = dget(file->f_path.dentry); 357 err = ceph_mdsc_do_request(mdsc, NULL, req); 358 if (err < 0) { 359 ceph_mdsc_put_request(req); 360 return err; 361 } 362 dout("readdir got and parsed readdir result=%d" 363 " on frag %x, end=%d, complete=%d\n", err, frag, 364 (int)req->r_reply_info.dir_end, 365 (int)req->r_reply_info.dir_complete); 366 367 if (!req->r_did_prepopulate) { 368 dout("readdir !did_prepopulate"); 369 /* preclude from marking dir complete */ 370 fi->dir_release_count--; 371 } 372 373 /* note next offset and last dentry name */ 374 rinfo = &req->r_reply_info; 375 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { 376 frag = le32_to_cpu(rinfo->dir_dir->frag); 377 if (ceph_frag_is_leftmost(frag)) 378 fi->next_offset = 2; 379 else 380 fi->next_offset = 0; 381 off = fi->next_offset; 382 } 383 fi->frag = frag; 384 fi->offset = fi->next_offset; 385 fi->last_readdir = req; 386 387 if (req->r_reply_info.dir_end) { 388 kfree(fi->last_name); 389 fi->last_name = NULL; 390 if (ceph_frag_is_rightmost(frag)) 391 fi->next_offset = 2; 392 else 393 fi->next_offset = 0; 394 } else { 395 err = note_last_dentry(fi, 396 rinfo->dir_dname[rinfo->dir_nr-1], 397 rinfo->dir_dname_len[rinfo->dir_nr-1]); 398 if (err) 399 return err; 400 fi->next_offset += rinfo->dir_nr; 401 } 402 } 403 404 rinfo = &fi->last_readdir->r_reply_info; 405 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 406 rinfo->dir_nr, off, fi->offset); 407 408 ctx->pos = ceph_make_fpos(frag, off); 409 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { 410 struct ceph_mds_reply_inode *in = 411 rinfo->dir_in[off - fi->offset].in; 412 struct ceph_vino vino; 413 ino_t ino; 414 415 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 416 off, off - fi->offset, rinfo->dir_nr, ctx->pos, 417 rinfo->dir_dname_len[off - fi->offset], 418 rinfo->dir_dname[off - fi->offset], in); 419 BUG_ON(!in); 420 ftype = le32_to_cpu(in->mode) >> 12; 421 vino.ino = le64_to_cpu(in->ino); 422 vino.snap = le64_to_cpu(in->snapid); 423 ino = ceph_vino_to_ino(vino); 424 if (!dir_emit(ctx, 425 rinfo->dir_dname[off - fi->offset], 426 rinfo->dir_dname_len[off - fi->offset], 427 ceph_translate_ino(inode->i_sb, ino), ftype)) { 428 dout("filldir stopping us...\n"); 429 return 0; 430 } 431 off++; 432 ctx->pos++; 433 } 434 435 if (fi->last_name) { 436 ceph_mdsc_put_request(fi->last_readdir); 437 fi->last_readdir = NULL; 438 goto more; 439 } 440 441 /* more frags? */ 442 if (!ceph_frag_is_rightmost(frag)) { 443 frag = ceph_frag_next(frag); 444 off = 0; 445 ctx->pos = ceph_make_fpos(frag, off); 446 dout("readdir next frag is %x\n", frag); 447 goto more; 448 } 449 fi->flags |= CEPH_F_ATEND; 450 451 /* 452 * if dir_release_count still matches the dir, no dentries 453 * were released during the whole readdir, and we should have 454 * the complete dir contents in our cache. 455 */ 456 spin_lock(&ci->i_ceph_lock); 457 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { 458 if (ci->i_ordered_count == fi->dir_ordered_count) 459 dout(" marking %p complete and ordered\n", inode); 460 else 461 dout(" marking %p complete\n", inode); 462 __ceph_dir_set_complete(ci, fi->dir_release_count, 463 fi->dir_ordered_count); 464 } 465 spin_unlock(&ci->i_ceph_lock); 466 467 dout("readdir %p file %p done.\n", inode, file); 468 return 0; 469 } 470 471 static void reset_readdir(struct ceph_file_info *fi, unsigned frag) 472 { 473 if (fi->last_readdir) { 474 ceph_mdsc_put_request(fi->last_readdir); 475 fi->last_readdir = NULL; 476 } 477 kfree(fi->last_name); 478 fi->last_name = NULL; 479 if (ceph_frag_is_leftmost(frag)) 480 fi->next_offset = 2; /* compensate for . and .. */ 481 else 482 fi->next_offset = 0; 483 if (fi->dentry) { 484 dput(fi->dentry); 485 fi->dentry = NULL; 486 } 487 fi->flags &= ~CEPH_F_ATEND; 488 } 489 490 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) 491 { 492 struct ceph_file_info *fi = file->private_data; 493 struct inode *inode = file->f_mapping->host; 494 loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset); 495 loff_t retval; 496 497 mutex_lock(&inode->i_mutex); 498 retval = -EINVAL; 499 switch (whence) { 500 case SEEK_END: 501 offset += inode->i_size + 2; /* FIXME */ 502 break; 503 case SEEK_CUR: 504 offset += file->f_pos; 505 case SEEK_SET: 506 break; 507 default: 508 goto out; 509 } 510 511 if (offset >= 0) { 512 if (offset != file->f_pos) { 513 file->f_pos = offset; 514 file->f_version = 0; 515 fi->flags &= ~CEPH_F_ATEND; 516 } 517 retval = offset; 518 519 /* 520 * discard buffered readdir content on seekdir(0), or 521 * seek to new frag, or seek prior to current chunk. 522 */ 523 if (offset == 0 || 524 fpos_frag(offset) != fi->frag || 525 fpos_off(offset) < fi->offset) { 526 dout("dir_llseek dropping %p content\n", file); 527 reset_readdir(fi, fpos_frag(offset)); 528 } 529 530 /* bump dir_release_count if we did a forward seek */ 531 if (fpos_cmp(offset, old_offset) > 0) 532 fi->dir_release_count--; 533 } 534 out: 535 mutex_unlock(&inode->i_mutex); 536 return retval; 537 } 538 539 /* 540 * Handle lookups for the hidden .snap directory. 541 */ 542 int ceph_handle_snapdir(struct ceph_mds_request *req, 543 struct dentry *dentry, int err) 544 { 545 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 546 struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */ 547 548 /* .snap dir? */ 549 if (err == -ENOENT && 550 ceph_snap(parent) == CEPH_NOSNAP && 551 strcmp(dentry->d_name.name, 552 fsc->mount_options->snapdir_name) == 0) { 553 struct inode *inode = ceph_get_snapdir(parent); 554 dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n", 555 dentry, dentry, inode); 556 BUG_ON(!d_unhashed(dentry)); 557 d_add(dentry, inode); 558 err = 0; 559 } 560 return err; 561 } 562 563 /* 564 * Figure out final result of a lookup/open request. 565 * 566 * Mainly, make sure we return the final req->r_dentry (if it already 567 * existed) in place of the original VFS-provided dentry when they 568 * differ. 569 * 570 * Gracefully handle the case where the MDS replies with -ENOENT and 571 * no trace (which it may do, at its discretion, e.g., if it doesn't 572 * care to issue a lease on the negative dentry). 573 */ 574 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 575 struct dentry *dentry, int err) 576 { 577 if (err == -ENOENT) { 578 /* no trace? */ 579 err = 0; 580 if (!req->r_reply_info.head->is_dentry) { 581 dout("ENOENT and no trace, dentry %p inode %p\n", 582 dentry, d_inode(dentry)); 583 if (d_really_is_positive(dentry)) { 584 d_drop(dentry); 585 err = -ENOENT; 586 } else { 587 d_add(dentry, NULL); 588 } 589 } 590 } 591 if (err) 592 dentry = ERR_PTR(err); 593 else if (dentry != req->r_dentry) 594 dentry = dget(req->r_dentry); /* we got spliced */ 595 else 596 dentry = NULL; 597 return dentry; 598 } 599 600 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) 601 { 602 return ceph_ino(inode) == CEPH_INO_ROOT && 603 strncmp(dentry->d_name.name, ".ceph", 5) == 0; 604 } 605 606 /* 607 * Look up a single dir entry. If there is a lookup intent, inform 608 * the MDS so that it gets our 'caps wanted' value in a single op. 609 */ 610 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 611 unsigned int flags) 612 { 613 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 614 struct ceph_mds_client *mdsc = fsc->mdsc; 615 struct ceph_mds_request *req; 616 int op; 617 int err; 618 619 dout("lookup %p dentry %p '%pd'\n", 620 dir, dentry, dentry); 621 622 if (dentry->d_name.len > NAME_MAX) 623 return ERR_PTR(-ENAMETOOLONG); 624 625 err = ceph_init_dentry(dentry); 626 if (err < 0) 627 return ERR_PTR(err); 628 629 /* can we conclude ENOENT locally? */ 630 if (d_really_is_negative(dentry)) { 631 struct ceph_inode_info *ci = ceph_inode(dir); 632 struct ceph_dentry_info *di = ceph_dentry(dentry); 633 634 spin_lock(&ci->i_ceph_lock); 635 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 636 if (strncmp(dentry->d_name.name, 637 fsc->mount_options->snapdir_name, 638 dentry->d_name.len) && 639 !is_root_ceph_dentry(dir, dentry) && 640 ceph_test_mount_opt(fsc, DCACHE) && 641 __ceph_dir_is_complete(ci) && 642 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 643 spin_unlock(&ci->i_ceph_lock); 644 dout(" dir %p complete, -ENOENT\n", dir); 645 d_add(dentry, NULL); 646 di->lease_shared_gen = ci->i_shared_gen; 647 return NULL; 648 } 649 spin_unlock(&ci->i_ceph_lock); 650 } 651 652 op = ceph_snap(dir) == CEPH_SNAPDIR ? 653 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 654 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 655 if (IS_ERR(req)) 656 return ERR_CAST(req); 657 req->r_dentry = dget(dentry); 658 req->r_num_caps = 2; 659 /* we only need inode linkage */ 660 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 661 req->r_locked_dir = dir; 662 err = ceph_mdsc_do_request(mdsc, NULL, req); 663 err = ceph_handle_snapdir(req, dentry, err); 664 dentry = ceph_finish_lookup(req, dentry, err); 665 ceph_mdsc_put_request(req); /* will dput(dentry) */ 666 dout("lookup result=%p\n", dentry); 667 return dentry; 668 } 669 670 /* 671 * If we do a create but get no trace back from the MDS, follow up with 672 * a lookup (the VFS expects us to link up the provided dentry). 673 */ 674 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 675 { 676 struct dentry *result = ceph_lookup(dir, dentry, 0); 677 678 if (result && !IS_ERR(result)) { 679 /* 680 * We created the item, then did a lookup, and found 681 * it was already linked to another inode we already 682 * had in our cache (and thus got spliced). To not 683 * confuse VFS (especially when inode is a directory), 684 * we don't link our dentry to that inode, return an 685 * error instead. 686 * 687 * This event should be rare and it happens only when 688 * we talk to old MDS. Recent MDS does not send traceless 689 * reply for request that creates new inode. 690 */ 691 d_drop(result); 692 return -ESTALE; 693 } 694 return PTR_ERR(result); 695 } 696 697 static int ceph_mknod(struct inode *dir, struct dentry *dentry, 698 umode_t mode, dev_t rdev) 699 { 700 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 701 struct ceph_mds_client *mdsc = fsc->mdsc; 702 struct ceph_mds_request *req; 703 struct ceph_acls_info acls = {}; 704 int err; 705 706 if (ceph_snap(dir) != CEPH_NOSNAP) 707 return -EROFS; 708 709 err = ceph_pre_init_acls(dir, &mode, &acls); 710 if (err < 0) 711 return err; 712 713 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", 714 dir, dentry, mode, rdev); 715 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 716 if (IS_ERR(req)) { 717 err = PTR_ERR(req); 718 goto out; 719 } 720 req->r_dentry = dget(dentry); 721 req->r_num_caps = 2; 722 req->r_locked_dir = dir; 723 req->r_args.mknod.mode = cpu_to_le32(mode); 724 req->r_args.mknod.rdev = cpu_to_le32(rdev); 725 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 726 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 727 if (acls.pagelist) { 728 req->r_pagelist = acls.pagelist; 729 acls.pagelist = NULL; 730 } 731 err = ceph_mdsc_do_request(mdsc, dir, req); 732 if (!err && !req->r_reply_info.head->is_dentry) 733 err = ceph_handle_notrace_create(dir, dentry); 734 ceph_mdsc_put_request(req); 735 out: 736 if (!err) 737 ceph_init_inode_acls(d_inode(dentry), &acls); 738 else 739 d_drop(dentry); 740 ceph_release_acls_info(&acls); 741 return err; 742 } 743 744 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, 745 bool excl) 746 { 747 return ceph_mknod(dir, dentry, mode, 0); 748 } 749 750 static int ceph_symlink(struct inode *dir, struct dentry *dentry, 751 const char *dest) 752 { 753 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 754 struct ceph_mds_client *mdsc = fsc->mdsc; 755 struct ceph_mds_request *req; 756 int err; 757 758 if (ceph_snap(dir) != CEPH_NOSNAP) 759 return -EROFS; 760 761 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 762 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 763 if (IS_ERR(req)) { 764 err = PTR_ERR(req); 765 goto out; 766 } 767 req->r_path2 = kstrdup(dest, GFP_NOFS); 768 if (!req->r_path2) { 769 err = -ENOMEM; 770 ceph_mdsc_put_request(req); 771 goto out; 772 } 773 req->r_locked_dir = dir; 774 req->r_dentry = dget(dentry); 775 req->r_num_caps = 2; 776 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 777 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 778 err = ceph_mdsc_do_request(mdsc, dir, req); 779 if (!err && !req->r_reply_info.head->is_dentry) 780 err = ceph_handle_notrace_create(dir, dentry); 781 ceph_mdsc_put_request(req); 782 out: 783 if (err) 784 d_drop(dentry); 785 return err; 786 } 787 788 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 789 { 790 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 791 struct ceph_mds_client *mdsc = fsc->mdsc; 792 struct ceph_mds_request *req; 793 struct ceph_acls_info acls = {}; 794 int err = -EROFS; 795 int op; 796 797 if (ceph_snap(dir) == CEPH_SNAPDIR) { 798 /* mkdir .snap/foo is a MKSNAP */ 799 op = CEPH_MDS_OP_MKSNAP; 800 dout("mksnap dir %p snap '%pd' dn %p\n", dir, 801 dentry, dentry); 802 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 803 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); 804 op = CEPH_MDS_OP_MKDIR; 805 } else { 806 goto out; 807 } 808 809 mode |= S_IFDIR; 810 err = ceph_pre_init_acls(dir, &mode, &acls); 811 if (err < 0) 812 goto out; 813 814 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 815 if (IS_ERR(req)) { 816 err = PTR_ERR(req); 817 goto out; 818 } 819 820 req->r_dentry = dget(dentry); 821 req->r_num_caps = 2; 822 req->r_locked_dir = dir; 823 req->r_args.mkdir.mode = cpu_to_le32(mode); 824 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 825 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 826 if (acls.pagelist) { 827 req->r_pagelist = acls.pagelist; 828 acls.pagelist = NULL; 829 } 830 err = ceph_mdsc_do_request(mdsc, dir, req); 831 if (!err && 832 !req->r_reply_info.head->is_target && 833 !req->r_reply_info.head->is_dentry) 834 err = ceph_handle_notrace_create(dir, dentry); 835 ceph_mdsc_put_request(req); 836 out: 837 if (!err) 838 ceph_init_inode_acls(d_inode(dentry), &acls); 839 else 840 d_drop(dentry); 841 ceph_release_acls_info(&acls); 842 return err; 843 } 844 845 static int ceph_link(struct dentry *old_dentry, struct inode *dir, 846 struct dentry *dentry) 847 { 848 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 849 struct ceph_mds_client *mdsc = fsc->mdsc; 850 struct ceph_mds_request *req; 851 int err; 852 853 if (ceph_snap(dir) != CEPH_NOSNAP) 854 return -EROFS; 855 856 dout("link in dir %p old_dentry %p dentry %p\n", dir, 857 old_dentry, dentry); 858 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); 859 if (IS_ERR(req)) { 860 d_drop(dentry); 861 return PTR_ERR(req); 862 } 863 req->r_dentry = dget(dentry); 864 req->r_num_caps = 2; 865 req->r_old_dentry = dget(old_dentry); 866 req->r_locked_dir = dir; 867 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 868 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 869 /* release LINK_SHARED on source inode (mds will lock it) */ 870 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 871 err = ceph_mdsc_do_request(mdsc, dir, req); 872 if (err) { 873 d_drop(dentry); 874 } else if (!req->r_reply_info.head->is_dentry) { 875 ihold(d_inode(old_dentry)); 876 d_instantiate(dentry, d_inode(old_dentry)); 877 } 878 ceph_mdsc_put_request(req); 879 return err; 880 } 881 882 /* 883 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it 884 * looks like the link count will hit 0, drop any other caps (other 885 * than PIN) we don't specifically want (due to the file still being 886 * open). 887 */ 888 static int drop_caps_for_unlink(struct inode *inode) 889 { 890 struct ceph_inode_info *ci = ceph_inode(inode); 891 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 892 893 spin_lock(&ci->i_ceph_lock); 894 if (inode->i_nlink == 1) { 895 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 896 ci->i_ceph_flags |= CEPH_I_NODELAY; 897 } 898 spin_unlock(&ci->i_ceph_lock); 899 return drop; 900 } 901 902 /* 903 * rmdir and unlink are differ only by the metadata op code 904 */ 905 static int ceph_unlink(struct inode *dir, struct dentry *dentry) 906 { 907 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 908 struct ceph_mds_client *mdsc = fsc->mdsc; 909 struct inode *inode = d_inode(dentry); 910 struct ceph_mds_request *req; 911 int err = -EROFS; 912 int op; 913 914 if (ceph_snap(dir) == CEPH_SNAPDIR) { 915 /* rmdir .snap/foo is RMSNAP */ 916 dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry); 917 op = CEPH_MDS_OP_RMSNAP; 918 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 919 dout("unlink/rmdir dir %p dn %p inode %p\n", 920 dir, dentry, inode); 921 op = d_is_dir(dentry) ? 922 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 923 } else 924 goto out; 925 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 926 if (IS_ERR(req)) { 927 err = PTR_ERR(req); 928 goto out; 929 } 930 req->r_dentry = dget(dentry); 931 req->r_num_caps = 2; 932 req->r_locked_dir = dir; 933 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 934 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 935 req->r_inode_drop = drop_caps_for_unlink(inode); 936 err = ceph_mdsc_do_request(mdsc, dir, req); 937 if (!err && !req->r_reply_info.head->is_dentry) 938 d_delete(dentry); 939 ceph_mdsc_put_request(req); 940 out: 941 return err; 942 } 943 944 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, 945 struct inode *new_dir, struct dentry *new_dentry) 946 { 947 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 948 struct ceph_mds_client *mdsc = fsc->mdsc; 949 struct ceph_mds_request *req; 950 int op = CEPH_MDS_OP_RENAME; 951 int err; 952 953 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 954 return -EXDEV; 955 if (ceph_snap(old_dir) != CEPH_NOSNAP) { 956 if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR) 957 op = CEPH_MDS_OP_RENAMESNAP; 958 else 959 return -EROFS; 960 } 961 dout("rename dir %p dentry %p to dir %p dentry %p\n", 962 old_dir, old_dentry, new_dir, new_dentry); 963 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 964 if (IS_ERR(req)) 965 return PTR_ERR(req); 966 ihold(old_dir); 967 req->r_dentry = dget(new_dentry); 968 req->r_num_caps = 2; 969 req->r_old_dentry = dget(old_dentry); 970 req->r_old_dentry_dir = old_dir; 971 req->r_locked_dir = new_dir; 972 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 973 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 974 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 975 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 976 /* release LINK_RDCACHE on source inode (mds will lock it) */ 977 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 978 if (d_really_is_positive(new_dentry)) 979 req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry)); 980 err = ceph_mdsc_do_request(mdsc, old_dir, req); 981 if (!err && !req->r_reply_info.head->is_dentry) { 982 /* 983 * Normally d_move() is done by fill_trace (called by 984 * do_request, above). If there is no trace, we need 985 * to do it here. 986 */ 987 988 d_move(old_dentry, new_dentry); 989 990 /* ensure target dentry is invalidated, despite 991 rehashing bug in vfs_rename_dir */ 992 ceph_invalidate_dentry_lease(new_dentry); 993 994 /* d_move screws up sibling dentries' offsets */ 995 ceph_dir_clear_complete(old_dir); 996 ceph_dir_clear_complete(new_dir); 997 998 } 999 ceph_mdsc_put_request(req); 1000 return err; 1001 } 1002 1003 /* 1004 * Ensure a dentry lease will no longer revalidate. 1005 */ 1006 void ceph_invalidate_dentry_lease(struct dentry *dentry) 1007 { 1008 spin_lock(&dentry->d_lock); 1009 dentry->d_time = jiffies; 1010 ceph_dentry(dentry)->lease_shared_gen = 0; 1011 spin_unlock(&dentry->d_lock); 1012 } 1013 1014 /* 1015 * Check if dentry lease is valid. If not, delete the lease. Try to 1016 * renew if the least is more than half up. 1017 */ 1018 static int dentry_lease_is_valid(struct dentry *dentry) 1019 { 1020 struct ceph_dentry_info *di; 1021 struct ceph_mds_session *s; 1022 int valid = 0; 1023 u32 gen; 1024 unsigned long ttl; 1025 struct ceph_mds_session *session = NULL; 1026 struct inode *dir = NULL; 1027 u32 seq = 0; 1028 1029 spin_lock(&dentry->d_lock); 1030 di = ceph_dentry(dentry); 1031 if (di->lease_session) { 1032 s = di->lease_session; 1033 spin_lock(&s->s_gen_ttl_lock); 1034 gen = s->s_cap_gen; 1035 ttl = s->s_cap_ttl; 1036 spin_unlock(&s->s_gen_ttl_lock); 1037 1038 if (di->lease_gen == gen && 1039 time_before(jiffies, dentry->d_time) && 1040 time_before(jiffies, ttl)) { 1041 valid = 1; 1042 if (di->lease_renew_after && 1043 time_after(jiffies, di->lease_renew_after)) { 1044 /* we should renew */ 1045 dir = d_inode(dentry->d_parent); 1046 session = ceph_get_mds_session(s); 1047 seq = di->lease_seq; 1048 di->lease_renew_after = 0; 1049 di->lease_renew_from = jiffies; 1050 } 1051 } 1052 } 1053 spin_unlock(&dentry->d_lock); 1054 1055 if (session) { 1056 ceph_mdsc_lease_send_msg(session, dir, dentry, 1057 CEPH_MDS_LEASE_RENEW, seq); 1058 ceph_put_mds_session(session); 1059 } 1060 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); 1061 return valid; 1062 } 1063 1064 /* 1065 * Check if directory-wide content lease/cap is valid. 1066 */ 1067 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) 1068 { 1069 struct ceph_inode_info *ci = ceph_inode(dir); 1070 struct ceph_dentry_info *di = ceph_dentry(dentry); 1071 int valid = 0; 1072 1073 spin_lock(&ci->i_ceph_lock); 1074 if (ci->i_shared_gen == di->lease_shared_gen) 1075 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1076 spin_unlock(&ci->i_ceph_lock); 1077 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1078 dir, (unsigned)ci->i_shared_gen, dentry, 1079 (unsigned)di->lease_shared_gen, valid); 1080 return valid; 1081 } 1082 1083 /* 1084 * Check if cached dentry can be trusted. 1085 */ 1086 static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) 1087 { 1088 int valid = 0; 1089 struct inode *dir; 1090 1091 if (flags & LOOKUP_RCU) 1092 return -ECHILD; 1093 1094 dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry, 1095 dentry, d_inode(dentry), ceph_dentry(dentry)->offset); 1096 1097 dir = ceph_get_dentry_parent_inode(dentry); 1098 1099 /* always trust cached snapped dentries, snapdir dentry */ 1100 if (ceph_snap(dir) != CEPH_NOSNAP) { 1101 dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, 1102 dentry, d_inode(dentry)); 1103 valid = 1; 1104 } else if (d_really_is_positive(dentry) && 1105 ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) { 1106 valid = 1; 1107 } else if (dentry_lease_is_valid(dentry) || 1108 dir_lease_is_valid(dir, dentry)) { 1109 if (d_really_is_positive(dentry)) 1110 valid = ceph_is_any_caps(d_inode(dentry)); 1111 else 1112 valid = 1; 1113 } 1114 1115 dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); 1116 if (valid) { 1117 ceph_dentry_lru_touch(dentry); 1118 } else { 1119 ceph_dir_clear_complete(dir); 1120 } 1121 iput(dir); 1122 return valid; 1123 } 1124 1125 /* 1126 * Release our ceph_dentry_info. 1127 */ 1128 static void ceph_d_release(struct dentry *dentry) 1129 { 1130 struct ceph_dentry_info *di = ceph_dentry(dentry); 1131 1132 dout("d_release %p\n", dentry); 1133 ceph_dentry_lru_del(dentry); 1134 if (di->lease_session) 1135 ceph_put_mds_session(di->lease_session); 1136 kmem_cache_free(ceph_dentry_cachep, di); 1137 dentry->d_fsdata = NULL; 1138 } 1139 1140 static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1141 unsigned int flags) 1142 { 1143 /* 1144 * Eventually, we'll want to revalidate snapped metadata 1145 * too... probably... 1146 */ 1147 return 1; 1148 } 1149 1150 /* 1151 * When the VFS prunes a dentry from the cache, we need to clear the 1152 * complete flag on the parent directory. 1153 * 1154 * Called under dentry->d_lock. 1155 */ 1156 static void ceph_d_prune(struct dentry *dentry) 1157 { 1158 dout("ceph_d_prune %p\n", dentry); 1159 1160 /* do we have a valid parent? */ 1161 if (IS_ROOT(dentry)) 1162 return; 1163 1164 /* if we are not hashed, we don't affect dir's completeness */ 1165 if (d_unhashed(dentry)) 1166 return; 1167 1168 /* 1169 * we hold d_lock, so d_parent is stable, and d_fsdata is never 1170 * cleared until d_release 1171 */ 1172 ceph_dir_clear_complete(d_inode(dentry->d_parent)); 1173 } 1174 1175 /* 1176 * read() on a dir. This weird interface hack only works if mounted 1177 * with '-o dirstat'. 1178 */ 1179 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1180 loff_t *ppos) 1181 { 1182 struct ceph_file_info *cf = file->private_data; 1183 struct inode *inode = file_inode(file); 1184 struct ceph_inode_info *ci = ceph_inode(inode); 1185 int left; 1186 const int bufsize = 1024; 1187 1188 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1189 return -EISDIR; 1190 1191 if (!cf->dir_info) { 1192 cf->dir_info = kmalloc(bufsize, GFP_NOFS); 1193 if (!cf->dir_info) 1194 return -ENOMEM; 1195 cf->dir_info_len = 1196 snprintf(cf->dir_info, bufsize, 1197 "entries: %20lld\n" 1198 " files: %20lld\n" 1199 " subdirs: %20lld\n" 1200 "rentries: %20lld\n" 1201 " rfiles: %20lld\n" 1202 " rsubdirs: %20lld\n" 1203 "rbytes: %20lld\n" 1204 "rctime: %10ld.%09ld\n", 1205 ci->i_files + ci->i_subdirs, 1206 ci->i_files, 1207 ci->i_subdirs, 1208 ci->i_rfiles + ci->i_rsubdirs, 1209 ci->i_rfiles, 1210 ci->i_rsubdirs, 1211 ci->i_rbytes, 1212 (long)ci->i_rctime.tv_sec, 1213 (long)ci->i_rctime.tv_nsec); 1214 } 1215 1216 if (*ppos >= cf->dir_info_len) 1217 return 0; 1218 size = min_t(unsigned, size, cf->dir_info_len-*ppos); 1219 left = copy_to_user(buf, cf->dir_info + *ppos, size); 1220 if (left == size) 1221 return -EFAULT; 1222 *ppos += (size - left); 1223 return size - left; 1224 } 1225 1226 /* 1227 * an fsync() on a dir will wait for any uncommitted directory 1228 * operations to commit. 1229 */ 1230 static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, 1231 int datasync) 1232 { 1233 struct inode *inode = file_inode(file); 1234 struct ceph_inode_info *ci = ceph_inode(inode); 1235 struct list_head *head = &ci->i_unsafe_dirops; 1236 struct ceph_mds_request *req; 1237 u64 last_tid; 1238 int ret = 0; 1239 1240 dout("dir_fsync %p\n", inode); 1241 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1242 if (ret) 1243 return ret; 1244 mutex_lock(&inode->i_mutex); 1245 1246 spin_lock(&ci->i_unsafe_lock); 1247 if (list_empty(head)) 1248 goto out; 1249 1250 req = list_entry(head->prev, 1251 struct ceph_mds_request, r_unsafe_dir_item); 1252 last_tid = req->r_tid; 1253 1254 do { 1255 ceph_mdsc_get_request(req); 1256 spin_unlock(&ci->i_unsafe_lock); 1257 1258 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1259 inode, req->r_tid, last_tid); 1260 if (req->r_timeout) { 1261 unsigned long time_left = wait_for_completion_timeout( 1262 &req->r_safe_completion, 1263 req->r_timeout); 1264 if (time_left > 0) 1265 ret = 0; 1266 else 1267 ret = -EIO; /* timed out */ 1268 } else { 1269 wait_for_completion(&req->r_safe_completion); 1270 } 1271 ceph_mdsc_put_request(req); 1272 1273 spin_lock(&ci->i_unsafe_lock); 1274 if (ret || list_empty(head)) 1275 break; 1276 req = list_entry(head->next, 1277 struct ceph_mds_request, r_unsafe_dir_item); 1278 } while (req->r_tid < last_tid); 1279 out: 1280 spin_unlock(&ci->i_unsafe_lock); 1281 mutex_unlock(&inode->i_mutex); 1282 1283 return ret; 1284 } 1285 1286 /* 1287 * We maintain a private dentry LRU. 1288 * 1289 * FIXME: this needs to be changed to a per-mds lru to be useful. 1290 */ 1291 void ceph_dentry_lru_add(struct dentry *dn) 1292 { 1293 struct ceph_dentry_info *di = ceph_dentry(dn); 1294 struct ceph_mds_client *mdsc; 1295 1296 dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn); 1297 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1298 spin_lock(&mdsc->dentry_lru_lock); 1299 list_add_tail(&di->lru, &mdsc->dentry_lru); 1300 mdsc->num_dentry++; 1301 spin_unlock(&mdsc->dentry_lru_lock); 1302 } 1303 1304 void ceph_dentry_lru_touch(struct dentry *dn) 1305 { 1306 struct ceph_dentry_info *di = ceph_dentry(dn); 1307 struct ceph_mds_client *mdsc; 1308 1309 dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn, 1310 di->offset); 1311 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1312 spin_lock(&mdsc->dentry_lru_lock); 1313 list_move_tail(&di->lru, &mdsc->dentry_lru); 1314 spin_unlock(&mdsc->dentry_lru_lock); 1315 } 1316 1317 void ceph_dentry_lru_del(struct dentry *dn) 1318 { 1319 struct ceph_dentry_info *di = ceph_dentry(dn); 1320 struct ceph_mds_client *mdsc; 1321 1322 dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn); 1323 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1324 spin_lock(&mdsc->dentry_lru_lock); 1325 list_del_init(&di->lru); 1326 mdsc->num_dentry--; 1327 spin_unlock(&mdsc->dentry_lru_lock); 1328 } 1329 1330 /* 1331 * Return name hash for a given dentry. This is dependent on 1332 * the parent directory's hash function. 1333 */ 1334 unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) 1335 { 1336 struct ceph_inode_info *dci = ceph_inode(dir); 1337 1338 switch (dci->i_dir_layout.dl_dir_hash) { 1339 case 0: /* for backward compat */ 1340 case CEPH_STR_HASH_LINUX: 1341 return dn->d_name.hash; 1342 1343 default: 1344 return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, 1345 dn->d_name.name, dn->d_name.len); 1346 } 1347 } 1348 1349 const struct file_operations ceph_dir_fops = { 1350 .read = ceph_read_dir, 1351 .iterate = ceph_readdir, 1352 .llseek = ceph_dir_llseek, 1353 .open = ceph_open, 1354 .release = ceph_release, 1355 .unlocked_ioctl = ceph_ioctl, 1356 .fsync = ceph_dir_fsync, 1357 }; 1358 1359 const struct file_operations ceph_snapdir_fops = { 1360 .iterate = ceph_readdir, 1361 .llseek = ceph_dir_llseek, 1362 .open = ceph_open, 1363 .release = ceph_release, 1364 }; 1365 1366 const struct inode_operations ceph_dir_iops = { 1367 .lookup = ceph_lookup, 1368 .permission = ceph_permission, 1369 .getattr = ceph_getattr, 1370 .setattr = ceph_setattr, 1371 .setxattr = ceph_setxattr, 1372 .getxattr = ceph_getxattr, 1373 .listxattr = ceph_listxattr, 1374 .removexattr = ceph_removexattr, 1375 .get_acl = ceph_get_acl, 1376 .set_acl = ceph_set_acl, 1377 .mknod = ceph_mknod, 1378 .symlink = ceph_symlink, 1379 .mkdir = ceph_mkdir, 1380 .link = ceph_link, 1381 .unlink = ceph_unlink, 1382 .rmdir = ceph_unlink, 1383 .rename = ceph_rename, 1384 .create = ceph_create, 1385 .atomic_open = ceph_atomic_open, 1386 }; 1387 1388 const struct inode_operations ceph_snapdir_iops = { 1389 .lookup = ceph_lookup, 1390 .permission = ceph_permission, 1391 .getattr = ceph_getattr, 1392 .mkdir = ceph_mkdir, 1393 .rmdir = ceph_unlink, 1394 .rename = ceph_rename, 1395 }; 1396 1397 const struct dentry_operations ceph_dentry_ops = { 1398 .d_revalidate = ceph_d_revalidate, 1399 .d_release = ceph_d_release, 1400 .d_prune = ceph_d_prune, 1401 }; 1402 1403 const struct dentry_operations ceph_snapdir_dentry_ops = { 1404 .d_revalidate = ceph_snapdir_d_revalidate, 1405 .d_release = ceph_d_release, 1406 }; 1407 1408 const struct dentry_operations ceph_snap_dentry_ops = { 1409 .d_release = ceph_d_release, 1410 .d_prune = ceph_d_prune, 1411 }; 1412