1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/spinlock.h> 4 #include <linux/fs_struct.h> 5 #include <linux/namei.h> 6 #include <linux/slab.h> 7 #include <linux/sched.h> 8 9 #include "super.h" 10 #include "mds_client.h" 11 12 /* 13 * Directory operations: readdir, lookup, create, link, unlink, 14 * rename, etc. 15 */ 16 17 /* 18 * Ceph MDS operations are specified in terms of a base ino and 19 * relative path. Thus, the client can specify an operation on a 20 * specific inode (e.g., a getattr due to fstat(2)), or as a path 21 * relative to, say, the root directory. 22 * 23 * Normally, we limit ourselves to strict inode ops (no path component) 24 * or dentry operations (a single path component relative to an ino). The 25 * exception to this is open_root_dentry(), which will open the mount 26 * point by name. 27 */ 28 29 const struct inode_operations ceph_dir_iops; 30 const struct file_operations ceph_dir_fops; 31 const struct dentry_operations ceph_dentry_ops; 32 33 /* 34 * Initialize ceph dentry state. 35 */ 36 int ceph_init_dentry(struct dentry *dentry) 37 { 38 struct ceph_dentry_info *di; 39 40 if (dentry->d_fsdata) 41 return 0; 42 43 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 44 if (!di) 45 return -ENOMEM; /* oh well */ 46 47 spin_lock(&dentry->d_lock); 48 if (dentry->d_fsdata) { 49 /* lost a race */ 50 kmem_cache_free(ceph_dentry_cachep, di); 51 goto out_unlock; 52 } 53 54 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ 55 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 56 d_set_d_op(dentry, &ceph_dentry_ops); 57 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 58 d_set_d_op(dentry, &ceph_snapdir_dentry_ops); 59 else 60 d_set_d_op(dentry, &ceph_snap_dentry_ops); 61 62 di->dentry = dentry; 63 di->lease_session = NULL; 64 dentry->d_time = jiffies; 65 /* avoid reordering d_fsdata setup so that the check above is safe */ 66 smp_mb(); 67 dentry->d_fsdata = di; 68 ceph_dentry_lru_add(dentry); 69 out_unlock: 70 spin_unlock(&dentry->d_lock); 71 return 0; 72 } 73 74 struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) 75 { 76 struct inode *inode = NULL; 77 78 if (!dentry) 79 return NULL; 80 81 spin_lock(&dentry->d_lock); 82 if (dentry->d_parent) { 83 inode = dentry->d_parent->d_inode; 84 ihold(inode); 85 } 86 spin_unlock(&dentry->d_lock); 87 return inode; 88 } 89 90 91 /* 92 * for readdir, we encode the directory frag and offset within that 93 * frag into f_pos. 94 */ 95 static unsigned fpos_frag(loff_t p) 96 { 97 return p >> 32; 98 } 99 static unsigned fpos_off(loff_t p) 100 { 101 return p & 0xffffffff; 102 } 103 104 /* 105 * When possible, we try to satisfy a readdir by peeking at the 106 * dcache. We make this work by carefully ordering dentries on 107 * d_u.d_child when we initially get results back from the MDS, and 108 * falling back to a "normal" sync readdir if any dentries in the dir 109 * are dropped. 110 * 111 * D_COMPLETE tells indicates we have all dentries in the dir. It is 112 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 113 * the MDS if/when the directory is modified). 114 */ 115 static int __dcache_readdir(struct file *filp, 116 void *dirent, filldir_t filldir) 117 { 118 struct ceph_file_info *fi = filp->private_data; 119 struct dentry *parent = filp->f_dentry; 120 struct inode *dir = parent->d_inode; 121 struct list_head *p; 122 struct dentry *dentry, *last; 123 struct ceph_dentry_info *di; 124 int err = 0; 125 126 /* claim ref on last dentry we returned */ 127 last = fi->dentry; 128 fi->dentry = NULL; 129 130 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 131 last); 132 133 spin_lock(&parent->d_lock); 134 135 /* start at beginning? */ 136 if (filp->f_pos == 2 || last == NULL || 137 filp->f_pos < ceph_dentry(last)->offset) { 138 if (list_empty(&parent->d_subdirs)) 139 goto out_unlock; 140 p = parent->d_subdirs.prev; 141 dout(" initial p %p/%p\n", p->prev, p->next); 142 } else { 143 p = last->d_u.d_child.prev; 144 } 145 146 more: 147 dentry = list_entry(p, struct dentry, d_u.d_child); 148 di = ceph_dentry(dentry); 149 while (1) { 150 dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, 151 d_unhashed(dentry) ? "!hashed" : "hashed", 152 parent->d_subdirs.prev, parent->d_subdirs.next); 153 if (p == &parent->d_subdirs) { 154 fi->flags |= CEPH_F_ATEND; 155 goto out_unlock; 156 } 157 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 158 if (!d_unhashed(dentry) && dentry->d_inode && 159 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 160 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 161 filp->f_pos <= di->offset) 162 break; 163 dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, 164 dentry->d_name.len, dentry->d_name.name, di->offset, 165 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 166 !dentry->d_inode ? " null" : ""); 167 spin_unlock(&dentry->d_lock); 168 p = p->prev; 169 dentry = list_entry(p, struct dentry, d_u.d_child); 170 di = ceph_dentry(dentry); 171 } 172 173 dget_dlock(dentry); 174 spin_unlock(&dentry->d_lock); 175 spin_unlock(&parent->d_lock); 176 177 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 178 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 179 filp->f_pos = di->offset; 180 err = filldir(dirent, dentry->d_name.name, 181 dentry->d_name.len, di->offset, 182 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), 183 dentry->d_inode->i_mode >> 12); 184 185 if (last) { 186 if (err < 0) { 187 /* remember our position */ 188 fi->dentry = last; 189 fi->next_offset = di->offset; 190 } else { 191 dput(last); 192 } 193 } 194 last = dentry; 195 196 if (err < 0) 197 goto out; 198 199 filp->f_pos++; 200 201 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 202 if (!ceph_dir_test_complete(dir)) { 203 dout(" lost D_COMPLETE on %p; falling back to mds\n", dir); 204 err = -EAGAIN; 205 goto out; 206 } 207 208 spin_lock(&parent->d_lock); 209 p = p->prev; /* advance to next dentry */ 210 goto more; 211 212 out_unlock: 213 spin_unlock(&parent->d_lock); 214 out: 215 if (last) 216 dput(last); 217 return err; 218 } 219 220 /* 221 * make note of the last dentry we read, so we can 222 * continue at the same lexicographical point, 223 * regardless of what dir changes take place on the 224 * server. 225 */ 226 static int note_last_dentry(struct ceph_file_info *fi, const char *name, 227 int len) 228 { 229 kfree(fi->last_name); 230 fi->last_name = kmalloc(len+1, GFP_NOFS); 231 if (!fi->last_name) 232 return -ENOMEM; 233 memcpy(fi->last_name, name, len); 234 fi->last_name[len] = 0; 235 dout("note_last_dentry '%s'\n", fi->last_name); 236 return 0; 237 } 238 239 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) 240 { 241 struct ceph_file_info *fi = filp->private_data; 242 struct inode *inode = filp->f_dentry->d_inode; 243 struct ceph_inode_info *ci = ceph_inode(inode); 244 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 245 struct ceph_mds_client *mdsc = fsc->mdsc; 246 unsigned frag = fpos_frag(filp->f_pos); 247 int off = fpos_off(filp->f_pos); 248 int err; 249 u32 ftype; 250 struct ceph_mds_reply_info_parsed *rinfo; 251 const int max_entries = fsc->mount_options->max_readdir; 252 const int max_bytes = fsc->mount_options->max_readdir_bytes; 253 254 dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); 255 if (fi->flags & CEPH_F_ATEND) 256 return 0; 257 258 /* always start with . and .. */ 259 if (filp->f_pos == 0) { 260 /* note dir version at start of readdir so we can tell 261 * if any dentries get dropped */ 262 fi->dir_release_count = ci->i_release_count; 263 264 dout("readdir off 0 -> '.'\n"); 265 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 266 ceph_translate_ino(inode->i_sb, inode->i_ino), 267 inode->i_mode >> 12) < 0) 268 return 0; 269 filp->f_pos = 1; 270 off = 1; 271 } 272 if (filp->f_pos == 1) { 273 ino_t ino = parent_ino(filp->f_dentry); 274 dout("readdir off 1 -> '..'\n"); 275 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 276 ceph_translate_ino(inode->i_sb, ino), 277 inode->i_mode >> 12) < 0) 278 return 0; 279 filp->f_pos = 2; 280 off = 2; 281 } 282 283 /* can we use the dcache? */ 284 spin_lock(&ci->i_ceph_lock); 285 if ((filp->f_pos == 2 || fi->dentry) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 287 ceph_snap(inode) != CEPH_SNAPDIR && 288 ceph_dir_test_complete(inode) && 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 290 spin_unlock(&ci->i_ceph_lock); 291 err = __dcache_readdir(filp, dirent, filldir); 292 if (err != -EAGAIN) 293 return err; 294 } else { 295 spin_unlock(&ci->i_ceph_lock); 296 } 297 if (fi->dentry) { 298 err = note_last_dentry(fi, fi->dentry->d_name.name, 299 fi->dentry->d_name.len); 300 if (err) 301 return err; 302 dput(fi->dentry); 303 fi->dentry = NULL; 304 } 305 306 /* proceed with a normal readdir */ 307 308 more: 309 /* do we have the correct frag content buffered? */ 310 if (fi->frag != frag || fi->last_readdir == NULL) { 311 struct ceph_mds_request *req; 312 int op = ceph_snap(inode) == CEPH_SNAPDIR ? 313 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 314 315 /* discard old result, if any */ 316 if (fi->last_readdir) { 317 ceph_mdsc_put_request(fi->last_readdir); 318 fi->last_readdir = NULL; 319 } 320 321 /* requery frag tree, as the frag topology may have changed */ 322 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 323 324 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 325 ceph_vinop(inode), frag, fi->last_name); 326 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 327 if (IS_ERR(req)) 328 return PTR_ERR(req); 329 req->r_inode = inode; 330 ihold(inode); 331 req->r_dentry = dget(filp->f_dentry); 332 /* hints to request -> mds selection code */ 333 req->r_direct_mode = USE_AUTH_MDS; 334 req->r_direct_hash = ceph_frag_value(frag); 335 req->r_direct_is_hash = true; 336 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 337 req->r_readdir_offset = fi->next_offset; 338 req->r_args.readdir.frag = cpu_to_le32(frag); 339 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 340 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); 341 req->r_num_caps = max_entries + 1; 342 err = ceph_mdsc_do_request(mdsc, NULL, req); 343 if (err < 0) { 344 ceph_mdsc_put_request(req); 345 return err; 346 } 347 dout("readdir got and parsed readdir result=%d" 348 " on frag %x, end=%d, complete=%d\n", err, frag, 349 (int)req->r_reply_info.dir_end, 350 (int)req->r_reply_info.dir_complete); 351 352 if (!req->r_did_prepopulate) { 353 dout("readdir !did_prepopulate"); 354 fi->dir_release_count--; /* preclude D_COMPLETE */ 355 } 356 357 /* note next offset and last dentry name */ 358 fi->offset = fi->next_offset; 359 fi->last_readdir = req; 360 361 if (req->r_reply_info.dir_end) { 362 kfree(fi->last_name); 363 fi->last_name = NULL; 364 if (ceph_frag_is_rightmost(frag)) 365 fi->next_offset = 2; 366 else 367 fi->next_offset = 0; 368 } else { 369 rinfo = &req->r_reply_info; 370 err = note_last_dentry(fi, 371 rinfo->dir_dname[rinfo->dir_nr-1], 372 rinfo->dir_dname_len[rinfo->dir_nr-1]); 373 if (err) 374 return err; 375 fi->next_offset += rinfo->dir_nr; 376 } 377 } 378 379 rinfo = &fi->last_readdir->r_reply_info; 380 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 381 rinfo->dir_nr, off, fi->offset); 382 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { 383 u64 pos = ceph_make_fpos(frag, off); 384 struct ceph_mds_reply_inode *in = 385 rinfo->dir_in[off - fi->offset].in; 386 struct ceph_vino vino; 387 ino_t ino; 388 389 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 390 off, off - fi->offset, rinfo->dir_nr, pos, 391 rinfo->dir_dname_len[off - fi->offset], 392 rinfo->dir_dname[off - fi->offset], in); 393 BUG_ON(!in); 394 ftype = le32_to_cpu(in->mode) >> 12; 395 vino.ino = le64_to_cpu(in->ino); 396 vino.snap = le64_to_cpu(in->snapid); 397 ino = ceph_vino_to_ino(vino); 398 if (filldir(dirent, 399 rinfo->dir_dname[off - fi->offset], 400 rinfo->dir_dname_len[off - fi->offset], 401 pos, 402 ceph_translate_ino(inode->i_sb, ino), ftype) < 0) { 403 dout("filldir stopping us...\n"); 404 return 0; 405 } 406 off++; 407 filp->f_pos = pos + 1; 408 } 409 410 if (fi->last_name) { 411 ceph_mdsc_put_request(fi->last_readdir); 412 fi->last_readdir = NULL; 413 goto more; 414 } 415 416 /* more frags? */ 417 if (!ceph_frag_is_rightmost(frag)) { 418 frag = ceph_frag_next(frag); 419 off = 0; 420 filp->f_pos = ceph_make_fpos(frag, off); 421 dout("readdir next frag is %x\n", frag); 422 goto more; 423 } 424 fi->flags |= CEPH_F_ATEND; 425 426 /* 427 * if dir_release_count still matches the dir, no dentries 428 * were released during the whole readdir, and we should have 429 * the complete dir contents in our cache. 430 */ 431 spin_lock(&ci->i_ceph_lock); 432 if (ci->i_release_count == fi->dir_release_count) { 433 ceph_dir_set_complete(inode); 434 ci->i_max_offset = filp->f_pos; 435 } 436 spin_unlock(&ci->i_ceph_lock); 437 438 dout("readdir %p filp %p done.\n", inode, filp); 439 return 0; 440 } 441 442 static void reset_readdir(struct ceph_file_info *fi) 443 { 444 if (fi->last_readdir) { 445 ceph_mdsc_put_request(fi->last_readdir); 446 fi->last_readdir = NULL; 447 } 448 kfree(fi->last_name); 449 fi->last_name = NULL; 450 fi->next_offset = 2; /* compensate for . and .. */ 451 if (fi->dentry) { 452 dput(fi->dentry); 453 fi->dentry = NULL; 454 } 455 fi->flags &= ~CEPH_F_ATEND; 456 } 457 458 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) 459 { 460 struct ceph_file_info *fi = file->private_data; 461 struct inode *inode = file->f_mapping->host; 462 loff_t old_offset = offset; 463 loff_t retval; 464 465 mutex_lock(&inode->i_mutex); 466 retval = -EINVAL; 467 switch (origin) { 468 case SEEK_END: 469 offset += inode->i_size + 2; /* FIXME */ 470 break; 471 case SEEK_CUR: 472 offset += file->f_pos; 473 case SEEK_SET: 474 break; 475 default: 476 goto out; 477 } 478 479 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 480 if (offset != file->f_pos) { 481 file->f_pos = offset; 482 file->f_version = 0; 483 fi->flags &= ~CEPH_F_ATEND; 484 } 485 retval = offset; 486 487 /* 488 * discard buffered readdir content on seekdir(0), or 489 * seek to new frag, or seek prior to current chunk. 490 */ 491 if (offset == 0 || 492 fpos_frag(offset) != fpos_frag(old_offset) || 493 fpos_off(offset) < fi->offset) { 494 dout("dir_llseek dropping %p content\n", file); 495 reset_readdir(fi); 496 } 497 498 /* bump dir_release_count if we did a forward seek */ 499 if (offset > old_offset) 500 fi->dir_release_count--; 501 } 502 out: 503 mutex_unlock(&inode->i_mutex); 504 return retval; 505 } 506 507 /* 508 * Handle lookups for the hidden .snap directory. 509 */ 510 int ceph_handle_snapdir(struct ceph_mds_request *req, 511 struct dentry *dentry, int err) 512 { 513 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 514 struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */ 515 516 /* .snap dir? */ 517 if (err == -ENOENT && 518 ceph_snap(parent) == CEPH_NOSNAP && 519 strcmp(dentry->d_name.name, 520 fsc->mount_options->snapdir_name) == 0) { 521 struct inode *inode = ceph_get_snapdir(parent); 522 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", 523 dentry, dentry->d_name.len, dentry->d_name.name, inode); 524 BUG_ON(!d_unhashed(dentry)); 525 d_add(dentry, inode); 526 err = 0; 527 } 528 return err; 529 } 530 531 /* 532 * Figure out final result of a lookup/open request. 533 * 534 * Mainly, make sure we return the final req->r_dentry (if it already 535 * existed) in place of the original VFS-provided dentry when they 536 * differ. 537 * 538 * Gracefully handle the case where the MDS replies with -ENOENT and 539 * no trace (which it may do, at its discretion, e.g., if it doesn't 540 * care to issue a lease on the negative dentry). 541 */ 542 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, 543 struct dentry *dentry, int err) 544 { 545 if (err == -ENOENT) { 546 /* no trace? */ 547 err = 0; 548 if (!req->r_reply_info.head->is_dentry) { 549 dout("ENOENT and no trace, dentry %p inode %p\n", 550 dentry, dentry->d_inode); 551 if (dentry->d_inode) { 552 d_drop(dentry); 553 err = -ENOENT; 554 } else { 555 d_add(dentry, NULL); 556 } 557 } 558 } 559 if (err) 560 dentry = ERR_PTR(err); 561 else if (dentry != req->r_dentry) 562 dentry = dget(req->r_dentry); /* we got spliced */ 563 else 564 dentry = NULL; 565 return dentry; 566 } 567 568 static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) 569 { 570 return ceph_ino(inode) == CEPH_INO_ROOT && 571 strncmp(dentry->d_name.name, ".ceph", 5) == 0; 572 } 573 574 /* 575 * Look up a single dir entry. If there is a lookup intent, inform 576 * the MDS so that it gets our 'caps wanted' value in a single op. 577 */ 578 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 579 unsigned int flags) 580 { 581 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 582 struct ceph_mds_client *mdsc = fsc->mdsc; 583 struct ceph_mds_request *req; 584 int op; 585 int err; 586 587 dout("lookup %p dentry %p '%.*s'\n", 588 dir, dentry, dentry->d_name.len, dentry->d_name.name); 589 590 if (dentry->d_name.len > NAME_MAX) 591 return ERR_PTR(-ENAMETOOLONG); 592 593 err = ceph_init_dentry(dentry); 594 if (err < 0) 595 return ERR_PTR(err); 596 597 /* can we conclude ENOENT locally? */ 598 if (dentry->d_inode == NULL) { 599 struct ceph_inode_info *ci = ceph_inode(dir); 600 struct ceph_dentry_info *di = ceph_dentry(dentry); 601 602 spin_lock(&ci->i_ceph_lock); 603 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 604 if (strncmp(dentry->d_name.name, 605 fsc->mount_options->snapdir_name, 606 dentry->d_name.len) && 607 !is_root_ceph_dentry(dir, dentry) && 608 ceph_dir_test_complete(dir) && 609 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 610 spin_unlock(&ci->i_ceph_lock); 611 dout(" dir %p complete, -ENOENT\n", dir); 612 d_add(dentry, NULL); 613 di->lease_shared_gen = ci->i_shared_gen; 614 return NULL; 615 } 616 spin_unlock(&ci->i_ceph_lock); 617 } 618 619 op = ceph_snap(dir) == CEPH_SNAPDIR ? 620 CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; 621 req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); 622 if (IS_ERR(req)) 623 return ERR_CAST(req); 624 req->r_dentry = dget(dentry); 625 req->r_num_caps = 2; 626 /* we only need inode linkage */ 627 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 628 req->r_locked_dir = dir; 629 err = ceph_mdsc_do_request(mdsc, NULL, req); 630 err = ceph_handle_snapdir(req, dentry, err); 631 dentry = ceph_finish_lookup(req, dentry, err); 632 ceph_mdsc_put_request(req); /* will dput(dentry) */ 633 dout("lookup result=%p\n", dentry); 634 return dentry; 635 } 636 637 int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 638 struct file *file, unsigned flags, umode_t mode, 639 int *opened) 640 { 641 int err; 642 struct dentry *res = NULL; 643 644 if (!(flags & O_CREAT)) { 645 if (dentry->d_name.len > NAME_MAX) 646 return -ENAMETOOLONG; 647 648 err = ceph_init_dentry(dentry); 649 if (err < 0) 650 return err; 651 652 return ceph_lookup_open(dir, dentry, file, flags, mode, opened); 653 } 654 655 if (d_unhashed(dentry)) { 656 res = ceph_lookup(dir, dentry, 0); 657 if (IS_ERR(res)) 658 return PTR_ERR(res); 659 660 if (res) 661 dentry = res; 662 } 663 664 /* We don't deal with positive dentries here */ 665 if (dentry->d_inode) 666 return finish_no_open(file, res); 667 668 *opened |= FILE_CREATED; 669 err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); 670 dput(res); 671 672 return err; 673 } 674 675 /* 676 * If we do a create but get no trace back from the MDS, follow up with 677 * a lookup (the VFS expects us to link up the provided dentry). 678 */ 679 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 680 { 681 struct dentry *result = ceph_lookup(dir, dentry, 0); 682 683 if (result && !IS_ERR(result)) { 684 /* 685 * We created the item, then did a lookup, and found 686 * it was already linked to another inode we already 687 * had in our cache (and thus got spliced). Link our 688 * dentry to that inode, but don't hash it, just in 689 * case the VFS wants to dereference it. 690 */ 691 BUG_ON(!result->d_inode); 692 d_instantiate(dentry, result->d_inode); 693 return 0; 694 } 695 return PTR_ERR(result); 696 } 697 698 static int ceph_mknod(struct inode *dir, struct dentry *dentry, 699 umode_t mode, dev_t rdev) 700 { 701 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 702 struct ceph_mds_client *mdsc = fsc->mdsc; 703 struct ceph_mds_request *req; 704 int err; 705 706 if (ceph_snap(dir) != CEPH_NOSNAP) 707 return -EROFS; 708 709 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", 710 dir, dentry, mode, rdev); 711 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 712 if (IS_ERR(req)) { 713 d_drop(dentry); 714 return PTR_ERR(req); 715 } 716 req->r_dentry = dget(dentry); 717 req->r_num_caps = 2; 718 req->r_locked_dir = dir; 719 req->r_args.mknod.mode = cpu_to_le32(mode); 720 req->r_args.mknod.rdev = cpu_to_le32(rdev); 721 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 722 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 723 err = ceph_mdsc_do_request(mdsc, dir, req); 724 if (!err && !req->r_reply_info.head->is_dentry) 725 err = ceph_handle_notrace_create(dir, dentry); 726 ceph_mdsc_put_request(req); 727 if (err) 728 d_drop(dentry); 729 return err; 730 } 731 732 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, 733 bool excl) 734 { 735 return ceph_mknod(dir, dentry, mode, 0); 736 } 737 738 static int ceph_symlink(struct inode *dir, struct dentry *dentry, 739 const char *dest) 740 { 741 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 742 struct ceph_mds_client *mdsc = fsc->mdsc; 743 struct ceph_mds_request *req; 744 int err; 745 746 if (ceph_snap(dir) != CEPH_NOSNAP) 747 return -EROFS; 748 749 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 750 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 751 if (IS_ERR(req)) { 752 d_drop(dentry); 753 return PTR_ERR(req); 754 } 755 req->r_dentry = dget(dentry); 756 req->r_num_caps = 2; 757 req->r_path2 = kstrdup(dest, GFP_NOFS); 758 req->r_locked_dir = dir; 759 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 760 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 761 err = ceph_mdsc_do_request(mdsc, dir, req); 762 if (!err && !req->r_reply_info.head->is_dentry) 763 err = ceph_handle_notrace_create(dir, dentry); 764 ceph_mdsc_put_request(req); 765 if (err) 766 d_drop(dentry); 767 return err; 768 } 769 770 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 771 { 772 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 773 struct ceph_mds_client *mdsc = fsc->mdsc; 774 struct ceph_mds_request *req; 775 int err = -EROFS; 776 int op; 777 778 if (ceph_snap(dir) == CEPH_SNAPDIR) { 779 /* mkdir .snap/foo is a MKSNAP */ 780 op = CEPH_MDS_OP_MKSNAP; 781 dout("mksnap dir %p snap '%.*s' dn %p\n", dir, 782 dentry->d_name.len, dentry->d_name.name, dentry); 783 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 784 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); 785 op = CEPH_MDS_OP_MKDIR; 786 } else { 787 goto out; 788 } 789 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 790 if (IS_ERR(req)) { 791 err = PTR_ERR(req); 792 goto out; 793 } 794 795 req->r_dentry = dget(dentry); 796 req->r_num_caps = 2; 797 req->r_locked_dir = dir; 798 req->r_args.mkdir.mode = cpu_to_le32(mode); 799 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 800 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 801 err = ceph_mdsc_do_request(mdsc, dir, req); 802 if (!err && !req->r_reply_info.head->is_dentry) 803 err = ceph_handle_notrace_create(dir, dentry); 804 ceph_mdsc_put_request(req); 805 out: 806 if (err < 0) 807 d_drop(dentry); 808 return err; 809 } 810 811 static int ceph_link(struct dentry *old_dentry, struct inode *dir, 812 struct dentry *dentry) 813 { 814 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 815 struct ceph_mds_client *mdsc = fsc->mdsc; 816 struct ceph_mds_request *req; 817 int err; 818 819 if (ceph_snap(dir) != CEPH_NOSNAP) 820 return -EROFS; 821 822 dout("link in dir %p old_dentry %p dentry %p\n", dir, 823 old_dentry, dentry); 824 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); 825 if (IS_ERR(req)) { 826 d_drop(dentry); 827 return PTR_ERR(req); 828 } 829 req->r_dentry = dget(dentry); 830 req->r_num_caps = 2; 831 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ 832 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 833 req->r_locked_dir = dir; 834 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 835 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 836 err = ceph_mdsc_do_request(mdsc, dir, req); 837 if (err) { 838 d_drop(dentry); 839 } else if (!req->r_reply_info.head->is_dentry) { 840 ihold(old_dentry->d_inode); 841 d_instantiate(dentry, old_dentry->d_inode); 842 } 843 ceph_mdsc_put_request(req); 844 return err; 845 } 846 847 /* 848 * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it 849 * looks like the link count will hit 0, drop any other caps (other 850 * than PIN) we don't specifically want (due to the file still being 851 * open). 852 */ 853 static int drop_caps_for_unlink(struct inode *inode) 854 { 855 struct ceph_inode_info *ci = ceph_inode(inode); 856 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 857 858 spin_lock(&ci->i_ceph_lock); 859 if (inode->i_nlink == 1) { 860 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 861 ci->i_ceph_flags |= CEPH_I_NODELAY; 862 } 863 spin_unlock(&ci->i_ceph_lock); 864 return drop; 865 } 866 867 /* 868 * rmdir and unlink are differ only by the metadata op code 869 */ 870 static int ceph_unlink(struct inode *dir, struct dentry *dentry) 871 { 872 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 873 struct ceph_mds_client *mdsc = fsc->mdsc; 874 struct inode *inode = dentry->d_inode; 875 struct ceph_mds_request *req; 876 int err = -EROFS; 877 int op; 878 879 if (ceph_snap(dir) == CEPH_SNAPDIR) { 880 /* rmdir .snap/foo is RMSNAP */ 881 dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len, 882 dentry->d_name.name, dentry); 883 op = CEPH_MDS_OP_RMSNAP; 884 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 885 dout("unlink/rmdir dir %p dn %p inode %p\n", 886 dir, dentry, inode); 887 op = S_ISDIR(dentry->d_inode->i_mode) ? 888 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 889 } else 890 goto out; 891 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 892 if (IS_ERR(req)) { 893 err = PTR_ERR(req); 894 goto out; 895 } 896 req->r_dentry = dget(dentry); 897 req->r_num_caps = 2; 898 req->r_locked_dir = dir; 899 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 900 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 901 req->r_inode_drop = drop_caps_for_unlink(inode); 902 err = ceph_mdsc_do_request(mdsc, dir, req); 903 if (!err && !req->r_reply_info.head->is_dentry) 904 d_delete(dentry); 905 ceph_mdsc_put_request(req); 906 out: 907 return err; 908 } 909 910 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, 911 struct inode *new_dir, struct dentry *new_dentry) 912 { 913 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 914 struct ceph_mds_client *mdsc = fsc->mdsc; 915 struct ceph_mds_request *req; 916 int err; 917 918 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 919 return -EXDEV; 920 if (ceph_snap(old_dir) != CEPH_NOSNAP || 921 ceph_snap(new_dir) != CEPH_NOSNAP) 922 return -EROFS; 923 dout("rename dir %p dentry %p to dir %p dentry %p\n", 924 old_dir, old_dentry, new_dir, new_dentry); 925 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 926 if (IS_ERR(req)) 927 return PTR_ERR(req); 928 req->r_dentry = dget(new_dentry); 929 req->r_num_caps = 2; 930 req->r_old_dentry = dget(old_dentry); 931 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 932 req->r_locked_dir = new_dir; 933 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 934 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 935 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 936 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 937 /* release LINK_RDCACHE on source inode (mds will lock it) */ 938 req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; 939 if (new_dentry->d_inode) 940 req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode); 941 err = ceph_mdsc_do_request(mdsc, old_dir, req); 942 if (!err && !req->r_reply_info.head->is_dentry) { 943 /* 944 * Normally d_move() is done by fill_trace (called by 945 * do_request, above). If there is no trace, we need 946 * to do it here. 947 */ 948 949 /* d_move screws up d_subdirs order */ 950 ceph_dir_clear_complete(new_dir); 951 952 d_move(old_dentry, new_dentry); 953 954 /* ensure target dentry is invalidated, despite 955 rehashing bug in vfs_rename_dir */ 956 ceph_invalidate_dentry_lease(new_dentry); 957 } 958 ceph_mdsc_put_request(req); 959 return err; 960 } 961 962 /* 963 * Ensure a dentry lease will no longer revalidate. 964 */ 965 void ceph_invalidate_dentry_lease(struct dentry *dentry) 966 { 967 spin_lock(&dentry->d_lock); 968 dentry->d_time = jiffies; 969 ceph_dentry(dentry)->lease_shared_gen = 0; 970 spin_unlock(&dentry->d_lock); 971 } 972 973 /* 974 * Check if dentry lease is valid. If not, delete the lease. Try to 975 * renew if the least is more than half up. 976 */ 977 static int dentry_lease_is_valid(struct dentry *dentry) 978 { 979 struct ceph_dentry_info *di; 980 struct ceph_mds_session *s; 981 int valid = 0; 982 u32 gen; 983 unsigned long ttl; 984 struct ceph_mds_session *session = NULL; 985 struct inode *dir = NULL; 986 u32 seq = 0; 987 988 spin_lock(&dentry->d_lock); 989 di = ceph_dentry(dentry); 990 if (di->lease_session) { 991 s = di->lease_session; 992 spin_lock(&s->s_gen_ttl_lock); 993 gen = s->s_cap_gen; 994 ttl = s->s_cap_ttl; 995 spin_unlock(&s->s_gen_ttl_lock); 996 997 if (di->lease_gen == gen && 998 time_before(jiffies, dentry->d_time) && 999 time_before(jiffies, ttl)) { 1000 valid = 1; 1001 if (di->lease_renew_after && 1002 time_after(jiffies, di->lease_renew_after)) { 1003 /* we should renew */ 1004 dir = dentry->d_parent->d_inode; 1005 session = ceph_get_mds_session(s); 1006 seq = di->lease_seq; 1007 di->lease_renew_after = 0; 1008 di->lease_renew_from = jiffies; 1009 } 1010 } 1011 } 1012 spin_unlock(&dentry->d_lock); 1013 1014 if (session) { 1015 ceph_mdsc_lease_send_msg(session, dir, dentry, 1016 CEPH_MDS_LEASE_RENEW, seq); 1017 ceph_put_mds_session(session); 1018 } 1019 dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); 1020 return valid; 1021 } 1022 1023 /* 1024 * Check if directory-wide content lease/cap is valid. 1025 */ 1026 static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) 1027 { 1028 struct ceph_inode_info *ci = ceph_inode(dir); 1029 struct ceph_dentry_info *di = ceph_dentry(dentry); 1030 int valid = 0; 1031 1032 spin_lock(&ci->i_ceph_lock); 1033 if (ci->i_shared_gen == di->lease_shared_gen) 1034 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1035 spin_unlock(&ci->i_ceph_lock); 1036 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1037 dir, (unsigned)ci->i_shared_gen, dentry, 1038 (unsigned)di->lease_shared_gen, valid); 1039 return valid; 1040 } 1041 1042 /* 1043 * Check if cached dentry can be trusted. 1044 */ 1045 static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) 1046 { 1047 int valid = 0; 1048 struct inode *dir; 1049 1050 if (flags & LOOKUP_RCU) 1051 return -ECHILD; 1052 1053 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1054 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 1055 ceph_dentry(dentry)->offset); 1056 1057 dir = ceph_get_dentry_parent_inode(dentry); 1058 1059 /* always trust cached snapped dentries, snapdir dentry */ 1060 if (ceph_snap(dir) != CEPH_NOSNAP) { 1061 dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, 1062 dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 1063 valid = 1; 1064 } else if (dentry->d_inode && 1065 ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { 1066 valid = 1; 1067 } else if (dentry_lease_is_valid(dentry) || 1068 dir_lease_is_valid(dir, dentry)) { 1069 valid = 1; 1070 } 1071 1072 dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); 1073 if (valid) 1074 ceph_dentry_lru_touch(dentry); 1075 else 1076 d_drop(dentry); 1077 iput(dir); 1078 return valid; 1079 } 1080 1081 /* 1082 * Release our ceph_dentry_info. 1083 */ 1084 static void ceph_d_release(struct dentry *dentry) 1085 { 1086 struct ceph_dentry_info *di = ceph_dentry(dentry); 1087 1088 dout("d_release %p\n", dentry); 1089 ceph_dentry_lru_del(dentry); 1090 if (di->lease_session) 1091 ceph_put_mds_session(di->lease_session); 1092 kmem_cache_free(ceph_dentry_cachep, di); 1093 dentry->d_fsdata = NULL; 1094 } 1095 1096 static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1097 unsigned int flags) 1098 { 1099 /* 1100 * Eventually, we'll want to revalidate snapped metadata 1101 * too... probably... 1102 */ 1103 return 1; 1104 } 1105 1106 /* 1107 * Set/clear/test dir complete flag on the dir's dentry. 1108 */ 1109 void ceph_dir_set_complete(struct inode *inode) 1110 { 1111 struct dentry *dentry = d_find_any_alias(inode); 1112 1113 if (dentry && ceph_dentry(dentry) && 1114 ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { 1115 dout(" marking %p (%p) complete\n", inode, dentry); 1116 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1117 } 1118 dput(dentry); 1119 } 1120 1121 void ceph_dir_clear_complete(struct inode *inode) 1122 { 1123 struct dentry *dentry = d_find_any_alias(inode); 1124 1125 if (dentry && ceph_dentry(dentry)) { 1126 dout(" marking %p (%p) complete\n", inode, dentry); 1127 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1128 } 1129 dput(dentry); 1130 } 1131 1132 bool ceph_dir_test_complete(struct inode *inode) 1133 { 1134 struct dentry *dentry = d_find_any_alias(inode); 1135 1136 if (dentry && ceph_dentry(dentry)) { 1137 dout(" marking %p (%p) NOT complete\n", inode, dentry); 1138 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); 1139 } 1140 dput(dentry); 1141 return false; 1142 } 1143 1144 /* 1145 * When the VFS prunes a dentry from the cache, we need to clear the 1146 * complete flag on the parent directory. 1147 * 1148 * Called under dentry->d_lock. 1149 */ 1150 static void ceph_d_prune(struct dentry *dentry) 1151 { 1152 struct ceph_dentry_info *di; 1153 1154 dout("ceph_d_prune %p\n", dentry); 1155 1156 /* do we have a valid parent? */ 1157 if (!dentry->d_parent || IS_ROOT(dentry)) 1158 return; 1159 1160 /* if we are not hashed, we don't affect D_COMPLETE */ 1161 if (d_unhashed(dentry)) 1162 return; 1163 1164 /* 1165 * we hold d_lock, so d_parent is stable, and d_fsdata is never 1166 * cleared until d_release 1167 */ 1168 di = ceph_dentry(dentry->d_parent); 1169 clear_bit(CEPH_D_COMPLETE, &di->flags); 1170 } 1171 1172 /* 1173 * read() on a dir. This weird interface hack only works if mounted 1174 * with '-o dirstat'. 1175 */ 1176 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, 1177 loff_t *ppos) 1178 { 1179 struct ceph_file_info *cf = file->private_data; 1180 struct inode *inode = file->f_dentry->d_inode; 1181 struct ceph_inode_info *ci = ceph_inode(inode); 1182 int left; 1183 const int bufsize = 1024; 1184 1185 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1186 return -EISDIR; 1187 1188 if (!cf->dir_info) { 1189 cf->dir_info = kmalloc(bufsize, GFP_NOFS); 1190 if (!cf->dir_info) 1191 return -ENOMEM; 1192 cf->dir_info_len = 1193 snprintf(cf->dir_info, bufsize, 1194 "entries: %20lld\n" 1195 " files: %20lld\n" 1196 " subdirs: %20lld\n" 1197 "rentries: %20lld\n" 1198 " rfiles: %20lld\n" 1199 " rsubdirs: %20lld\n" 1200 "rbytes: %20lld\n" 1201 "rctime: %10ld.%09ld\n", 1202 ci->i_files + ci->i_subdirs, 1203 ci->i_files, 1204 ci->i_subdirs, 1205 ci->i_rfiles + ci->i_rsubdirs, 1206 ci->i_rfiles, 1207 ci->i_rsubdirs, 1208 ci->i_rbytes, 1209 (long)ci->i_rctime.tv_sec, 1210 (long)ci->i_rctime.tv_nsec); 1211 } 1212 1213 if (*ppos >= cf->dir_info_len) 1214 return 0; 1215 size = min_t(unsigned, size, cf->dir_info_len-*ppos); 1216 left = copy_to_user(buf, cf->dir_info + *ppos, size); 1217 if (left == size) 1218 return -EFAULT; 1219 *ppos += (size - left); 1220 return size - left; 1221 } 1222 1223 /* 1224 * an fsync() on a dir will wait for any uncommitted directory 1225 * operations to commit. 1226 */ 1227 static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, 1228 int datasync) 1229 { 1230 struct inode *inode = file->f_path.dentry->d_inode; 1231 struct ceph_inode_info *ci = ceph_inode(inode); 1232 struct list_head *head = &ci->i_unsafe_dirops; 1233 struct ceph_mds_request *req; 1234 u64 last_tid; 1235 int ret = 0; 1236 1237 dout("dir_fsync %p\n", inode); 1238 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1239 if (ret) 1240 return ret; 1241 mutex_lock(&inode->i_mutex); 1242 1243 spin_lock(&ci->i_unsafe_lock); 1244 if (list_empty(head)) 1245 goto out; 1246 1247 req = list_entry(head->prev, 1248 struct ceph_mds_request, r_unsafe_dir_item); 1249 last_tid = req->r_tid; 1250 1251 do { 1252 ceph_mdsc_get_request(req); 1253 spin_unlock(&ci->i_unsafe_lock); 1254 1255 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1256 inode, req->r_tid, last_tid); 1257 if (req->r_timeout) { 1258 ret = wait_for_completion_timeout( 1259 &req->r_safe_completion, req->r_timeout); 1260 if (ret > 0) 1261 ret = 0; 1262 else if (ret == 0) 1263 ret = -EIO; /* timed out */ 1264 } else { 1265 wait_for_completion(&req->r_safe_completion); 1266 } 1267 ceph_mdsc_put_request(req); 1268 1269 spin_lock(&ci->i_unsafe_lock); 1270 if (ret || list_empty(head)) 1271 break; 1272 req = list_entry(head->next, 1273 struct ceph_mds_request, r_unsafe_dir_item); 1274 } while (req->r_tid < last_tid); 1275 out: 1276 spin_unlock(&ci->i_unsafe_lock); 1277 mutex_unlock(&inode->i_mutex); 1278 1279 return ret; 1280 } 1281 1282 /* 1283 * We maintain a private dentry LRU. 1284 * 1285 * FIXME: this needs to be changed to a per-mds lru to be useful. 1286 */ 1287 void ceph_dentry_lru_add(struct dentry *dn) 1288 { 1289 struct ceph_dentry_info *di = ceph_dentry(dn); 1290 struct ceph_mds_client *mdsc; 1291 1292 dout("dentry_lru_add %p %p '%.*s'\n", di, dn, 1293 dn->d_name.len, dn->d_name.name); 1294 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1295 spin_lock(&mdsc->dentry_lru_lock); 1296 list_add_tail(&di->lru, &mdsc->dentry_lru); 1297 mdsc->num_dentry++; 1298 spin_unlock(&mdsc->dentry_lru_lock); 1299 } 1300 1301 void ceph_dentry_lru_touch(struct dentry *dn) 1302 { 1303 struct ceph_dentry_info *di = ceph_dentry(dn); 1304 struct ceph_mds_client *mdsc; 1305 1306 dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, 1307 dn->d_name.len, dn->d_name.name, di->offset); 1308 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1309 spin_lock(&mdsc->dentry_lru_lock); 1310 list_move_tail(&di->lru, &mdsc->dentry_lru); 1311 spin_unlock(&mdsc->dentry_lru_lock); 1312 } 1313 1314 void ceph_dentry_lru_del(struct dentry *dn) 1315 { 1316 struct ceph_dentry_info *di = ceph_dentry(dn); 1317 struct ceph_mds_client *mdsc; 1318 1319 dout("dentry_lru_del %p %p '%.*s'\n", di, dn, 1320 dn->d_name.len, dn->d_name.name); 1321 mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; 1322 spin_lock(&mdsc->dentry_lru_lock); 1323 list_del_init(&di->lru); 1324 mdsc->num_dentry--; 1325 spin_unlock(&mdsc->dentry_lru_lock); 1326 } 1327 1328 /* 1329 * Return name hash for a given dentry. This is dependent on 1330 * the parent directory's hash function. 1331 */ 1332 unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) 1333 { 1334 struct ceph_inode_info *dci = ceph_inode(dir); 1335 1336 switch (dci->i_dir_layout.dl_dir_hash) { 1337 case 0: /* for backward compat */ 1338 case CEPH_STR_HASH_LINUX: 1339 return dn->d_name.hash; 1340 1341 default: 1342 return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, 1343 dn->d_name.name, dn->d_name.len); 1344 } 1345 } 1346 1347 const struct file_operations ceph_dir_fops = { 1348 .read = ceph_read_dir, 1349 .readdir = ceph_readdir, 1350 .llseek = ceph_dir_llseek, 1351 .open = ceph_open, 1352 .release = ceph_release, 1353 .unlocked_ioctl = ceph_ioctl, 1354 .fsync = ceph_dir_fsync, 1355 }; 1356 1357 const struct inode_operations ceph_dir_iops = { 1358 .lookup = ceph_lookup, 1359 .permission = ceph_permission, 1360 .getattr = ceph_getattr, 1361 .setattr = ceph_setattr, 1362 .setxattr = ceph_setxattr, 1363 .getxattr = ceph_getxattr, 1364 .listxattr = ceph_listxattr, 1365 .removexattr = ceph_removexattr, 1366 .mknod = ceph_mknod, 1367 .symlink = ceph_symlink, 1368 .mkdir = ceph_mkdir, 1369 .link = ceph_link, 1370 .unlink = ceph_unlink, 1371 .rmdir = ceph_unlink, 1372 .rename = ceph_rename, 1373 .create = ceph_create, 1374 .atomic_open = ceph_atomic_open, 1375 }; 1376 1377 const struct dentry_operations ceph_dentry_ops = { 1378 .d_revalidate = ceph_d_revalidate, 1379 .d_release = ceph_d_release, 1380 .d_prune = ceph_d_prune, 1381 }; 1382 1383 const struct dentry_operations ceph_snapdir_dentry_ops = { 1384 .d_revalidate = ceph_snapdir_d_revalidate, 1385 .d_release = ceph_d_release, 1386 }; 1387 1388 const struct dentry_operations ceph_snap_dentry_ops = { 1389 .d_release = ceph_d_release, 1390 .d_prune = ceph_d_prune, 1391 }; 1392