1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); 48 struct ceph_monmap *monmap = fsc->client->monc.monmap; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 53 dout("statfs\n"); 54 err = ceph_monc_do_statfs(&fsc->client->monc, &st); 55 if (err < 0) 56 return err; 57 58 /* fill in kstatfs */ 59 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 60 61 /* 62 * express utilization in terms of large blocks to avoid 63 * overflow on 32-bit machines. 64 * 65 * NOTE: for the time being, we make bsize == frsize to humor 66 * not-yet-ancient versions of glibc that are broken. 67 * Someday, we will probably want to report a real block 68 * size... whatever that may mean for a network file system! 69 */ 70 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 71 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 72 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 73 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 74 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 75 76 buf->f_files = le64_to_cpu(st.num_objects); 77 buf->f_ffree = -1; 78 buf->f_namelen = NAME_MAX; 79 80 /* leave fsid little-endian, regardless of host endianness */ 81 fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); 82 buf->f_fsid.val[0] = fsid & 0xffffffff; 83 buf->f_fsid.val[1] = fsid >> 32; 84 85 return 0; 86 } 87 88 89 static int ceph_sync_fs(struct super_block *sb, int wait) 90 { 91 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 92 93 if (!wait) { 94 dout("sync_fs (non-blocking)\n"); 95 ceph_flush_dirty_caps(fsc->mdsc); 96 dout("sync_fs (non-blocking) done\n"); 97 return 0; 98 } 99 100 dout("sync_fs (blocking)\n"); 101 ceph_osdc_sync(&fsc->client->osdc); 102 ceph_mdsc_sync(fsc->mdsc); 103 dout("sync_fs (blocking) done\n"); 104 return 0; 105 } 106 107 /* 108 * mount options 109 */ 110 enum { 111 Opt_wsize, 112 Opt_rsize, 113 Opt_rasize, 114 Opt_caps_wanted_delay_min, 115 Opt_caps_wanted_delay_max, 116 Opt_cap_release_safety, 117 Opt_readdir_max_entries, 118 Opt_readdir_max_bytes, 119 Opt_congestion_kb, 120 Opt_last_int, 121 /* int args above */ 122 Opt_snapdirname, 123 Opt_last_string, 124 /* string args above */ 125 Opt_dirstat, 126 Opt_nodirstat, 127 Opt_rbytes, 128 Opt_norbytes, 129 Opt_asyncreaddir, 130 Opt_noasyncreaddir, 131 Opt_dcache, 132 Opt_nodcache, 133 Opt_ino32, 134 Opt_noino32, 135 Opt_fscache, 136 Opt_nofscache, 137 #ifdef CONFIG_CEPH_FS_POSIX_ACL 138 Opt_acl, 139 #endif 140 Opt_noacl 141 }; 142 143 static match_table_t fsopt_tokens = { 144 {Opt_wsize, "wsize=%d"}, 145 {Opt_rsize, "rsize=%d"}, 146 {Opt_rasize, "rasize=%d"}, 147 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 148 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 149 {Opt_cap_release_safety, "cap_release_safety=%d"}, 150 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 151 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 152 {Opt_congestion_kb, "write_congestion_kb=%d"}, 153 /* int args above */ 154 {Opt_snapdirname, "snapdirname=%s"}, 155 /* string args above */ 156 {Opt_dirstat, "dirstat"}, 157 {Opt_nodirstat, "nodirstat"}, 158 {Opt_rbytes, "rbytes"}, 159 {Opt_norbytes, "norbytes"}, 160 {Opt_asyncreaddir, "asyncreaddir"}, 161 {Opt_noasyncreaddir, "noasyncreaddir"}, 162 {Opt_dcache, "dcache"}, 163 {Opt_nodcache, "nodcache"}, 164 {Opt_ino32, "ino32"}, 165 {Opt_noino32, "noino32"}, 166 {Opt_fscache, "fsc"}, 167 {Opt_nofscache, "nofsc"}, 168 #ifdef CONFIG_CEPH_FS_POSIX_ACL 169 {Opt_acl, "acl"}, 170 #endif 171 {Opt_noacl, "noacl"}, 172 {-1, NULL} 173 }; 174 175 static int parse_fsopt_token(char *c, void *private) 176 { 177 struct ceph_mount_options *fsopt = private; 178 substring_t argstr[MAX_OPT_ARGS]; 179 int token, intval, ret; 180 181 token = match_token((char *)c, fsopt_tokens, argstr); 182 if (token < 0) 183 return -EINVAL; 184 185 if (token < Opt_last_int) { 186 ret = match_int(&argstr[0], &intval); 187 if (ret < 0) { 188 pr_err("bad mount option arg (not int) " 189 "at '%s'\n", c); 190 return ret; 191 } 192 dout("got int token %d val %d\n", token, intval); 193 } else if (token > Opt_last_int && token < Opt_last_string) { 194 dout("got string token %d val %s\n", token, 195 argstr[0].from); 196 } else { 197 dout("got token %d\n", token); 198 } 199 200 switch (token) { 201 case Opt_snapdirname: 202 kfree(fsopt->snapdir_name); 203 fsopt->snapdir_name = kstrndup(argstr[0].from, 204 argstr[0].to-argstr[0].from, 205 GFP_KERNEL); 206 if (!fsopt->snapdir_name) 207 return -ENOMEM; 208 break; 209 210 /* misc */ 211 case Opt_wsize: 212 fsopt->wsize = intval; 213 break; 214 case Opt_rsize: 215 fsopt->rsize = intval; 216 break; 217 case Opt_rasize: 218 fsopt->rasize = intval; 219 break; 220 case Opt_caps_wanted_delay_min: 221 fsopt->caps_wanted_delay_min = intval; 222 break; 223 case Opt_caps_wanted_delay_max: 224 fsopt->caps_wanted_delay_max = intval; 225 break; 226 case Opt_readdir_max_entries: 227 fsopt->max_readdir = intval; 228 break; 229 case Opt_readdir_max_bytes: 230 fsopt->max_readdir_bytes = intval; 231 break; 232 case Opt_congestion_kb: 233 fsopt->congestion_kb = intval; 234 break; 235 case Opt_dirstat: 236 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 237 break; 238 case Opt_nodirstat: 239 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 240 break; 241 case Opt_rbytes: 242 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 243 break; 244 case Opt_norbytes: 245 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 246 break; 247 case Opt_asyncreaddir: 248 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 249 break; 250 case Opt_noasyncreaddir: 251 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 252 break; 253 case Opt_dcache: 254 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 255 break; 256 case Opt_nodcache: 257 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 258 break; 259 case Opt_ino32: 260 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 261 break; 262 case Opt_noino32: 263 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 264 break; 265 case Opt_fscache: 266 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 267 break; 268 case Opt_nofscache: 269 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 270 break; 271 #ifdef CONFIG_CEPH_FS_POSIX_ACL 272 case Opt_acl: 273 fsopt->sb_flags |= MS_POSIXACL; 274 break; 275 #endif 276 case Opt_noacl: 277 fsopt->sb_flags &= ~MS_POSIXACL; 278 break; 279 default: 280 BUG_ON(token); 281 } 282 return 0; 283 } 284 285 static void destroy_mount_options(struct ceph_mount_options *args) 286 { 287 dout("destroy_mount_options %p\n", args); 288 kfree(args->snapdir_name); 289 kfree(args); 290 } 291 292 static int strcmp_null(const char *s1, const char *s2) 293 { 294 if (!s1 && !s2) 295 return 0; 296 if (s1 && !s2) 297 return -1; 298 if (!s1 && s2) 299 return 1; 300 return strcmp(s1, s2); 301 } 302 303 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 304 struct ceph_options *new_opt, 305 struct ceph_fs_client *fsc) 306 { 307 struct ceph_mount_options *fsopt1 = new_fsopt; 308 struct ceph_mount_options *fsopt2 = fsc->mount_options; 309 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 310 int ret; 311 312 ret = memcmp(fsopt1, fsopt2, ofs); 313 if (ret) 314 return ret; 315 316 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 317 if (ret) 318 return ret; 319 320 return ceph_compare_options(new_opt, fsc->client); 321 } 322 323 static int parse_mount_options(struct ceph_mount_options **pfsopt, 324 struct ceph_options **popt, 325 int flags, char *options, 326 const char *dev_name, 327 const char **path) 328 { 329 struct ceph_mount_options *fsopt; 330 const char *dev_name_end; 331 int err; 332 333 if (!dev_name || !*dev_name) 334 return -EINVAL; 335 336 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 337 if (!fsopt) 338 return -ENOMEM; 339 340 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 341 342 fsopt->sb_flags = flags; 343 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 344 345 fsopt->rsize = CEPH_RSIZE_DEFAULT; 346 fsopt->rasize = CEPH_RASIZE_DEFAULT; 347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 348 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 349 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 350 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 351 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 352 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 353 fsopt->congestion_kb = default_congestion_kb(); 354 355 /* 356 * Distinguish the server list from the path in "dev_name". 357 * Internally we do not include the leading '/' in the path. 358 * 359 * "dev_name" will look like: 360 * <server_spec>[,<server_spec>...]:[<path>] 361 * where 362 * <server_spec> is <ip>[:<port>] 363 * <path> is optional, but if present must begin with '/' 364 */ 365 dev_name_end = strchr(dev_name, '/'); 366 if (dev_name_end) { 367 /* skip over leading '/' for path */ 368 *path = dev_name_end + 1; 369 } else { 370 /* path is empty */ 371 dev_name_end = dev_name + strlen(dev_name); 372 *path = dev_name_end; 373 } 374 err = -EINVAL; 375 dev_name_end--; /* back up to ':' separator */ 376 if (dev_name_end < dev_name || *dev_name_end != ':') { 377 pr_err("device name is missing path (no : separator in %s)\n", 378 dev_name); 379 goto out; 380 } 381 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 382 dout("server path '%s'\n", *path); 383 384 *popt = ceph_parse_options(options, dev_name, dev_name_end, 385 parse_fsopt_token, (void *)fsopt); 386 if (IS_ERR(*popt)) { 387 err = PTR_ERR(*popt); 388 goto out; 389 } 390 391 /* success */ 392 *pfsopt = fsopt; 393 return 0; 394 395 out: 396 destroy_mount_options(fsopt); 397 return err; 398 } 399 400 /** 401 * ceph_show_options - Show mount options in /proc/mounts 402 * @m: seq_file to write to 403 * @root: root of that (sub)tree 404 */ 405 static int ceph_show_options(struct seq_file *m, struct dentry *root) 406 { 407 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 408 struct ceph_mount_options *fsopt = fsc->mount_options; 409 struct ceph_options *opt = fsc->client->options; 410 411 if (opt->flags & CEPH_OPT_FSID) 412 seq_printf(m, ",fsid=%pU", &opt->fsid); 413 if (opt->flags & CEPH_OPT_NOSHARE) 414 seq_puts(m, ",noshare"); 415 if (opt->flags & CEPH_OPT_NOCRC) 416 seq_puts(m, ",nocrc"); 417 if (opt->flags & CEPH_OPT_NOMSGAUTH) 418 seq_puts(m, ",nocephx_require_signatures"); 419 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 420 seq_puts(m, ",notcp_nodelay"); 421 422 if (opt->name) 423 seq_printf(m, ",name=%s", opt->name); 424 if (opt->key) 425 seq_puts(m, ",secret=<hidden>"); 426 427 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 428 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); 429 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 430 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); 431 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 432 seq_printf(m, ",osdkeepalivetimeout=%d", 433 opt->osd_keepalive_timeout); 434 435 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 436 seq_puts(m, ",dirstat"); 437 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) 438 seq_puts(m, ",norbytes"); 439 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 440 seq_puts(m, ",noasyncreaddir"); 441 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) 442 seq_puts(m, ",dcache"); 443 else 444 seq_puts(m, ",nodcache"); 445 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) 446 seq_puts(m, ",fsc"); 447 else 448 seq_puts(m, ",nofsc"); 449 450 #ifdef CONFIG_CEPH_FS_POSIX_ACL 451 if (fsopt->sb_flags & MS_POSIXACL) 452 seq_puts(m, ",acl"); 453 else 454 seq_puts(m, ",noacl"); 455 #endif 456 457 if (fsopt->wsize) 458 seq_printf(m, ",wsize=%d", fsopt->wsize); 459 if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 460 seq_printf(m, ",rsize=%d", fsopt->rsize); 461 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 462 seq_printf(m, ",rasize=%d", fsopt->rasize); 463 if (fsopt->congestion_kb != default_congestion_kb()) 464 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 465 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 466 seq_printf(m, ",caps_wanted_delay_min=%d", 467 fsopt->caps_wanted_delay_min); 468 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 469 seq_printf(m, ",caps_wanted_delay_max=%d", 470 fsopt->caps_wanted_delay_max); 471 if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) 472 seq_printf(m, ",cap_release_safety=%d", 473 fsopt->cap_release_safety); 474 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 475 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 476 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 477 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 478 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 479 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); 480 return 0; 481 } 482 483 /* 484 * handle any mon messages the standard library doesn't understand. 485 * return error if we don't either. 486 */ 487 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 488 { 489 struct ceph_fs_client *fsc = client->private; 490 int type = le16_to_cpu(msg->hdr.type); 491 492 switch (type) { 493 case CEPH_MSG_MDS_MAP: 494 ceph_mdsc_handle_map(fsc->mdsc, msg); 495 return 0; 496 497 default: 498 return -1; 499 } 500 } 501 502 /* 503 * create a new fs client 504 */ 505 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 506 struct ceph_options *opt) 507 { 508 struct ceph_fs_client *fsc; 509 const u64 supported_features = 510 CEPH_FEATURE_FLOCK | 511 CEPH_FEATURE_DIRLAYOUTHASH | 512 CEPH_FEATURE_MDS_INLINE_DATA; 513 const u64 required_features = 0; 514 int page_count; 515 size_t size; 516 int err = -ENOMEM; 517 518 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 519 if (!fsc) 520 return ERR_PTR(-ENOMEM); 521 522 fsc->client = ceph_create_client(opt, fsc, supported_features, 523 required_features); 524 if (IS_ERR(fsc->client)) { 525 err = PTR_ERR(fsc->client); 526 goto fail; 527 } 528 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 529 fsc->client->monc.want_mdsmap = 1; 530 531 fsc->mount_options = fsopt; 532 533 fsc->sb = NULL; 534 fsc->mount_state = CEPH_MOUNT_MOUNTING; 535 536 atomic_long_set(&fsc->writeback_count, 0); 537 538 err = bdi_init(&fsc->backing_dev_info); 539 if (err < 0) 540 goto fail_client; 541 542 err = -ENOMEM; 543 /* 544 * The number of concurrent works can be high but they don't need 545 * to be processed in parallel, limit concurrency. 546 */ 547 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 548 if (fsc->wb_wq == NULL) 549 goto fail_bdi; 550 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 551 if (fsc->pg_inv_wq == NULL) 552 goto fail_wb_wq; 553 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 554 if (fsc->trunc_wq == NULL) 555 goto fail_pg_inv_wq; 556 557 /* set up mempools */ 558 err = -ENOMEM; 559 page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT; 560 size = sizeof (struct page *) * (page_count ? page_count : 1); 561 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 562 if (!fsc->wb_pagevec_pool) 563 goto fail_trunc_wq; 564 565 /* setup fscache */ 566 if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) && 567 (ceph_fscache_register_fs(fsc) != 0)) 568 goto fail_fscache; 569 570 /* caps */ 571 fsc->min_caps = fsopt->max_readdir; 572 573 return fsc; 574 575 fail_fscache: 576 ceph_fscache_unregister_fs(fsc); 577 fail_trunc_wq: 578 destroy_workqueue(fsc->trunc_wq); 579 fail_pg_inv_wq: 580 destroy_workqueue(fsc->pg_inv_wq); 581 fail_wb_wq: 582 destroy_workqueue(fsc->wb_wq); 583 fail_bdi: 584 bdi_destroy(&fsc->backing_dev_info); 585 fail_client: 586 ceph_destroy_client(fsc->client); 587 fail: 588 kfree(fsc); 589 return ERR_PTR(err); 590 } 591 592 static void destroy_fs_client(struct ceph_fs_client *fsc) 593 { 594 dout("destroy_fs_client %p\n", fsc); 595 596 ceph_fscache_unregister_fs(fsc); 597 598 destroy_workqueue(fsc->wb_wq); 599 destroy_workqueue(fsc->pg_inv_wq); 600 destroy_workqueue(fsc->trunc_wq); 601 602 bdi_destroy(&fsc->backing_dev_info); 603 604 mempool_destroy(fsc->wb_pagevec_pool); 605 606 destroy_mount_options(fsc->mount_options); 607 608 ceph_fs_debugfs_cleanup(fsc); 609 610 ceph_destroy_client(fsc->client); 611 612 kfree(fsc); 613 dout("destroy_fs_client %p done\n", fsc); 614 } 615 616 /* 617 * caches 618 */ 619 struct kmem_cache *ceph_inode_cachep; 620 struct kmem_cache *ceph_cap_cachep; 621 struct kmem_cache *ceph_dentry_cachep; 622 struct kmem_cache *ceph_file_cachep; 623 624 static void ceph_inode_init_once(void *foo) 625 { 626 struct ceph_inode_info *ci = foo; 627 inode_init_once(&ci->vfs_inode); 628 } 629 630 static int __init init_caches(void) 631 { 632 int error = -ENOMEM; 633 634 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 635 sizeof(struct ceph_inode_info), 636 __alignof__(struct ceph_inode_info), 637 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), 638 ceph_inode_init_once); 639 if (ceph_inode_cachep == NULL) 640 return -ENOMEM; 641 642 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 643 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 644 if (ceph_cap_cachep == NULL) 645 goto bad_cap; 646 647 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 648 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 649 if (ceph_dentry_cachep == NULL) 650 goto bad_dentry; 651 652 ceph_file_cachep = KMEM_CACHE(ceph_file_info, 653 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 654 if (ceph_file_cachep == NULL) 655 goto bad_file; 656 657 if ((error = ceph_fscache_register())) 658 goto bad_file; 659 660 return 0; 661 bad_file: 662 kmem_cache_destroy(ceph_dentry_cachep); 663 bad_dentry: 664 kmem_cache_destroy(ceph_cap_cachep); 665 bad_cap: 666 kmem_cache_destroy(ceph_inode_cachep); 667 return error; 668 } 669 670 static void destroy_caches(void) 671 { 672 /* 673 * Make sure all delayed rcu free inodes are flushed before we 674 * destroy cache. 675 */ 676 rcu_barrier(); 677 678 kmem_cache_destroy(ceph_inode_cachep); 679 kmem_cache_destroy(ceph_cap_cachep); 680 kmem_cache_destroy(ceph_dentry_cachep); 681 kmem_cache_destroy(ceph_file_cachep); 682 683 ceph_fscache_unregister(); 684 } 685 686 687 /* 688 * ceph_umount_begin - initiate forced umount. Tear down down the 689 * mount, skipping steps that may hang while waiting for server(s). 690 */ 691 static void ceph_umount_begin(struct super_block *sb) 692 { 693 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 694 695 dout("ceph_umount_begin - starting forced umount\n"); 696 if (!fsc) 697 return; 698 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 699 return; 700 } 701 702 static const struct super_operations ceph_super_ops = { 703 .alloc_inode = ceph_alloc_inode, 704 .destroy_inode = ceph_destroy_inode, 705 .write_inode = ceph_write_inode, 706 .drop_inode = ceph_drop_inode, 707 .sync_fs = ceph_sync_fs, 708 .put_super = ceph_put_super, 709 .show_options = ceph_show_options, 710 .statfs = ceph_statfs, 711 .umount_begin = ceph_umount_begin, 712 }; 713 714 /* 715 * Bootstrap mount by opening the root directory. Note the mount 716 * @started time from caller, and time out if this takes too long. 717 */ 718 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 719 const char *path, 720 unsigned long started) 721 { 722 struct ceph_mds_client *mdsc = fsc->mdsc; 723 struct ceph_mds_request *req = NULL; 724 int err; 725 struct dentry *root; 726 727 /* open dir */ 728 dout("open_root_inode opening '%s'\n", path); 729 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 730 if (IS_ERR(req)) 731 return ERR_CAST(req); 732 req->r_path1 = kstrdup(path, GFP_NOFS); 733 req->r_ino1.ino = CEPH_INO_ROOT; 734 req->r_ino1.snap = CEPH_NOSNAP; 735 req->r_started = started; 736 req->r_timeout = fsc->client->options->mount_timeout * HZ; 737 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 738 req->r_num_caps = 2; 739 err = ceph_mdsc_do_request(mdsc, NULL, req); 740 if (err == 0) { 741 struct inode *inode = req->r_target_inode; 742 req->r_target_inode = NULL; 743 dout("open_root_inode success\n"); 744 if (ceph_ino(inode) == CEPH_INO_ROOT && 745 fsc->sb->s_root == NULL) { 746 root = d_make_root(inode); 747 if (!root) { 748 root = ERR_PTR(-ENOMEM); 749 goto out; 750 } 751 } else { 752 root = d_obtain_root(inode); 753 } 754 ceph_init_dentry(root); 755 dout("open_root_inode success, root dentry is %p\n", root); 756 } else { 757 root = ERR_PTR(err); 758 } 759 out: 760 ceph_mdsc_put_request(req); 761 return root; 762 } 763 764 765 766 767 /* 768 * mount: join the ceph cluster, and open root directory. 769 */ 770 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 771 const char *path) 772 { 773 int err; 774 unsigned long started = jiffies; /* note the start time */ 775 struct dentry *root; 776 int first = 0; /* first vfsmount for this super_block */ 777 778 dout("mount start\n"); 779 mutex_lock(&fsc->client->mount_mutex); 780 781 err = __ceph_open_session(fsc->client, started); 782 if (err < 0) 783 goto out; 784 785 dout("mount opening root\n"); 786 root = open_root_dentry(fsc, "", started); 787 if (IS_ERR(root)) { 788 err = PTR_ERR(root); 789 goto out; 790 } 791 if (fsc->sb->s_root) { 792 dput(root); 793 } else { 794 fsc->sb->s_root = root; 795 first = 1; 796 797 err = ceph_fs_debugfs_init(fsc); 798 if (err < 0) 799 goto fail; 800 } 801 802 if (path[0] == 0) { 803 dget(root); 804 } else { 805 dout("mount opening base mountpoint\n"); 806 root = open_root_dentry(fsc, path, started); 807 if (IS_ERR(root)) { 808 err = PTR_ERR(root); 809 goto fail; 810 } 811 } 812 813 fsc->mount_state = CEPH_MOUNT_MOUNTED; 814 dout("mount success\n"); 815 mutex_unlock(&fsc->client->mount_mutex); 816 return root; 817 818 out: 819 mutex_unlock(&fsc->client->mount_mutex); 820 return ERR_PTR(err); 821 822 fail: 823 if (first) { 824 dput(fsc->sb->s_root); 825 fsc->sb->s_root = NULL; 826 } 827 goto out; 828 } 829 830 static int ceph_set_super(struct super_block *s, void *data) 831 { 832 struct ceph_fs_client *fsc = data; 833 int ret; 834 835 dout("set_super %p data %p\n", s, data); 836 837 s->s_flags = fsc->mount_options->sb_flags; 838 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 839 840 s->s_xattr = ceph_xattr_handlers; 841 s->s_fs_info = fsc; 842 fsc->sb = s; 843 844 s->s_op = &ceph_super_ops; 845 s->s_export_op = &ceph_export_ops; 846 847 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 848 849 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 850 if (ret != 0) 851 goto fail; 852 853 return ret; 854 855 fail: 856 s->s_fs_info = NULL; 857 fsc->sb = NULL; 858 return ret; 859 } 860 861 /* 862 * share superblock if same fs AND options 863 */ 864 static int ceph_compare_super(struct super_block *sb, void *data) 865 { 866 struct ceph_fs_client *new = data; 867 struct ceph_mount_options *fsopt = new->mount_options; 868 struct ceph_options *opt = new->client->options; 869 struct ceph_fs_client *other = ceph_sb_to_client(sb); 870 871 dout("ceph_compare_super %p\n", sb); 872 873 if (compare_mount_options(fsopt, opt, other)) { 874 dout("monitor(s)/mount options don't match\n"); 875 return 0; 876 } 877 if ((opt->flags & CEPH_OPT_FSID) && 878 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 879 dout("fsid doesn't match\n"); 880 return 0; 881 } 882 if (fsopt->sb_flags != other->mount_options->sb_flags) { 883 dout("flags differ\n"); 884 return 0; 885 } 886 return 1; 887 } 888 889 /* 890 * construct our own bdi so we can control readahead, etc. 891 */ 892 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 893 894 static int ceph_register_bdi(struct super_block *sb, 895 struct ceph_fs_client *fsc) 896 { 897 int err; 898 899 /* set ra_pages based on rasize mount option? */ 900 if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE) 901 fsc->backing_dev_info.ra_pages = 902 (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1) 903 >> PAGE_SHIFT; 904 else 905 fsc->backing_dev_info.ra_pages = 906 VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; 907 908 err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", 909 atomic_long_inc_return(&bdi_seq)); 910 if (!err) 911 sb->s_bdi = &fsc->backing_dev_info; 912 return err; 913 } 914 915 static struct dentry *ceph_mount(struct file_system_type *fs_type, 916 int flags, const char *dev_name, void *data) 917 { 918 struct super_block *sb; 919 struct ceph_fs_client *fsc; 920 struct dentry *res; 921 int err; 922 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 923 const char *path = NULL; 924 struct ceph_mount_options *fsopt = NULL; 925 struct ceph_options *opt = NULL; 926 927 dout("ceph_mount\n"); 928 929 #ifdef CONFIG_CEPH_FS_POSIX_ACL 930 flags |= MS_POSIXACL; 931 #endif 932 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); 933 if (err < 0) { 934 res = ERR_PTR(err); 935 goto out_final; 936 } 937 938 /* create client (which we may/may not use) */ 939 fsc = create_fs_client(fsopt, opt); 940 if (IS_ERR(fsc)) { 941 res = ERR_CAST(fsc); 942 destroy_mount_options(fsopt); 943 ceph_destroy_options(opt); 944 goto out_final; 945 } 946 947 err = ceph_mdsc_init(fsc); 948 if (err < 0) { 949 res = ERR_PTR(err); 950 goto out; 951 } 952 953 if (ceph_test_opt(fsc->client, NOSHARE)) 954 compare_super = NULL; 955 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 956 if (IS_ERR(sb)) { 957 res = ERR_CAST(sb); 958 goto out; 959 } 960 961 if (ceph_sb_to_client(sb) != fsc) { 962 ceph_mdsc_destroy(fsc); 963 destroy_fs_client(fsc); 964 fsc = ceph_sb_to_client(sb); 965 dout("get_sb got existing client %p\n", fsc); 966 } else { 967 dout("get_sb using new client %p\n", fsc); 968 err = ceph_register_bdi(sb, fsc); 969 if (err < 0) { 970 res = ERR_PTR(err); 971 goto out_splat; 972 } 973 } 974 975 res = ceph_real_mount(fsc, path); 976 if (IS_ERR(res)) 977 goto out_splat; 978 dout("root %p inode %p ino %llx.%llx\n", res, 979 res->d_inode, ceph_vinop(res->d_inode)); 980 return res; 981 982 out_splat: 983 ceph_mdsc_close_sessions(fsc->mdsc); 984 deactivate_locked_super(sb); 985 goto out_final; 986 987 out: 988 ceph_mdsc_destroy(fsc); 989 destroy_fs_client(fsc); 990 out_final: 991 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 992 return res; 993 } 994 995 static void ceph_kill_sb(struct super_block *s) 996 { 997 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 998 dev_t dev = s->s_dev; 999 1000 dout("kill_sb %p\n", s); 1001 1002 ceph_mdsc_pre_umount(fsc->mdsc); 1003 generic_shutdown_super(s); 1004 ceph_mdsc_destroy(fsc); 1005 1006 destroy_fs_client(fsc); 1007 free_anon_bdev(dev); 1008 } 1009 1010 static struct file_system_type ceph_fs_type = { 1011 .owner = THIS_MODULE, 1012 .name = "ceph", 1013 .mount = ceph_mount, 1014 .kill_sb = ceph_kill_sb, 1015 .fs_flags = FS_RENAME_DOES_D_MOVE, 1016 }; 1017 MODULE_ALIAS_FS("ceph"); 1018 1019 static int __init init_ceph(void) 1020 { 1021 int ret = init_caches(); 1022 if (ret) 1023 goto out; 1024 1025 ceph_flock_init(); 1026 ceph_xattr_init(); 1027 ret = ceph_snap_init(); 1028 if (ret) 1029 goto out_xattr; 1030 ret = register_filesystem(&ceph_fs_type); 1031 if (ret) 1032 goto out_snap; 1033 1034 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1035 1036 return 0; 1037 1038 out_snap: 1039 ceph_snap_exit(); 1040 out_xattr: 1041 ceph_xattr_exit(); 1042 destroy_caches(); 1043 out: 1044 return ret; 1045 } 1046 1047 static void __exit exit_ceph(void) 1048 { 1049 dout("exit_ceph\n"); 1050 unregister_filesystem(&ceph_fs_type); 1051 ceph_snap_exit(); 1052 ceph_xattr_exit(); 1053 destroy_caches(); 1054 } 1055 1056 module_init(init_ceph); 1057 module_exit(exit_ceph); 1058 1059 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1060 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1061 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1062 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1063 MODULE_LICENSE("GPL"); 1064