1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/sched.h> 14 #include <linux/seq_file.h> 15 #include <linux/slab.h> 16 #include <linux/statfs.h> 17 #include <linux/string.h> 18 19 #include "super.h" 20 #include "mds_client.h" 21 #include "cache.h" 22 23 #include <linux/ceph/ceph_features.h> 24 #include <linux/ceph/decode.h> 25 #include <linux/ceph/mon_client.h> 26 #include <linux/ceph/auth.h> 27 #include <linux/ceph/debugfs.h> 28 29 /* 30 * Ceph superblock operations 31 * 32 * Handle the basics of mounting, unmounting. 33 */ 34 35 /* 36 * super ops 37 */ 38 static void ceph_put_super(struct super_block *s) 39 { 40 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 41 42 dout("put_super\n"); 43 ceph_mdsc_close_sessions(fsc->mdsc); 44 } 45 46 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 47 { 48 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 49 struct ceph_mon_client *monc = &fsc->client->monc; 50 struct ceph_statfs st; 51 u64 fsid; 52 int err; 53 u64 data_pool; 54 55 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 56 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 57 } else { 58 data_pool = CEPH_NOPOOL; 59 } 60 61 dout("statfs\n"); 62 err = ceph_monc_do_statfs(monc, data_pool, &st); 63 if (err < 0) 64 return err; 65 66 /* fill in kstatfs */ 67 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 68 69 /* 70 * express utilization in terms of large blocks to avoid 71 * overflow on 32-bit machines. 72 * 73 * NOTE: for the time being, we make bsize == frsize to humor 74 * not-yet-ancient versions of glibc that are broken. 75 * Someday, we will probably want to report a real block 76 * size... whatever that may mean for a network file system! 77 */ 78 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 79 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 80 81 /* 82 * By default use root quota for stats; fallback to overall filesystem 83 * usage if using 'noquotadf' mount option or if the root dir doesn't 84 * have max_bytes quota set. 85 */ 86 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 87 !ceph_quota_update_statfs(fsc, buf)) { 88 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 89 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 } 92 93 buf->f_files = le64_to_cpu(st.num_objects); 94 buf->f_ffree = -1; 95 buf->f_namelen = NAME_MAX; 96 97 /* Must convert the fsid, for consistent values across arches */ 98 mutex_lock(&monc->mutex); 99 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 100 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 101 mutex_unlock(&monc->mutex); 102 103 buf->f_fsid.val[0] = fsid & 0xffffffff; 104 buf->f_fsid.val[1] = fsid >> 32; 105 106 return 0; 107 } 108 109 110 static int ceph_sync_fs(struct super_block *sb, int wait) 111 { 112 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 113 114 if (!wait) { 115 dout("sync_fs (non-blocking)\n"); 116 ceph_flush_dirty_caps(fsc->mdsc); 117 dout("sync_fs (non-blocking) done\n"); 118 return 0; 119 } 120 121 dout("sync_fs (blocking)\n"); 122 ceph_osdc_sync(&fsc->client->osdc); 123 ceph_mdsc_sync(fsc->mdsc); 124 dout("sync_fs (blocking) done\n"); 125 return 0; 126 } 127 128 /* 129 * mount options 130 */ 131 enum { 132 Opt_wsize, 133 Opt_rsize, 134 Opt_rasize, 135 Opt_caps_wanted_delay_min, 136 Opt_caps_wanted_delay_max, 137 Opt_caps_max, 138 Opt_readdir_max_entries, 139 Opt_readdir_max_bytes, 140 Opt_congestion_kb, 141 Opt_last_int, 142 /* int args above */ 143 Opt_snapdirname, 144 Opt_mds_namespace, 145 Opt_fscache_uniq, 146 Opt_recover_session, 147 Opt_last_string, 148 /* string args above */ 149 Opt_dirstat, 150 Opt_nodirstat, 151 Opt_rbytes, 152 Opt_norbytes, 153 Opt_asyncreaddir, 154 Opt_noasyncreaddir, 155 Opt_dcache, 156 Opt_nodcache, 157 Opt_ino32, 158 Opt_noino32, 159 Opt_fscache, 160 Opt_nofscache, 161 Opt_poolperm, 162 Opt_nopoolperm, 163 Opt_require_active_mds, 164 Opt_norequire_active_mds, 165 #ifdef CONFIG_CEPH_FS_POSIX_ACL 166 Opt_acl, 167 #endif 168 Opt_noacl, 169 Opt_quotadf, 170 Opt_noquotadf, 171 Opt_copyfrom, 172 Opt_nocopyfrom, 173 }; 174 175 static match_table_t fsopt_tokens = { 176 {Opt_wsize, "wsize=%d"}, 177 {Opt_rsize, "rsize=%d"}, 178 {Opt_rasize, "rasize=%d"}, 179 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 180 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 181 {Opt_caps_max, "caps_max=%d"}, 182 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 183 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 184 {Opt_congestion_kb, "write_congestion_kb=%d"}, 185 /* int args above */ 186 {Opt_snapdirname, "snapdirname=%s"}, 187 {Opt_mds_namespace, "mds_namespace=%s"}, 188 {Opt_recover_session, "recover_session=%s"}, 189 {Opt_fscache_uniq, "fsc=%s"}, 190 /* string args above */ 191 {Opt_dirstat, "dirstat"}, 192 {Opt_nodirstat, "nodirstat"}, 193 {Opt_rbytes, "rbytes"}, 194 {Opt_norbytes, "norbytes"}, 195 {Opt_asyncreaddir, "asyncreaddir"}, 196 {Opt_noasyncreaddir, "noasyncreaddir"}, 197 {Opt_dcache, "dcache"}, 198 {Opt_nodcache, "nodcache"}, 199 {Opt_ino32, "ino32"}, 200 {Opt_noino32, "noino32"}, 201 {Opt_fscache, "fsc"}, 202 {Opt_nofscache, "nofsc"}, 203 {Opt_poolperm, "poolperm"}, 204 {Opt_nopoolperm, "nopoolperm"}, 205 {Opt_require_active_mds, "require_active_mds"}, 206 {Opt_norequire_active_mds, "norequire_active_mds"}, 207 #ifdef CONFIG_CEPH_FS_POSIX_ACL 208 {Opt_acl, "acl"}, 209 #endif 210 {Opt_noacl, "noacl"}, 211 {Opt_quotadf, "quotadf"}, 212 {Opt_noquotadf, "noquotadf"}, 213 {Opt_copyfrom, "copyfrom"}, 214 {Opt_nocopyfrom, "nocopyfrom"}, 215 {-1, NULL} 216 }; 217 218 static int parse_fsopt_token(char *c, void *private) 219 { 220 struct ceph_mount_options *fsopt = private; 221 substring_t argstr[MAX_OPT_ARGS]; 222 int token, intval, ret; 223 224 token = match_token((char *)c, fsopt_tokens, argstr); 225 if (token < 0) 226 return -EINVAL; 227 228 if (token < Opt_last_int) { 229 ret = match_int(&argstr[0], &intval); 230 if (ret < 0) { 231 pr_err("bad option arg (not int) at '%s'\n", c); 232 return ret; 233 } 234 dout("got int token %d val %d\n", token, intval); 235 } else if (token > Opt_last_int && token < Opt_last_string) { 236 dout("got string token %d val %s\n", token, 237 argstr[0].from); 238 } else { 239 dout("got token %d\n", token); 240 } 241 242 switch (token) { 243 case Opt_snapdirname: 244 kfree(fsopt->snapdir_name); 245 fsopt->snapdir_name = kstrndup(argstr[0].from, 246 argstr[0].to-argstr[0].from, 247 GFP_KERNEL); 248 if (!fsopt->snapdir_name) 249 return -ENOMEM; 250 break; 251 case Opt_mds_namespace: 252 kfree(fsopt->mds_namespace); 253 fsopt->mds_namespace = kstrndup(argstr[0].from, 254 argstr[0].to-argstr[0].from, 255 GFP_KERNEL); 256 if (!fsopt->mds_namespace) 257 return -ENOMEM; 258 break; 259 case Opt_recover_session: 260 if (!strncmp(argstr[0].from, "no", 261 argstr[0].to - argstr[0].from)) { 262 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 263 } else if (!strncmp(argstr[0].from, "clean", 264 argstr[0].to - argstr[0].from)) { 265 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 266 } else { 267 return -EINVAL; 268 } 269 break; 270 case Opt_fscache_uniq: 271 #ifdef CONFIG_CEPH_FSCACHE 272 kfree(fsopt->fscache_uniq); 273 fsopt->fscache_uniq = kstrndup(argstr[0].from, 274 argstr[0].to-argstr[0].from, 275 GFP_KERNEL); 276 if (!fsopt->fscache_uniq) 277 return -ENOMEM; 278 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 279 break; 280 #else 281 pr_err("fscache support is disabled\n"); 282 return -EINVAL; 283 #endif 284 case Opt_wsize: 285 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 286 return -EINVAL; 287 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 288 break; 289 case Opt_rsize: 290 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 291 return -EINVAL; 292 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 293 break; 294 case Opt_rasize: 295 if (intval < 0) 296 return -EINVAL; 297 fsopt->rasize = ALIGN(intval, PAGE_SIZE); 298 break; 299 case Opt_caps_wanted_delay_min: 300 if (intval < 1) 301 return -EINVAL; 302 fsopt->caps_wanted_delay_min = intval; 303 break; 304 case Opt_caps_wanted_delay_max: 305 if (intval < 1) 306 return -EINVAL; 307 fsopt->caps_wanted_delay_max = intval; 308 break; 309 case Opt_caps_max: 310 if (intval < 0) 311 return -EINVAL; 312 fsopt->caps_max = intval; 313 break; 314 case Opt_readdir_max_entries: 315 if (intval < 1) 316 return -EINVAL; 317 fsopt->max_readdir = intval; 318 break; 319 case Opt_readdir_max_bytes: 320 if (intval < (int)PAGE_SIZE && intval != 0) 321 return -EINVAL; 322 fsopt->max_readdir_bytes = intval; 323 break; 324 case Opt_congestion_kb: 325 if (intval < 1024) /* at least 1M */ 326 return -EINVAL; 327 fsopt->congestion_kb = intval; 328 break; 329 case Opt_dirstat: 330 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 331 break; 332 case Opt_nodirstat: 333 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 334 break; 335 case Opt_rbytes: 336 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 337 break; 338 case Opt_norbytes: 339 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 340 break; 341 case Opt_asyncreaddir: 342 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 343 break; 344 case Opt_noasyncreaddir: 345 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 346 break; 347 case Opt_dcache: 348 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 349 break; 350 case Opt_nodcache: 351 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 352 break; 353 case Opt_ino32: 354 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 355 break; 356 case Opt_noino32: 357 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 358 break; 359 case Opt_fscache: 360 #ifdef CONFIG_CEPH_FSCACHE 361 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 362 kfree(fsopt->fscache_uniq); 363 fsopt->fscache_uniq = NULL; 364 break; 365 #else 366 pr_err("fscache support is disabled\n"); 367 return -EINVAL; 368 #endif 369 case Opt_nofscache: 370 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 371 kfree(fsopt->fscache_uniq); 372 fsopt->fscache_uniq = NULL; 373 break; 374 case Opt_poolperm: 375 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 376 break; 377 case Opt_nopoolperm: 378 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 379 break; 380 case Opt_require_active_mds: 381 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 382 break; 383 case Opt_norequire_active_mds: 384 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 385 break; 386 case Opt_quotadf: 387 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 388 break; 389 case Opt_noquotadf: 390 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 391 break; 392 case Opt_copyfrom: 393 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 394 break; 395 case Opt_nocopyfrom: 396 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 397 break; 398 #ifdef CONFIG_CEPH_FS_POSIX_ACL 399 case Opt_acl: 400 fsopt->sb_flags |= SB_POSIXACL; 401 break; 402 #endif 403 case Opt_noacl: 404 fsopt->sb_flags &= ~SB_POSIXACL; 405 break; 406 default: 407 BUG_ON(token); 408 } 409 return 0; 410 } 411 412 static void destroy_mount_options(struct ceph_mount_options *args) 413 { 414 dout("destroy_mount_options %p\n", args); 415 kfree(args->snapdir_name); 416 kfree(args->mds_namespace); 417 kfree(args->server_path); 418 kfree(args->fscache_uniq); 419 kfree(args); 420 } 421 422 static int strcmp_null(const char *s1, const char *s2) 423 { 424 if (!s1 && !s2) 425 return 0; 426 if (s1 && !s2) 427 return -1; 428 if (!s1 && s2) 429 return 1; 430 return strcmp(s1, s2); 431 } 432 433 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 434 struct ceph_options *new_opt, 435 struct ceph_fs_client *fsc) 436 { 437 struct ceph_mount_options *fsopt1 = new_fsopt; 438 struct ceph_mount_options *fsopt2 = fsc->mount_options; 439 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 440 int ret; 441 442 ret = memcmp(fsopt1, fsopt2, ofs); 443 if (ret) 444 return ret; 445 446 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 447 if (ret) 448 return ret; 449 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 450 if (ret) 451 return ret; 452 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 453 if (ret) 454 return ret; 455 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 456 if (ret) 457 return ret; 458 459 return ceph_compare_options(new_opt, fsc->client); 460 } 461 462 static int parse_mount_options(struct ceph_mount_options **pfsopt, 463 struct ceph_options **popt, 464 int flags, char *options, 465 const char *dev_name) 466 { 467 struct ceph_mount_options *fsopt; 468 const char *dev_name_end; 469 int err; 470 471 if (!dev_name || !*dev_name) 472 return -EINVAL; 473 474 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 475 if (!fsopt) 476 return -ENOMEM; 477 478 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 479 480 fsopt->sb_flags = flags; 481 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 482 483 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 484 fsopt->rsize = CEPH_MAX_READ_SIZE; 485 fsopt->rasize = CEPH_RASIZE_DEFAULT; 486 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 487 if (!fsopt->snapdir_name) { 488 err = -ENOMEM; 489 goto out; 490 } 491 492 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 493 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 494 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 495 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 496 fsopt->congestion_kb = default_congestion_kb(); 497 498 /* 499 * Distinguish the server list from the path in "dev_name". 500 * Internally we do not include the leading '/' in the path. 501 * 502 * "dev_name" will look like: 503 * <server_spec>[,<server_spec>...]:[<path>] 504 * where 505 * <server_spec> is <ip>[:<port>] 506 * <path> is optional, but if present must begin with '/' 507 */ 508 dev_name_end = strchr(dev_name, '/'); 509 if (dev_name_end) { 510 if (strlen(dev_name_end) > 1) { 511 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 512 if (!fsopt->server_path) { 513 err = -ENOMEM; 514 goto out; 515 } 516 } 517 } else { 518 dev_name_end = dev_name + strlen(dev_name); 519 } 520 err = -EINVAL; 521 dev_name_end--; /* back up to ':' separator */ 522 if (dev_name_end < dev_name || *dev_name_end != ':') { 523 pr_err("device name is missing path (no : separator in %s)\n", 524 dev_name); 525 goto out; 526 } 527 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 528 if (fsopt->server_path) 529 dout("server path '%s'\n", fsopt->server_path); 530 531 *popt = ceph_parse_options(options, dev_name, dev_name_end, 532 parse_fsopt_token, (void *)fsopt); 533 if (IS_ERR(*popt)) { 534 err = PTR_ERR(*popt); 535 goto out; 536 } 537 538 /* success */ 539 *pfsopt = fsopt; 540 return 0; 541 542 out: 543 destroy_mount_options(fsopt); 544 return err; 545 } 546 547 /** 548 * ceph_show_options - Show mount options in /proc/mounts 549 * @m: seq_file to write to 550 * @root: root of that (sub)tree 551 */ 552 static int ceph_show_options(struct seq_file *m, struct dentry *root) 553 { 554 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 555 struct ceph_mount_options *fsopt = fsc->mount_options; 556 size_t pos; 557 int ret; 558 559 /* a comma between MNT/MS and client options */ 560 seq_putc(m, ','); 561 pos = m->count; 562 563 ret = ceph_print_client_options(m, fsc->client, false); 564 if (ret) 565 return ret; 566 567 /* retract our comma if no client options */ 568 if (m->count == pos) 569 m->count--; 570 571 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 572 seq_puts(m, ",dirstat"); 573 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 574 seq_puts(m, ",rbytes"); 575 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 576 seq_puts(m, ",noasyncreaddir"); 577 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 578 seq_puts(m, ",nodcache"); 579 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 580 seq_puts(m, ",ino32"); 581 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 582 seq_show_option(m, "fsc", fsopt->fscache_uniq); 583 } 584 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 585 seq_puts(m, ",nopoolperm"); 586 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 587 seq_puts(m, ",noquotadf"); 588 589 #ifdef CONFIG_CEPH_FS_POSIX_ACL 590 if (fsopt->sb_flags & SB_POSIXACL) 591 seq_puts(m, ",acl"); 592 else 593 seq_puts(m, ",noacl"); 594 #endif 595 596 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 597 seq_puts(m, ",copyfrom"); 598 599 if (fsopt->mds_namespace) 600 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 601 602 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 603 seq_show_option(m, "recover_session", "clean"); 604 605 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 606 seq_printf(m, ",wsize=%d", fsopt->wsize); 607 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 608 seq_printf(m, ",rsize=%d", fsopt->rsize); 609 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 610 seq_printf(m, ",rasize=%d", fsopt->rasize); 611 if (fsopt->congestion_kb != default_congestion_kb()) 612 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 613 if (fsopt->caps_max) 614 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 615 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 616 seq_printf(m, ",caps_wanted_delay_min=%d", 617 fsopt->caps_wanted_delay_min); 618 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 619 seq_printf(m, ",caps_wanted_delay_max=%d", 620 fsopt->caps_wanted_delay_max); 621 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 622 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 623 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 624 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 625 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 626 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 627 628 return 0; 629 } 630 631 /* 632 * handle any mon messages the standard library doesn't understand. 633 * return error if we don't either. 634 */ 635 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 636 { 637 struct ceph_fs_client *fsc = client->private; 638 int type = le16_to_cpu(msg->hdr.type); 639 640 switch (type) { 641 case CEPH_MSG_MDS_MAP: 642 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 643 return 0; 644 case CEPH_MSG_FS_MAP_USER: 645 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 646 return 0; 647 default: 648 return -1; 649 } 650 } 651 652 /* 653 * create a new fs client 654 * 655 * Success or not, this function consumes @fsopt and @opt. 656 */ 657 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 658 struct ceph_options *opt) 659 { 660 struct ceph_fs_client *fsc; 661 int page_count; 662 size_t size; 663 int err; 664 665 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 666 if (!fsc) { 667 err = -ENOMEM; 668 goto fail; 669 } 670 671 fsc->client = ceph_create_client(opt, fsc); 672 if (IS_ERR(fsc->client)) { 673 err = PTR_ERR(fsc->client); 674 goto fail; 675 } 676 opt = NULL; /* fsc->client now owns this */ 677 678 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 679 ceph_set_opt(fsc->client, ABORT_ON_FULL); 680 681 if (!fsopt->mds_namespace) { 682 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 683 0, true); 684 } else { 685 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 686 0, false); 687 } 688 689 fsc->mount_options = fsopt; 690 691 fsc->sb = NULL; 692 fsc->mount_state = CEPH_MOUNT_MOUNTING; 693 fsc->filp_gen = 1; 694 695 atomic_long_set(&fsc->writeback_count, 0); 696 697 err = -ENOMEM; 698 /* 699 * The number of concurrent works can be high but they don't need 700 * to be processed in parallel, limit concurrency. 701 */ 702 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 703 if (!fsc->inode_wq) 704 goto fail_client; 705 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 706 if (!fsc->cap_wq) 707 goto fail_inode_wq; 708 709 /* set up mempools */ 710 err = -ENOMEM; 711 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 712 size = sizeof (struct page *) * (page_count ? page_count : 1); 713 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 714 if (!fsc->wb_pagevec_pool) 715 goto fail_cap_wq; 716 717 return fsc; 718 719 fail_cap_wq: 720 destroy_workqueue(fsc->cap_wq); 721 fail_inode_wq: 722 destroy_workqueue(fsc->inode_wq); 723 fail_client: 724 ceph_destroy_client(fsc->client); 725 fail: 726 kfree(fsc); 727 if (opt) 728 ceph_destroy_options(opt); 729 destroy_mount_options(fsopt); 730 return ERR_PTR(err); 731 } 732 733 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 734 { 735 flush_workqueue(fsc->inode_wq); 736 flush_workqueue(fsc->cap_wq); 737 } 738 739 static void destroy_fs_client(struct ceph_fs_client *fsc) 740 { 741 dout("destroy_fs_client %p\n", fsc); 742 743 ceph_mdsc_destroy(fsc); 744 destroy_workqueue(fsc->inode_wq); 745 destroy_workqueue(fsc->cap_wq); 746 747 mempool_destroy(fsc->wb_pagevec_pool); 748 749 destroy_mount_options(fsc->mount_options); 750 751 ceph_destroy_client(fsc->client); 752 753 kfree(fsc); 754 dout("destroy_fs_client %p done\n", fsc); 755 } 756 757 /* 758 * caches 759 */ 760 struct kmem_cache *ceph_inode_cachep; 761 struct kmem_cache *ceph_cap_cachep; 762 struct kmem_cache *ceph_cap_flush_cachep; 763 struct kmem_cache *ceph_dentry_cachep; 764 struct kmem_cache *ceph_file_cachep; 765 struct kmem_cache *ceph_dir_file_cachep; 766 767 static void ceph_inode_init_once(void *foo) 768 { 769 struct ceph_inode_info *ci = foo; 770 inode_init_once(&ci->vfs_inode); 771 } 772 773 static int __init init_caches(void) 774 { 775 int error = -ENOMEM; 776 777 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 778 sizeof(struct ceph_inode_info), 779 __alignof__(struct ceph_inode_info), 780 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 781 SLAB_ACCOUNT, ceph_inode_init_once); 782 if (!ceph_inode_cachep) 783 return -ENOMEM; 784 785 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 786 if (!ceph_cap_cachep) 787 goto bad_cap; 788 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 789 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 790 if (!ceph_cap_flush_cachep) 791 goto bad_cap_flush; 792 793 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 794 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 795 if (!ceph_dentry_cachep) 796 goto bad_dentry; 797 798 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 799 if (!ceph_file_cachep) 800 goto bad_file; 801 802 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 803 if (!ceph_dir_file_cachep) 804 goto bad_dir_file; 805 806 error = ceph_fscache_register(); 807 if (error) 808 goto bad_fscache; 809 810 return 0; 811 812 bad_fscache: 813 kmem_cache_destroy(ceph_dir_file_cachep); 814 bad_dir_file: 815 kmem_cache_destroy(ceph_file_cachep); 816 bad_file: 817 kmem_cache_destroy(ceph_dentry_cachep); 818 bad_dentry: 819 kmem_cache_destroy(ceph_cap_flush_cachep); 820 bad_cap_flush: 821 kmem_cache_destroy(ceph_cap_cachep); 822 bad_cap: 823 kmem_cache_destroy(ceph_inode_cachep); 824 return error; 825 } 826 827 static void destroy_caches(void) 828 { 829 /* 830 * Make sure all delayed rcu free inodes are flushed before we 831 * destroy cache. 832 */ 833 rcu_barrier(); 834 835 kmem_cache_destroy(ceph_inode_cachep); 836 kmem_cache_destroy(ceph_cap_cachep); 837 kmem_cache_destroy(ceph_cap_flush_cachep); 838 kmem_cache_destroy(ceph_dentry_cachep); 839 kmem_cache_destroy(ceph_file_cachep); 840 kmem_cache_destroy(ceph_dir_file_cachep); 841 842 ceph_fscache_unregister(); 843 } 844 845 846 /* 847 * ceph_umount_begin - initiate forced umount. Tear down down the 848 * mount, skipping steps that may hang while waiting for server(s). 849 */ 850 static void ceph_umount_begin(struct super_block *sb) 851 { 852 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 853 854 dout("ceph_umount_begin - starting forced umount\n"); 855 if (!fsc) 856 return; 857 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 858 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 859 ceph_mdsc_force_umount(fsc->mdsc); 860 fsc->filp_gen++; // invalidate open files 861 } 862 863 static int ceph_remount(struct super_block *sb, int *flags, char *data) 864 { 865 sync_filesystem(sb); 866 return 0; 867 } 868 869 static const struct super_operations ceph_super_ops = { 870 .alloc_inode = ceph_alloc_inode, 871 .free_inode = ceph_free_inode, 872 .write_inode = ceph_write_inode, 873 .drop_inode = generic_delete_inode, 874 .evict_inode = ceph_evict_inode, 875 .sync_fs = ceph_sync_fs, 876 .put_super = ceph_put_super, 877 .remount_fs = ceph_remount, 878 .show_options = ceph_show_options, 879 .statfs = ceph_statfs, 880 .umount_begin = ceph_umount_begin, 881 }; 882 883 /* 884 * Bootstrap mount by opening the root directory. Note the mount 885 * @started time from caller, and time out if this takes too long. 886 */ 887 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 888 const char *path, 889 unsigned long started) 890 { 891 struct ceph_mds_client *mdsc = fsc->mdsc; 892 struct ceph_mds_request *req = NULL; 893 int err; 894 struct dentry *root; 895 896 /* open dir */ 897 dout("open_root_inode opening '%s'\n", path); 898 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 899 if (IS_ERR(req)) 900 return ERR_CAST(req); 901 req->r_path1 = kstrdup(path, GFP_NOFS); 902 if (!req->r_path1) { 903 root = ERR_PTR(-ENOMEM); 904 goto out; 905 } 906 907 req->r_ino1.ino = CEPH_INO_ROOT; 908 req->r_ino1.snap = CEPH_NOSNAP; 909 req->r_started = started; 910 req->r_timeout = fsc->client->options->mount_timeout; 911 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 912 req->r_num_caps = 2; 913 err = ceph_mdsc_do_request(mdsc, NULL, req); 914 if (err == 0) { 915 struct inode *inode = req->r_target_inode; 916 req->r_target_inode = NULL; 917 dout("open_root_inode success\n"); 918 root = d_make_root(inode); 919 if (!root) { 920 root = ERR_PTR(-ENOMEM); 921 goto out; 922 } 923 dout("open_root_inode success, root dentry is %p\n", root); 924 } else { 925 root = ERR_PTR(err); 926 } 927 out: 928 ceph_mdsc_put_request(req); 929 return root; 930 } 931 932 933 934 935 /* 936 * mount: join the ceph cluster, and open root directory. 937 */ 938 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 939 { 940 int err; 941 unsigned long started = jiffies; /* note the start time */ 942 struct dentry *root; 943 944 dout("mount start %p\n", fsc); 945 mutex_lock(&fsc->client->mount_mutex); 946 947 if (!fsc->sb->s_root) { 948 const char *path; 949 err = __ceph_open_session(fsc->client, started); 950 if (err < 0) 951 goto out; 952 953 /* setup fscache */ 954 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 955 err = ceph_fscache_register_fs(fsc); 956 if (err < 0) 957 goto out; 958 } 959 960 if (!fsc->mount_options->server_path) { 961 path = ""; 962 dout("mount opening path \\t\n"); 963 } else { 964 path = fsc->mount_options->server_path + 1; 965 dout("mount opening path %s\n", path); 966 } 967 968 ceph_fs_debugfs_init(fsc); 969 970 root = open_root_dentry(fsc, path, started); 971 if (IS_ERR(root)) { 972 err = PTR_ERR(root); 973 goto out; 974 } 975 fsc->sb->s_root = dget(root); 976 } else { 977 root = dget(fsc->sb->s_root); 978 } 979 980 fsc->mount_state = CEPH_MOUNT_MOUNTED; 981 dout("mount success\n"); 982 mutex_unlock(&fsc->client->mount_mutex); 983 return root; 984 985 out: 986 mutex_unlock(&fsc->client->mount_mutex); 987 return ERR_PTR(err); 988 } 989 990 static int ceph_set_super(struct super_block *s, void *data) 991 { 992 struct ceph_fs_client *fsc = data; 993 int ret; 994 995 dout("set_super %p data %p\n", s, data); 996 997 s->s_flags = fsc->mount_options->sb_flags; 998 s->s_maxbytes = MAX_LFS_FILESIZE; 999 1000 s->s_xattr = ceph_xattr_handlers; 1001 s->s_fs_info = fsc; 1002 fsc->sb = s; 1003 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1004 1005 s->s_op = &ceph_super_ops; 1006 s->s_d_op = &ceph_dentry_ops; 1007 s->s_export_op = &ceph_export_ops; 1008 1009 s->s_time_gran = 1; 1010 s->s_time_min = 0; 1011 s->s_time_max = U32_MAX; 1012 1013 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 1014 if (ret != 0) 1015 goto fail; 1016 1017 return ret; 1018 1019 fail: 1020 s->s_fs_info = NULL; 1021 fsc->sb = NULL; 1022 return ret; 1023 } 1024 1025 /* 1026 * share superblock if same fs AND options 1027 */ 1028 static int ceph_compare_super(struct super_block *sb, void *data) 1029 { 1030 struct ceph_fs_client *new = data; 1031 struct ceph_mount_options *fsopt = new->mount_options; 1032 struct ceph_options *opt = new->client->options; 1033 struct ceph_fs_client *other = ceph_sb_to_client(sb); 1034 1035 dout("ceph_compare_super %p\n", sb); 1036 1037 if (compare_mount_options(fsopt, opt, other)) { 1038 dout("monitor(s)/mount options don't match\n"); 1039 return 0; 1040 } 1041 if ((opt->flags & CEPH_OPT_FSID) && 1042 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 1043 dout("fsid doesn't match\n"); 1044 return 0; 1045 } 1046 if (fsopt->sb_flags != other->mount_options->sb_flags) { 1047 dout("flags differ\n"); 1048 return 0; 1049 } 1050 return 1; 1051 } 1052 1053 /* 1054 * construct our own bdi so we can control readahead, etc. 1055 */ 1056 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1057 1058 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1059 { 1060 int err; 1061 1062 err = super_setup_bdi_name(sb, "ceph-%ld", 1063 atomic_long_inc_return(&bdi_seq)); 1064 if (err) 1065 return err; 1066 1067 /* set ra_pages based on rasize mount option? */ 1068 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1069 1070 /* set io_pages based on max osd read size */ 1071 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1072 1073 return 0; 1074 } 1075 1076 static struct dentry *ceph_mount(struct file_system_type *fs_type, 1077 int flags, const char *dev_name, void *data) 1078 { 1079 struct super_block *sb; 1080 struct ceph_fs_client *fsc; 1081 struct dentry *res; 1082 int err; 1083 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 1084 struct ceph_mount_options *fsopt = NULL; 1085 struct ceph_options *opt = NULL; 1086 1087 dout("ceph_mount\n"); 1088 1089 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1090 flags |= SB_POSIXACL; 1091 #endif 1092 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 1093 if (err < 0) { 1094 res = ERR_PTR(err); 1095 goto out_final; 1096 } 1097 1098 /* create client (which we may/may not use) */ 1099 fsc = create_fs_client(fsopt, opt); 1100 if (IS_ERR(fsc)) { 1101 res = ERR_CAST(fsc); 1102 goto out_final; 1103 } 1104 1105 err = ceph_mdsc_init(fsc); 1106 if (err < 0) { 1107 res = ERR_PTR(err); 1108 goto out; 1109 } 1110 1111 if (ceph_test_opt(fsc->client, NOSHARE)) 1112 compare_super = NULL; 1113 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1114 if (IS_ERR(sb)) { 1115 res = ERR_CAST(sb); 1116 goto out; 1117 } 1118 1119 if (ceph_sb_to_client(sb) != fsc) { 1120 destroy_fs_client(fsc); 1121 fsc = ceph_sb_to_client(sb); 1122 dout("get_sb got existing client %p\n", fsc); 1123 } else { 1124 dout("get_sb using new client %p\n", fsc); 1125 err = ceph_setup_bdi(sb, fsc); 1126 if (err < 0) { 1127 res = ERR_PTR(err); 1128 goto out_splat; 1129 } 1130 } 1131 1132 res = ceph_real_mount(fsc); 1133 if (IS_ERR(res)) 1134 goto out_splat; 1135 dout("root %p inode %p ino %llx.%llx\n", res, 1136 d_inode(res), ceph_vinop(d_inode(res))); 1137 return res; 1138 1139 out_splat: 1140 ceph_mdsc_close_sessions(fsc->mdsc); 1141 deactivate_locked_super(sb); 1142 goto out_final; 1143 1144 out: 1145 destroy_fs_client(fsc); 1146 out_final: 1147 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1148 return res; 1149 } 1150 1151 static void ceph_kill_sb(struct super_block *s) 1152 { 1153 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1154 dev_t dev = s->s_dev; 1155 1156 dout("kill_sb %p\n", s); 1157 1158 ceph_mdsc_pre_umount(fsc->mdsc); 1159 flush_fs_workqueues(fsc); 1160 1161 generic_shutdown_super(s); 1162 1163 fsc->client->extra_mon_dispatch = NULL; 1164 ceph_fs_debugfs_cleanup(fsc); 1165 1166 ceph_fscache_unregister_fs(fsc); 1167 1168 destroy_fs_client(fsc); 1169 free_anon_bdev(dev); 1170 } 1171 1172 static struct file_system_type ceph_fs_type = { 1173 .owner = THIS_MODULE, 1174 .name = "ceph", 1175 .mount = ceph_mount, 1176 .kill_sb = ceph_kill_sb, 1177 .fs_flags = FS_RENAME_DOES_D_MOVE, 1178 }; 1179 MODULE_ALIAS_FS("ceph"); 1180 1181 int ceph_force_reconnect(struct super_block *sb) 1182 { 1183 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1184 int err = 0; 1185 1186 ceph_umount_begin(sb); 1187 1188 /* Make sure all page caches get invalidated. 1189 * see remove_session_caps_cb() */ 1190 flush_workqueue(fsc->inode_wq); 1191 1192 /* In case that we were blacklisted. This also reset 1193 * all mon/osd connections */ 1194 ceph_reset_client_addr(fsc->client); 1195 1196 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1197 1198 fsc->blacklisted = false; 1199 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1200 1201 if (sb->s_root) { 1202 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1203 CEPH_STAT_CAP_INODE, true); 1204 } 1205 return err; 1206 } 1207 1208 static int __init init_ceph(void) 1209 { 1210 int ret = init_caches(); 1211 if (ret) 1212 goto out; 1213 1214 ceph_flock_init(); 1215 ret = register_filesystem(&ceph_fs_type); 1216 if (ret) 1217 goto out_caches; 1218 1219 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1220 1221 return 0; 1222 1223 out_caches: 1224 destroy_caches(); 1225 out: 1226 return ret; 1227 } 1228 1229 static void __exit exit_ceph(void) 1230 { 1231 dout("exit_ceph\n"); 1232 unregister_filesystem(&ceph_fs_type); 1233 destroy_caches(); 1234 } 1235 1236 module_init(init_ceph); 1237 module_exit(exit_ceph); 1238 1239 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1240 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1241 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1242 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1243 MODULE_LICENSE("GPL"); 1244