1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 #include <uapi/linux/magic.h> 31 32 static DEFINE_SPINLOCK(ceph_fsc_lock); 33 static LIST_HEAD(ceph_fsc_list); 34 35 /* 36 * Ceph superblock operations 37 * 38 * Handle the basics of mounting, unmounting. 39 */ 40 41 /* 42 * super ops 43 */ 44 static void ceph_put_super(struct super_block *s) 45 { 46 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 47 48 dout("put_super\n"); 49 ceph_mdsc_close_sessions(fsc->mdsc); 50 } 51 52 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 53 { 54 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 55 struct ceph_mon_client *monc = &fsc->client->monc; 56 struct ceph_statfs st; 57 int i, err; 58 u64 data_pool; 59 60 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 61 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 62 } else { 63 data_pool = CEPH_NOPOOL; 64 } 65 66 dout("statfs\n"); 67 err = ceph_monc_do_statfs(monc, data_pool, &st); 68 if (err < 0) 69 return err; 70 71 /* fill in kstatfs */ 72 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 73 74 /* 75 * express utilization in terms of large blocks to avoid 76 * overflow on 32-bit machines. 77 * 78 * NOTE: for the time being, we make bsize == frsize to humor 79 * not-yet-ancient versions of glibc that are broken. 80 * Someday, we will probably want to report a real block 81 * size... whatever that may mean for a network file system! 82 */ 83 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 84 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 85 86 /* 87 * By default use root quota for stats; fallback to overall filesystem 88 * usage if using 'noquotadf' mount option or if the root dir doesn't 89 * have max_bytes quota set. 90 */ 91 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 92 !ceph_quota_update_statfs(fsc, buf)) { 93 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 94 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 95 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 96 } 97 98 buf->f_files = le64_to_cpu(st.num_objects); 99 buf->f_ffree = -1; 100 buf->f_namelen = NAME_MAX; 101 102 /* Must convert the fsid, for consistent values across arches */ 103 buf->f_fsid.val[0] = 0; 104 mutex_lock(&monc->mutex); 105 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 106 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 107 mutex_unlock(&monc->mutex); 108 109 /* fold the fs_cluster_id into the upper bits */ 110 buf->f_fsid.val[1] = monc->fs_cluster_id; 111 112 return 0; 113 } 114 115 static int ceph_sync_fs(struct super_block *sb, int wait) 116 { 117 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 118 119 if (!wait) { 120 dout("sync_fs (non-blocking)\n"); 121 ceph_flush_dirty_caps(fsc->mdsc); 122 dout("sync_fs (non-blocking) done\n"); 123 return 0; 124 } 125 126 dout("sync_fs (blocking)\n"); 127 ceph_osdc_sync(&fsc->client->osdc); 128 ceph_mdsc_sync(fsc->mdsc); 129 dout("sync_fs (blocking) done\n"); 130 return 0; 131 } 132 133 /* 134 * mount options 135 */ 136 enum { 137 Opt_wsize, 138 Opt_rsize, 139 Opt_rasize, 140 Opt_caps_wanted_delay_min, 141 Opt_caps_wanted_delay_max, 142 Opt_caps_max, 143 Opt_readdir_max_entries, 144 Opt_readdir_max_bytes, 145 Opt_congestion_kb, 146 /* int args above */ 147 Opt_snapdirname, 148 Opt_mds_namespace, 149 Opt_recover_session, 150 Opt_source, 151 Opt_mon_addr, 152 /* string args above */ 153 Opt_dirstat, 154 Opt_rbytes, 155 Opt_asyncreaddir, 156 Opt_dcache, 157 Opt_ino32, 158 Opt_fscache, 159 Opt_poolperm, 160 Opt_require_active_mds, 161 Opt_acl, 162 Opt_quotadf, 163 Opt_copyfrom, 164 Opt_wsync, 165 Opt_pagecache, 166 }; 167 168 enum ceph_recover_session_mode { 169 ceph_recover_session_no, 170 ceph_recover_session_clean 171 }; 172 173 static const struct constant_table ceph_param_recover[] = { 174 { "no", ceph_recover_session_no }, 175 { "clean", ceph_recover_session_clean }, 176 {} 177 }; 178 179 static const struct fs_parameter_spec ceph_mount_parameters[] = { 180 fsparam_flag_no ("acl", Opt_acl), 181 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 182 fsparam_s32 ("caps_max", Opt_caps_max), 183 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 184 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 185 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 186 fsparam_flag_no ("copyfrom", Opt_copyfrom), 187 fsparam_flag_no ("dcache", Opt_dcache), 188 fsparam_flag_no ("dirstat", Opt_dirstat), 189 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 190 fsparam_string ("fsc", Opt_fscache), // fsc=... 191 fsparam_flag_no ("ino32", Opt_ino32), 192 fsparam_string ("mds_namespace", Opt_mds_namespace), 193 fsparam_flag_no ("poolperm", Opt_poolperm), 194 fsparam_flag_no ("quotadf", Opt_quotadf), 195 fsparam_u32 ("rasize", Opt_rasize), 196 fsparam_flag_no ("rbytes", Opt_rbytes), 197 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 198 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 199 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 200 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 201 fsparam_u32 ("rsize", Opt_rsize), 202 fsparam_string ("snapdirname", Opt_snapdirname), 203 fsparam_string ("source", Opt_source), 204 fsparam_string ("mon_addr", Opt_mon_addr), 205 fsparam_u32 ("wsize", Opt_wsize), 206 fsparam_flag_no ("wsync", Opt_wsync), 207 fsparam_flag_no ("pagecache", Opt_pagecache), 208 {} 209 }; 210 211 struct ceph_parse_opts_ctx { 212 struct ceph_options *copts; 213 struct ceph_mount_options *opts; 214 }; 215 216 /* 217 * Remove adjacent slashes and then the trailing slash, unless it is 218 * the only remaining character. 219 * 220 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 221 */ 222 static void canonicalize_path(char *path) 223 { 224 int i, j = 0; 225 226 for (i = 0; path[i] != '\0'; i++) { 227 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 228 path[j++] = path[i]; 229 } 230 231 if (j > 1 && path[j - 1] == '/') 232 j--; 233 path[j] = '\0'; 234 } 235 236 /* 237 * Check if the mds namespace in ceph_mount_options matches 238 * the passed in namespace string. First time match (when 239 * ->mds_namespace is NULL) is treated specially, since 240 * ->mds_namespace needs to be initialized by the caller. 241 */ 242 static int namespace_equals(struct ceph_mount_options *fsopt, 243 const char *namespace, size_t len) 244 { 245 return !(fsopt->mds_namespace && 246 (strlen(fsopt->mds_namespace) != len || 247 strncmp(fsopt->mds_namespace, namespace, len))); 248 } 249 250 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 251 struct fs_context *fc) 252 { 253 int r; 254 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 255 struct ceph_mount_options *fsopt = pctx->opts; 256 257 if (*dev_name_end != ':') 258 return invalfc(fc, "separator ':' missing in source"); 259 260 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 261 pctx->copts, fc->log.log, ','); 262 if (r) 263 return r; 264 265 fsopt->new_dev_syntax = false; 266 return 0; 267 } 268 269 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 270 struct fs_context *fc) 271 { 272 size_t len; 273 struct ceph_fsid fsid; 274 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 275 struct ceph_mount_options *fsopt = pctx->opts; 276 char *fsid_start, *fs_name_start; 277 278 if (*dev_name_end != '=') { 279 dout("separator '=' missing in source"); 280 return -EINVAL; 281 } 282 283 fsid_start = strchr(dev_name, '@'); 284 if (!fsid_start) 285 return invalfc(fc, "missing cluster fsid"); 286 ++fsid_start; /* start of cluster fsid */ 287 288 fs_name_start = strchr(fsid_start, '.'); 289 if (!fs_name_start) 290 return invalfc(fc, "missing file system name"); 291 292 if (ceph_parse_fsid(fsid_start, &fsid)) 293 return invalfc(fc, "Invalid FSID"); 294 295 ++fs_name_start; /* start of file system name */ 296 len = dev_name_end - fs_name_start; 297 298 if (!namespace_equals(fsopt, fs_name_start, len)) 299 return invalfc(fc, "Mismatching mds_namespace"); 300 kfree(fsopt->mds_namespace); 301 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 302 if (!fsopt->mds_namespace) 303 return -ENOMEM; 304 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 305 306 fsopt->new_dev_syntax = true; 307 return 0; 308 } 309 310 /* 311 * Parse the source parameter for new device format. Distinguish the device 312 * spec from the path. Try parsing new device format and fallback to old 313 * format if needed. 314 * 315 * New device syntax will looks like: 316 * <device_spec>=/<path> 317 * where 318 * <device_spec> is name@fsid.fsname 319 * <path> is optional, but if present must begin with '/' 320 * (monitor addresses are passed via mount option) 321 * 322 * Old device syntax is: 323 * <server_spec>[,<server_spec>...]:[<path>] 324 * where 325 * <server_spec> is <ip>[:<port>] 326 * <path> is optional, but if present must begin with '/' 327 */ 328 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 329 { 330 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 331 struct ceph_mount_options *fsopt = pctx->opts; 332 char *dev_name = param->string, *dev_name_end; 333 int ret; 334 335 dout("%s '%s'\n", __func__, dev_name); 336 if (!dev_name || !*dev_name) 337 return invalfc(fc, "Empty source"); 338 339 dev_name_end = strchr(dev_name, '/'); 340 if (dev_name_end) { 341 /* 342 * The server_path will include the whole chars from userland 343 * including the leading '/'. 344 */ 345 kfree(fsopt->server_path); 346 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 347 if (!fsopt->server_path) 348 return -ENOMEM; 349 350 canonicalize_path(fsopt->server_path); 351 } else { 352 dev_name_end = dev_name + strlen(dev_name); 353 } 354 355 dev_name_end--; /* back up to separator */ 356 if (dev_name_end < dev_name) 357 return invalfc(fc, "Path missing in source"); 358 359 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 360 if (fsopt->server_path) 361 dout("server path '%s'\n", fsopt->server_path); 362 363 dout("trying new device syntax"); 364 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 365 if (ret) { 366 if (ret != -EINVAL) 367 return ret; 368 dout("trying old device syntax"); 369 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 370 if (ret) 371 return ret; 372 } 373 374 fc->source = param->string; 375 param->string = NULL; 376 return 0; 377 } 378 379 static int ceph_parse_mon_addr(struct fs_parameter *param, 380 struct fs_context *fc) 381 { 382 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 383 struct ceph_mount_options *fsopt = pctx->opts; 384 385 kfree(fsopt->mon_addr); 386 fsopt->mon_addr = param->string; 387 param->string = NULL; 388 389 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 390 pctx->copts, fc->log.log, '/'); 391 } 392 393 static int ceph_parse_mount_param(struct fs_context *fc, 394 struct fs_parameter *param) 395 { 396 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 397 struct ceph_mount_options *fsopt = pctx->opts; 398 struct fs_parse_result result; 399 unsigned int mode; 400 int token, ret; 401 402 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 403 if (ret != -ENOPARAM) 404 return ret; 405 406 token = fs_parse(fc, ceph_mount_parameters, param, &result); 407 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 408 if (token < 0) 409 return token; 410 411 switch (token) { 412 case Opt_snapdirname: 413 kfree(fsopt->snapdir_name); 414 fsopt->snapdir_name = param->string; 415 param->string = NULL; 416 break; 417 case Opt_mds_namespace: 418 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 419 return invalfc(fc, "Mismatching mds_namespace"); 420 kfree(fsopt->mds_namespace); 421 fsopt->mds_namespace = param->string; 422 param->string = NULL; 423 break; 424 case Opt_recover_session: 425 mode = result.uint_32; 426 if (mode == ceph_recover_session_no) 427 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 428 else if (mode == ceph_recover_session_clean) 429 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 430 else 431 BUG(); 432 break; 433 case Opt_source: 434 if (fc->source) 435 return invalfc(fc, "Multiple sources specified"); 436 return ceph_parse_source(param, fc); 437 case Opt_mon_addr: 438 return ceph_parse_mon_addr(param, fc); 439 case Opt_wsize: 440 if (result.uint_32 < PAGE_SIZE || 441 result.uint_32 > CEPH_MAX_WRITE_SIZE) 442 goto out_of_range; 443 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 444 break; 445 case Opt_rsize: 446 if (result.uint_32 < PAGE_SIZE || 447 result.uint_32 > CEPH_MAX_READ_SIZE) 448 goto out_of_range; 449 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 450 break; 451 case Opt_rasize: 452 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_caps_wanted_delay_min: 455 if (result.uint_32 < 1) 456 goto out_of_range; 457 fsopt->caps_wanted_delay_min = result.uint_32; 458 break; 459 case Opt_caps_wanted_delay_max: 460 if (result.uint_32 < 1) 461 goto out_of_range; 462 fsopt->caps_wanted_delay_max = result.uint_32; 463 break; 464 case Opt_caps_max: 465 if (result.int_32 < 0) 466 goto out_of_range; 467 fsopt->caps_max = result.int_32; 468 break; 469 case Opt_readdir_max_entries: 470 if (result.uint_32 < 1) 471 goto out_of_range; 472 fsopt->max_readdir = result.uint_32; 473 break; 474 case Opt_readdir_max_bytes: 475 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 476 goto out_of_range; 477 fsopt->max_readdir_bytes = result.uint_32; 478 break; 479 case Opt_congestion_kb: 480 if (result.uint_32 < 1024) /* at least 1M */ 481 goto out_of_range; 482 fsopt->congestion_kb = result.uint_32; 483 break; 484 case Opt_dirstat: 485 if (!result.negated) 486 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 487 else 488 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 489 break; 490 case Opt_rbytes: 491 if (!result.negated) 492 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 493 else 494 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 495 break; 496 case Opt_asyncreaddir: 497 if (!result.negated) 498 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 499 else 500 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 501 break; 502 case Opt_dcache: 503 if (!result.negated) 504 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 505 else 506 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 507 break; 508 case Opt_ino32: 509 if (!result.negated) 510 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 511 else 512 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 513 break; 514 515 case Opt_fscache: 516 #ifdef CONFIG_CEPH_FSCACHE 517 kfree(fsopt->fscache_uniq); 518 fsopt->fscache_uniq = NULL; 519 if (result.negated) { 520 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 521 } else { 522 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 523 fsopt->fscache_uniq = param->string; 524 param->string = NULL; 525 } 526 break; 527 #else 528 return invalfc(fc, "fscache support is disabled"); 529 #endif 530 case Opt_poolperm: 531 if (!result.negated) 532 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 533 else 534 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 535 break; 536 case Opt_require_active_mds: 537 if (!result.negated) 538 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 539 else 540 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 541 break; 542 case Opt_quotadf: 543 if (!result.negated) 544 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 545 else 546 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 547 break; 548 case Opt_copyfrom: 549 if (!result.negated) 550 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 551 else 552 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 553 break; 554 case Opt_acl: 555 if (!result.negated) { 556 #ifdef CONFIG_CEPH_FS_POSIX_ACL 557 fc->sb_flags |= SB_POSIXACL; 558 #else 559 return invalfc(fc, "POSIX ACL support is disabled"); 560 #endif 561 } else { 562 fc->sb_flags &= ~SB_POSIXACL; 563 } 564 break; 565 case Opt_wsync: 566 if (!result.negated) 567 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 568 else 569 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 570 break; 571 case Opt_pagecache: 572 if (result.negated) 573 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 574 else 575 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 576 break; 577 default: 578 BUG(); 579 } 580 return 0; 581 582 out_of_range: 583 return invalfc(fc, "%s out of range", param->key); 584 } 585 586 static void destroy_mount_options(struct ceph_mount_options *args) 587 { 588 dout("destroy_mount_options %p\n", args); 589 if (!args) 590 return; 591 592 kfree(args->snapdir_name); 593 kfree(args->mds_namespace); 594 kfree(args->server_path); 595 kfree(args->fscache_uniq); 596 kfree(args->mon_addr); 597 kfree(args); 598 } 599 600 static int strcmp_null(const char *s1, const char *s2) 601 { 602 if (!s1 && !s2) 603 return 0; 604 if (s1 && !s2) 605 return -1; 606 if (!s1 && s2) 607 return 1; 608 return strcmp(s1, s2); 609 } 610 611 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 612 struct ceph_options *new_opt, 613 struct ceph_fs_client *fsc) 614 { 615 struct ceph_mount_options *fsopt1 = new_fsopt; 616 struct ceph_mount_options *fsopt2 = fsc->mount_options; 617 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 618 int ret; 619 620 ret = memcmp(fsopt1, fsopt2, ofs); 621 if (ret) 622 return ret; 623 624 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 625 if (ret) 626 return ret; 627 628 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 629 if (ret) 630 return ret; 631 632 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 633 if (ret) 634 return ret; 635 636 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 637 if (ret) 638 return ret; 639 640 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 641 if (ret) 642 return ret; 643 644 return ceph_compare_options(new_opt, fsc->client); 645 } 646 647 /** 648 * ceph_show_options - Show mount options in /proc/mounts 649 * @m: seq_file to write to 650 * @root: root of that (sub)tree 651 */ 652 static int ceph_show_options(struct seq_file *m, struct dentry *root) 653 { 654 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 655 struct ceph_mount_options *fsopt = fsc->mount_options; 656 size_t pos; 657 int ret; 658 659 /* a comma between MNT/MS and client options */ 660 seq_putc(m, ','); 661 pos = m->count; 662 663 ret = ceph_print_client_options(m, fsc->client, false); 664 if (ret) 665 return ret; 666 667 /* retract our comma if no client options */ 668 if (m->count == pos) 669 m->count--; 670 671 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 672 seq_puts(m, ",dirstat"); 673 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 674 seq_puts(m, ",rbytes"); 675 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 676 seq_puts(m, ",noasyncreaddir"); 677 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 678 seq_puts(m, ",nodcache"); 679 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 680 seq_puts(m, ",ino32"); 681 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 682 seq_show_option(m, "fsc", fsopt->fscache_uniq); 683 } 684 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 685 seq_puts(m, ",nopoolperm"); 686 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 687 seq_puts(m, ",noquotadf"); 688 689 #ifdef CONFIG_CEPH_FS_POSIX_ACL 690 if (root->d_sb->s_flags & SB_POSIXACL) 691 seq_puts(m, ",acl"); 692 else 693 seq_puts(m, ",noacl"); 694 #endif 695 696 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 697 seq_puts(m, ",copyfrom"); 698 699 /* dump mds_namespace when old device syntax is in use */ 700 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 701 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 702 703 if (fsopt->mon_addr) 704 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 705 706 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 707 seq_show_option(m, "recover_session", "clean"); 708 709 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 710 seq_puts(m, ",wsync"); 711 712 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 713 seq_puts(m, ",nopagecache"); 714 715 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 716 seq_printf(m, ",wsize=%u", fsopt->wsize); 717 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 718 seq_printf(m, ",rsize=%u", fsopt->rsize); 719 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 720 seq_printf(m, ",rasize=%u", fsopt->rasize); 721 if (fsopt->congestion_kb != default_congestion_kb()) 722 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 723 if (fsopt->caps_max) 724 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 725 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 726 seq_printf(m, ",caps_wanted_delay_min=%u", 727 fsopt->caps_wanted_delay_min); 728 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 729 seq_printf(m, ",caps_wanted_delay_max=%u", 730 fsopt->caps_wanted_delay_max); 731 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 732 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 733 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 734 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 735 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 736 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 737 738 return 0; 739 } 740 741 /* 742 * handle any mon messages the standard library doesn't understand. 743 * return error if we don't either. 744 */ 745 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 746 { 747 struct ceph_fs_client *fsc = client->private; 748 int type = le16_to_cpu(msg->hdr.type); 749 750 switch (type) { 751 case CEPH_MSG_MDS_MAP: 752 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 753 return 0; 754 case CEPH_MSG_FS_MAP_USER: 755 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 756 return 0; 757 default: 758 return -1; 759 } 760 } 761 762 /* 763 * create a new fs client 764 * 765 * Success or not, this function consumes @fsopt and @opt. 766 */ 767 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 768 struct ceph_options *opt) 769 { 770 struct ceph_fs_client *fsc; 771 int err; 772 773 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 774 if (!fsc) { 775 err = -ENOMEM; 776 goto fail; 777 } 778 779 fsc->client = ceph_create_client(opt, fsc); 780 if (IS_ERR(fsc->client)) { 781 err = PTR_ERR(fsc->client); 782 goto fail; 783 } 784 opt = NULL; /* fsc->client now owns this */ 785 786 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 787 ceph_set_opt(fsc->client, ABORT_ON_FULL); 788 789 if (!fsopt->mds_namespace) { 790 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 791 0, true); 792 } else { 793 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 794 0, false); 795 } 796 797 fsc->mount_options = fsopt; 798 799 fsc->sb = NULL; 800 fsc->mount_state = CEPH_MOUNT_MOUNTING; 801 fsc->filp_gen = 1; 802 fsc->have_copy_from2 = true; 803 804 atomic_long_set(&fsc->writeback_count, 0); 805 fsc->write_congested = false; 806 807 err = -ENOMEM; 808 /* 809 * The number of concurrent works can be high but they don't need 810 * to be processed in parallel, limit concurrency. 811 */ 812 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 813 if (!fsc->inode_wq) 814 goto fail_client; 815 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 816 if (!fsc->cap_wq) 817 goto fail_inode_wq; 818 819 spin_lock(&ceph_fsc_lock); 820 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 821 spin_unlock(&ceph_fsc_lock); 822 823 return fsc; 824 825 fail_inode_wq: 826 destroy_workqueue(fsc->inode_wq); 827 fail_client: 828 ceph_destroy_client(fsc->client); 829 fail: 830 kfree(fsc); 831 if (opt) 832 ceph_destroy_options(opt); 833 destroy_mount_options(fsopt); 834 return ERR_PTR(err); 835 } 836 837 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 838 { 839 flush_workqueue(fsc->inode_wq); 840 flush_workqueue(fsc->cap_wq); 841 } 842 843 static void destroy_fs_client(struct ceph_fs_client *fsc) 844 { 845 dout("destroy_fs_client %p\n", fsc); 846 847 spin_lock(&ceph_fsc_lock); 848 list_del(&fsc->metric_wakeup); 849 spin_unlock(&ceph_fsc_lock); 850 851 ceph_mdsc_destroy(fsc); 852 destroy_workqueue(fsc->inode_wq); 853 destroy_workqueue(fsc->cap_wq); 854 855 destroy_mount_options(fsc->mount_options); 856 857 ceph_destroy_client(fsc->client); 858 859 kfree(fsc); 860 dout("destroy_fs_client %p done\n", fsc); 861 } 862 863 /* 864 * caches 865 */ 866 struct kmem_cache *ceph_inode_cachep; 867 struct kmem_cache *ceph_cap_cachep; 868 struct kmem_cache *ceph_cap_snap_cachep; 869 struct kmem_cache *ceph_cap_flush_cachep; 870 struct kmem_cache *ceph_dentry_cachep; 871 struct kmem_cache *ceph_file_cachep; 872 struct kmem_cache *ceph_dir_file_cachep; 873 struct kmem_cache *ceph_mds_request_cachep; 874 mempool_t *ceph_wb_pagevec_pool; 875 876 static void ceph_inode_init_once(void *foo) 877 { 878 struct ceph_inode_info *ci = foo; 879 inode_init_once(&ci->vfs_inode); 880 } 881 882 static int __init init_caches(void) 883 { 884 int error = -ENOMEM; 885 886 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 887 sizeof(struct ceph_inode_info), 888 __alignof__(struct ceph_inode_info), 889 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 890 SLAB_ACCOUNT, ceph_inode_init_once); 891 if (!ceph_inode_cachep) 892 return -ENOMEM; 893 894 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 895 if (!ceph_cap_cachep) 896 goto bad_cap; 897 ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); 898 if (!ceph_cap_snap_cachep) 899 goto bad_cap_snap; 900 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 901 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 902 if (!ceph_cap_flush_cachep) 903 goto bad_cap_flush; 904 905 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 906 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 907 if (!ceph_dentry_cachep) 908 goto bad_dentry; 909 910 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 911 if (!ceph_file_cachep) 912 goto bad_file; 913 914 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 915 if (!ceph_dir_file_cachep) 916 goto bad_dir_file; 917 918 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 919 if (!ceph_mds_request_cachep) 920 goto bad_mds_req; 921 922 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 923 if (!ceph_wb_pagevec_pool) 924 goto bad_pagevec_pool; 925 926 return 0; 927 928 bad_pagevec_pool: 929 kmem_cache_destroy(ceph_mds_request_cachep); 930 bad_mds_req: 931 kmem_cache_destroy(ceph_dir_file_cachep); 932 bad_dir_file: 933 kmem_cache_destroy(ceph_file_cachep); 934 bad_file: 935 kmem_cache_destroy(ceph_dentry_cachep); 936 bad_dentry: 937 kmem_cache_destroy(ceph_cap_flush_cachep); 938 bad_cap_flush: 939 kmem_cache_destroy(ceph_cap_snap_cachep); 940 bad_cap_snap: 941 kmem_cache_destroy(ceph_cap_cachep); 942 bad_cap: 943 kmem_cache_destroy(ceph_inode_cachep); 944 return error; 945 } 946 947 static void destroy_caches(void) 948 { 949 /* 950 * Make sure all delayed rcu free inodes are flushed before we 951 * destroy cache. 952 */ 953 rcu_barrier(); 954 955 kmem_cache_destroy(ceph_inode_cachep); 956 kmem_cache_destroy(ceph_cap_cachep); 957 kmem_cache_destroy(ceph_cap_snap_cachep); 958 kmem_cache_destroy(ceph_cap_flush_cachep); 959 kmem_cache_destroy(ceph_dentry_cachep); 960 kmem_cache_destroy(ceph_file_cachep); 961 kmem_cache_destroy(ceph_dir_file_cachep); 962 kmem_cache_destroy(ceph_mds_request_cachep); 963 mempool_destroy(ceph_wb_pagevec_pool); 964 } 965 966 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 967 { 968 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 969 ceph_mdsc_force_umount(fsc->mdsc); 970 fsc->filp_gen++; // invalidate open files 971 } 972 973 /* 974 * ceph_umount_begin - initiate forced umount. Tear down the 975 * mount, skipping steps that may hang while waiting for server(s). 976 */ 977 void ceph_umount_begin(struct super_block *sb) 978 { 979 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 980 981 dout("ceph_umount_begin - starting forced umount\n"); 982 if (!fsc) 983 return; 984 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 985 __ceph_umount_begin(fsc); 986 } 987 988 static const struct super_operations ceph_super_ops = { 989 .alloc_inode = ceph_alloc_inode, 990 .free_inode = ceph_free_inode, 991 .write_inode = ceph_write_inode, 992 .drop_inode = generic_delete_inode, 993 .evict_inode = ceph_evict_inode, 994 .sync_fs = ceph_sync_fs, 995 .put_super = ceph_put_super, 996 .show_options = ceph_show_options, 997 .statfs = ceph_statfs, 998 .umount_begin = ceph_umount_begin, 999 }; 1000 1001 /* 1002 * Bootstrap mount by opening the root directory. Note the mount 1003 * @started time from caller, and time out if this takes too long. 1004 */ 1005 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 1006 const char *path, 1007 unsigned long started) 1008 { 1009 struct ceph_mds_client *mdsc = fsc->mdsc; 1010 struct ceph_mds_request *req = NULL; 1011 int err; 1012 struct dentry *root; 1013 1014 /* open dir */ 1015 dout("open_root_inode opening '%s'\n", path); 1016 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1017 if (IS_ERR(req)) 1018 return ERR_CAST(req); 1019 req->r_path1 = kstrdup(path, GFP_NOFS); 1020 if (!req->r_path1) { 1021 root = ERR_PTR(-ENOMEM); 1022 goto out; 1023 } 1024 1025 req->r_ino1.ino = CEPH_INO_ROOT; 1026 req->r_ino1.snap = CEPH_NOSNAP; 1027 req->r_started = started; 1028 req->r_timeout = fsc->client->options->mount_timeout; 1029 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1030 req->r_num_caps = 2; 1031 err = ceph_mdsc_do_request(mdsc, NULL, req); 1032 if (err == 0) { 1033 struct inode *inode = req->r_target_inode; 1034 req->r_target_inode = NULL; 1035 dout("open_root_inode success\n"); 1036 root = d_make_root(inode); 1037 if (!root) { 1038 root = ERR_PTR(-ENOMEM); 1039 goto out; 1040 } 1041 dout("open_root_inode success, root dentry is %p\n", root); 1042 } else { 1043 root = ERR_PTR(err); 1044 } 1045 out: 1046 ceph_mdsc_put_request(req); 1047 return root; 1048 } 1049 1050 /* 1051 * mount: join the ceph cluster, and open root directory. 1052 */ 1053 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1054 struct fs_context *fc) 1055 { 1056 int err; 1057 unsigned long started = jiffies; /* note the start time */ 1058 struct dentry *root; 1059 1060 dout("mount start %p\n", fsc); 1061 mutex_lock(&fsc->client->mount_mutex); 1062 1063 if (!fsc->sb->s_root) { 1064 const char *path = fsc->mount_options->server_path ? 1065 fsc->mount_options->server_path + 1 : ""; 1066 1067 err = __ceph_open_session(fsc->client, started); 1068 if (err < 0) 1069 goto out; 1070 1071 /* setup fscache */ 1072 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1073 err = ceph_fscache_register_fs(fsc, fc); 1074 if (err < 0) 1075 goto out; 1076 } 1077 1078 dout("mount opening path '%s'\n", path); 1079 1080 ceph_fs_debugfs_init(fsc); 1081 1082 root = open_root_dentry(fsc, path, started); 1083 if (IS_ERR(root)) { 1084 err = PTR_ERR(root); 1085 goto out; 1086 } 1087 fsc->sb->s_root = dget(root); 1088 } else { 1089 root = dget(fsc->sb->s_root); 1090 } 1091 1092 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1093 dout("mount success\n"); 1094 mutex_unlock(&fsc->client->mount_mutex); 1095 return root; 1096 1097 out: 1098 mutex_unlock(&fsc->client->mount_mutex); 1099 return ERR_PTR(err); 1100 } 1101 1102 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1103 { 1104 struct ceph_fs_client *fsc = s->s_fs_info; 1105 int ret; 1106 1107 dout("set_super %p\n", s); 1108 1109 s->s_maxbytes = MAX_LFS_FILESIZE; 1110 1111 s->s_xattr = ceph_xattr_handlers; 1112 fsc->sb = s; 1113 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1114 1115 s->s_op = &ceph_super_ops; 1116 s->s_d_op = &ceph_dentry_ops; 1117 s->s_export_op = &ceph_export_ops; 1118 1119 s->s_time_gran = 1; 1120 s->s_time_min = 0; 1121 s->s_time_max = U32_MAX; 1122 1123 ret = set_anon_super_fc(s, fc); 1124 if (ret != 0) 1125 fsc->sb = NULL; 1126 return ret; 1127 } 1128 1129 /* 1130 * share superblock if same fs AND options 1131 */ 1132 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1133 { 1134 struct ceph_fs_client *new = fc->s_fs_info; 1135 struct ceph_mount_options *fsopt = new->mount_options; 1136 struct ceph_options *opt = new->client->options; 1137 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1138 1139 dout("ceph_compare_super %p\n", sb); 1140 1141 if (compare_mount_options(fsopt, opt, fsc)) { 1142 dout("monitor(s)/mount options don't match\n"); 1143 return 0; 1144 } 1145 if ((opt->flags & CEPH_OPT_FSID) && 1146 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1147 dout("fsid doesn't match\n"); 1148 return 0; 1149 } 1150 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1151 dout("flags differ\n"); 1152 return 0; 1153 } 1154 1155 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1156 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1157 return 0; 1158 } 1159 1160 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1161 dout("client has been forcibly unmounted\n"); 1162 return 0; 1163 } 1164 1165 return 1; 1166 } 1167 1168 /* 1169 * construct our own bdi so we can control readahead, etc. 1170 */ 1171 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1172 1173 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1174 { 1175 int err; 1176 1177 err = super_setup_bdi_name(sb, "ceph-%ld", 1178 atomic_long_inc_return(&bdi_seq)); 1179 if (err) 1180 return err; 1181 1182 /* set ra_pages based on rasize mount option? */ 1183 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1184 1185 /* set io_pages based on max osd read size */ 1186 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1187 1188 return 0; 1189 } 1190 1191 static int ceph_get_tree(struct fs_context *fc) 1192 { 1193 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1194 struct ceph_mount_options *fsopt = pctx->opts; 1195 struct super_block *sb; 1196 struct ceph_fs_client *fsc; 1197 struct dentry *res; 1198 int (*compare_super)(struct super_block *, struct fs_context *) = 1199 ceph_compare_super; 1200 int err; 1201 1202 dout("ceph_get_tree\n"); 1203 1204 if (!fc->source) 1205 return invalfc(fc, "No source"); 1206 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1207 return invalfc(fc, "No monitor address"); 1208 1209 /* create client (which we may/may not use) */ 1210 fsc = create_fs_client(pctx->opts, pctx->copts); 1211 pctx->opts = NULL; 1212 pctx->copts = NULL; 1213 if (IS_ERR(fsc)) { 1214 err = PTR_ERR(fsc); 1215 goto out_final; 1216 } 1217 1218 err = ceph_mdsc_init(fsc); 1219 if (err < 0) 1220 goto out; 1221 1222 if (ceph_test_opt(fsc->client, NOSHARE)) 1223 compare_super = NULL; 1224 1225 fc->s_fs_info = fsc; 1226 sb = sget_fc(fc, compare_super, ceph_set_super); 1227 fc->s_fs_info = NULL; 1228 if (IS_ERR(sb)) { 1229 err = PTR_ERR(sb); 1230 goto out; 1231 } 1232 1233 if (ceph_sb_to_client(sb) != fsc) { 1234 destroy_fs_client(fsc); 1235 fsc = ceph_sb_to_client(sb); 1236 dout("get_sb got existing client %p\n", fsc); 1237 } else { 1238 dout("get_sb using new client %p\n", fsc); 1239 err = ceph_setup_bdi(sb, fsc); 1240 if (err < 0) 1241 goto out_splat; 1242 } 1243 1244 res = ceph_real_mount(fsc, fc); 1245 if (IS_ERR(res)) { 1246 err = PTR_ERR(res); 1247 goto out_splat; 1248 } 1249 dout("root %p inode %p ino %llx.%llx\n", res, 1250 d_inode(res), ceph_vinop(d_inode(res))); 1251 fc->root = fsc->sb->s_root; 1252 return 0; 1253 1254 out_splat: 1255 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1256 pr_info("No mds server is up or the cluster is laggy\n"); 1257 err = -EHOSTUNREACH; 1258 } 1259 1260 ceph_mdsc_close_sessions(fsc->mdsc); 1261 deactivate_locked_super(sb); 1262 goto out_final; 1263 1264 out: 1265 destroy_fs_client(fsc); 1266 out_final: 1267 dout("ceph_get_tree fail %d\n", err); 1268 return err; 1269 } 1270 1271 static void ceph_free_fc(struct fs_context *fc) 1272 { 1273 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1274 1275 if (pctx) { 1276 destroy_mount_options(pctx->opts); 1277 ceph_destroy_options(pctx->copts); 1278 kfree(pctx); 1279 } 1280 } 1281 1282 static int ceph_reconfigure_fc(struct fs_context *fc) 1283 { 1284 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1285 struct ceph_mount_options *fsopt = pctx->opts; 1286 struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); 1287 1288 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1289 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1290 else 1291 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1292 1293 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1294 kfree(fsc->mount_options->mon_addr); 1295 fsc->mount_options->mon_addr = fsopt->mon_addr; 1296 fsopt->mon_addr = NULL; 1297 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1298 } 1299 1300 sync_filesystem(fc->root->d_sb); 1301 return 0; 1302 } 1303 1304 static const struct fs_context_operations ceph_context_ops = { 1305 .free = ceph_free_fc, 1306 .parse_param = ceph_parse_mount_param, 1307 .get_tree = ceph_get_tree, 1308 .reconfigure = ceph_reconfigure_fc, 1309 }; 1310 1311 /* 1312 * Set up the filesystem mount context. 1313 */ 1314 static int ceph_init_fs_context(struct fs_context *fc) 1315 { 1316 struct ceph_parse_opts_ctx *pctx; 1317 struct ceph_mount_options *fsopt; 1318 1319 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1320 if (!pctx) 1321 return -ENOMEM; 1322 1323 pctx->copts = ceph_alloc_options(); 1324 if (!pctx->copts) 1325 goto nomem; 1326 1327 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1328 if (!pctx->opts) 1329 goto nomem; 1330 1331 fsopt = pctx->opts; 1332 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1333 1334 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1335 fsopt->rsize = CEPH_MAX_READ_SIZE; 1336 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1337 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1338 if (!fsopt->snapdir_name) 1339 goto nomem; 1340 1341 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1342 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1343 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1344 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1345 fsopt->congestion_kb = default_congestion_kb(); 1346 1347 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1348 fc->sb_flags |= SB_POSIXACL; 1349 #endif 1350 1351 fc->fs_private = pctx; 1352 fc->ops = &ceph_context_ops; 1353 return 0; 1354 1355 nomem: 1356 destroy_mount_options(pctx->opts); 1357 ceph_destroy_options(pctx->copts); 1358 kfree(pctx); 1359 return -ENOMEM; 1360 } 1361 1362 static void ceph_kill_sb(struct super_block *s) 1363 { 1364 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1365 1366 dout("kill_sb %p\n", s); 1367 1368 ceph_mdsc_pre_umount(fsc->mdsc); 1369 flush_fs_workqueues(fsc); 1370 1371 kill_anon_super(s); 1372 1373 fsc->client->extra_mon_dispatch = NULL; 1374 ceph_fs_debugfs_cleanup(fsc); 1375 1376 ceph_fscache_unregister_fs(fsc); 1377 1378 destroy_fs_client(fsc); 1379 } 1380 1381 static struct file_system_type ceph_fs_type = { 1382 .owner = THIS_MODULE, 1383 .name = "ceph", 1384 .init_fs_context = ceph_init_fs_context, 1385 .kill_sb = ceph_kill_sb, 1386 .fs_flags = FS_RENAME_DOES_D_MOVE, 1387 }; 1388 MODULE_ALIAS_FS("ceph"); 1389 1390 int ceph_force_reconnect(struct super_block *sb) 1391 { 1392 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1393 int err = 0; 1394 1395 fsc->mount_state = CEPH_MOUNT_RECOVER; 1396 __ceph_umount_begin(fsc); 1397 1398 /* Make sure all page caches get invalidated. 1399 * see remove_session_caps_cb() */ 1400 flush_workqueue(fsc->inode_wq); 1401 1402 /* In case that we were blocklisted. This also reset 1403 * all mon/osd connections */ 1404 ceph_reset_client_addr(fsc->client); 1405 1406 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1407 1408 fsc->blocklisted = false; 1409 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1410 1411 if (sb->s_root) { 1412 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1413 CEPH_STAT_CAP_INODE, true); 1414 } 1415 return err; 1416 } 1417 1418 static int __init init_ceph(void) 1419 { 1420 int ret = init_caches(); 1421 if (ret) 1422 goto out; 1423 1424 ceph_flock_init(); 1425 ret = register_filesystem(&ceph_fs_type); 1426 if (ret) 1427 goto out_caches; 1428 1429 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1430 1431 return 0; 1432 1433 out_caches: 1434 destroy_caches(); 1435 out: 1436 return ret; 1437 } 1438 1439 static void __exit exit_ceph(void) 1440 { 1441 dout("exit_ceph\n"); 1442 unregister_filesystem(&ceph_fs_type); 1443 destroy_caches(); 1444 } 1445 1446 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1447 { 1448 struct ceph_fs_client *fsc; 1449 int ret; 1450 1451 ret = param_set_bool(val, kp); 1452 if (ret) { 1453 pr_err("Failed to parse sending metrics switch value '%s'\n", 1454 val); 1455 return ret; 1456 } else if (!disable_send_metrics) { 1457 // wake up all the mds clients 1458 spin_lock(&ceph_fsc_lock); 1459 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1460 metric_schedule_delayed(&fsc->mdsc->metric); 1461 } 1462 spin_unlock(&ceph_fsc_lock); 1463 } 1464 1465 return 0; 1466 } 1467 1468 static const struct kernel_param_ops param_ops_metrics = { 1469 .set = param_set_metrics, 1470 .get = param_get_bool, 1471 }; 1472 1473 bool disable_send_metrics = false; 1474 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1475 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1476 1477 /* for both v1 and v2 syntax */ 1478 static bool mount_support = true; 1479 static const struct kernel_param_ops param_ops_mount_syntax = { 1480 .get = param_get_bool, 1481 }; 1482 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1483 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1484 1485 module_init(init_ceph); 1486 module_exit(exit_ceph); 1487 1488 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1489 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1490 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1491 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1492 MODULE_LICENSE("GPL"); 1493